Skip to content

Commit ef5bf52

Browse files
committed
Merge branch 'fx86-master'
2 parents f40d5db + daf5a25 commit ef5bf52

File tree

7 files changed

+66
-4
lines changed

7 files changed

+66
-4
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
*.todo
12
*.7z
23
*.xml
34
*.pyc

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ Schema hints are taken from [a post on Meta.StackExchange](http://meta.stackexch
2424
- `python load_into_pg.py Tags` (not present in earliest dumps)
2525
- `python load_into_pg.py Users`
2626
- `python load_into_pg.py Votes`
27+
- `python load_into_pg.py PostHistory`
28+
- `python load_into_pg.py Comments`
2729
- Finally, after all the initial tables have been created:
2830
- `psql stackoverflow < ./sql/final_post.sql`
2931
- If you used a different database name, make sure to use that instead of

load_into_pg.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
7474
if pre != '':
7575
cur.execute(pre)
7676
conn.commit()
77-
print 'Pre-processing took {} seconds'.format(time.time() - start_time)
77+
print 'Pre-processing took {:.1f} seconds'.format(time.time() - start_time)
7878

7979
# Handle content of the table
8080
start_time = time.time()
@@ -91,7 +91,7 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
9191
' VALUES\n' + valuesStr + ';'
9292
cur.execute(cmd)
9393
conn.commit()
94-
print 'Table processing took {} seconds'.format(time.time() - start_time)
94+
print 'Table processing took {:.1f} seconds'.format(time.time() - start_time)
9595

9696
# Post-processing (creation of indexes)
9797
start_time = time.time()
@@ -119,7 +119,7 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
119119
parser = argparse.ArgumentParser()
120120
parser.add_argument( 'table'
121121
, help = 'The table to work on.'
122-
, choices = ['Users', 'Badges', 'Posts', 'Tags', 'Votes']
122+
, choices = ['Users', 'Badges', 'Posts', 'Tags', 'Votes', 'PostHistory', 'Comments']
123123
)
124124

125125
parser.add_argument( '-d', '--dbname'
@@ -232,7 +232,25 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
232232
, 'ExcerptPostId'
233233
, 'WikiPostId'
234234
]
235-
235+
elif table == 'PostHistory':
236+
keys = [
237+
'Id',
238+
'PostHistoryTypeId',
239+
'PostId',
240+
'RevisionGUID',
241+
'CreationDate',
242+
'UserId',
243+
'Text'
244+
]
245+
elif table == 'Comments':
246+
keys = [
247+
'Id',
248+
'PostId',
249+
'Score',
250+
'Text',
251+
'CreationDate',
252+
'UserId',
253+
]
236254
choice = raw_input('This will drop the {} table. Are you sure [y/n]?'.format(table))
237255

238256
if len(choice) > 0 and choice[0].lower() == 'y':

sql/Comments_post.sql

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
-- hash index takes too long to create
2+
CREATE INDEX cmnts_post_type_id_idx ON Comments USING btree (Score)
3+
WITH (FILLFACTOR = 100);
4+
CREATE INDEX cmnts_postid_idx ON Comments USING hash (PostId)
5+
WITH (FILLFACTOR = 100);
6+
CREATE INDEX cmnts_revguid_idx ON Comments USING btree (RevisionGUID)
7+
WITH (FILLFACTOR = 100);
8+
CREATE INDEX cmnts_creation_date_idx ON Comments USING btree (CreationDate)
9+
WITH (FILLFACTOR = 100);
10+
CREATE INDEX cmnts_userid_idx ON Comments USING btree (UserId)
11+
WITH (FILLFACTOR = 100);

sql/Comments_pre.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
DROP TABLE IF EXISTS Comments CASCADE;
2+
CREATE TABLE Comments (
3+
Id int PRIMARY KEY ,
4+
PostId int ,
5+
Score int ,
6+
Post_Text text ,
7+
CreationDate timestamp not NULL ,
8+
UserId int
9+
);

sql/PostHistory_post.sql

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
-- hash index takes too long to create
2+
CREATE INDEX ph_post_type_id_idx ON PostHistory USING btree (PostHistoryTypeId)
3+
WITH (FILLFACTOR = 100);
4+
CREATE INDEX ph_postid_idx ON PostHistory USING hash (PostId)
5+
WITH (FILLFACTOR = 100);
6+
CREATE INDEX ph_revguid_idx ON PostHistory USING btree (RevisionGUID)
7+
WITH (FILLFACTOR = 100);
8+
CREATE INDEX ph_creation_date_idx ON PostHistory USING btree (CreationDate)
9+
WITH (FILLFACTOR = 100);
10+
CREATE INDEX ph_userid_idx ON PostHistory USING btree (UserId)
11+
WITH (FILLFACTOR = 100);

sql/PostHistory_pre.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
DROP TABLE IF EXISTS PostHistory CASCADE;
2+
CREATE TABLE PostHistory (
3+
Id int PRIMARY KEY ,
4+
PostHistoryTypeId int ,
5+
PostId int ,
6+
RevisionGUID text ,
7+
CreationDate timestamp not NULL ,
8+
UserId int ,
9+
PostText text
10+
);

0 commit comments

Comments
 (0)