Skip to content

Commit a27a880

Browse files
authored
Merge branch 'master' into issue/1
2 parents e381dfa + ef5bf52 commit a27a880

File tree

7 files changed

+54
-7
lines changed

7 files changed

+54
-7
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
*.todo
12
*.7z
23
*.xml
34
*.pyc

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Schema hints are taken from [a post on Meta.StackExchange](http://meta.stackexch
2525
- `python load_into_pg.py Users`
2626
- `python load_into_pg.py Votes`
2727
- `python load_into_pg.py PostLinks`
28+
- `python load_into_pg.py PostHistory`
2829
- `python load_into_pg.py Comments`
2930
- Finally, after all the initial tables have been created:
3031
- `psql stackoverflow < ./sql/final_post.sql`

load_into_pg.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
7474
if pre != '':
7575
cur.execute(pre)
7676
conn.commit()
77-
print 'Pre-processing took {} seconds'.format(time.time() - start_time)
77+
print 'Pre-processing took {:.1f} seconds'.format(time.time() - start_time)
7878

7979
# Handle content of the table
8080
start_time = time.time()
@@ -91,7 +91,7 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
9191
' VALUES\n' + valuesStr + ';'
9292
cur.execute(cmd)
9393
conn.commit()
94-
print 'Table processing took {} seconds'.format(time.time() - start_time)
94+
print 'Table processing took {:.1f} seconds'.format(time.time() - start_time)
9595

9696
# Post-processing (creation of indexes)
9797
start_time = time.time()
@@ -119,7 +119,7 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
119119
parser = argparse.ArgumentParser()
120120
parser.add_argument( 'table'
121121
, help = 'The table to work on.'
122-
, choices = ['Users', 'Badges', 'Posts', 'Tags', 'Votes','PostLinks','Comments']
122+
, choices = ['Users', 'Badges', 'Posts', 'Tags', 'Votes', 'PostLinks', 'PostHistory', 'Comments']
123123
)
124124

125125
parser.add_argument( '-d', '--dbname'
@@ -249,7 +249,25 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
249249
, 'ExcerptPostId'
250250
, 'WikiPostId'
251251
]
252-
252+
elif table == 'PostHistory':
253+
keys = [
254+
'Id',
255+
'PostHistoryTypeId',
256+
'PostId',
257+
'RevisionGUID',
258+
'CreationDate',
259+
'UserId',
260+
'Text'
261+
]
262+
elif table == 'Comments':
263+
keys = [
264+
'Id',
265+
'PostId',
266+
'Score',
267+
'Text',
268+
'CreationDate',
269+
'UserId',
270+
]
253271
choice = raw_input('This will drop the {} table. Are you sure [y/n]?'.format(table))
254272

255273
if len(choice) > 0 and choice[0].lower() == 'y':

sql/Comments_post.sql

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
-- hash index takes too long to create
2-
CREATE INDEX comments_post_type_id_idx ON Comments USING btree (PostId)
2+
CREATE INDEX cmnts_score_idx ON Comments USING btree (Score)
33
WITH (FILLFACTOR = 100);
4-
CREATE INDEX comments_score_idx ON Comments USING btree (Score)
4+
CREATE INDEX cmnts_postid_idx ON Comments USING hash (PostId)
55
WITH (FILLFACTOR = 100);
6+
CREATE INDEX cmnts_revguid_idx ON Comments USING btree (RevisionGUID)
7+
WITH (FILLFACTOR = 100);
8+
CREATE INDEX cmnts_creation_date_idx ON Comments USING btree (CreationDate)
9+
WITH (FILLFACTOR = 100);
10+
CREATE INDEX cmnts_userid_idx ON Comments USING btree (UserId)
11+
WITH (FILLFACTOR = 100);

sql/Comments_pre.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ CREATE TABLE Comments (
55
Score int not NULL ,
66
Text text ,
77
CreationDate timestamp not NULL ,
8-
UserId int
8+
UserId int
99
);

sql/PostHistory_post.sql

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
-- hash index takes too long to create
2+
CREATE INDEX ph_post_type_id_idx ON PostHistory USING btree (PostHistoryTypeId)
3+
WITH (FILLFACTOR = 100);
4+
CREATE INDEX ph_postid_idx ON PostHistory USING hash (PostId)
5+
WITH (FILLFACTOR = 100);
6+
CREATE INDEX ph_revguid_idx ON PostHistory USING btree (RevisionGUID)
7+
WITH (FILLFACTOR = 100);
8+
CREATE INDEX ph_creation_date_idx ON PostHistory USING btree (CreationDate)
9+
WITH (FILLFACTOR = 100);
10+
CREATE INDEX ph_userid_idx ON PostHistory USING btree (UserId)
11+
WITH (FILLFACTOR = 100);

sql/PostHistory_pre.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
DROP TABLE IF EXISTS PostHistory CASCADE;
2+
CREATE TABLE PostHistory (
3+
Id int PRIMARY KEY ,
4+
PostHistoryTypeId int ,
5+
PostId int ,
6+
RevisionGUID text ,
7+
CreationDate timestamp not NULL ,
8+
UserId int ,
9+
PostText text
10+
);

0 commit comments

Comments
 (0)