Skip to content

Commit f26c869

Browse files
committed
Performance tests are now part of the test-suite.
By default, a small repository will be tested, which doesn't take that long actually (~20s) Additionally, that way we enforce correctness tests, which didn't run by default previously. As we are handling data here, we must be sure that it's handled correctly, thus the tests should run.
1 parent 7fd369c commit f26c869

File tree

5 files changed

+43
-46
lines changed

5 files changed

+43
-46
lines changed

gitdb/test/lib.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import random
1111
from array import array
1212

13-
from io import StringIO
13+
from io import BytesIO
1414

1515
import glob
1616
import unittest
@@ -120,7 +120,7 @@ def make_memory_file(size_in_bytes, randomize=False):
120120
""":return: tuple(size_of_stream, stream)
121121
:param randomize: try to produce a very random stream"""
122122
d = make_bytes(size_in_bytes, randomize)
123-
return len(d), StringIO(d)
123+
return len(d), BytesIO(d)
124124

125125
#} END routines
126126

gitdb/test/performance/lib.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
55
"""Contains library functions"""
66
import os
7+
import logging
78
from gitdb.test.lib import TestBase
89

910

@@ -12,17 +13,6 @@
1213
#} END invariants
1314

1415

15-
#{ Utilities
16-
def resolve_or_fail(env_var):
17-
""":return: resolved environment variable or raise EnvironmentError"""
18-
try:
19-
return os.environ[env_var]
20-
except KeyError:
21-
raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var)
22-
# END exception handling
23-
24-
#} END utilities
25-
2616

2717
#{ Base Classes
2818

@@ -39,14 +29,19 @@ class TestBigRepoR(TestBase):
3929
head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5'
4030
#} END invariants
4131

42-
@classmethod
43-
def setUpAll(cls):
32+
def setUp(self):
4433
try:
45-
super(TestBigRepoR, cls).setUpAll()
34+
super(TestBigRepoR, self).setUp()
4635
except AttributeError:
4736
pass
48-
cls.gitrepopath = resolve_or_fail(k_env_git_repo)
49-
assert cls.gitrepopath.endswith('.git')
37+
38+
self.gitrepopath = os.environ.get(k_env_git_repo)
39+
if not self.gitrepopath:
40+
logging.info("You can set the %s environment variable to a .git repository of your choice - defaulting to the gitdb repository")
41+
ospd = os.path.dirname
42+
self.gitrepopath = os.path.join(ospd(ospd(ospd(ospd(__file__)))), '.git')
43+
# end assure gitrepo is set
44+
assert self.gitrepopath.endswith('.git')
5045

5146

5247
#} END base classes

gitdb/test/performance/test_pack.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
# This module is part of GitDB and is released under
44
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
55
"""Performance tests for object store"""
6-
from lib import (
6+
from __future__ import print_function
7+
8+
from gitdb.test.performance.lib import (
79
TestBigRepoR
8-
)
10+
)
911

1012
from gitdb.exc import UnsupportedOperation
1113
from gitdb.db.pack import PackedDB
@@ -15,8 +17,6 @@
1517
import os
1618
from time import time
1719

18-
from nose import SkipTest
19-
2020
class TestPackedDBPerformance(TestBigRepoR):
2121

2222
def test_pack_random_access(self):
@@ -27,7 +27,7 @@ def test_pack_random_access(self):
2727
sha_list = list(pdb.sha_iter())
2828
elapsed = time() - st
2929
ns = len(sha_list)
30-
print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed)
30+
print("PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed), file=sys.stderr)
3131

3232
# sha lookup: best-case and worst case access
3333
pdb_pack_info = pdb._pack_info
@@ -41,7 +41,7 @@ def test_pack_random_access(self):
4141
# discard cache
4242
del(pdb._entities)
4343
pdb.entities()
44-
print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed)
44+
print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed), file=sys.stderr)
4545
# END for each random mode
4646

4747
# query info and streams only
@@ -51,7 +51,7 @@ def test_pack_random_access(self):
5151
for sha in sha_list[:max_items]:
5252
pdb_fun(sha)
5353
elapsed = time() - st
54-
print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed)
54+
print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed), file=sys.stderr)
5555
# END for each function
5656

5757
# retrieve stream and read all
@@ -65,13 +65,12 @@ def test_pack_random_access(self):
6565
total_size += stream.size
6666
elapsed = time() - st
6767
total_kib = total_size / 1000
68-
print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)
68+
print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed), file=sys.stderr)
6969

7070
def test_correctness(self):
71-
raise SkipTest("Takes too long, enable it if you change the algorithm and want to be sure you decode packs correctly")
7271
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
7372
# disabled for now as it used to work perfectly, checking big repositories takes a long time
74-
print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)"
73+
print("Endurance run: verify streaming of objects (crc and sha)", file=sys.stderr)
7574
for crc in range(2):
7675
count = 0
7776
st = time()
@@ -88,6 +87,6 @@ def test_correctness(self):
8887
# END for each index
8988
# END for each entity
9089
elapsed = time() - st
91-
print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed)
90+
print("PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed), file=sys.stderr)
9291
# END for each verify mode
9392

gitdb/test/performance/test_pack_streaming.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
# This module is part of GitDB and is released under
44
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
55
"""Specific test for pack streams only"""
6-
from lib import (
6+
from __future__ import print_function
7+
8+
from gitdb.test.performance.lib import (
79
TestBigRepoR
8-
)
10+
)
911

1012
from gitdb.db.pack import PackedDB
1113
from gitdb.stream import NullStream
@@ -14,7 +16,6 @@
1416
import os
1517
import sys
1618
from time import time
17-
from nose import SkipTest
1819

1920
class CountedNullStream(NullStream):
2021
__slots__ = '_bw'
@@ -36,7 +37,7 @@ def test_pack_writing(self):
3637
ostream = CountedNullStream()
3738
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
3839

39-
ni = 5000
40+
ni = 1000
4041
count = 0
4142
st = time()
4243
for sha in pdb.sha_iter():
@@ -46,17 +47,17 @@ def test_pack_writing(self):
4647
break
4748
#END gather objects for pack-writing
4849
elapsed = time() - st
49-
print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed)
50+
print("PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed), file=sys.stderr)
5051

5152
st = time()
5253
PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni)
5354
elapsed = time() - st
5455
total_kb = ostream.bytes_written() / 1000
55-
print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
56+
print(sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed), sys.stderr)
5657

5758

5859
def test_stream_reading(self):
59-
raise SkipTest()
60+
# raise SkipTest()
6061
pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
6162

6263
# streaming only, meant for --with-profile runs
@@ -74,5 +75,5 @@ def test_stream_reading(self):
7475
count += 1
7576
elapsed = time() - st
7677
total_kib = total_size / 1000
77-
print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed)
78+
print(sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed), sys.stderr)
7879

gitdb/test/performance/test_stream.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
# This module is part of GitDB and is released under
44
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
55
"""Performance data streaming performance"""
6-
from lib import TestBigRepoR
6+
from __future__ import print_function
7+
8+
from gitdb.test.performance.lib import TestBigRepoR
79
from gitdb.db import LooseObjectDB
8-
from gitdb.stream import IStream
10+
from gitdb import IStream
911

1012
from gitdb.util import bin_to_hex
1113
from gitdb.fun import chunk_size
@@ -15,7 +17,7 @@
1517
import sys
1618

1719

18-
from lib import (
20+
from gitdb.test.lib import (
1921
make_memory_file,
2022
with_rw_directory
2123
)
@@ -49,11 +51,11 @@ def test_large_data_streaming(self, path):
4951
# serial mode
5052
for randomize in range(2):
5153
desc = (randomize and 'random ') or ''
52-
print >> sys.stderr, "Creating %s data ..." % desc
54+
print("Creating %s data ..." % desc, file=sys.stderr)
5355
st = time()
5456
size, stream = make_memory_file(self.large_data_size_bytes, randomize)
5557
elapsed = time() - st
56-
print >> sys.stderr, "Done (in %f s)" % elapsed
58+
print("Done (in %f s)" % elapsed, file=sys.stderr)
5759
string_ios.append(stream)
5860

5961
# writing - due to the compression it will seem faster than it is
@@ -66,7 +68,7 @@ def test_large_data_streaming(self, path):
6668

6769

6870
size_kib = size / 1000
69-
print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
71+
print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add), file=sys.stderr)
7072

7173
# reading all at once
7274
st = time()
@@ -76,7 +78,7 @@ def test_large_data_streaming(self, path):
7678

7779
stream.seek(0)
7880
assert shadata == stream.getvalue()
79-
print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
81+
print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall), file=sys.stderr)
8082

8183

8284
# reading in chunks of 1 MiB
@@ -93,10 +95,10 @@ def test_large_data_streaming(self, path):
9395
elapsed_readchunks = time() - st
9496

9597
stream.seek(0)
96-
assert ''.join(chunks) == stream.getvalue()
98+
assert b''.join(chunks) == stream.getvalue()
9799

98100
cs_kib = cs / 1000
99-
print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
101+
print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks), file=sys.stderr)
100102

101103
# del db file so we keep something to do
102104
os.remove(db_file)

0 commit comments

Comments
 (0)