11"""Contains implementations of database retrieveing objects"""
22import os
33from git .errors import InvalidDBRoot
4+ from git .utils import IndexFileSHA1Writer
5+
6+ from utils import (
7+ to_hex_sha ,
8+ exists ,
9+ hex_to_bin ,
10+ FDCompressedSha1Writer ,
11+ isdir ,
12+ mkdir ,
13+ rename ,
14+ dirname ,
15+ join
16+ )
17+
18+ import tempfile
419
520
621class iObjectDBR (object ):
@@ -9,29 +24,29 @@ class iObjectDBR(object):
924 by sha (20 bytes)"""
1025 __slots__ = tuple ()
1126
27+ def __contains__ (self , sha ):
28+ return self .has_obj
29+
1230 #{ Query Interface
13- def has_obj_hex (self , hexsha ):
14- """:return: True if the object identified by the given 40 byte hexsha is
15- contained in the database"""
16- raise NotImplementedError ("To be implemented in subclass" )
17-
18- def has_obj_bin (self , sha ):
19- """:return: as ``has_obj_hex``, but takes a 20 byte binary sha"""
20- raise NotImplementedError ("To be implemented in subclass" )
21-
22- def obj_hex (self , hexsha ):
23- """:return: tuple(type_string, size_in_bytes, stream) a tuple with object
24- information including its type, its size as well as a stream from which its
25- contents can be read"""
31+ def has_object (self , sha ):
32+ """
33+ :return: True if the object identified by the given 40 byte hexsha or 20 bytes
34+ binary sha is contained in the database"""
2635 raise NotImplementedError ("To be implemented in subclass" )
2736
28- def obj_bin (self , sha ):
29- """:return: as in ``obj_hex``, but takes a binary sha"""
37+ def object (self , sha ):
38+ """
39+ :return: tuple(type_string, size_in_bytes, stream) a tuple with object
40+ information including its type, its size as well as a stream from which its
41+ contents can be read
42+ :param sha: 40 bytes hexsha or 20 bytes binary sha """
3043 raise NotImplementedError ("To be implemented in subclass" )
3144
32- def obj_info_hex (self , hexsha ):
33- """:return: tuple(type_string, size_in_bytes) tuple with the object's type
34- string as well as its size in bytes"""
45+ def object_info (self , sha ):
46+ """
47+ :return: tuple(type_string, size_in_bytes) tuple with the object's type
48+ string as well as its size in bytes
49+ :param sha: 40 bytes hexsha or 20 bytes binary sha"""
3550 raise NotImplementedError ("To be implemented in subclass" )
3651
3752 #} END query interface
@@ -42,7 +57,7 @@ class iObjectDBW(object):
4257
4358 #{ Edit Interface
4459
45- def to_obj (self , type , size , stream , dry_run = False , sha_as_hex = True ):
60+ def to_object (self , type , size , stream , dry_run = False , sha_as_hex = True ):
4661 """Create a new object in the database
4762 :return: the sha identifying the object in the database
4863 :param type: type string identifying the object
@@ -53,7 +68,7 @@ def to_obj(self, type, size, stream, dry_run=False, sha_as_hex=True):
5368 hex encoded, not binary"""
5469 raise NotImplementedError ("To be implemented in subclass" )
5570
56- def to_objs (self , iter_info , dry_run = False , sha_as_hex = True , max_threads = 0 ):
71+ def to_objects (self , iter_info , dry_run = False , sha_as_hex = True , max_threads = 0 ):
5772 """Create multiple new objects in the database
5873 :return: sequence of shas identifying the created objects in the order in which
5974 they where given.
@@ -68,7 +83,7 @@ def to_objs(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0):
6883 # actually use multiple threads, default False of course. If the add
6984 shas = list ()
7085 for args in iter_info :
71- shas .append (self .to_obj (* args , dry_run = dry_run , sha_as_hex = sha_as_hex ))
86+ shas .append (self .to_object (* args , dry_run = dry_run , sha_as_hex = sha_as_hex ))
7287 return shas
7388
7489 #} END edit interface
@@ -95,18 +110,103 @@ def root_path(self):
95110 """:return: path at which this db operates"""
96111 return self ._root_path
97112
113+ def db_path (self , rela_path ):
114+ """
115+ :return: the given relative path relative to our database root, allowing
116+ to pontentially access datafiles"""
117+ return join (self ._root_path , rela_path )
98118 #} END interface
99119
100120 #{ Utiltities
101- def _root_rela_path (self , rela_path ):
102- """:return: the given relative path relative to our database root"""
103- return os .path .join (self ._root_path , rela_path )
121+
104122
105123 #} END utilities
106124
107125
108126class LooseObjectDB (FileDBBase , iObjectDBR , iObjectDBW ):
109127 """A database which operates on loose object files"""
128+ __slots__ = ('_hexsha_to_file' , )
129+
130+ # CONFIGURATION
131+ # chunks in which data will be copied between streams
132+ stream_chunk_size = 1000 * 1000
133+
134+ def __init__ (self , root_path ):
135+ super (LooseObjectDB , self ).__init__ (root_path )
136+ self ._hexsha_to_file = dict ()
137+
138+ #{ Interface
139+ def hexsha_to_object_path (self , hexsha ):
140+ """
141+ :return: path at which the object with the given hexsha would be stored,
142+ relative to the database root"""
143+ return join (hexsha [:2 ], hexsha [2 :])
144+
145+ #} END interface
146+
147+ def has_object (self , sha ):
148+ sha = to_hex_sha (sha )
149+ # try cache
150+ if sha in self ._hexsha_to_file :
151+ return True
152+
153+ # try filesystem
154+ path = self .db_path (self .hexsha_to_object_path (sha ))
155+ if exists (path ):
156+ self ._hexsha_to_file [sha ] = path
157+ return True
158+ # END handle cache
159+ return False
160+
161+ def to_object (self , type , size , stream , dry_run = False , sha_as_hex = True ):
162+ # open a tmp file to write the data to
163+ fd , tmp_path = tempfile .mkstemp (prefix = 'obj' , dir = self ._root_path )
164+ writer = FDCompressedSha1Writer (fd )
165+
166+ # WRITE HEADER: type SP size NULL
167+ writer .write ("%s %i%s" % (type , size , chr (0 )))
168+
169+ # WRITE ALL DATA
170+ chunksize = self .stream_chunk_size
171+ try :
172+ try :
173+ while True :
174+ data_len = writer .write (stream .read (chunksize ))
175+ if data_len < chunksize :
176+ # WRITE FOOTER
177+ writer .write ('\n ' )
178+ break
179+ # END check for stream end
180+ # END duplicate data
181+ finally :
182+ writer .close ()
183+ # END assure file was closed
184+ except :
185+ os .remove (tmp_path )
186+ raise
187+ # END assure tmpfile removal on error
188+
189+
190+ # in dry-run mode, we delete the file afterwards
191+ sha = writer .sha (as_hex = True )
192+
193+ if dry_run :
194+ os .remove (tmp_path )
195+ else :
196+ # rename the file into place
197+ obj_path = self .db_path (self .hexsha_to_object_path (sha ))
198+ obj_dir = dirname (obj_path )
199+ if not isdir (obj_dir ):
200+ mkdir (obj_dir )
201+ # END handle destination directory
202+ rename (tmp_path , obj_path )
203+ # END handle dry_run
204+
205+ if not sha_as_hex :
206+ sha = hex_to_bin (sha )
207+ # END handle sha format
208+
209+ return sha
110210
111211
112212class PackedDB (FileDBBase , iObjectDBR ):
0 commit comments