11"""Contains implementations of database retrieveing objects"""
2- import os
2+ from git . utils import IndexFileSHA1Writer
33from git .errors import (
44 InvalidDBRoot ,
5- BadObject
5+ BadObject ,
6+ BadObjectType
67 )
7- from git .utils import IndexFileSHA1Writer
88
99from utils import (
10- getsize ,
10+ DecompressMemMapReader ,
11+ FDCompressedSha1Writer ,
12+ ENOENT ,
1113 to_hex_sha ,
1214 exists ,
1315 hex_to_bin ,
14- FDCompressedSha1Writer ,
1516 isdir ,
1617 mkdir ,
1718 rename ,
1819 dirname ,
1920 join
2021 )
2122
23+ from fun import (
24+ chunk_size ,
25+ loose_object_header_info ,
26+ write_object
27+ )
28+
2229import tempfile
2330import mmap
31+ import os
2432
2533
2634class iObjectDBR (object ):
@@ -36,22 +44,25 @@ def __contains__(self, sha):
3644 def has_object (self , sha ):
3745 """
3846 :return: True if the object identified by the given 40 byte hexsha or 20 bytes
39- binary sha is contained in the database"""
47+ binary sha is contained in the database
48+ :raise BadObject:"""
4049 raise NotImplementedError ("To be implemented in subclass" )
4150
4251 def object (self , sha ):
4352 """
4453 :return: tuple(type_string, size_in_bytes, stream) a tuple with object
4554 information including its type, its size as well as a stream from which its
4655 contents can be read
47- :param sha: 40 bytes hexsha or 20 bytes binary sha """
56+ :param sha: 40 bytes hexsha or 20 bytes binary sha
57+ :raise BadObject:"""
4858 raise NotImplementedError ("To be implemented in subclass" )
4959
5060 def object_info (self , sha ):
5161 """
5262 :return: tuple(type_string, size_in_bytes) tuple with the object's type
5363 string as well as its size in bytes
54- :param sha: 40 bytes hexsha or 20 bytes binary sha"""
64+ :param sha: 40 bytes hexsha or 20 bytes binary sha
65+ :raise BadObject:"""
5566 raise NotImplementedError ("To be implemented in subclass" )
5667
5768 #} END query interface
@@ -70,7 +81,8 @@ def to_object(self, type, size, stream, dry_run=False, sha_as_hex=True):
7081 :param stream: stream providing the data
7182 :param dry_run: if True, the object database will not actually be changed
7283 :param sha_as_hex: if True, the returned sha identifying the object will be
73- hex encoded, not binary"""
84+ hex encoded, not binary
85+ :raise IOError: if data could not be written"""
7486 raise NotImplementedError ("To be implemented in subclass" )
7587
7688 def to_objects (self , iter_info , dry_run = False , sha_as_hex = True , max_threads = 0 ):
@@ -82,7 +94,8 @@ def to_objects(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0):
8294 :param dry_run: see ``to_obj``
8395 :param sha_as_hex: see ``to_obj``
8496 :param max_threads: if < 1, any number of threads may be started while processing
85- the request, otherwise the given number of threads will be started."""
97+ the request, otherwise the given number of threads will be started.
98+ :raise IOError: if data could not be written"""
8699 # a trivial implementation, ignoring the threads for now
87100 # TODO: add configuration to the class to determine whether we may
88101 # actually use multiple threads, default False of course. If the add
@@ -130,15 +143,19 @@ def db_path(self, rela_path):
130143
131144class LooseObjectDB (FileDBBase , iObjectDBR , iObjectDBW ):
132145 """A database which operates on loose object files"""
133- __slots__ = ('_hexsha_to_file' , )
134-
146+ __slots__ = ('_hexsha_to_file' , '_fd_open_flags' )
135147 # CONFIGURATION
136148 # chunks in which data will be copied between streams
137- stream_chunk_size = 1000 * 1000
149+ stream_chunk_size = chunk_size
150+
138151
139152 def __init__ (self , root_path ):
140153 super (LooseObjectDB , self ).__init__ (root_path )
141154 self ._hexsha_to_file = dict ()
155+ # Additional Flags - might be set to 0 after the first failure
156+ # Depending on the root, this might work for some mounts, for others not, which
157+ # is why it is per instance
158+ self ._fd_open_flags = os .O_NOATIME
142159
143160 #{ Interface
144161 def object_path (self , hexsha ):
@@ -167,36 +184,46 @@ def readable_db_object_path(self, hexsha):
167184
168185 #} END interface
169186
170- def _object_header_info (self , mmap ):
171- """:return: tuple(type_string, uncompressed_size_in_bytes
172- :param mmap: newly mapped memory map at position 0. It will be
173- seeked to the actual start of the object contents, which can be used
174- to initialize a zlib decompress object."""
175- raise NotImplementedError ("todo" )
176-
177- def _map_object (self , sha ):
187+ def _map_loose_object (self , sha ):
178188 """
179- :return: tuple(file, mmap) tuple with an opened file for reading, and
180- a memory map of that file"""
181- db_path = self .readable_db_object_path (to_hex_sha (sha ))
182- f = open (db_path , 'rb' )
183- m = mmap .mmap (f .fileno (), getsize (db_path ), access = mmap .ACCESS_READ )
184- return (f , m )
189+ :return: memory map of that file to allow random read access
190+ :raise BadObject: if object could not be located"""
191+ db_path = self .db_path (self .object_path (to_hex_sha (sha )))
192+ try :
193+ fd = os .open (db_path , os .O_RDONLY | self ._fd_open_flags )
194+ except OSError ,e :
195+ if e .errno != ENOENT :
196+ # try again without noatime
197+ try :
198+ fd = os .open (db_path , os .O_RDONLY )
199+ except OSError :
200+ raise BadObject (to_hex_sha (sha ))
201+ # didn't work because of our flag, don't try it again
202+ self ._fd_open_flags = 0
203+ else :
204+ raise BadObject (to_hex_sha (sha ))
205+ # END handle error
206+ # END exception handling
207+ try :
208+ return mmap .mmap (fd , 0 , access = mmap .ACCESS_READ )
209+ finally :
210+ os .close (fd )
211+ # END assure file is closed
185212
186213 def object_info (self , sha ):
187- f , m = self ._map_object (sha )
214+ m = self ._map_loose_object (sha )
188215 try :
189- type , size = self . _object_header_info (m )
216+ return loose_object_header_info (m )
190217 finally :
191- f .close ()
192218 m .close ()
193219 # END assure release of system resources
194220
195221 def object (self , sha ):
196- f , m = self ._map_object (sha )
197- type , size = self . _object_header_info ( m )
198- # TODO: init a dynamic decompress stream from our memory map
222+ m = self ._map_loose_object (sha )
223+ reader = DecompressMemMapReader ( m , close_on_deletion = True )
224+ type , size = reader . initialize ()
199225
226+ return type , size , reader
200227
201228 def has_object (self , sha ):
202229 try :
@@ -210,25 +237,10 @@ def to_object(self, type, size, stream, dry_run=False, sha_as_hex=True):
210237 # open a tmp file to write the data to
211238 fd , tmp_path = tempfile .mkstemp (prefix = 'obj' , dir = self ._root_path )
212239 writer = FDCompressedSha1Writer (fd )
213-
214- # WRITE HEADER: type SP size NULL
215- writer .write ("%s %i%s" % (type , size , chr (0 )))
216-
217- # WRITE ALL DATA
218- chunksize = self .stream_chunk_size
240+
219241 try :
220- try :
221- while True :
222- data_len = writer .write (stream .read (chunksize ))
223- if data_len < chunksize :
224- # WRITE FOOTER
225- writer .write ('\n ' )
226- break
227- # END check for stream end
228- # END duplicate data
229- finally :
230- writer .close ()
231- # END assure file was closed
242+ write_object (type , size , stream , writer ,
243+ close_target_stream = True , chunk_size = self .stream_chunk_size )
232244 except :
233245 os .remove (tmp_path )
234246 raise
0 commit comments