55"""Contains PackIndexFile and PackFile implementations"""
66from gitdb .exc import (
77 BadObject ,
8- UnsupportedOperation
8+ UnsupportedOperation ,
9+ ParseError
910 )
1011from util import (
1112 zlib ,
1516 )
1617
1718from fun import (
19+ create_pack_object_header ,
1820 pack_object_header_info ,
1921 is_equal_canonical_sha ,
2022 type_id_to_type_map ,
4749 DeltaApplyReader ,
4850 Sha1Writer ,
4951 NullStream ,
52+ FlexibleSha1Writer
5053 )
5154
5255from struct import (
5356 pack ,
5457 unpack ,
5558 )
5659
60+ from binascii import crc32
61+
5762from itertools import izip
5863import array
5964import os
@@ -119,10 +124,113 @@ def pack_object_at(data, offset, as_stream):
119124 return abs_data_offset , ODeltaPackInfo (offset , type_id , uncomp_size , delta_info )
120125 # END handle info
121126 # END handle stream
122-
127+
128+ def write_stream_to_pack (read , write , zstream , want_crc = False ):
129+ """Copy a stream as read from read function, zip it, and write the result.
130+ Count the number of written bytes and return it
131+ :param want_crc: if True, the crc will be generated over the compressed data.
132+ :return: tuple(no bytes read, no bytes written, crc32) crc might be 0 if want_crc
133+ was false"""
134+ br = 0 # bytes read
135+ bw = 0 # bytes written
136+ crc = 0
137+
138+ while True :
139+ chunk = read (chunk_size )
140+ br += len (chunk )
141+ compressed = zstream .compress (chunk )
142+ bw += len (compressed )
143+ write (compressed ) # cannot assume return value
144+
145+ if want_crc :
146+ crc = crc32 (compressed , crc )
147+ #END handle crc
148+
149+ if len (chunk ) != chunk_size :
150+ break
151+ #END copy loop
152+
153+ compressed = zstream .flush ()
154+ bw += len (compressed )
155+ write (compressed )
156+ if want_crc :
157+ crc = crc32 (compressed , crc )
158+ #END handle crc
159+
160+ return (br , bw , crc )
161+
162+
123163#} END utilities
124164
125165
166+ class IndexWriter (object ):
167+ """Utility to cache index information, allowing to write all information later
168+ in one go to the given stream
169+ :note: currently only writes v2 indices"""
170+ __slots__ = '_objs'
171+
172+ def __init__ (self ):
173+ self ._objs = list ()
174+
175+ def append (self , binsha , crc , offset ):
176+ """Append one piece of object information"""
177+ self ._objs .append ((binsha , crc , offset ))
178+
179+ def write (self , pack_binsha , write ):
180+ """Write the index file using the given write method
181+ :param pack_binsha: sha over the whole pack that we index"""
182+ # sort for sha1 hash
183+ self ._objs .sort (key = lambda o : o [0 ])
184+
185+ sha_writer = FlexibleSha1Writer (write )
186+ sha_write = sha_writer .write
187+ sha_write (PackIndexFile .index_v2_signature )
188+ sha_write (pack (">L" , PackIndexFile .index_version_default ))
189+
190+ # fanout
191+ tmplist = list ((0 ,)* 256 ) # fanout or list with 64 bit offsets
192+ for t in self ._objs :
193+ tmplist [ord (t [0 ][0 ])] += 1
194+ #END prepare fanout
195+
196+ for i in xrange (255 ):
197+ v = tmplist [i ]
198+ sha_write (pack ('>L' , v ))
199+ tmplist [i + 1 ] = v
200+ #END write each fanout entry
201+ sha_write (pack ('>L' , tmplist [255 ]))
202+
203+ # sha1 ordered
204+ # save calls, that is push them into c
205+ sha_write ('' .join (t [0 ] for t in self ._objs ))
206+
207+ # crc32
208+ for t in self ._objs :
209+ sha_write (pack ('>L' , t [1 ]& 0xffffffff ))
210+ #END for each crc
211+
212+ tmplist = list ()
213+ # offset 32
214+ for t in self ._objs :
215+ ofs = t [2 ]
216+ if ofs > 0x7fffffff :
217+ tmplist .append (ofs )
218+ ofs = 0x80000000 + len (tmplist )- 1
219+ #END hande 64 bit offsets
220+ sha_write (pack ('>L' , ofs & 0xffffffff ))
221+ #END for each offset
222+
223+ # offset 64
224+ for ofs in tmplist :
225+ sha_write (pack (">Q" , ofs ))
226+ #END for each offset
227+
228+ # trailer
229+ assert (len (pack_binsha ) == 20 )
230+ sha_write (pack_binsha )
231+ write (sha_writer .sha (as_hex = False ))
232+
233+
126234
127235class PackIndexFile (LazyMixin ):
128236 """A pack index provides offsets into the corresponding pack, allowing to find
@@ -135,6 +243,8 @@ class PackIndexFile(LazyMixin):
135243
136244 # used in v2 indices
137245 _sha_list_offset = 8 + 1024
246+ index_v2_signature = '\377 tOc'
247+ index_version_default = 2
138248
139249 def __init__ (self , indexpath ):
140250 super (PackIndexFile , self ).__init__ ()
@@ -155,7 +265,7 @@ def _set_cache_(self, attr):
155265 # to access the fanout table or related properties
156266
157267 # CHECK VERSION
158- self ._version = (self ._data [:4 ] == ' \377 tOc' and 2 ) or 1
268+ self ._version = (self ._data [:4 ] == self . index_v2_signature and 2 ) or 1
159269 if self ._version == 2 :
160270 version_id = unpack_from (">L" , self ._data , 4 )[0 ]
161271 assert version_id == self ._version , "Unsupported index version: %i" % version_id
@@ -383,6 +493,8 @@ class PackFile(LazyMixin):
383493 case"""
384494
385495 __slots__ = ('_packpath' , '_data' , '_size' , '_version' )
496+ pack_signature = 0x5041434b # 'PACK'
497+ pack_version_default = 2
386498
387499 # offset into our data at which the first object starts
388500 first_object_offset = 3 * 4 # header bytes
@@ -396,15 +508,19 @@ def _set_cache_(self, attr):
396508 self ._data = file_contents_ro_filepath (self ._packpath )
397509
398510 # read the header information
399- type_id , self ._version , self ._size = unpack_from (">4sLL " , self ._data , 0 )
511+ type_id , self ._version , self ._size = unpack_from (">LLL " , self ._data , 0 )
400512
401513 # TODO: figure out whether we should better keep the lock, or maybe
402514 # add a .keep file instead ?
403515 else : # must be '_size' or '_version'
404516 # read header info - we do that just with a file stream
405- type_id , self ._version , self ._size = unpack (">4sLL " , open (self ._packpath ).read (12 ))
517+ type_id , self ._version , self ._size = unpack (">LLL " , open (self ._packpath ).read (12 ))
406518 # END handle header
407519
520+ if type_id != self .pack_signature :
521+ raise ParseError ("Invalid pack signature: %i" % type_id )
522+ #END assert type id
523+
408524 def _iter_objects (self , start_offset , as_stream = True ):
409525 """Handle the actual iteration of objects within this pack"""
410526 data = self ._data
@@ -759,7 +875,8 @@ def collect_streams(self, sha):
759875
760876
761877 @classmethod
762- def create (cls , object_iter , pack_write , index_write = None ):
878+ def write_pack (cls , object_iter , pack_write , index_write = None ,
879+ object_count = None , zlib_compression = zlib .Z_BEST_SPEED ):
763880 """
764881 Create a new pack by putting all objects obtained by the object_iterator
765882 into a pack which is written using the pack_write method.
@@ -769,9 +886,74 @@ def create(cls, object_iter, pack_write, index_write=None):
769886 :param pack_write: function to receive strings to write into the pack stream
770887 :param indx_write: if not None, the function writes the index file corresponding
771888 to the pack.
889+ :param object_count: if you can provide the amount of objects in your iteration,
890+ this would be the place to put it. Otherwise we have to pre-iterate and store
891+ all items into a list to get the number, which uses more memory than necessary.
892+ :param zlib_compression: the zlib compression level to use
893+ :return: binary sha over all the contents of the pack
772894 :note: The destination of the write functions is up to the user. It could
773- be a socket, or a file for instance"""
895+ be a socket, or a file for instance
896+ :note: writes only undeltified objects"""
897+ objs = object_iter
898+ if not object_count :
899+ if not isinstance (object_iter , (tuple , list )):
900+ objs = list (object_iter )
901+ #END handle list type
902+ object_count = len (objs )
903+ #END handle object
904+
905+ pack_writer = FlexibleSha1Writer (pack_write )
906+ pwrite = pack_writer .write
907+ ofs = 0 # current offset into the pack file
908+ index = None
909+ wants_index = index_write is not None
910+
911+ # write header
912+ pwrite (pack ('>LLL' , PackFile .pack_signature , PackFile .pack_version_default , object_count ))
913+ ofs += 12
914+
915+ if wants_index :
916+ index = IndexWriter ()
917+ #END handle index header
918+
919+ actual_count = 0
920+ for obj in objs :
921+ actual_count += 1
922+
923+ # object header
924+ hdr = create_pack_object_header (obj .type_id , obj .size )
925+ pwrite (hdr )
926+
927+ # data stream
928+ zstream = zlib .compressobj (zlib_compression )
929+ ostream = obj .stream
930+ br , bw , crc = write_stream_to_pack (ostream .read , pwrite , zstream , want_crc = index_write )
931+ assert (br == obj .size )
932+ if wants_index :
933+ index .append (obj .binsha , crc , ofs )
934+ #END handle index
935+
936+ ofs += len (hdr ) + bw
937+ if actual_count == object_count :
938+ break
939+ #END abort once we are done
940+ #END for each object
941+
942+ if actual_count != object_count :
943+ raise ValueError ("Expected to write %i objects into pack, but received only %i from iterators" % (object_count , actual_count ))
944+ #END count assertion
945+
946+ # write footer
947+ binsha = pack_writer .sha (as_hex = False )
948+ assert len (binsha ) == 20
949+ pack_write (binsha )
950+ ofs += len (binsha ) # just for completeness ;)
951+
952+ if wants_index :
953+ index .write (binsha , index_write )
954+ #END handle index
774955
956+ return binsha
775957
776958
777959 #} END interface
0 commit comments