1414from itertools import islice
1515from functools import reduce
1616
17- from gitdb .utils .compat import izip , buffer , xrange
17+ from gitdb .const import NULL_BYTE , BYTE_SPACE
18+ from gitdb .utils .encoding import force_text
19+ from gitdb .utils .compat import izip , buffer , xrange , PY3
1820from gitdb .typ import (
1921 str_blob_type ,
2022 str_commit_type ,
3032delta_types = (OFS_DELTA , REF_DELTA )
3133
3234type_id_to_type_map = {
33- 0 : "" , # EXT 1
35+ 0 : b'' , # EXT 1
3436 1 : str_commit_type ,
3537 2 : str_tree_type ,
3638 3 : str_blob_type ,
3739 4 : str_tag_type ,
38- 5 : "" , # EXT 2
40+ 5 : b'' , # EXT 2
3941 OFS_DELTA : "OFS_DELTA" , # OFFSET DELTA
4042 REF_DELTA : "REF_DELTA" # REFERENCE DELTA
4143}
@@ -394,11 +396,9 @@ def loose_object_header_info(m):
394396 :return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
395397 object as well as its uncompressed size in bytes.
396398 :param m: memory map from which to read the compressed object data"""
397- from gitdb .const import NULL_BYTE
398-
399399 decompress_size = 8192 # is used in cgit as well
400400 hdr = decompressobj ().decompress (m , decompress_size )
401- type_name , size = hdr [:hdr .find (NULL_BYTE )].split (" " . encode ( "ascii" ) )
401+ type_name , size = hdr [:hdr .find (NULL_BYTE )].split (BYTE_SPACE )
402402
403403 return type_name , int (size )
404404
@@ -413,12 +413,21 @@ def pack_object_header_info(data):
413413 type_id = (c >> 4 ) & 7 # numeric type
414414 size = c & 15 # starting size
415415 s = 4 # starting bit-shift size
416- while c & 0x80 :
417- c = byte_ord (data [i ])
418- i += 1
419- size += (c & 0x7f ) << s
420- s += 7
421- # END character loop
416+ if PY3 :
417+ while c & 0x80 :
418+ c = data [i ]
419+ i += 1
420+ size += (c & 0x7f ) << s
421+ s += 7
422+ # END character loop
423+ else :
424+ while c & 0x80 :
425+ c = ord (data [i ])
426+ i += 1
427+ size += (c & 0x7f ) << s
428+ s += 7
429+ # END character loop
430+ # end performance at expense of maintenance ...
422431 return (type_id , size , i )
423432
424433def create_pack_object_header (obj_type , obj_size ):
@@ -429,16 +438,29 @@ def create_pack_object_header(obj_type, obj_size):
429438 :param obj_type: pack type_id of the object
430439 :param obj_size: uncompressed size in bytes of the following object stream"""
431440 c = 0 # 1 byte
432- hdr = str () # output string
433-
434- c = (obj_type << 4 ) | (obj_size & 0xf )
435- obj_size >>= 4
436- while obj_size :
437- hdr += chr (c | 0x80 )
438- c = obj_size & 0x7f
439- obj_size >>= 7
440- #END until size is consumed
441- hdr += chr (c )
441+ if PY3 :
442+ hdr = bytearray () # output string
443+
444+ c = (obj_type << 4 ) | (obj_size & 0xf )
445+ obj_size >>= 4
446+ while obj_size :
447+ hdr .append (c | 0x80 )
448+ c = obj_size & 0x7f
449+ obj_size >>= 7
450+ #END until size is consumed
451+ hdr .append (c )
452+ else :
453+ hdr = bytes () # output string
454+
455+ c = (obj_type << 4 ) | (obj_size & 0xf )
456+ obj_size >>= 4
457+ while obj_size :
458+ hdr += chr (c | 0x80 )
459+ c = obj_size & 0x7f
460+ obj_size >>= 7
461+ #END until size is consumed
462+ hdr += chr (c )
463+ # end handle interpreter
442464 return hdr
443465
444466def msb_size (data , offset = 0 ):
@@ -449,24 +471,36 @@ def msb_size(data, offset=0):
449471 i = 0
450472 l = len (data )
451473 hit_msb = False
452- while i < l :
453- c = byte_ord (data [i + offset ])
454- size |= (c & 0x7f ) << i * 7
455- i += 1
456- if not c & 0x80 :
457- hit_msb = True
458- break
459- # END check msb bit
460- # END while in range
474+ if PY3 :
475+ while i < l :
476+ c = data [i + offset ]
477+ size |= (c & 0x7f ) << i * 7
478+ i += 1
479+ if not c & 0x80 :
480+ hit_msb = True
481+ break
482+ # END check msb bit
483+ # END while in range
484+ else :
485+ while i < l :
486+ c = ord (data [i + offset ])
487+ size |= (c & 0x7f ) << i * 7
488+ i += 1
489+ if not c & 0x80 :
490+ hit_msb = True
491+ break
492+ # END check msb bit
493+ # END while in range
494+ # end performance ...
461495 if not hit_msb :
462496 raise AssertionError ("Could not find terminating MSB byte in data stream" )
463497 return i + offset , size
464498
465499def loose_object_header (type , size ):
466500 """
467- :return: string representing the loose object header, which is immediately
501+ :return: bytes representing the loose object header, which is immediately
468502 followed by the content stream of size 'size'"""
469- return " %s %i\0 " % (type , size )
503+ return ( ' %s %i\0 ' % (force_text ( type ) , size )). encode ( 'ascii' )
470504
471505def write_object (type , size , read , write , chunk_size = chunk_size ):
472506 """
@@ -611,48 +645,93 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
611645 **Note:** transcribed to python from the similar routine in patch-delta.c"""
612646 i = 0
613647 db = delta_buf
614- while i < delta_buf_size :
615- c = ord (db [i ])
616- i += 1
617- if c & 0x80 :
618- cp_off , cp_size = 0 , 0
619- if (c & 0x01 ):
620- cp_off = ord (db [i ])
621- i += 1
622- if (c & 0x02 ):
623- cp_off |= (ord (db [i ]) << 8 )
624- i += 1
625- if (c & 0x04 ):
626- cp_off |= (ord (db [i ]) << 16 )
627- i += 1
628- if (c & 0x08 ):
629- cp_off |= (ord (db [i ]) << 24 )
630- i += 1
631- if (c & 0x10 ):
632- cp_size = ord (db [i ])
633- i += 1
634- if (c & 0x20 ):
635- cp_size |= (ord (db [i ]) << 8 )
636- i += 1
637- if (c & 0x40 ):
638- cp_size |= (ord (db [i ]) << 16 )
639- i += 1
640-
641- if not cp_size :
642- cp_size = 0x10000
643-
644- rbound = cp_off + cp_size
645- if (rbound < cp_size or
646- rbound > src_buf_size ):
647- break
648- write (buffer (src_buf , cp_off , cp_size ))
649- elif c :
650- write (db [i :i + c ])
651- i += c
652- else :
653- raise ValueError ("unexpected delta opcode 0" )
654- # END handle command byte
655- # END while processing delta data
648+ if PY3 :
649+ while i < delta_buf_size :
650+ c = db [i ]
651+ i += 1
652+ if c & 0x80 :
653+ cp_off , cp_size = 0 , 0
654+ if (c & 0x01 ):
655+ cp_off = db [i ]
656+ i += 1
657+ if (c & 0x02 ):
658+ cp_off |= (db [i ] << 8 )
659+ i += 1
660+ if (c & 0x04 ):
661+ cp_off |= (db [i ] << 16 )
662+ i += 1
663+ if (c & 0x08 ):
664+ cp_off |= (db [i ] << 24 )
665+ i += 1
666+ if (c & 0x10 ):
667+ cp_size = db [i ]
668+ i += 1
669+ if (c & 0x20 ):
670+ cp_size |= (db [i ] << 8 )
671+ i += 1
672+ if (c & 0x40 ):
673+ cp_size |= (db [i ] << 16 )
674+ i += 1
675+
676+ if not cp_size :
677+ cp_size = 0x10000
678+
679+ rbound = cp_off + cp_size
680+ if (rbound < cp_size or
681+ rbound > src_buf_size ):
682+ break
683+ write (buffer (src_buf , cp_off , cp_size ))
684+ elif c :
685+ write (db [i :i + c ])
686+ i += c
687+ else :
688+ raise ValueError ("unexpected delta opcode 0" )
689+ # END handle command byte
690+ # END while processing delta data
691+ else :
692+ while i < delta_buf_size :
693+ c = ord (db [i ])
694+ i += 1
695+ if c & 0x80 :
696+ cp_off , cp_size = 0 , 0
697+ if (c & 0x01 ):
698+ cp_off = ord (db [i ])
699+ i += 1
700+ if (c & 0x02 ):
701+ cp_off |= (ord (db [i ]) << 8 )
702+ i += 1
703+ if (c & 0x04 ):
704+ cp_off |= (ord (db [i ]) << 16 )
705+ i += 1
706+ if (c & 0x08 ):
707+ cp_off |= (ord (db [i ]) << 24 )
708+ i += 1
709+ if (c & 0x10 ):
710+ cp_size = ord (db [i ])
711+ i += 1
712+ if (c & 0x20 ):
713+ cp_size |= (ord (db [i ]) << 8 )
714+ i += 1
715+ if (c & 0x40 ):
716+ cp_size |= (ord (db [i ]) << 16 )
717+ i += 1
718+
719+ if not cp_size :
720+ cp_size = 0x10000
721+
722+ rbound = cp_off + cp_size
723+ if (rbound < cp_size or
724+ rbound > src_buf_size ):
725+ break
726+ write (buffer (src_buf , cp_off , cp_size ))
727+ elif c :
728+ write (db [i :i + c ])
729+ i += c
730+ else :
731+ raise ValueError ("unexpected delta opcode 0" )
732+ # END handle command byte
733+ # END while processing delta data
734+ # end save byte_ord call and prevent performance regression in py2
656735
657736 # yes, lets use the exact same error message that git uses :)
658737 assert i == delta_buf_size , "delta replay has gone wild"
0 commit comments