2222NULL_TREE = object ()
2323
2424
25+ def decode_path (path , has_ab_prefix = True ):
26+ if path == b'/dev/null' :
27+ return None
28+
29+ if path .startswith (b'"' ) and path .endswith (b'"' ):
30+ path = (path [1 :- 1 ].replace (b'\\ n' , b'\n ' )
31+ .replace (b'\\ t' , b'\t ' )
32+ .replace (b'\\ "' , b'"' )
33+ .replace (b'\\ \\ ' , b'\\ ' ))
34+
35+ if has_ab_prefix :
36+ assert path .startswith (b'a/' ) or path .startswith (b'b/' )
37+ path = path [2 :]
38+
39+ return path
40+
41+
2542class Diffable (object ):
2643
2744 """Common interface for all object that can be diffed against another object of compatible type.
@@ -196,9 +213,9 @@ class Diff(object):
196213 be different to the version in the index or tree, and hence has been modified."""
197214
198215 # precompiled regex
199- re_header = re .compile (r """
216+ re_header = re .compile (br """
200217 ^diff[ ]--git
201- [ ](?:a/)?(? P<a_path_fallback>.+?)[ ](?:b/)?(? P<b_path_fallback>.+ ?)\n
218+ [ ](?P<a_path_fallback>"?a/ .+?"? )[ ](?P<b_path_fallback>"?b/.+?" ?)\n
202219 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n
203220 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
204221 (?:^similarity[ ]index[ ]\d+%\n
@@ -208,9 +225,9 @@ class Diff(object):
208225 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
209226 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+)
210227 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
211- (?:^---[ ](?:a/)?(? P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
212- (?:^\+\+\+[ ](?:b/)?(? P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
213- """ . encode ( 'ascii' ) , re .VERBOSE | re .MULTILINE )
228+ (?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
229+ (?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
230+ """ , re .VERBOSE | re .MULTILINE )
214231 # can be used for comparisons
215232 NULL_HEX_SHA = "0" * 40
216233 NULL_BIN_SHA = b"\0 " * 20
@@ -319,6 +336,19 @@ def renamed(self):
319336 """:returns: True if the blob of our diff has been renamed"""
320337 return self .rename_from != self .rename_to
321338
339+ @classmethod
340+ def _pick_best_path (cls , path_match , rename_match , path_fallback_match ):
341+ if path_match :
342+ return decode_path (path_match )
343+
344+ if rename_match :
345+ return decode_path (rename_match , has_ab_prefix = False )
346+
347+ if path_fallback_match :
348+ return decode_path (path_fallback_match )
349+
350+ return None
351+
322352 @classmethod
323353 def _index_from_patch_format (cls , repo , stream ):
324354 """Create a new DiffIndex from the given text which must be in patch format
@@ -338,14 +368,8 @@ def _index_from_patch_format(cls, repo, stream):
338368 a_path , b_path = header .groups ()
339369 new_file , deleted_file = bool (new_file_mode ), bool (deleted_file_mode )
340370
341- a_path = a_path or rename_from or a_path_fallback
342- b_path = b_path or rename_to or b_path_fallback
343-
344- if a_path == b'/dev/null' :
345- a_path = None
346-
347- if b_path == b'/dev/null' :
348- b_path = None
371+ a_path = cls ._pick_best_path (a_path , rename_from , a_path_fallback )
372+ b_path = cls ._pick_best_path (b_path , rename_to , b_path_fallback )
349373
350374 # Our only means to find the actual text is to see what has not been matched by our regex,
351375 # and then retro-actively assin it to our index
0 commit comments