From: Magnus Hagander Date: Mon, 10 Apr 2017 12:16:46 +0000 (+0200) Subject: Try to decode attachment filenames when escaped X-Git-Url: http://git.postgresql.org/gitweb/static/gitweb.js?a=commitdiff_plain;h=410dd9f90d3c59b35fc1d4840496ab246c2c24d9;p=pgarchives.git Try to decode attachment filenames when escaped Some MUAs (notably gmail at least) can generate header-escaped filenames for attachments, if non-ascii characters are included. If this happens, decode them and try to use that rather than generating filenames with escaping in them. --- diff --git a/loader/lib/parser.py b/loader/lib/parser.py index 770db6a..d8c8cc3 100644 --- a/loader/lib/parser.py +++ b/loader/lib/parser.py @@ -242,8 +242,12 @@ class ArchivesParser(object): self.attachments_found_first_plaintext = False self.recursive_get_attachments(self.msg) + # Clean a filenames encoding and return it as a unicode string def _clean_filename_encoding(self, filename): - # Clean a filenames encoding and return it as a unicode string + # If this is a header-encoded filename, start by decoding that + if filename.startswith('=?'): + decoded, encoding = email.header.decode_header(filename)[0] + return unicode(decoded, encoding, errors='ignore') # If it's already unicode, just return it if isinstance(filename, unicode):