|
26 | 26 | # [1] https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/UnicodeData.txt |
27 | 27 | # [2] https://raw.githubusercontent.com/unicode-org/cldr/${TAG}/common/transforms/Latin-ASCII.xml |
28 | 28 |
|
29 | | -# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped |
30 | | -# The approach is to be Python3 compatible with Python2 "backports". |
31 | | -from __future__ import print_function |
32 | | -from __future__ import unicode_literals |
33 | | -# END: Python 2/3 compatibility - remove when Python 2 compatibility dropped |
34 | | - |
35 | 29 | import argparse |
36 | 30 | import codecs |
37 | 31 | import re |
38 | 32 | import sys |
39 | 33 | import xml.etree.ElementTree as ET |
40 | 34 |
|
41 | | -# BEGIN: Python 2/3 compatibility - remove when Python 2 compatibility dropped |
42 | | -if sys.version_info[0] <= 2: |
43 | | - # Encode stdout as UTF-8, so we can just print to it |
44 | | - sys.stdout = codecs.getwriter('utf8')(sys.stdout) |
45 | | - |
46 | | - # Map Python 2's chr to unichr |
47 | | - chr = unichr |
48 | | - |
49 | | - # Python 2 and 3 compatible bytes call |
50 | | - def bytes(source, encoding='ascii', errors='strict'): |
51 | | - return source.encode(encoding=encoding, errors=errors) |
52 | | -else: |
53 | | -# END: Python 2/3 compatibility - remove when Python 2 compatibility dropped |
54 | | - sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) |
| 35 | +sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) |
55 | 36 |
|
56 | 37 | # The ranges of Unicode characters that we consider to be "plain letters". |
57 | 38 | # For now we are being conservative by including only Latin and Greek. This |
@@ -213,12 +194,12 @@ def special_cases(): |
213 | 194 | charactersSet = set() |
214 | 195 |
|
215 | 196 | # Cyrillic |
216 | | - charactersSet.add((0x0401, u"\u0415")) # CYRILLIC CAPITAL LETTER IO |
217 | | - charactersSet.add((0x0451, u"\u0435")) # CYRILLIC SMALL LETTER IO |
| 197 | + charactersSet.add((0x0401, "\u0415")) # CYRILLIC CAPITAL LETTER IO |
| 198 | + charactersSet.add((0x0451, "\u0435")) # CYRILLIC SMALL LETTER IO |
218 | 199 |
|
219 | 200 | # Symbols of "Letterlike Symbols" Unicode Block (U+2100 to U+214F) |
220 | | - charactersSet.add((0x2103, u"\xb0C")) # DEGREE CELSIUS |
221 | | - charactersSet.add((0x2109, u"\xb0F")) # DEGREE FAHRENHEIT |
| 201 | + charactersSet.add((0x2103, "\xb0C")) # DEGREE CELSIUS |
| 202 | + charactersSet.add((0x2109, "\xb0F")) # DEGREE FAHRENHEIT |
222 | 203 | charactersSet.add((0x2117, "(P)")) # SOUND RECORDING COPYRIGHT |
223 | 204 |
|
224 | 205 | return charactersSet |
|
0 commit comments