closes bpo-39926: Update Unicode to 13.0.0. (GH-18910) · python/cpython@051b9d0 (original) (raw)

File tree

11 files changed

lines changed

11 files changed

lines changed

Original file line number Diff line number Diff line change
@@ -352,7 +352,7 @@ Notes:
352 352 The numeric literals accepted include the digits ``0`` to ``9`` or any
353 353 Unicode equivalent (code points with the ``Nd`` property).
354 354
355 - See http://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedNumericType.txt
355 + See http://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt
356 356 for a complete list of code points with the ``Nd`` property.
357 357
358 358
Original file line number Diff line number Diff line change
@@ -17,8 +17,8 @@
17 17
18 18 This module provides access to the Unicode Character Database (UCD) which
19 19 defines character properties for all Unicode characters. The data contained in
20 -this database is compiled from the `UCD version 12.1.0
21 -http://www.unicode.org/Public/12.1.0/ucd`_.
20 +this database is compiled from the `UCD version 13.0.0
21 +http://www.unicode.org/Public/13.0.0/ucd`_.
22 22
23 23 The module uses the same names and symbols as defined by Unicode
24 24 Standard Annex #44, `"Unicode Character Database"
@@ -175,6 +175,6 @@ Examples:
175 175
176 176 .. rubric:: Footnotes
177 177
178 -.. [#] http://www.unicode.org/Public/12.1.0/ucd/NameAliases.txt
178 +.. [#] http://www.unicode.org/Public/13.0.0/ucd/NameAliases.txt
179 179
180 -.. [#] http://www.unicode.org/Public/12.1.0/ucd/NamedSequences.txt
180 +.. [#] http://www.unicode.org/Public/13.0.0/ucd/NamedSequences.txt
Original file line number Diff line number Diff line change
@@ -316,7 +316,7 @@ The Unicode category codes mentioned above stand for:
316 316 * *Nd* - decimal numbers
317 317 * *Pc* - connector punctuations
318 318 * *Other_ID_Start* - explicit list of characters in `PropList.txt
319 -http://www.unicode.org/Public/12.1.0/ucd/PropList.txt`_ to support backwards
319 +http://www.unicode.org/Public/13.0.0/ucd/PropList.txt`_ to support backwards
320 320 compatibility
321 321 * *Other_ID_Continue* - likewise
322 322
Original file line number Diff line number Diff line change
@@ -372,6 +372,11 @@ types with context-specific metadata and new ``include_extras`` parameter to
372 372 :func:`typing.get_type_hints` to access the metadata at runtime. (Contributed
373 373 by Till Varoquaux and Konstantin Kashin.)
374 374
375 +unicodedata
376 +-----------
377 +
378 +The Unicode database has been updated to version 13.0.0. (:issue:`39926`).
379 +
375 380 venv
376 381 ----
377 382
Original file line number Diff line number Diff line change
@@ -99,6 +99,7 @@ def test_cjk_unified_ideographs(self):
99 99 self.checkletter("CJK UNIFIED IDEOGRAPH-2B734", "\U0002B734")
100 100 self.checkletter("CJK UNIFIED IDEOGRAPH-2B740", "\U0002B740")
101 101 self.checkletter("CJK UNIFIED IDEOGRAPH-2B81D", "\U0002B81D")
102 +self.checkletter("CJK UNIFIED IDEOGRAPH-3134A", "\U0003134A")
102 103
103 104 def test_bmp_characters(self):
104 105 for code in range(0x10000):
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1 +Update Unicode database to Unicode version 13.0.0.
Original file line number Diff line number Diff line change
@@ -1031,13 +1031,14 @@ static int
1031 1031 is_unified_ideograph(Py_UCS4 code)
1032 1032 {
1033 1033 return
1034 - (0x3400 <= code && code <= 0x4DB5) |
1035 - (0x4E00 <= code && code <= 0x9FEF) |
1036 - (0x20000 <= code && code <= 0x2A6D6) |
1034 + (0x3400 <= code && code <= 0x4DBF) |
1035 + (0x4E00 <= code && code <= 0x9FFC) |
1036 + (0x20000 <= code && code <= 0x2A6DD) |
1037 1037 (0x2A700 <= code && code <= 0x2B734) |
1038 1038 (0x2B740 <= code && code <= 0x2B81D) |
1039 1039 (0x2B820 <= code && code <= 0x2CEA1) |
1040 - (0x2CEB0 <= code && code <= 0x2EBEF); /* CJK Ideograph Extension F */
1040 + (0x2CEB0 <= code && code <= 0x2EBE0) |
1041 + (0x30000 <= code && code <= 0x3134A); /* CJK Ideograph Extension G */
1041 1042 }
1042 1043
1043 1044 /* macros used to determine if the given code point is in the PUA range that
Original file line number Diff line number Diff line change
@@ -44,7 +44,7 @@
44 44 # * Doc/library/stdtypes.rst, and
45 45 # * Doc/library/unicodedata.rst
46 46 # * Doc/reference/lexical_analysis.rst (two occurrences)
47 -UNIDATA_VERSION = "12.1.0"
47 +UNIDATA_VERSION = "13.0.0"
48 48 UNICODE_DATA = "UnicodeData%s.txt"
49 49 COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
50 50 EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
@@ -100,13 +100,14 @@
100 100
101 101 # these ranges need to match unicodedata.c:is_unified_ideograph
102 102 cjk_ranges = [
103 - ('3400', '4DB5'),
104 - ('4E00', '9FEF'),
105 - ('20000', '2A6D6'),
103 + ('3400', '4DBF'),
104 + ('4E00', '9FFC'),
105 + ('20000', '2A6DD'),
106 106 ('2A700', '2B734'),
107 107 ('2B740', '2B81D'),
108 108 ('2B820', '2CEA1'),
109 109 ('2CEB0', '2EBE0'),
110 + ('30000', '3134A'),
110 111 ]
111 112
112 113