closes bpo-39926: Update Unicode to 13.0.0. (GH-18910) · python/cpython@051b9d0 (original) (raw)
File tree
11 files changed
lines changed
- Misc/NEWS.d/next/Core and Builtins
11 files changed
lines changed
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -352,7 +352,7 @@ Notes: | ||
352 | 352 | The numeric literals accepted include the digits ``0`` to ``9`` or any |
353 | 353 | Unicode equivalent (code points with the ``Nd`` property). |
354 | 354 | |
355 | - See http://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedNumericType.txt | |
355 | + See http://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt | |
356 | 356 | for a complete list of code points with the ``Nd`` property. |
357 | 357 | |
358 | 358 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -17,8 +17,8 @@ | ||
17 | 17 | |
18 | 18 | This module provides access to the Unicode Character Database (UCD) which |
19 | 19 | defines character properties for all Unicode characters. The data contained in |
20 | -this database is compiled from the `UCD version 12.1.0 | |
21 | -http://www.unicode.org/Public/12.1.0/ucd`_. | |
20 | +this database is compiled from the `UCD version 13.0.0 | |
21 | +http://www.unicode.org/Public/13.0.0/ucd`_. | |
22 | 22 | |
23 | 23 | The module uses the same names and symbols as defined by Unicode |
24 | 24 | Standard Annex #44, `"Unicode Character Database" |
@@ -175,6 +175,6 @@ Examples: | ||
175 | 175 | |
176 | 176 | .. rubric:: Footnotes |
177 | 177 | |
178 | -.. [#] http://www.unicode.org/Public/12.1.0/ucd/NameAliases.txt | |
178 | +.. [#] http://www.unicode.org/Public/13.0.0/ucd/NameAliases.txt | |
179 | 179 | |
180 | -.. [#] http://www.unicode.org/Public/12.1.0/ucd/NamedSequences.txt | |
180 | +.. [#] http://www.unicode.org/Public/13.0.0/ucd/NamedSequences.txt |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -316,7 +316,7 @@ The Unicode category codes mentioned above stand for: | ||
316 | 316 | * *Nd* - decimal numbers |
317 | 317 | * *Pc* - connector punctuations |
318 | 318 | * *Other_ID_Start* - explicit list of characters in `PropList.txt |
319 | -http://www.unicode.org/Public/12.1.0/ucd/PropList.txt`_ to support backwards | |
319 | +http://www.unicode.org/Public/13.0.0/ucd/PropList.txt`_ to support backwards | |
320 | 320 | compatibility |
321 | 321 | * *Other_ID_Continue* - likewise |
322 | 322 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -372,6 +372,11 @@ types with context-specific metadata and new ``include_extras`` parameter to | ||
372 | 372 | :func:`typing.get_type_hints` to access the metadata at runtime. (Contributed |
373 | 373 | by Till Varoquaux and Konstantin Kashin.) |
374 | 374 | |
375 | +unicodedata | |
376 | +----------- | |
377 | + | |
378 | +The Unicode database has been updated to version 13.0.0. (:issue:`39926`). | |
379 | + | |
375 | 380 | venv |
376 | 381 | ---- |
377 | 382 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -99,6 +99,7 @@ def test_cjk_unified_ideographs(self): | ||
99 | 99 | self.checkletter("CJK UNIFIED IDEOGRAPH-2B734", "\U0002B734") |
100 | 100 | self.checkletter("CJK UNIFIED IDEOGRAPH-2B740", "\U0002B740") |
101 | 101 | self.checkletter("CJK UNIFIED IDEOGRAPH-2B81D", "\U0002B81D") |
102 | +self.checkletter("CJK UNIFIED IDEOGRAPH-3134A", "\U0003134A") | |
102 | 103 | |
103 | 104 | def test_bmp_characters(self): |
104 | 105 | for code in range(0x10000): |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
1 | +Update Unicode database to Unicode version 13.0.0. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1031,13 +1031,14 @@ static int | ||
1031 | 1031 | is_unified_ideograph(Py_UCS4 code) |
1032 | 1032 | { |
1033 | 1033 | return |
1034 | - (0x3400 <= code && code <= 0x4DB5) | | |
1035 | - (0x4E00 <= code && code <= 0x9FEF) | | |
1036 | - (0x20000 <= code && code <= 0x2A6D6) | | |
1034 | + (0x3400 <= code && code <= 0x4DBF) | | |
1035 | + (0x4E00 <= code && code <= 0x9FFC) | | |
1036 | + (0x20000 <= code && code <= 0x2A6DD) | | |
1037 | 1037 | (0x2A700 <= code && code <= 0x2B734) | |
1038 | 1038 | (0x2B740 <= code && code <= 0x2B81D) | |
1039 | 1039 | (0x2B820 <= code && code <= 0x2CEA1) | |
1040 | - (0x2CEB0 <= code && code <= 0x2EBEF); /* CJK Ideograph Extension F */ | |
1040 | + (0x2CEB0 <= code && code <= 0x2EBE0) | | |
1041 | + (0x30000 <= code && code <= 0x3134A); /* CJK Ideograph Extension G */ | |
1041 | 1042 | } |
1042 | 1043 | |
1043 | 1044 | /* macros used to determine if the given code point is in the PUA range that |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -44,7 +44,7 @@ | ||
44 | 44 | # * Doc/library/stdtypes.rst, and |
45 | 45 | # * Doc/library/unicodedata.rst |
46 | 46 | # * Doc/reference/lexical_analysis.rst (two occurrences) |
47 | -UNIDATA_VERSION = "12.1.0" | |
47 | +UNIDATA_VERSION = "13.0.0" | |
48 | 48 | UNICODE_DATA = "UnicodeData%s.txt" |
49 | 49 | COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" |
50 | 50 | EASTASIAN_WIDTH = "EastAsianWidth%s.txt" |
@@ -100,13 +100,14 @@ | ||
100 | 100 | |
101 | 101 | # these ranges need to match unicodedata.c:is_unified_ideograph |
102 | 102 | cjk_ranges = [ |
103 | - ('3400', '4DB5'), | |
104 | - ('4E00', '9FEF'), | |
105 | - ('20000', '2A6D6'), | |
103 | + ('3400', '4DBF'), | |
104 | + ('4E00', '9FFC'), | |
105 | + ('20000', '2A6DD'), | |
106 | 106 | ('2A700', '2B734'), |
107 | 107 | ('2B740', '2B81D'), |
108 | 108 | ('2B820', '2CEA1'), |
109 | 109 | ('2CEB0', '2EBE0'), |
110 | + ('30000', '3134A'), | |
110 | 111 | ] |
111 | 112 | |
112 | 113 |