closes bpo-39926: Update Unicode to 13.0.0. (GH-18910) · python/cpython@051b9d0 (original) (raw)

File tree

lines changed

lines changed

Original file line number	Diff line number	Diff line change
@@ -352,7 +352,7 @@ Notes:
352	352	The numeric literals accepted include the digits ``0`` to ``9`` or any
353	353	Unicode equivalent (code points with the ``Nd`` property).
354	354
355		- See http://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedNumericType.txt
	355	+ See http://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedNumericType.txt
356	356	for a complete list of code points with the ``Nd`` property.
357	357
358	358

Original file line number	Diff line number	Diff line change
@@ -17,8 +17,8 @@
17	17
18	18	This module provides access to the Unicode Character Database (UCD) which
19	19	defines character properties for all Unicode characters. The data contained in
20		-this database is compiled from the `UCD version 12.1.0
21		-http://www.unicode.org/Public/12.1.0/ucd`_.
	20	+this database is compiled from the `UCD version 13.0.0
	21	+http://www.unicode.org/Public/13.0.0/ucd`_.
22	22
23	23	The module uses the same names and symbols as defined by Unicode
24	24	Standard Annex #44, `"Unicode Character Database"
@@ -175,6 +175,6 @@ Examples:
175	175
176	176	.. rubric:: Footnotes
177	177
178		-.. [#] http://www.unicode.org/Public/12.1.0/ucd/NameAliases.txt
	178	+.. [#] http://www.unicode.org/Public/13.0.0/ucd/NameAliases.txt
179	179
180		-.. [#] http://www.unicode.org/Public/12.1.0/ucd/NamedSequences.txt
	180	+.. [#] http://www.unicode.org/Public/13.0.0/ucd/NamedSequences.txt

Original file line number	Diff line number	Diff line change
@@ -316,7 +316,7 @@ The Unicode category codes mentioned above stand for:
316	316	* Nd - decimal numbers
317	317	* Pc - connector punctuations
318	318	* Other_ID_Start - explicit list of characters in `PropList.txt
319		-http://www.unicode.org/Public/12.1.0/ucd/PropList.txt`_ to support backwards
	319	+http://www.unicode.org/Public/13.0.0/ucd/PropList.txt`_ to support backwards
320	320	compatibility
321	321	* Other_ID_Continue - likewise
322	322

Original file line number	Diff line number	Diff line change
@@ -372,6 +372,11 @@ types with context-specific metadata and new ``include_extras`` parameter to
372	372	:func:`typing.get_type_hints` to access the metadata at runtime. (Contributed
373	373	by Till Varoquaux and Konstantin Kashin.)
374	374
	375	+unicodedata
	376	+-----------
	377	+
	378	+The Unicode database has been updated to version 13.0.0. (:issue:`39926`).
	379	+
375	380	venv
376	381	----
377	382

Original file line number	Diff line number	Diff line change
@@ -99,6 +99,7 @@ def test_cjk_unified_ideographs(self):
99	99	self.checkletter("CJK UNIFIED IDEOGRAPH-2B734", "\U0002B734")
100	100	self.checkletter("CJK UNIFIED IDEOGRAPH-2B740", "\U0002B740")
101	101	self.checkletter("CJK UNIFIED IDEOGRAPH-2B81D", "\U0002B81D")
	102	+self.checkletter("CJK UNIFIED IDEOGRAPH-3134A", "\U0003134A")
102	103
103	104	def test_bmp_characters(self):
104	105	for code in range(0x10000):

Original file line number	Diff line number	Diff line change
@@ -0,0 +1 @@
	1	+Update Unicode database to Unicode version 13.0.0.

Original file line number	Diff line number	Diff line change
@@ -1031,13 +1031,14 @@ static int
1031	1031	is_unified_ideograph(Py_UCS4 code)
1032	1032	{
1033	1033	return
1034		- (0x3400 <= code && code <= 0x4DB5) \|
1035		- (0x4E00 <= code && code <= 0x9FEF) \|
1036		- (0x20000 <= code && code <= 0x2A6D6) \|
	1034	+ (0x3400 <= code && code <= 0x4DBF) \|
	1035	+ (0x4E00 <= code && code <= 0x9FFC) \|
	1036	+ (0x20000 <= code && code <= 0x2A6DD) \|
1037	1037	(0x2A700 <= code && code <= 0x2B734) \|
1038	1038	(0x2B740 <= code && code <= 0x2B81D) \|
1039	1039	(0x2B820 <= code && code <= 0x2CEA1) \|
1040		- (0x2CEB0 <= code && code <= 0x2EBEF); /* CJK Ideograph Extension F */
	1040	+ (0x2CEB0 <= code && code <= 0x2EBE0) \|
	1041	+ (0x30000 <= code && code <= 0x3134A); /* CJK Ideograph Extension G */
1041	1042	}
1042	1043
1043	1044	/* macros used to determine if the given code point is in the PUA range that

Original file line number	Diff line number	Diff line change
@@ -44,7 +44,7 @@
44	44	# * Doc/library/stdtypes.rst, and
45	45	# * Doc/library/unicodedata.rst
46	46	# * Doc/reference/lexical_analysis.rst (two occurrences)
47		-UNIDATA_VERSION = "12.1.0"
	47	+UNIDATA_VERSION = "13.0.0"
48	48	UNICODE_DATA = "UnicodeData%s.txt"
49	49	COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
50	50	EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
@@ -100,13 +100,14 @@
100	100
101	101	# these ranges need to match unicodedata.c:is_unified_ideograph
102	102	cjk_ranges = [
103		- ('3400', '4DB5'),
104		- ('4E00', '9FEF'),
105		- ('20000', '2A6D6'),
	103	+ ('3400', '4DBF'),
	104	+ ('4E00', '9FFC'),
	105	+ ('20000', '2A6DD'),
106	106	('2A700', '2B734'),
107	107	('2B740', '2B81D'),
108	108	('2B820', '2CEA1'),
109	109	('2CEB0', '2EBE0'),
	110	+ ('30000', '3134A'),
110	111	]
111	112
112	113