cpython: 8f95d77443da (original) (raw)

--- a/Misc/NEWS +++ b/Misc/NEWS @@ -107,6 +107,11 @@ Documentation

--- a/Tools/unicode/mkstringprep.py +++ b/Tools/unicode/mkstringprep.py @@ -1,4 +1,5 @@ -import re, unicodedata, sys +import re, sys +from unicodedata import ucd_3_2_0 as unicodedata if sys.maxunicode == 65535: raise RuntimeError("need UCS-4 Python") @@ -37,16 +38,20 @@ def compact_set(l): tuple.append((prev,prev+span+1)) else: single.append(prev)

############## Read the tables in the RFC ####################### -data = open("rfc3454.txt").readlines() +with open("rfc3454.txt") as f:

tables = [] curname = None @@ -55,8 +60,7 @@ for l in data: if not l: continue # Skip RFC page breaks

A.1 is the table of unassigned characters

XXX Plane 15 PUA is listed as unassigned in Python.

@@ -173,15 +179,15 @@ assert name == "B.3" b3_exceptions = {} for k,v in table_b2.items():

b3 = sorted(b3_exceptions.items()) print(""" b3_exceptions = {""") -for i,(k,v) in enumerate(b3):

+for i, kv in enumerate(b3):

print("}") @@ -224,7 +230,7 @@ print(""" def map_table_b2(a): al = map_table_b3(a) b = unicodedata.normalize("NFKC", al)

@@ -240,7 +246,7 @@ assert table == {0x20:0x20} print(""" def in_table_c11(code):

""")

C.1.2 is the rest of all space characters

@@ -249,12 +255,12 @@ del tables[0] assert name == "C.1.2"

table = set(table.keys())

-# Zs = set(gen_category(["Zs"])) - set([0x20]) +# Zs = set(gen_category(["Zs"])) - {0x20}

assert Zs == table

print(""" def in_table_c12(code):

def in_table_c11_c12(code): return unicodedata.category(code) == "Zs"