bpo-13611: C14N 2.0 implementation for ElementTree (GH-12966) · python/cpython@e1d5dd6 (original) (raw)
`@@ -12,6 +12,7 @@
`
12
12
`import itertools
`
13
13
`import locale
`
14
14
`import operator
`
``
15
`+
import os
`
15
16
`import pickle
`
16
17
`import sys
`
17
18
`import textwrap
`
`@@ -20,6 +21,7 @@
`
20
21
`import warnings
`
21
22
`import weakref
`
22
23
``
``
24
`+
from functools import partial
`
23
25
`from itertools import product, islice
`
24
26
`from test import support
`
25
27
`from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
`
`@@ -3527,6 +3529,231 @@ def test_correct_import_pyET(self):
`
3527
3529
`self.assertIsInstance(pyET.Element.init, types.FunctionType)
`
3528
3530
`self.assertIsInstance(pyET.XMLParser.init, types.FunctionType)
`
3529
3531
``
``
3532
+
``
3533
`+
--------------------------------------------------------------------
`
``
3534
+
``
3535
`+
def c14n_roundtrip(xml, **options):
`
``
3536
`+
return pyET.canonicalize(xml, **options)
`
``
3537
+
``
3538
+
``
3539
`+
class C14NTest(unittest.TestCase):
`
``
3540
`+
maxDiff = None
`
``
3541
+
``
3542
`+
`
``
3543
`+
simple roundtrip tests (from c14n.py)
`
``
3544
+
``
3545
`+
def test_simple_roundtrip(self):
`
``
3546
`+
Basics
`
``
3547
`+
self.assertEqual(c14n_roundtrip(""), '')
`
``
3548
`+
self.assertEqual(c14n_roundtrip(""), # FIXME
`
``
3549
`+
'')
`
``
3550
`+
self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
`
``
3551
`+
'<prefix:doc xmlns:prefix="uri">')
`
``
3552
`+
self.assertEqual(c14n_roundtrip("prefix:bar/"),
`
``
3553
`+
'<prefix:bar xmlns:prefix="uri">')
`
``
3554
`+
self.assertEqual(c14n_roundtrip(""),
`
``
3555
`+
'')
`
``
3556
+
``
3557
`+
C14N spec
`
``
3558
`+
self.assertEqual(c14n_roundtrip("Hello, world!"),
`
``
3559
`+
'Hello, world!')
`
``
3560
`+
self.assertEqual(c14n_roundtrip("2"),
`
``
3561
`+
'2')
`
``
3562
`+
self.assertEqual(c14n_roundtrip(''),
`
``
3563
`+
'value>"0" && value<"10" ?"valid":"error"')
`
``
3564
`+
self.assertEqual(c14n_roundtrip('''valid'''),
`
``
3565
`+
'valid')
`
``
3566
`+
self.assertEqual(c14n_roundtrip(""),
`
``
3567
`+
'')
`
``
3568
`+
self.assertEqual(c14n_roundtrip(""),
`
``
3569
`+
'')
`
``
3570
`+
self.assertEqual(c14n_roundtrip(""),
`
``
3571
`+
'')
`
``
3572
+
``
3573
`+
fragments from PJ's tests
`
``
3574
`+
#self.assertEqual(c14n_roundtrip(""),
`
``
3575
`+
#'')
`
``
3576
+
``
3577
`+
def test_c14n_exclusion(self):
`
``
3578
`+
xml = textwrap.dedent("""\
`
``
3579
`+
`
``
3580
`+
`
``
3581
`+
abtext
`
``
3582
`+
`
``
3583
`+
btext
`
``
3584
`+
`
``
3585
`+
<x:d>dtext
`
``
3586
`+
`
``
3587
`+
`
``
3588
`+
""")
`
``
3589
`+
self.assertEqual(
`
``
3590
`+
c14n_roundtrip(xml, strip_text=True),
`
``
3591
`+
''
`
``
3592
`+
'abtext'
`
``
3593
`+
'btext'
`
``
3594
`+
'<x:d xmlns:x="dtext" title="undefined" rel="noopener noreferrer">http://example.com/x">dtext'
`
``
3595
`+
'')
`
``
3596
`+
self.assertEqual(
`
``
3597
`+
c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
`
``
3598
`+
''
`
``
3599
`+
'abtext'
`
``
3600
`+
'btext'
`
``
3601
`+
'<x:d xmlns:x="dtext" title="undefined" rel="noopener noreferrer">http://example.com/x">dtext'
`
``
3602
`+
'')
`
``
3603
`+
self.assertEqual(
`
``
3604
`+
c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
`
``
3605
`+
''
`
``
3606
`+
'abtext'
`
``
3607
`+
'btext'
`
``
3608
`+
''
`
``
3609
`+
'')
`
``
3610
`+
self.assertEqual(
`
``
3611
`+
c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
`
``
3612
`+
exclude_tags=['{http://example.com/x}d']),
`
``
3613
`+
''
`
``
3614
`+
'abtext'
`
``
3615
`+
'btext'
`
``
3616
`+
''
`
``
3617
`+
'')
`
``
3618
`+
self.assertEqual(
`
``
3619
`+
c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
`
``
3620
`+
''
`
``
3621
`+
'<x:d xmlns:x="dtext" title="undefined" rel="noopener noreferrer">http://example.com/x">dtext'
`
``
3622
`+
'')
`
``
3623
`+
self.assertEqual(
`
``
3624
`+
c14n_roundtrip(xml, exclude_tags=['a', 'b']),
`
``
3625
`+
'\n'
`
``
3626
`+
' \n'
`
``
3627
`+
' \n'
`
``
3628
`+
' \n'
`
``
3629
`+
' <x:d xmlns:x="dtext" title="undefined" rel="noopener noreferrer">http://example.com/x">dtext\n'
`
``
3630
`+
' \n'
`
``
3631
`+
'')
`
``
3632
`+
self.assertEqual(
`
``
3633
`+
c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
`
``
3634
`+
''
`
``
3635
`+
`
``
3636
`+
''
`
``
3637
`+
'')
`
``
3638
`+
self.assertEqual(
`
``
3639
`+
c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
`
``
3640
`+
'\n'
`
``
3641
`+
' \n'
`
``
3642
`+
' \n'
`
``
3643
`+
' \n'
`
``
3644
`+
' \n'
`
``
3645
`+
' \n'
`
``
3646
`+
' \n'
`
``
3647
`+
' \n'
`
``
3648
`+
'')
`
``
3649
+
``
3650
`+
`
``
3651
`+
basic method=c14n tests from the c14n 2.0 specification. uses
`
``
3652
`+
test files under xmltestdata/c14n-20.
`
``
3653
+
``
3654
`+
note that this uses generated C14N versions of the standard ET.write
`
``
3655
`+
output, not roundtripped C14N (see above).
`
``
3656
+
``
3657
`+
def test_xml_c14n2(self):
`
``
3658
`+
datadir = findfile("c14n-20", subdir="xmltestdata")
`
``
3659
`+
full_path = partial(os.path.join, datadir)
`
``
3660
+
``
3661
`+
files = [filename[:-4] for filename in sorted(os.listdir(datadir))
`
``
3662
`+
if filename.endswith('.xml')]
`
``
3663
`+
input_files = [
`
``
3664
`+
filename for filename in files
`
``
3665
`+
if filename.startswith('in')
`
``
3666
`+
]
`
``
3667
`+
configs = {
`
``
3668
`+
filename: {
`
``
3669
`+
c14n2:PrefixRewritesequential
`
``
3670
`+
option.tag.split('}')[-1]: ((option.text or '').strip(), option)
`
``
3671
`+
for option in ET.parse(full_path(filename) + ".xml").getroot()
`
``
3672
`+
}
`
``
3673
`+
for filename in files
`
``
3674
`+
if filename.startswith('c14n')
`
``
3675
`+
}
`
``
3676
+
``
3677
`+
tests = {
`
``
3678
`+
input_file: [
`
``
3679
`+
(filename, configs[filename.rsplit('_', 1)[-1]])
`
``
3680
`+
for filename in files
`
``
3681
`+
if filename.startswith(f'out_{input_file}_')
`
``
3682
`+
and filename.rsplit('_', 1)[-1] in configs
`
``
3683
`+
]
`
``
3684
`+
for input_file in input_files
`
``
3685
`+
}
`
``
3686
+
``
3687
`+
Make sure we found all test cases.
`
``
3688
`+
self.assertEqual(30, len([
`
``
3689
`+
output_file for output_files in tests.values()
`
``
3690
`+
for output_file in output_files]))
`
``
3691
+
``
3692
`+
def get_option(config, option_name, default=None):
`
``
3693
`+
return config.get(option_name, (default, ()))[0]
`
``
3694
+
``
3695
`+
for input_file, output_files in tests.items():
`
``
3696
`+
for output_file, config in output_files:
`
``
3697
`+
keep_comments = get_option(
`
``
3698
`+
config, 'IgnoreComments') == 'true' # no, it's right :)
`
``
3699
`+
strip_text = get_option(
`
``
3700
`+
config, 'TrimTextNodes') == 'true'
`
``
3701
`+
rewrite_prefixes = get_option(
`
``
3702
`+
config, 'PrefixRewrite') == 'sequential'
`
``
3703
`+
if 'QNameAware' in config:
`
``
3704
`+
qattrs = [
`
``
3705
`+
f"{{{el.get('NS')}}}{el.get('Name')}"
`
``
3706
`+
for el in config['QNameAware'][1].findall(
`
``
3707
`+
'{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
`
``
3708
`+
]
`
``
3709
`+
qtags = [
`
``
3710
`+
f"{{{el.get('NS')}}}{el.get('Name')}"
`
``
3711
`+
for el in config['QNameAware'][1].findall(
`
``
3712
`+
'{http://www.w3.org/2010/xml-c14n2}Element')
`
``
3713
`+
]
`
``
3714
`+
else:
`
``
3715
`+
qtags = qattrs = None
`
``
3716
+
``
3717
`+
Build subtest description from config.
`
``
3718
`+
config_descr = ','.join(
`
``
3719
`+
f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
`
``
3720
`+
for name, (value, children) in sorted(config.items())
`
``
3721
`+
)
`
``
3722
+
``
3723
`+
with self.subTest(f"{output_file}({config_descr})"):
`
``
3724
`+
if input_file == 'inNsRedecl' and not rewrite_prefixes:
`
``
3725
`+
self.skipTest(
`
``
3726
`+
f"Redeclared namespace handling is not supported in {output_file}")
`
``
3727
`+
if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
`
``
3728
`+
self.skipTest(
`
``
3729
`+
f"Redeclared namespace handling is not supported in {output_file}")
`
``
3730
`+
if 'QNameAware' in config and config['QNameAware'][1].find(
`
``
3731
`+
'{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
`
``
3732
`+
self.skipTest(
`
``
3733
`+
f"QName rewriting in XPath text is not supported in {output_file}")
`
``
3734
+
``
3735
`+
f = full_path(input_file + ".xml")
`
``
3736
`+
if input_file == 'inC14N5':
`
``
3737
`+
Hack: avoid setting up external entity resolution in the parser.
`
``
3738
`+
with open(full_path('world.txt'), 'rb') as entity_file:
`
``
3739
`+
with open(f, 'rb') as f:
`
``
3740
`+
f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
`
``
3741
+
``
3742
`+
text = ET.canonicalize(
`
``
3743
`+
from_file=f,
`
``
3744
`+
with_comments=keep_comments,
`
``
3745
`+
strip_text=strip_text,
`
``
3746
`+
rewrite_prefixes=rewrite_prefixes,
`
``
3747
`+
qname_aware_tags=qtags, qname_aware_attrs=qattrs)
`
``
3748
+
``
3749
`+
with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
`
``
3750
`+
expected = f.read()
`
``
3751
`+
if input_file == 'inC14N3':
`
``
3752
`+
FIXME: cET resolves default attributes but ET does not!
`
``
3753
`+
expected = expected.replace(' attr="default"', '')
`
``
3754
`+
text = text.replace(' attr="default"', '')
`
``
3755
`+
self.assertEqual(expected, text)
`
``
3756
+
3530
3757
`# --------------------------------------------------------------------
`
3531
3758
``
3532
3759
``
`@@ -3559,6 +3786,8 @@ def test_main(module=None):
`
3559
3786
`XMLParserTest,
`
3560
3787
`XMLPullParserTest,
`
3561
3788
`BugsTest,
`
``
3789
`+
KeywordArgsTest,
`
``
3790
`+
C14NTest,
`
3562
3791
` ]
`
3563
3792
``
3564
3793
`# These tests will only run for the pure-Python version that doesn't import
`