bpo-13611: C14N 2.0 implementation for ElementTree (GH-12966) · python/cpython@e1d5dd6 (original) (raw)

`@@ -12,6 +12,7 @@

`

12

12

`import itertools

`

13

13

`import locale

`

14

14

`import operator

`

``

15

`+

import os

`

15

16

`import pickle

`

16

17

`import sys

`

17

18

`import textwrap

`

`@@ -20,6 +21,7 @@

`

20

21

`import warnings

`

21

22

`import weakref

`

22

23

``

``

24

`+

from functools import partial

`

23

25

`from itertools import product, islice

`

24

26

`from test import support

`

25

27

`from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr

`

`@@ -3527,6 +3529,231 @@ def test_correct_import_pyET(self):

`

3527

3529

`self.assertIsInstance(pyET.Element.init, types.FunctionType)

`

3528

3530

`self.assertIsInstance(pyET.XMLParser.init, types.FunctionType)

`

3529

3531

``

``

3532

+

``

3533

`+

--------------------------------------------------------------------

`

``

3534

+

``

3535

`+

def c14n_roundtrip(xml, **options):

`

``

3536

`+

return pyET.canonicalize(xml, **options)

`

``

3537

+

``

3538

+

``

3539

`+

class C14NTest(unittest.TestCase):

`

``

3540

`+

maxDiff = None

`

``

3541

+

``

3542

`+

`

``

3543

`+

simple roundtrip tests (from c14n.py)

`

``

3544

+

``

3545

`+

def test_simple_roundtrip(self):

`

``

3546

`+

Basics

`

``

3547

`+

self.assertEqual(c14n_roundtrip(""), '')

`

``

3548

`+

self.assertEqual(c14n_roundtrip(""), # FIXME

`

``

3549

`+

'')

`

``

3550

`+

self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),

`

``

3551

`+

'<prefix:doc xmlns:prefix="uri">')

`

``

3552

`+

self.assertEqual(c14n_roundtrip("prefix:bar/"),

`

``

3553

`+

'<prefix:bar xmlns:prefix="uri">')

`

``

3554

`+

self.assertEqual(c14n_roundtrip(""),

`

``

3555

`+

'')

`

``

3556

+

``

3557

`+

C14N spec

`

``

3558

`+

self.assertEqual(c14n_roundtrip("Hello, world!"),

`

``

3559

`+

'Hello, world!')

`

``

3560

`+

self.assertEqual(c14n_roundtrip("2"),

`

``

3561

`+

'2')

`

``

3562

`+

self.assertEqual(c14n_roundtrip(''),

`

``

3563

`+

'value>"0" && value<"10" ?"valid":"error"')

`

``

3564

`+

self.assertEqual(c14n_roundtrip('''valid'''),

`

``

3565

`+

'valid')

`

``

3566

`+

self.assertEqual(c14n_roundtrip(""),

`

``

3567

`+

'')

`

``

3568

`+

self.assertEqual(c14n_roundtrip(""),

`

``

3569

`+

'')

`

``

3570

`+

self.assertEqual(c14n_roundtrip(""),

`

``

3571

`+

'')

`

``

3572

+

``

3573

`+

fragments from PJ's tests

`

``

3574

`+

#self.assertEqual(c14n_roundtrip(""),

`

``

3575

`+

#'')

`

``

3576

+

``

3577

`+

def test_c14n_exclusion(self):

`

``

3578

`+

xml = textwrap.dedent("""\

`

``

3579

`+

`

``

3580

`+

`

``

3581

`+

abtext

`

``

3582

`+

`

``

3583

`+

btext

`

``

3584

`+

`

``

3585

`+

<x:d>dtext

`

``

3586

`+

`

``

3587

`+

`

``

3588

`+

""")

`

``

3589

`+

self.assertEqual(

`

``

3590

`+

c14n_roundtrip(xml, strip_text=True),

`

``

3591

`+

''

`

``

3592

`+

'abtext'

`

``

3593

`+

'btext'

`

``

3594

`+

'<x:d xmlns:x="dtext" title="undefined" rel="noopener noreferrer">http://example.com/x">dtext'

`

``

3595

`+

'')

`

``

3596

`+

self.assertEqual(

`

``

3597

`+

c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),

`

``

3598

`+

''

`

``

3599

`+

'abtext'

`

``

3600

`+

'btext'

`

``

3601

`+

'<x:d xmlns:x="dtext" title="undefined" rel="noopener noreferrer">http://example.com/x">dtext'

`

``

3602

`+

'')

`

``

3603

`+

self.assertEqual(

`

``

3604

`+

c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),

`

``

3605

`+

''

`

``

3606

`+

'abtext'

`

``

3607

`+

'btext'

`

``

3608

`+

''

`

``

3609

`+

'')

`

``

3610

`+

self.assertEqual(

`

``

3611

`+

c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],

`

``

3612

`+

exclude_tags=['{http://example.com/x}d']),

`

``

3613

`+

''

`

``

3614

`+

'abtext'

`

``

3615

`+

'btext'

`

``

3616

`+

''

`

``

3617

`+

'')

`

``

3618

`+

self.assertEqual(

`

``

3619

`+

c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),

`

``

3620

`+

''

`

``

3621

`+

'<x:d xmlns:x="dtext" title="undefined" rel="noopener noreferrer">http://example.com/x">dtext'

`

``

3622

`+

'')

`

``

3623

`+

self.assertEqual(

`

``

3624

`+

c14n_roundtrip(xml, exclude_tags=['a', 'b']),

`

``

3625

`+

'\n'

`

``

3626

`+

' \n'

`

``

3627

`+

' \n'

`

``

3628

`+

' \n'

`

``

3629

`+

' <x:d xmlns:x="dtext" title="undefined" rel="noopener noreferrer">http://example.com/x">dtext\n'

`

``

3630

`+

' \n'

`

``

3631

`+

'')

`

``

3632

`+

self.assertEqual(

`

``

3633

`+

c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),

`

``

3634

`+

''

`

``

3635

`+

''

`

``

3636

`+

''

`

``

3637

`+

'')

`

``

3638

`+

self.assertEqual(

`

``

3639

`+

c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),

`

``

3640

`+

'\n'

`

``

3641

`+

' \n'

`

``

3642

`+

' \n'

`

``

3643

`+

' \n'

`

``

3644

`+

' \n'

`

``

3645

`+

' \n'

`

``

3646

`+

' \n'

`

``

3647

`+

' \n'

`

``

3648

`+

'')

`

``

3649

+

``

3650

`+

`

``

3651

`+

basic method=c14n tests from the c14n 2.0 specification. uses

`

``

3652

`+

test files under xmltestdata/c14n-20.

`

``

3653

+

``

3654

`+

note that this uses generated C14N versions of the standard ET.write

`

``

3655

`+

output, not roundtripped C14N (see above).

`

``

3656

+

``

3657

`+

def test_xml_c14n2(self):

`

``

3658

`+

datadir = findfile("c14n-20", subdir="xmltestdata")

`

``

3659

`+

full_path = partial(os.path.join, datadir)

`

``

3660

+

``

3661

`+

files = [filename[:-4] for filename in sorted(os.listdir(datadir))

`

``

3662

`+

if filename.endswith('.xml')]

`

``

3663

`+

input_files = [

`

``

3664

`+

filename for filename in files

`

``

3665

`+

if filename.startswith('in')

`

``

3666

`+

]

`

``

3667

`+

configs = {

`

``

3668

`+

filename: {

`

``

3669

`+

c14n2:PrefixRewritesequential

`

``

3670

`+

option.tag.split('}')[-1]: ((option.text or '').strip(), option)

`

``

3671

`+

for option in ET.parse(full_path(filename) + ".xml").getroot()

`

``

3672

`+

}

`

``

3673

`+

for filename in files

`

``

3674

`+

if filename.startswith('c14n')

`

``

3675

`+

}

`

``

3676

+

``

3677

`+

tests = {

`

``

3678

`+

input_file: [

`

``

3679

`+

(filename, configs[filename.rsplit('_', 1)[-1]])

`

``

3680

`+

for filename in files

`

``

3681

`+

if filename.startswith(f'out_{input_file}_')

`

``

3682

`+

and filename.rsplit('_', 1)[-1] in configs

`

``

3683

`+

]

`

``

3684

`+

for input_file in input_files

`

``

3685

`+

}

`

``

3686

+

``

3687

`+

Make sure we found all test cases.

`

``

3688

`+

self.assertEqual(30, len([

`

``

3689

`+

output_file for output_files in tests.values()

`

``

3690

`+

for output_file in output_files]))

`

``

3691

+

``

3692

`+

def get_option(config, option_name, default=None):

`

``

3693

`+

return config.get(option_name, (default, ()))[0]

`

``

3694

+

``

3695

`+

for input_file, output_files in tests.items():

`

``

3696

`+

for output_file, config in output_files:

`

``

3697

`+

keep_comments = get_option(

`

``

3698

`+

config, 'IgnoreComments') == 'true' # no, it's right :)

`

``

3699

`+

strip_text = get_option(

`

``

3700

`+

config, 'TrimTextNodes') == 'true'

`

``

3701

`+

rewrite_prefixes = get_option(

`

``

3702

`+

config, 'PrefixRewrite') == 'sequential'

`

``

3703

`+

if 'QNameAware' in config:

`

``

3704

`+

qattrs = [

`

``

3705

`+

f"{{{el.get('NS')}}}{el.get('Name')}"

`

``

3706

`+

for el in config['QNameAware'][1].findall(

`

``

3707

`+

'{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')

`

``

3708

`+

]

`

``

3709

`+

qtags = [

`

``

3710

`+

f"{{{el.get('NS')}}}{el.get('Name')}"

`

``

3711

`+

for el in config['QNameAware'][1].findall(

`

``

3712

`+

'{http://www.w3.org/2010/xml-c14n2}Element')

`

``

3713

`+

]

`

``

3714

`+

else:

`

``

3715

`+

qtags = qattrs = None

`

``

3716

+

``

3717

`+

Build subtest description from config.

`

``

3718

`+

config_descr = ','.join(

`

``

3719

`+

f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"

`

``

3720

`+

for name, (value, children) in sorted(config.items())

`

``

3721

`+

)

`

``

3722

+

``

3723

`+

with self.subTest(f"{output_file}({config_descr})"):

`

``

3724

`+

if input_file == 'inNsRedecl' and not rewrite_prefixes:

`

``

3725

`+

self.skipTest(

`

``

3726

`+

f"Redeclared namespace handling is not supported in {output_file}")

`

``

3727

`+

if input_file == 'inNsSuperfluous' and not rewrite_prefixes:

`

``

3728

`+

self.skipTest(

`

``

3729

`+

f"Redeclared namespace handling is not supported in {output_file}")

`

``

3730

`+

if 'QNameAware' in config and config['QNameAware'][1].find(

`

``

3731

`+

'{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:

`

``

3732

`+

self.skipTest(

`

``

3733

`+

f"QName rewriting in XPath text is not supported in {output_file}")

`

``

3734

+

``

3735

`+

f = full_path(input_file + ".xml")

`

``

3736

`+

if input_file == 'inC14N5':

`

``

3737

`+

Hack: avoid setting up external entity resolution in the parser.

`

``

3738

`+

with open(full_path('world.txt'), 'rb') as entity_file:

`

``

3739

`+

with open(f, 'rb') as f:

`

``

3740

`+

f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))

`

``

3741

+

``

3742

`+

text = ET.canonicalize(

`

``

3743

`+

from_file=f,

`

``

3744

`+

with_comments=keep_comments,

`

``

3745

`+

strip_text=strip_text,

`

``

3746

`+

rewrite_prefixes=rewrite_prefixes,

`

``

3747

`+

qname_aware_tags=qtags, qname_aware_attrs=qattrs)

`

``

3748

+

``

3749

`+

with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:

`

``

3750

`+

expected = f.read()

`

``

3751

`+

if input_file == 'inC14N3':

`

``

3752

`+

FIXME: cET resolves default attributes but ET does not!

`

``

3753

`+

expected = expected.replace(' attr="default"', '')

`

``

3754

`+

text = text.replace(' attr="default"', '')

`

``

3755

`+

self.assertEqual(expected, text)

`

``

3756

+

3530

3757

`# --------------------------------------------------------------------

`

3531

3758

``

3532

3759

``

`@@ -3559,6 +3786,8 @@ def test_main(module=None):

`

3559

3786

`XMLParserTest,

`

3560

3787

`XMLPullParserTest,

`

3561

3788

`BugsTest,

`

``

3789

`+

KeywordArgsTest,

`

``

3790

`+

C14NTest,

`

3562

3791

` ]

`

3563

3792

``

3564

3793

`# These tests will only run for the pure-Python version that doesn't import

`