cpython: 940748853712 (original) (raw)
Mercurial > cpython
changeset 82815:940748853712
#15927: Fix cvs.reader parsing of escaped \r\n with quoting off. This fix means that such values are correctly roundtripped, since cvs.writer already does the correct escaping. Patch by Michael Johnson. [#15927]
R David Murray rdmurray@bitdance.com | |
---|---|
date | Tue, 19 Mar 2013 22:41:47 -0400 |
parents | 684b75600fa9 |
children | 01372117a5b4 |
files | Lib/test/test_csv.py Misc/ACKS Misc/NEWS Modules/_csv.c |
diffstat | 4 files changed, 25 insertions(+), 1 deletions(-)[+] [-] Lib/test/test_csv.py 9 Misc/ACKS 1 Misc/NEWS 3 Modules/_csv.c 13 |
line wrap: on
line diff
--- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -308,6 +308,15 @@ class Test_Csv(unittest.TestCase): for i, row in enumerate(csv.reader(fileobj)): self.assertEqual(row, rows[i])
- def test_roundtrip_escaped_unquoted_newlines(self):
with TemporaryFile("w+", newline='') as fileobj:[](#l1.8)
writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")[](#l1.9)
rows = [['a\nb','b'],['c','x\r\nd']][](#l1.10)
writer.writerows(rows)[](#l1.11)
fileobj.seek(0)[](#l1.12)
for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):[](#l1.13)
self.assertEqual(row,rows[i])[](#l1.14)
+ class TestDialectRegistry(unittest.TestCase): def test_registry_badargs(self): self.assertRaises(TypeError, csv.list_dialects, None)
--- a/Misc/ACKS +++ b/Misc/ACKS @@ -591,6 +591,7 @@ Orjan Johansen Fredrik Johansson Gregory K. Johnson Kent Johnson +Michael Johnson Simon Johnston Matt Joiner Thomas Jollans
--- a/Misc/NEWS +++ b/Misc/NEWS @@ -289,6 +289,9 @@ Core and Builtins Library ------- +- Issue #15927: CVS now correctly parses escaped newlines and carriage
--- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -51,7 +51,7 @@ static struct PyModuleDef _csvmodule; typedef enum { START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
} ParserState; typedef enum { @@ -644,6 +644,12 @@ parse_process_char(ReaderObj *self, Py_U break; case ESCAPED_CHAR:
if (c == '\n' | c=='\r') {[](#l4.16)
if (parse_add_char(self, c) < 0)[](#l4.17)
return -1;[](#l4.18)
self->state = AFTER_ESCAPED_CRNL;[](#l4.19)
break;[](#l4.20)
}[](#l4.21) if (c == '\0')[](#l4.22) c = '\n';[](#l4.23) if (parse_add_char(self, c) < 0)[](#l4.24)
@@ -651,6 +657,11 @@ parse_process_char(ReaderObj *self, Py_U self->state = IN_FIELD; break;
+ case IN_FIELD: /* in unquoted field */ if (c == '\n' || c == '\r' || c == '\0') {