[Python-Dev] Unicode: When Things Get Hairy (original) (raw)

M.-A. Lemburg mal@lemburg.com
Sat, 11 Mar 2000 14:57:34 +0100


This is a multi-part message in MIME format. --------------56A130F1FCAC300009B200AD Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit

I couldn't resist :-) Here's the patch...

BTW, how should we proceed with future patches ? Should I wrap them together about once a week, or send them as soon as they are done ?

-- Marc-Andre Lemburg


Business: http://www.lemburg.com/ Python Pages: http://www.lemburg.com/python/ --------------56A130F1FCAC300009B200AD Content-Type: text/plain; charset=us-ascii; name="Unicode-Implementation-2000-03-11.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="Unicode-Implementation-2000-03-11.patch"

diff -u -rP -x *.o -x *.pyc -x Makefile -x ~ -x .so -x add2lib -x pgen -x buildno -x config. -x libpython -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Include/unicodeobject.h Python+Unicode/Include/unicodeobject.h --- CVS-Python/Include/unicodeobject.h Fri Mar 10 23:33:05 2000 +++ Python+Unicode/Include/unicodeobject.h Sat Mar 11 14:45:59 2000 @@ -683,6 +683,17 @@ PyObject args / Argument tuple or dictionary / ); +/ Checks whether element is contained in container and return 1/0 + accordingly. + + element has to coerce to an one element Unicode string. -1 is + returned in case of an error. */ + +extern DL_IMPORT(int) PyUnicode_Contains( + PyObject container, / Container string / + PyObject element / Element string / + ); + / === Characters Type APIs =============================================== / / These should not be used directly. Use the Py_UNICODE_IS and diff -u -rP -x *.o -x *.pyc -x Makefile -x ~ -x .so -x add2lib -x pgen -x buildno -x config. -x libpython -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Lib/test/test_unicode.py Python+Unicode/Lib/test/test_unicode.py --- CVS-Python/Lib/test/test_unicode.py Sat Mar 11 00:23:20 2000 +++ Python+Unicode/Lib/test/test_unicode.py Sat Mar 11 14:52:29 2000 @@ -219,6 +219,19 @@ test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')}) test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'}) +# Contains: +print 'Testing Unicode contains method...', +assert ('a' in 'abdb') == 1 +assert ('a' in 'bdab') == 1 +assert ('a' in 'bdaba') == 1 +assert ('a' in 'bdba') == 1 +assert ('a' in u'bdba') == 1 +assert (u'a' in u'bdba') == 1 +assert (u'a' in u'bdb') == 0 +assert (u'a' in 'bdb') == 0 +assert (u'a' in 'bdba') == 1 +print 'done.' + # Formatting: print 'Testing Unicode formatting strings...', assert u"%s, %s" % (u"abc", "abc") == u'abc, abc' diff -u -rP -x *.o -x *.pyc -x Makefile -x ~ -x .so -x add2lib -x pgen -x buildno -x config. -x libpython -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Misc/unicode.txt Python+Unicode/Misc/unicode.txt --- CVS-Python/Misc/unicode.txt Sat Mar 11 00:14:11 2000 +++ Python+Unicode/Misc/unicode.txt Sat Mar 11 14:53:37 2000 @@ -743,8 +743,9 @@ stream codecs as available through the codecs module should be used. -XXX There should be a short-cut open(filename,mode,encoding) available which - also assures that mode contains the 'b' character when needed. +The codecs module should provide a short-cut open(filename,mode,encoding) +available which also assures that mode contains the 'b' character when +needed. File/Stream Input: @@ -810,6 +811,10 @@ Introduction to Unicode (a little outdated by still nice to read): http://www.nada.kth.se/i18n/ucs/unicode-iso10646-oview.html +For comparison: + Introducing Unicode to ECMAScript -- + http://www-4.ibm.com/software/developer/library/internationalization-support.html + Encodings: Overview: @@ -832,7 +837,7 @@ History of this Proposal:

-1.2: +1.2: Removed POD about codecs.open() 1.1: Added note about comparisons and hash values. Added note about case mapping algorithms. Changed stream codecs .read() and .write() method to match the standard file-like object methods diff -u -rP -x *.o -x *.pyc -x Makefile -x ~ -x .so -x add2lib -x pgen -x buildno -x config. -x libpython -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Objects/stringobject.c Python+Unicode/Objects/stringobject.c --- CVS-Python/Objects/stringobject.c Sat Mar 11 10:55:09 2000 +++ Python+Unicode/Objects/stringobject.c Sat Mar 11 14:47:45 2000 @@ -389,7 +389,9 @@ { register char *s, *end; register char c; - if (!PyString_Check(el) || PyString_Size(el) != 1) { + if (!PyString_Check(el)) + return PyUnicode_Contains(a, el); + if (PyString_Size(el) != 1) { PyErr_SetString(PyExc_TypeError, "string member test needs char left operand"); return -1; diff -u -rP -x *.o -x *.pyc -x Makefile -x ~ -x .so -x add2lib -x pgen -x buildno -x config. -x libpython -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x *.bak -x *.s -x DEADJOE -x Demo -x CVS CVS-Python/Objects/unicodeobject.c Python+Unicode/Objects/unicodeobject.c --- CVS-Python/Objects/unicodeobject.c Fri Mar 10 23:53:23 2000 +++ Python+Unicode/Objects/unicodeobject.c Sat Mar 11 14:48:52 2000 @@ -2737,6 +2737,49 @@ return -1; }

+int PyUnicode_Contains(PyObject *container, + PyObject *element) +{

+} + /* Concat to string or Unicode object giving a new Unicode object. */

PyObject *PyUnicode_Concat(PyObject left, @@ -3817,6 +3860,7 @@ (intintargfunc) unicode_slice, / sq_slice / 0, / sq_ass_item / 0, / sq_ass_slice */

};

static int

--------------56A130F1FCAC300009B200AD--