[issue3672] Ill-formed surrogates not treated as errors during encoding/decoding - Code Review (original) (raw)

OLD

NEW

1 /* ------------------------------------------------------------------------

1 /* ------------------------------------------------------------------------

2

2

3 Python Codec Registry and support functions

3 Python Codec Registry and support functions

4

4

5 Written by Marc-Andre Lemburg (mal@lemburg.com).

5 Written by Marc-Andre Lemburg (mal@lemburg.com).

6

6

7 Copyright (c) Corporation for National Research Initiatives.

7 Copyright (c) Corporation for National Research Initiatives.

8

8

9 ------------------------------------------------------------------------ */

9 ------------------------------------------------------------------------ */

10

10

(...skipping 730 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading...

741 Py_DECREF(res);

741 Py_DECREF(res);

742 Py_DECREF(object);

742 Py_DECREF(object);

743 return restuple;

743 return restuple;

744 }

744 }

745 else {

745 else {

746 wrong_exception_type(exc);

746 wrong_exception_type(exc);

747 return NULL;

747 return NULL;

748 }

748 }

749 }

749 }

750

750

751 PyObject *PyCodec_SurrogateErrors(PyObject *exc)

752 {

753 PyObject *restuple;

754 PyObject *object;

755 Py_ssize_t start;

756 Py_ssize_t end;

757 PyObject *res;

758 if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {

759 Py_UNICODE *p;

760 Py_UNICODE *startp;

761 char *outp;

762 if (PyUnicodeEncodeError_GetStart(exc, &start))

763 return NULL;

764 if (PyUnicodeEncodeError_GetEnd(exc, &end))

765 return NULL;

766 if (!(object = PyUnicodeEncodeError_GetObject(exc)))

767 return NULL;

768 startp = PyUnicode_AS_UNICODE(object);

769 res = PyBytes_FromStringAndSize(NULL, 3*(end-start));

770 if (!res)

771 return NULL;

772 outp = PyBytes_AsString(res);

773 for (p = startp+start; p < startp+end; p++) {

774 Py_UNICODE ch = *p;

775 if (ch < 0xd800 || ch > 0xdfff) {

776 /* Not a surrogate, fail with original exception */

777 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);

778 Py_DECREF(res);

779 Py_DECREF(object);

780 return NULL;

781 }

782 *outp++ = (char)(0xe0 | (ch >> 12));

783 *outp++ = (char)(0x80 | ((ch >> 6) & 0x3f));

784 *outp++ = (char)(0x80 | (ch & 0x3f));

785 }

786 restuple = Py_BuildValue("(On)", res, end);

787 Py_DECREF(res);

788 Py_DECREF(object);

789 return restuple;

790 }

791 else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {

792 unsigned char *p;

793 Py_UNICODE ch = 0;

794 if (PyUnicodeDecodeError_GetStart(exc, &start))

795 return NULL;

796 if (!(object = PyUnicodeDecodeError_GetObject(exc)))

797 return NULL;

798 if (!(p = (unsigned char*)PyBytes_AsString(object))) {

799 Py_DECREF(object);

800 return NULL;

801 }

802 /* Try decoding a single surrogate character. If

803 there are more, let the codec call us again. */

804 if ((p[0] & 0xf0) == 0xe0 ||·

805 (p[1] & 0xc0) == 0x80 ||

806 (p[2] & 0xc0) == 0x80) {

807 /* it's a three-byte code */

808 ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);

809 if (ch < 0xd800 || ch > 0xdfff)

810 /* it's not a surrogate - fail */

811 ch = 0;

812 }

813 Py_DECREF(object);

814 if (ch == 0) {

815 PyErr_SetObject(PyExceptionInstance_Class(exc), exc);

816 return NULL;

817 }

818 return Py_BuildValue("(u#n)", &ch, 1, start+3);

819 }

820 else {

821 wrong_exception_type(exc);

822 return NULL;

823 }

824 }

825

826 ········

751 static PyObject *strict_errors(PyObject *self, PyObject *exc)

827 static PyObject *strict_errors(PyObject *self, PyObject *exc)

752 {

828 {

753 return PyCodec_StrictErrors(exc);

829 return PyCodec_StrictErrors(exc);

754 }

830 }

755

831

756

832

757 static PyObject *ignore_errors(PyObject *self, PyObject *exc)

833 static PyObject *ignore_errors(PyObject *self, PyObject *exc)

758 {

834 {

759 return PyCodec_IgnoreErrors(exc);

835 return PyCodec_IgnoreErrors(exc);

760 }

836 }

761

837

762

838

763 static PyObject *replace_errors(PyObject *self, PyObject *exc)

839 static PyObject *replace_errors(PyObject *self, PyObject *exc)

764 {

840 {

765 return PyCodec_ReplaceErrors(exc);

841 return PyCodec_ReplaceErrors(exc);

766 }

842 }

767

843

768

844

769 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)

845 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)

770 {

846 {

771 return PyCodec_XMLCharRefReplaceErrors(exc);

847 return PyCodec_XMLCharRefReplaceErrors(exc);

772 }

848 }

773

849

774

850

775 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)

851 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)

776 {

852 {

777 return PyCodec_BackslashReplaceErrors(exc);

853 return PyCodec_BackslashReplaceErrors(exc);

778 }

854 }

779

855

856 static PyObject *surrogates_errors(PyObject *self, PyObject *exc)

857 {

858 return PyCodec_SurrogateErrors(exc);

859 }

860

780 static int _PyCodecRegistry_Init(void)

861 static int _PyCodecRegistry_Init(void)

781 {

862 {

782 static struct {

863 static struct {

783 char *name;

864 char *name;

784 PyMethodDef def;

865 PyMethodDef def;

785 } methods[] =

866 } methods[] =

786 {

867 {

787 {

868 {

788 "strict",

869 "strict",

789 {

870 {

(...skipping 26 matching lines...) Expand all Loading...

816 METH_O

897 METH_O

817 }

898 }

818 },

899 },

819 {

900 {

820 "backslashreplace",

901 "backslashreplace",

821 {

902 {

822 "backslashreplace_errors",

903 "backslashreplace_errors",

823 backslashreplace_errors,

904 backslashreplace_errors,

824 METH_O

905 METH_O

825 }

906 }

907 },

908 {

909 "surrogates",

910 {

911 "surrogates",

912 surrogates_errors,

913 METH_O

914 }

826 }

915 }

827 };

916 };

828

917

829 PyInterpreterState *interp = PyThreadState_GET()->interp;

918 PyInterpreterState *interp = PyThreadState_GET()->interp;

830 PyObject *mod;

919 PyObject *mod;

831 unsigned i;

920 unsigned i;

832

921

833 if (interp->codec_search_path != NULL)

922 if (interp->codec_search_path != NULL)

834 return 0;

923 return 0;

835

924

(...skipping 29 matching lines...) Expand all Loading...

865 configuration are still reported back to the user. */

954 configuration are still reported back to the user. */

866 PyErr_Clear();

955 PyErr_Clear();

867 return 0;

956 return 0;

868 }

957 }

869 return -1;

958 return -1;

870 }

959 }

871 Py_DECREF(mod);

960 Py_DECREF(mod);

872 interp->codecs_initialized = 1;

961 interp->codecs_initialized = 1;

873 return 0;

962 return 0;

874 }

963 }

OLD

NEW