cpython: bcecf3910162 (original) (raw)

--- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -933,6 +933,13 @@ PyAPI_FUNC(int) _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter writer, Py_ssize_t length, Py_UCS4 maxchar); +/ Append a Unicode character.

+ /* Append a Unicode string. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int)

--- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -23,12 +23,12 @@

-#define GBK_DECODE(dc1, dc2, assi) [](#l2.7)

+#define GBK_DECODE(dc1, dc2, writer) [](#l2.13)

#define GBK_ENCODE(code, assi) [](#l2.20) if ((code) == 0x2014) (assi) = 0xa1aa; [](#l2.21) @@ -43,7 +43,7 @@ ENCODER(gb2312) { while (inleft > 0) {

if (c < 0x80) { @@ -73,17 +73,15 @@ DECODER(gb2312) while (inleft > 0) { unsigned char c = **inbuf;

- if (c < 0x80) {

REQUIRE_INBUF(2)

if (c < 0x80) { @@ -130,20 +128,18 @@ DECODER(gbk) while (inleft > 0) { unsigned char c = IN1;

- if (c < 0x80) {

REQUIRE_INBUF(2)

if (c < 0x80) { @@ -174,7 +170,7 @@ ENCODER(gb18030) return 1; #endif else if (c >= 0x10000) {

REQUIRE_OUTBUF(4) @@ -208,7 +204,7 @@ ENCODER(gb18030) utrrange++) if (utrrange->first <= c && c <= utrrange->last) {

tc = c - utrrange->first + utrrange->base; @@ -247,11 +243,9 @@ DECODER(gb18030) while (inleft > 0) { unsigned char c = IN1, c2;

- if (c < 0x80) {

@@ -261,7 +255,7 @@ DECODER(gb18030) if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ const struct _gb18030_to_unibmp_ranges *utr; unsigned char c3, c4;

REQUIRE_INBUF(4) c3 = IN3; @@ -272,34 +266,34 @@ DECODER(gb18030) c3 -= 0x81; c4 -= 0x30; if (c < 4) { /* U+0080 - U+FFFF */

if (c < 0x80) { @@ -389,8 +383,8 @@ DECODER(hz) REQUIRE_INBUF(2) if (c2 == '~') {

@@ -401,7 +395,7 @@ DECODER(hz) ; /* line-continuation */ else return 1;

@@ -409,14 +403,13 @@ DECODER(hz) return 1; if (state->i == 0) { /* ASCII mode */

--- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -39,7 +39,7 @@ static const DBCHAR big5hkscs_pairenc_ta ENCODER(big5hkscs) { while (inleft > 0) {

@@ -103,26 +103,24 @@ DECODER(big5hkscs) { while (inleft > 0) { unsigned char c = IN1;

-

if (c < 0x80) {

REQUIRE_INBUF(2) if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {

@@ -146,25 +144,25 @@ DECODER(big5hkscs) return MBERR_INTERNAL; if (hintbase[s >> 3] & (1 << (s & 7))) {

switch ((c << 8) | IN2) {

--- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -102,8 +102,8 @@ /-- internal data structures --/ typedef int (*iso2022_init_func)(void); -typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data); -typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length); +typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data); +typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length); struct iso2022_designation { unsigned char mark; @@ -158,7 +158,7 @@ ENCODER(iso2022) while (inleft > 0) { const struct iso2022_designation *dsg; DBCHAR encoded;

if (c < 0x80) { @@ -196,9 +196,9 @@ ENCODER(iso2022) length = 2; #if Py_UNICODE_SIZE == 2 if (length == 2) {

@@ -277,7 +277,7 @@ ENCODER(iso2022) WRITE2(encoded >> 8, encoded & 0xff) NEXT_OUT(2) }

+#define ISO8859_7_DECODE(c, writer) [](#l4.59)

static Py_ssize_t iso2022processg2(const void *config, MultibyteCodec_State *state, const unsigned char **inbuf, Py_ssize_t *inleft,

{ /* not written to use encoder, decoder functions because only few * encodings use G2 designations in CJKCodecs */ if (STATE_G2 == CHARSET_ISO8859_1) { if (IN3 < 0x80)

(*inbuf) += 3; *inleft -= 3;

@@ -429,8 +427,8 @@ DECODER(iso2022) if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { /* ESC throughout mode: * for non-iso2022 escape sequences */

@@ -449,32 +447,32 @@ DECODER(iso2022) else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ REQUIRE_INBUF(3) err = iso2022processg2(config, state,

@@ -484,7 +482,7 @@ DECODER(iso2022) else { const struct iso2022_designation *dsg; unsigned char charset;

if (STATE_GETFLAG(F_SHIFTED)) charset = STATE_G1; @@ -492,8 +490,8 @@ DECODER(iso2022) charset = STATE_G0; if (charset == CHARSET_ASCII) { -bypass: WRITE1(c)

+bypass: OUTCHAR(c);

@@ -518,17 +516,15 @@ bypass: return dsg->width; if (decoded < 0x10000) {

@@ -577,18 +573,18 @@ ksx1001_init(void) return 0; } -static ucs4_t +static Py_UCS4 ksx1001_decoder(const unsigned char *data) {

} static DBCHAR -ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) +ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -613,20 +609,20 @@ jisx0208_init(void) return 0; } -static ucs4_t +static Py_UCS4 jisx0208_decoder(const unsigned char *data) {

} static DBCHAR -jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -654,18 +650,18 @@ jisx0212_init(void) return 0; } -static ucs4_t +static Py_UCS4 jisx0212_decoder(const unsigned char *data) {

} static DBCHAR -jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -705,30 +701,30 @@ jisx0213_init(void) } #define config ((void *)2000) -static ucs4_t +static Py_UCS4 jisx0213_2000_1_decoder(const unsigned char *data) {

-static ucs4_t +static Py_UCS4 jisx0213_2000_2_decoder(const unsigned char *data) {

@@ -736,28 +732,28 @@ jisx0213_2000_2_decoder(const unsigned c } #undef config -static ucs4_t +static Py_UCS4 jisx0213_2004_1_decoder(const unsigned char *data) {

-static ucs4_t +static Py_UCS4 jisx0213_2004_2_decoder(const unsigned char *data) {

@@ -765,7 +761,7 @@ jisx0213_2004_2_decoder(const unsigned c } static DBCHAR -jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) +jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config) { DBCHAR coded; @@ -819,7 +815,7 @@ jisx0213_encoder(const ucs4_t *data, Py_ } static DBCHAR -jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -831,7 +827,7 @@ jisx0213_2000_1_encoder(const ucs4_t *da } static DBCHAR -jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; Py_ssize_t ilength = *length; @@ -854,7 +850,7 @@ jisx0213_2000_1_encoder_paironly(const u } static DBCHAR -jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -866,7 +862,7 @@ jisx0213_2000_2_encoder(const ucs4_t *da } static DBCHAR -jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -878,7 +874,7 @@ jisx0213_2004_1_encoder(const ucs4_t *da } static DBCHAR -jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; Py_ssize_t ilength = *length; @@ -901,7 +897,7 @@ jisx0213_2004_1_encoder_paironly(const u } static DBCHAR -jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -912,17 +908,17 @@ jisx0213_2004_2_encoder(const ucs4_t *da return MAP_UNMAPPABLE; } -static ucs4_t +static Py_UCS4 jisx0201_r_decoder(const unsigned char *data) {

static DBCHAR -jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_R_ENCODE(*data, coded) @@ -930,17 +926,17 @@ jisx0201_r_encoder(const ucs4_t *data, P return coded; } -static ucs4_t +static Py_UCS4 jisx0201_k_decoder(const unsigned char *data) {

static DBCHAR -jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_K_ENCODE(*data, coded) @@ -961,18 +957,18 @@ gb2312_init(void) return 0; } -static ucs4_t +static Py_UCS4 gb2312_decoder(const unsigned char *data) {

} static DBCHAR -gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) +gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -986,14 +982,14 @@ gb2312_encoder(const ucs4_t *data, Py_ss } -static ucs4_t +static Py_UCS4 dummy_decoder(const unsigned char *data) { return MAP_UNMAPPABLE; } static DBCHAR -dummy_encoder(const ucs4_t *data, Py_ssize_t *length) +dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length) { return MAP_UNMAPPABLE; }

--- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -20,7 +20,7 @@ ENCODER(cp932) { while (inleft > 0) {

@@ -66,8 +66,8 @@ ENCODER(cp932) } else if (c >= 0xe000 && c < 0xe758) { /* User-defined area */

@@ -85,31 +85,30 @@ DECODER(cp932) while (inleft > 0) { unsigned char c = IN1, c2;

REQUIRE_INBUF(2) c2 = IN2;

@@ -119,21 +118,21 @@ DECODER(cp932) c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21); c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;

@@ -235,13 +234,11 @@ DECODER(euc_jis_2004) { while (inleft > 0) { unsigned char c = IN1;

-

if (c < 0x80) {

@@ -252,8 +249,8 @@ DECODER(euc_jis_2004) REQUIRE_INBUF(2) c2 = IN2; if (c2 >= 0xa1 && c2 <= 0xdf) {

@@ -266,16 +263,16 @@ DECODER(euc_jis_2004) c3 = IN3 ^ 0x80; /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */

@@ -285,23 +282,23 @@ DECODER(euc_jis_2004) c2 = IN2 ^ 0x80; /* JIS X 0213 Plane 1 */

if (c < 0x80) { @@ -369,11 +366,9 @@ DECODER(euc_jp) while (inleft > 0) { unsigned char c = IN1;

- if (c < 0x80) {

@@ -384,8 +379,8 @@ DECODER(euc_jp) REQUIRE_INBUF(2) c2 = IN2; if (c2 >= 0xa1 && c2 <= 0xdf) {

@@ -397,8 +392,8 @@ DECODER(euc_jp) c2 = IN2; c3 = IN3; /* JIS X 0212 */

@@ -412,13 +407,13 @@ DECODER(euc_jp) #ifndef STRICT_BUILD if (c == 0xa1 && c2 == 0xc0) /* FULL-WIDTH REVERSE SOLIDUS */

#endif

@@ -488,14 +483,12 @@ DECODER(shift_jis) while (inleft > 0) { unsigned char c = IN1;

- #ifdef STRICT_BUILD

#else

#endif

@@ -512,13 +505,13 @@ DECODER(shift_jis) #ifndef STRICT_BUILD if (c1 == 0x21 && c2 == 0x40) { /* FULL-WIDTH REVERSE SOLIDUS */

#endif

@@ -527,7 +520,7 @@ DECODER(shift_jis) else return 1;

@@ -636,11 +629,10 @@ DECODER(shift_jis_2004) while (inleft > 0) { unsigned char c = IN1;

REQUIRE_INBUF(2) c2 = IN2; @@ -654,50 +646,47 @@ DECODER(shift_jis_2004) if (c1 < 0x5e) { /* Plane 1 */ c1 += 0x21;

--- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -34,7 +34,7 @@ static const unsigned char u2cgk_jongseo ENCODER(euc_kr) { while (inleft > 0) {

if (c < 0x80) { @@ -104,11 +104,9 @@ DECODER(euc_kr) while (inleft > 0) { unsigned char c = IN1;

- if (c < 0x80) {

@@ -145,11 +143,11 @@ DECODER(euc_kr) if (cho == NONE || jung == NONE || jong == NONE) return 1;

@@ -167,7 +165,7 @@ DECODER(euc_kr) ENCODER(cp949) { while (inleft > 0) {

if (c < 0x80) { @@ -197,20 +195,18 @@ DECODER(cp949) while (inleft > 0) { unsigned char c = IN1;

- if (c < 0x80) {

REQUIRE_INBUF(2)

if (c < 0x80) { @@ -350,11 +346,9 @@ DECODER(johab) while (inleft > 0) { unsigned char c = IN1, c2;

- if (c < 0x80) {

@@ -381,33 +375,33 @@ DECODER(johab) if (i_cho == FILL) { if (i_jung == FILL) { if (i_jong == FILL)

@@ -424,9 +418,9 @@ DECODER(johab) t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21; t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;

--- a/Modules/cjkcodecs/_codecs_tw.c +++ b/Modules/cjkcodecs/_codecs_tw.c @@ -14,7 +14,7 @@ ENCODER(big5) { while (inleft > 0) {

if (c < 0x80) { @@ -43,17 +43,15 @@ DECODER(big5) while (inleft > 0) { unsigned char c = IN1;

- if (c < 0x80) {

REQUIRE_INBUF(2)

if (c < 0x80) { @@ -97,21 +95,19 @@ DECODER(cp950) while (inleft > 0) { unsigned char c = IN1;

- if (c < 0x80) {

REQUIRE_INBUF(2)

--- a/Modules/cjkcodecs/alg_jisx0201.h +++ b/Modules/cjkcodecs/alg_jisx0201.h @@ -10,15 +10,24 @@ JISX0201_R_ENCODE(c, assi) [](#l8.4) else JISX0201_K_ENCODE(c, assi) -#define JISX0201_R_DECODE(c, assi) [](#l8.7) +#define JISX0201_R_DECODE_CHAR(c, assi) [](#l8.8) if ((c) < 0x5c) (assi) = (c); [](#l8.9) else if ((c) == 0x5c) (assi) = 0x00a5; [](#l8.10) else if ((c) < 0x7e) (assi) = (c); [](#l8.11) else if ((c) == 0x7e) (assi) = 0x203e; [](#l8.12) else if ((c) == 0x7f) (assi) = 0x7f; -#define JISX0201_K_DECODE(c, assi) [](#l8.14) +#define JISX0201_R_DECODE(c, writer) [](#l8.15)

+#define JISX0201_K_DECODE(c, writer) [](#l8.21) if ((c) >= 0xa1 && (c) <= 0xdf) [](#l8.22)

-#define JISX0201_DECODE(c, assi) [](#l8.24)

+#define JISX0201_K_DECODE_CHAR(c, assi) [](#l8.28)

+#define JISX0201_DECODE(c, writer) [](#l8.31)

--- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -33,7 +33,7 @@ struct dbcs_index { typedef struct dbcs_index decode_map; struct widedbcs_index {

#define DECODER_RESET(encoding) [](#l9.27) static Py_ssize_t encoding##_decode_reset( [](#l9.28) MultibyteCodec_State *state, const void *config) @@ -101,13 +101,15 @@ static const struct dbcs_map *mapping_li #endif #define NEXT_IN(i) [](#l9.33)

#define NEXT_OUT(o) [](#l9.40) (*outbuf) += (o); [](#l9.41) (outleft) -= (o); #define NEXT(i, o) [](#l9.43)

#define REQUIRE_INBUF(n) [](#l9.47) if (inleft < (n)) [](#l9.48) @@ -121,6 +123,23 @@ static const struct dbcs_map *mapping_li #define IN3 ((*inbuf)[2]) #define IN4 ((*inbuf)[3]) +#define OUTCHAR(c) [](#l9.53)

+ +#define OUTCHAR2(c1, c2) [](#l9.59)

+ #define OUT1(c) ((*outbuf)[0]) = (c); #define OUT2(c) ((*outbuf)[1]) = (c); #define OUT3(c) ((*outbuf)[2]) = (c); @@ -145,19 +164,6 @@ static const struct dbcs_map *mapping_li (*outbuf)[2] = (c3); [](#l9.74) (*outbuf)[3] = (c4); -#if Py_UNICODE_SIZE == 2 -# define WRITEUCS4(c) [](#l9.78)

-#else -# define WRITEUCS4(c) [](#l9.84)

-#endif - #define _TRYMAP_ENC(m, assi, val) [](#l9.90) ((m)->map != NULL && (val) >= (m)->bottom && [](#l9.91) (val)<= (m)->top && ((assi) = (m)->map(val) - [ @@ -167,24 +173,41 @@ static const struct dbcs_map *mapping_li #define TRYMAP_ENC(charset, assi, uni) [](#l9.94) if TRYMAP_ENC_COND(charset, assi, uni) -#define _TRYMAP_DEC(m, assi, val) [](#l9.97)

-#define TRYMAP_DEC(charset, assi, c1, c2) [](#l9.101)

+Py_LOCAL_INLINE(int) +_TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c) +{

+} -#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) [](#l9.112)

+#define _TRYMAP_DEC(m, writer, val) [](#l9.116)

+#define _TRYMAP_DEC_CHAR(m, assi, val) [](#l9.121)

+#define TRYMAP_DEC(charset, writer, c1, c2) [](#l9.126)

+#define TRYMAP_DEC_CHAR(charset, assi, c1, c2) [](#l9.128)

+ +#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) [](#l9.131)

#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) [](#l9.137) if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], [](#l9.138) assplane, asshi, asslo, (uni) & 0xff) -#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) [](#l9.140)

+#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) [](#l9.142)

#if Py_UNICODE_SIZE == 2 #define DECODE_SURROGATE(c) [](#l9.146) @@ -323,7 +346,7 @@ find_pairencmap(ucs2_t body, ucs2_t modi const struct pair_encodemap *haystack, int haystacksize) { int pos, min, max;

min = 0; max = haystacksize;

--- a/Modules/cjkcodecs/emu_jisx0213_2000.h +++ b/Modules/cjkcodecs/emu_jisx0213_2000.h @@ -38,6 +38,9 @@ ((c1) == 0x7E && (c2) == 0x7E))) [](#l10.4) return EMULATE_JISX0213_2000_DECODE_INVALID; -#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) [](#l10.7) +#define EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2) [](#l10.8)

+#define EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(assi, c1, c2) [](#l10.11) if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) [](#l10.12) (assi) = 0x9B1D;

--- a/Modules/cjkcodecs/mappings_cn.h +++ b/Modules/cjkcodecs/mappings_cn.h @@ -4049,7 +4049,7 @@ 0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0, static const struct _gb18030_to_unibmp_ranges {

--- a/Modules/cjkcodecs/mappings_jisx0213_pair.h +++ b/Modules/cjkcodecs/mappings_jisx0213_pair.h @@ -3,7 +3,7 @@ static const struct widedbcs_index *jisx0213_pair_decmap; static const struct pair_encodemap *jisx0213_pair_encmap; #else -static const ucs4_t __jisx0213_pair_decmap[49] = { +static const Py_UCS4 __jisx0213_pair_decmap[49] = { 810234010,810365082,810496154,810627226,810758298,816525466,816656538, 816787610,816918682,817049754,817574042,818163866,818426010,838283418, 15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,

--- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -17,8 +17,8 @@ typedef struct { typedef struct { const unsigned char *inbuf, *inbuf_top, *inbuf_end;

} MultibyteDecodeBuffer; PyDoc_STRVAR(MultibyteCodec_Encode__doc__, @@ -197,29 +197,6 @@ expand_encodebuffer(MultibyteEncodeBuffe goto errorexit; [](#l13.15) } -static int -expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) -{

-

-

-

-} -#define REQUIRE_DECODEBUFFER(buf, s) { [](#l13.35)

-} - /**

{ PyObject *retobj = NULL, *retuni = NULL;

@@ -403,8 +379,9 @@ multibytecodec_decerror(MultibyteCodec * } if (errors == ERROR_REPLACE) {

@@ -447,15 +424,8 @@ multibytecodec_decerror(MultibyteCodec * goto errorexit; }

newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); if (newpos < 0 && !PyErr_Occurred()) @@ -617,10 +587,10 @@ MultibyteCodec_Decode(MultibyteCodecObje { MultibyteCodec_State state; MultibyteDecodeBuffer buf;

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode", codeckwarglist, &pdata, &errors)) @@ -640,29 +610,22 @@ MultibyteCodec_Decode(MultibyteCodecObje return make_tuple(PyUnicode_New(0, 0), 0); }

if (self->codec->decinit != NULL && self->codec->decinit(&state, self->codec->config) != 0) goto errorexit; while (buf.inbuf < buf.inbuf_end) {

inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);

r = self->codec->decode(&state, self->codec->config,

@@ -670,23 +633,20 @@ MultibyteCodec_Decode(MultibyteCodecObje goto errorexit; }

-

PyBuffer_Release(&pdata); Py_XDECREF(buf.excobj); ERROR_DECREF(errorcb);

errorexit: PyBuffer_Release(&pdata); ERROR_DECREF(errorcb); Py_XDECREF(buf.excobj);

return NULL; } @@ -859,17 +819,7 @@ decoder_prepare_buffer(MultibyteDecodeBu { buf->inbuf = buf->inbuf_top = (const unsigned char *)data; buf->inbuf_end = buf->inbuf_top + size;

-

@@ -878,14 +828,13 @@ decoder_feed_buffer(MultibyteStatefulDec MultibyteDecodeBuffer *buf) { while (buf->inbuf < buf->inbuf_end) {

inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);

r = ctx->codec->decode(&ctx->state, ctx->codec->config,

@@ -1058,8 +1007,9 @@ mbidecoder_decode(MultibyteIncrementalDe MultibyteDecodeBuffer buf; char *data, *wdata = NULL; Py_buffer pdata;

if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode", incrementalkwarglist, &pdata, &final)) @@ -1067,7 +1017,8 @@ mbidecoder_decode(MultibyteIncrementalDe data = pdata.buf; size = pdata.len;

PyBuffer_Release(&pdata); if (wdata != data) PyMem_Del(wdata); Py_XDECREF(buf.excobj);

errorexit: PyBuffer_Release(&pdata); if (wdata != NULL && wdata != data) PyMem_Del(wdata); Py_XDECREF(buf.excobj);

@@ -1265,13 +1215,14 @@ mbstreamreader_iread(MultibyteStreamRead const char *method, Py_ssize_t sizehint) { MultibyteDecodeBuffer buf;

if (sizehint == 0) return PyUnicode_New(0, 0);

sizehint = 1; /* read 1 more byte and retry */ }

Py_XDECREF(cres); Py_XDECREF(buf.excobj);

errorexit: Py_XDECREF(cres); Py_XDECREF(buf.excobj);

--- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -10,12 +10,6 @@ extern "C" { #endif -#ifdef uint32_t -typedef uint32_t ucs4_t; -#else -typedef unsigned int ucs4_t; -#endif - #ifdef uint16_t typedef uint16_t ucs2_t, DBCHAR; #else @@ -27,7 +21,7 @@ typedef union { int i; unsigned char c[8]; ucs2_t u2[4];

} MultibyteCodec_State; typedef int (*mbcodec_init)(const void *config); @@ -44,7 +38,7 @@ typedef Py_ssize_t (*mbencodereset_func) typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state, const void *config, const unsigned char **inbuf, Py_ssize_t inleft,

typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state, const void *config); typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,

--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12948,6 +12948,16 @@ int } int +_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch) +{

+} + +int _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) { Py_UCS4 maxchar;