cpython: bcecf3910162 (original) (raw)
--- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -933,6 +933,13 @@ PyAPI_FUNC(int) _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter writer, Py_ssize_t length, Py_UCS4 maxchar); +/ Append a Unicode character.
- Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
- Py_UCS4 ch
- );
+ /* Append a Unicode string. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int)
--- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -23,12 +23,12 @@
-#define GBK_DECODE(dc1, dc2, assi) [](#l2.7)
- if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; [](#l2.8)
- else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; [](#l2.9)
- else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; [](#l2.10)
- else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); [](#l2.11)
- else TRYMAP_DEC(gbkext, assi, dc1, dc2);
+#define GBK_DECODE(dc1, dc2, writer) [](#l2.13)
- if ((dc1) == 0xa1 && (dc2) == 0xaa) OUTCHAR(0x2014); [](#l2.14)
- else if ((dc1) == 0xa8 && (dc2) == 0x44) OUTCHAR(0x2015); [](#l2.15)
- else if ((dc1) == 0xa1 && (dc2) == 0xa4) OUTCHAR(0x00b7); [](#l2.16)
- else TRYMAP_DEC(gb2312, writer, dc1 ^ 0x80, dc2 ^ 0x80); [](#l2.17)
- else TRYMAP_DEC(gbkext, writer, dc1, dc2);
#define GBK_ENCODE(code, assi) [](#l2.20) if ((code) == 0x2014) (assi) = 0xa1aa; [](#l2.21) @@ -43,7 +43,7 @@ ENCODER(gb2312) { while (inleft > 0) {
Py_UNICODE c = IN1;[](#l2.26)
Py_UCS4 c = IN1;[](#l2.27) DBCHAR code;[](#l2.28)
if (c < 0x80) { @@ -73,17 +73,15 @@ DECODER(gb2312) while (inleft > 0) { unsigned char c = **inbuf;
REQUIRE_OUTBUF(1)[](#l2.35)
OUT1(c)[](#l2.38)
NEXT(1, 1)[](#l2.39)
OUTCHAR(c);[](#l2.40)
NEXT_IN(1);[](#l2.41) continue;[](#l2.42) }[](#l2.43)
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {[](#l2.46)
NEXT(2, 1)[](#l2.47)
TRYMAP_DEC(gb2312, writer, c ^ 0x80, IN2 ^ 0x80) {[](#l2.48)
} @@ -99,7 +97,7 @@ DECODER(gb2312) ENCODER(gbk) { while (inleft > 0) {NEXT_IN(2);[](#l2.49) }[](#l2.50) else return 1;[](#l2.51)
Py_UNICODE c = IN1;[](#l2.57)
Py_UCS4 c = IN1;[](#l2.58) DBCHAR code;[](#l2.59)
if (c < 0x80) { @@ -130,20 +128,18 @@ DECODER(gbk) while (inleft > 0) { unsigned char c = IN1;
REQUIRE_OUTBUF(1)[](#l2.66)
OUT1(c)[](#l2.69)
NEXT(1, 1)[](#l2.70)
OUTCHAR(c);[](#l2.71)
NEXT_IN(1);[](#l2.72) continue;[](#l2.73) }[](#l2.74)
GBK_DECODE(c, IN2, **outbuf)[](#l2.78)
GBK_DECODE(c, IN2, writer)[](#l2.79) else return 1;[](#l2.80)
NEXT(2, 1)[](#l2.82)
} return 0; @@ -157,7 +153,7 @@ DECODER(gbk) ENCODER(gb18030) { while (inleft > 0) {NEXT_IN(2);[](#l2.83)
ucs4_t c = IN1;[](#l2.91)
Py_UCS4 c = IN1;[](#l2.92) DBCHAR code;[](#l2.93)
if (c < 0x80) { @@ -174,7 +170,7 @@ ENCODER(gb18030) return 1; #endif else if (c >= 0x10000) {
ucs4_t tc = c - 0x10000;[](#l2.100)
Py_UCS4 tc = c - 0x10000;[](#l2.101)
REQUIRE_OUTBUF(4) @@ -208,7 +204,7 @@ ENCODER(gb18030) utrrange++) if (utrrange->first <= c && c <= utrrange->last) {
Py_UNICODE tc;[](#l2.109)
Py_UCS4 tc;[](#l2.110)
tc = c - utrrange->first + utrrange->base; @@ -247,11 +243,9 @@ DECODER(gb18030) while (inleft > 0) { unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)[](#l2.118)
OUT1(c)[](#l2.121)
NEXT(1, 1)[](#l2.122)
OUTCHAR(c);[](#l2.123)
NEXT_IN(1);[](#l2.124) continue;[](#l2.125) }[](#l2.126)
@@ -261,7 +255,7 @@ DECODER(gb18030) if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ const struct _gb18030_to_unibmp_ranges *utr; unsigned char c3, c4;
ucs4_t lseq;[](#l2.132)
Py_UCS4 lseq;[](#l2.133)
REQUIRE_INBUF(4) c3 = IN3; @@ -272,34 +266,34 @@ DECODER(gb18030) c3 -= 0x81; c4 -= 0x30; if (c < 4) { /* U+0080 - U+FFFF */
lseq = ((ucs4_t)c * 10 + c2) * 1260 +[](#l2.141)
(ucs4_t)c3 * 10 + c4;[](#l2.142)
lseq = ((Py_UCS4)c * 10 + c2) * 1260 +[](#l2.143)
(Py_UCS4)c3 * 10 + c4;[](#l2.144) if (lseq < 39420) {[](#l2.145) for (utr = gb18030_to_unibmp_ranges;[](#l2.146) lseq >= (utr + 1)->base;[](#l2.147) utr++) ;[](#l2.148)
OUT1(utr->first - utr->base + lseq)[](#l2.149)
NEXT(4, 1)[](#l2.150)
OUTCHAR(utr->first - utr->base + lseq);[](#l2.151)
NEXT_IN(4);[](#l2.152) continue;[](#l2.153) }[](#l2.154) }[](#l2.155) else if (c >= 15) { /* U+10000 - U+10FFFF */[](#l2.156)
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)[](#l2.157)
* 1260 + (ucs4_t)c3 * 10 + c4;[](#l2.158)
lseq = 0x10000 + (((Py_UCS4)c-15) * 10 + c2)[](#l2.159)
* 1260 + (Py_UCS4)c3 * 10 + c4;[](#l2.160) if (lseq <= 0x10FFFF) {[](#l2.161)
WRITEUCS4(lseq);[](#l2.162)
NEXT_IN(4)[](#l2.163)
OUTCHAR(lseq);[](#l2.164)
NEXT_IN(4);[](#l2.165) continue;[](#l2.166) }[](#l2.167) }[](#l2.168) return 1;[](#l2.169) }[](#l2.170)
GBK_DECODE(c, c2, **outbuf)[](#l2.172)
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);[](#l2.173)
GBK_DECODE(c, c2, writer)[](#l2.174)
else TRYMAP_DEC(gb18030ext, writer, c, c2);[](#l2.175) else return 1;[](#l2.176)
NEXT(2, 1)[](#l2.178)
} return 0; @@ -329,7 +323,7 @@ ENCODER_RESET(hz) ENCODER(hz) { while (inleft > 0) {NEXT_IN(2);[](#l2.179)
Py_UNICODE c = IN1;[](#l2.187)
Py_UCS4 c = IN1;[](#l2.188) DBCHAR code;[](#l2.189)
if (c < 0x80) { @@ -389,8 +383,8 @@ DECODER(hz) REQUIRE_INBUF(2) if (c2 == '~') {
WRITE1('~')[](#l2.196)
NEXT(2, 1)[](#l2.197)
OUTCHAR('~');[](#l2.198)
NEXT_IN(2);[](#l2.199) continue;[](#l2.200) }[](#l2.201) else if (c2 == '{' && state->i == 0)[](#l2.202)
@@ -401,7 +395,7 @@ DECODER(hz) ; /* line-continuation */ else return 1;
NEXT(2, 0);[](#l2.207)
NEXT_IN(2);[](#l2.208) continue;[](#l2.209) }[](#l2.210)
@@ -409,14 +403,13 @@ DECODER(hz) return 1; if (state->i == 0) { /* ASCII mode */
WRITE1(c)[](#l2.216)
NEXT(1, 1)[](#l2.217)
OUTCHAR(c);[](#l2.218)
NEXT_IN(1);[](#l2.219) }[](#l2.220) else { /* GB mode */[](#l2.221) REQUIRE_INBUF(2)[](#l2.222)
REQUIRE_OUTBUF(1)[](#l2.223)
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {[](#l2.224)
NEXT(2, 1)[](#l2.225)
TRYMAP_DEC(gb2312, writer, c, IN2) {[](#l2.226)
NEXT_IN(2);[](#l2.227) }[](#l2.228) else[](#l2.229) return 1;[](#l2.230)
--- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -39,7 +39,7 @@ static const DBCHAR big5hkscs_pairenc_ta ENCODER(big5hkscs) { while (inleft > 0) {
ucs4_t c = **inbuf;[](#l3.7)
Py_UCS4 c = **inbuf;[](#l3.8) DBCHAR code;[](#l3.9) Py_ssize_t insize;[](#l3.10)
@@ -103,26 +103,24 @@ DECODER(big5hkscs) { while (inleft > 0) { unsigned char c = IN1;
ucs4_t decoded;[](#l3.16)
REQUIRE_OUTBUF(1)[](#l3.18)
Py_UCS4 decoded;[](#l3.19)
OUT1(c)[](#l3.22)
NEXT(1, 1)[](#l3.23)
OUTCHAR(c);[](#l3.24)
NEXT_IN(1);[](#l3.25) continue;[](#l3.26) }[](#l3.27)
REQUIRE_INBUF(2) if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
TRYMAP_DEC(big5, **outbuf, c, IN2) {[](#l3.32)
NEXT(2, 1)[](#l3.33)
TRYMAP_DEC(big5, writer, c, IN2) {[](#l3.34)
NEXT_IN(2);[](#l3.35) continue;[](#l3.36) }[](#l3.37) }[](#l3.38)
TRYMAP_DEC(big5hkscs, decoded, c, IN2)[](#l3.40)
TRYMAP_DEC_CHAR(big5hkscs, decoded, c, IN2)[](#l3.41) {[](#l3.42) int s = BH2S(c, IN2);[](#l3.43) const unsigned char *hintbase;[](#l3.44)
@@ -146,25 +144,25 @@ DECODER(big5hkscs) return MBERR_INTERNAL; if (hintbase[s >> 3] & (1 << (s & 7))) {
WRITEUCS4(decoded | 0x20000)[](#l3.49)
NEXT_IN(2)[](#l3.50)
OUTCHAR(decoded | 0x20000);[](#l3.51)
NEXT_IN(2);[](#l3.52) }[](#l3.53) else {[](#l3.54)
OUT1(decoded)[](#l3.55)
NEXT(2, 1)[](#l3.56)
OUTCHAR(decoded);[](#l3.57)
NEXT_IN(2);[](#l3.58) }[](#l3.59) continue;[](#l3.60) }[](#l3.61)
case 0x8862: WRITE2(0x00ca, 0x0304); break;[](#l3.64)
case 0x8864: WRITE2(0x00ca, 0x030c); break;[](#l3.65)
case 0x88a3: WRITE2(0x00ea, 0x0304); break;[](#l3.66)
case 0x88a5: WRITE2(0x00ea, 0x030c); break;[](#l3.67)
case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;[](#l3.68)
case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;[](#l3.69)
case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;[](#l3.70)
case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;[](#l3.71) default: return 1;[](#l3.72) }[](#l3.73)
NEXT(2, 2) /* all decoded codepoints are pairs, above. */[](#l3.75)
--- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -102,8 +102,8 @@ /-- internal data structures --/ typedef int (*iso2022_init_func)(void); -typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data); -typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length); +typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data); +typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length); struct iso2022_designation { unsigned char mark; @@ -158,7 +158,7 @@ ENCODER(iso2022) while (inleft > 0) { const struct iso2022_designation *dsg; DBCHAR encoded;
ucs4_t c = **inbuf;[](#l4.18)
Py_UCS4 c = **inbuf;[](#l4.19) Py_ssize_t insize;[](#l4.20)
if (c < 0x80) { @@ -196,9 +196,9 @@ ENCODER(iso2022) length = 2; #if Py_UNICODE_SIZE == 2 if (length == 2) {
ucs4_t u4in[2];[](#l4.27)
u4in[0] = (ucs4_t)IN1;[](#l4.28)
u4in[1] = (ucs4_t)IN2;[](#l4.29)
Py_UCS4 u4in[2];[](#l4.30)
u4in[0] = (Py_UCS4)IN1;[](#l4.31)
u4in[1] = (Py_UCS4)IN2;[](#l4.32) encoded = dsg->encoder(u4in, &length);[](#l4.33) } else[](#l4.34) encoded = dsg->encoder(&c, &length);[](#l4.35)
@@ -277,7 +277,7 @@ ENCODER(iso2022) WRITE2(encoded >> 8, encoded & 0xff) NEXT_OUT(2) }
NEXT_IN(insize)[](#l4.40)
} return 0; @@ -376,45 +376,43 @@ iso2022processesc(const void *config, Mu return 0; } -#define ISO8859_7_DECODE(c, assi) [](#l4.49)NEXT_IN(insize);[](#l4.41)
- if ((c) < 0xa0) (assi) = (c); [](#l4.50)
- else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) [](#l4.51)
(assi) = (c); \[](#l4.52)
- else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || [](#l4.53)
(0xbffffd77L & (1L << ((c)-0xb4))))) \[](#l4.54)
(assi) = 0x02d0 + (c); \[](#l4.55)
- else if ((c) == 0xa1) (assi) = 0x2018; [](#l4.56)
- else if ((c) == 0xa2) (assi) = 0x2019; [](#l4.57)
- else if ((c) == 0xaf) (assi) = 0x2015;
+#define ISO8859_7_DECODE(c, writer) [](#l4.59)
- if ((c) < 0xa0) OUTCHAR(c); [](#l4.60)
- else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) [](#l4.61)
OUTCHAR(c); \[](#l4.62)
- else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || [](#l4.63)
(0xbffffd77L & (1L << ((c)-0xb4))))) \[](#l4.64)
OUTCHAR(0x02d0 + (c)); \[](#l4.65)
- else if ((c) == 0xa1) OUTCHAR(0x2018); [](#l4.66)
- else if ((c) == 0xa2) OUTCHAR(0x2019); [](#l4.67)
- else if ((c) == 0xaf) OUTCHAR(0x2015);
static Py_ssize_t iso2022processg2(const void *config, MultibyteCodec_State *state, const unsigned char **inbuf, Py_ssize_t *inleft,
Py_UNICODE **outbuf, Py_ssize_t *outleft)[](#l4.73)
_PyUnicodeWriter *writer)[](#l4.74)
{ /* not written to use encoder, decoder functions because only few * encodings use G2 designations in CJKCodecs */ if (STATE_G2 == CHARSET_ISO8859_1) { if (IN3 < 0x80)
OUT1(IN3 + 0x80)[](#l4.80)
} else if (STATE_G2 == CHARSET_ISO8859_7) {OUTCHAR(IN3 + 0x80);[](#l4.81) else[](#l4.82) return 3;[](#l4.83)
ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)[](#l4.86)
} else if (STATE_G2 == CHARSET_ASCII) { if (IN3 & 0x80) return 3;ISO8859_7_DECODE(IN3 ^ 0x80, writer)[](#l4.87) else return 3;[](#l4.88)
else **outbuf = IN3;[](#l4.92)
@@ -429,8 +427,8 @@ DECODER(iso2022) if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { /* ESC throughout mode: * for non-iso2022 escape sequences */
WRITE1(c) /* assume as ISO-8859-1 */[](#l4.109)
NEXT(1, 1)[](#l4.110)
OUTCHAR(c); /* assume as ISO-8859-1 */[](#l4.111)
NEXT_IN(1);[](#l4.112) if (IS_ESCEND(c)) {[](#l4.113) STATE_CLEARFLAG(F_ESCTHROUGHOUT)[](#l4.114) }[](#l4.115)
@@ -449,32 +447,32 @@ DECODER(iso2022) else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ REQUIRE_INBUF(3) err = iso2022processg2(config, state,
inbuf, &inleft, outbuf, &outleft);[](#l4.120)
inbuf, &inleft, writer);[](#l4.121) if (err != 0)[](#l4.122) return err;[](#l4.123) }[](#l4.124) else {[](#l4.125)
WRITE1(ESC)[](#l4.126)
OUTCHAR(ESC);[](#l4.127) STATE_SETFLAG(F_ESCTHROUGHOUT)[](#l4.128)
NEXT(1, 1)[](#l4.129)
NEXT_IN(1);[](#l4.130) }[](#l4.131) break;[](#l4.132) case SI:[](#l4.133) if (CONFIG_ISSET(NO_SHIFT))[](#l4.134) goto bypass;[](#l4.135) STATE_CLEARFLAG(F_SHIFTED)[](#l4.136)
NEXT_IN(1)[](#l4.137)
NEXT_IN(1);[](#l4.138) break;[](#l4.139) case SO:[](#l4.140) if (CONFIG_ISSET(NO_SHIFT))[](#l4.141) goto bypass;[](#l4.142) STATE_SETFLAG(F_SHIFTED)[](#l4.143)
NEXT_IN(1)[](#l4.144)
NEXT_IN(1);[](#l4.145) break;[](#l4.146) case LF:[](#l4.147) STATE_CLEARFLAG(F_SHIFTED)[](#l4.148)
WRITE1(LF)[](#l4.149)
NEXT(1, 1)[](#l4.150)
OUTCHAR(LF);[](#l4.151)
NEXT_IN(1);[](#l4.152) break;[](#l4.153) default:[](#l4.154) if (c < 0x20) /* C0 */[](#l4.155)
@@ -484,7 +482,7 @@ DECODER(iso2022) else { const struct iso2022_designation *dsg; unsigned char charset;
ucs4_t decoded;[](#l4.160)
Py_UCS4 decoded;[](#l4.161)
if (STATE_GETFLAG(F_SHIFTED)) charset = STATE_G1; @@ -492,8 +490,8 @@ DECODER(iso2022) charset = STATE_G0; if (charset == CHARSET_ASCII) { -bypass: WRITE1(c)
NEXT(1, 1)[](#l4.170)
NEXT_IN(1);[](#l4.172) break;[](#l4.173) }[](#l4.174)
@@ -518,17 +516,15 @@ bypass: return dsg->width; if (decoded < 0x10000) {
WRITE1(decoded)[](#l4.180)
NEXT_OUT(1)[](#l4.181)
OUTCHAR(decoded);[](#l4.182) }[](#l4.183) else if (decoded < 0x30000) {[](#l4.184)
WRITEUCS4(decoded)[](#l4.185)
OUTCHAR(decoded);[](#l4.186) }[](#l4.187) else { /* JIS X 0213 pairs */[](#l4.188)
WRITE2(decoded >> 16, decoded & 0xffff)[](#l4.189)
NEXT_OUT(2)[](#l4.190)
OUTCHAR2(decoded >> 16, decoded & 0xffff);[](#l4.191) }[](#l4.192)
NEXT_IN(dsg->width)[](#l4.193)
NEXT_IN(dsg->width);[](#l4.194) }[](#l4.195) break;[](#l4.196) }[](#l4.197)
@@ -577,18 +573,18 @@ ksx1001_init(void) return 0; } -static ucs4_t +static Py_UCS4 ksx1001_decoder(const unsigned char *data) {
} static DBCHAR -ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) +ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -613,20 +609,20 @@ jisx0208_init(void) return 0; } -static ucs4_t +static Py_UCS4 jisx0208_decoder(const unsigned char *data) {
} static DBCHAR -jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -654,18 +650,18 @@ jisx0212_init(void) return 0; } -static ucs4_t +static Py_UCS4 jisx0212_decoder(const unsigned char *data) {
} static DBCHAR -jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -705,30 +701,30 @@ jisx0213_init(void) } #define config ((void *)2000) -static ucs4_t +static Py_UCS4 jisx0213_2000_1_decoder(const unsigned char *data) {
- Py_UCS4 u; EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1]) else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ return 0xff3c;
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
- else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1]) u |= 0x20000;
-static ucs4_t +static Py_UCS4 jisx0213_2000_2_decoder(const unsigned char *data) {
- ucs4_t u;
- EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
- Py_UCS4 u;
- EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1])
- TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1]) u |= 0x20000; else return MAP_UNMAPPABLE;
@@ -736,28 +732,28 @@ jisx0213_2000_2_decoder(const unsigned c } #undef config -static ucs4_t +static Py_UCS4 jisx0213_2004_1_decoder(const unsigned char *data) {
- else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
- else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
- else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
- else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1]) u |= 0x20000;
-static ucs4_t +static Py_UCS4 jisx0213_2004_2_decoder(const unsigned char *data) {
- ucs4_t u;
- TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
- Py_UCS4 u;
- TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
- else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1]) u |= 0x20000; else return MAP_UNMAPPABLE;
@@ -765,7 +761,7 @@ jisx0213_2004_2_decoder(const unsigned c } static DBCHAR -jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config) +jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config) { DBCHAR coded; @@ -819,7 +815,7 @@ jisx0213_encoder(const ucs4_t *data, Py_ } static DBCHAR -jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -831,7 +827,7 @@ jisx0213_2000_1_encoder(const ucs4_t *da } static DBCHAR -jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; Py_ssize_t ilength = *length; @@ -854,7 +850,7 @@ jisx0213_2000_1_encoder_paironly(const u } static DBCHAR -jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -866,7 +862,7 @@ jisx0213_2000_2_encoder(const ucs4_t *da } static DBCHAR -jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -878,7 +874,7 @@ jisx0213_2004_1_encoder(const ucs4_t *da } static DBCHAR -jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; Py_ssize_t ilength = *length; @@ -901,7 +897,7 @@ jisx0213_2004_1_encoder_paironly(const u } static DBCHAR -jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded = jisx0213_encoder(data, length, NULL); if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) @@ -912,17 +908,17 @@ jisx0213_2004_2_encoder(const ucs4_t *da return MAP_UNMAPPABLE; } -static ucs4_t +static Py_UCS4 jisx0201_r_decoder(const unsigned char *data) {
static DBCHAR -jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_R_ENCODE(*data, coded) @@ -930,17 +926,17 @@ jisx0201_r_encoder(const ucs4_t *data, P return coded; } -static ucs4_t +static Py_UCS4 jisx0201_k_decoder(const unsigned char *data) {
static DBCHAR -jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length) +jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; JISX0201_K_ENCODE(*data, coded) @@ -961,18 +957,18 @@ gb2312_init(void) return 0; } -static ucs4_t +static Py_UCS4 gb2312_decoder(const unsigned char *data) {
} static DBCHAR -gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) +gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length) { DBCHAR coded; assert(*length == 1); @@ -986,14 +982,14 @@ gb2312_encoder(const ucs4_t *data, Py_ss } -static ucs4_t +static Py_UCS4 dummy_decoder(const unsigned char *data) { return MAP_UNMAPPABLE; } static DBCHAR -dummy_encoder(const ucs4_t *data, Py_ssize_t *length) +dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length) { return MAP_UNMAPPABLE; }
--- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -20,7 +20,7 @@ ENCODER(cp932) { while (inleft > 0) {
Py_UNICODE c = IN1;[](#l5.7)
Py_UCS4 c = IN1;[](#l5.8) DBCHAR code;[](#l5.9) unsigned char c1, c2;[](#l5.10)
@@ -66,8 +66,8 @@ ENCODER(cp932) } else if (c >= 0xe000 && c < 0xe758) { /* User-defined area */
c1 = (Py_UNICODE)(c - 0xe000) / 188;[](#l5.16)
c2 = (Py_UNICODE)(c - 0xe000) % 188;[](#l5.17)
c1 = (Py_UCS4)(c - 0xe000) / 188;[](#l5.18)
c2 = (Py_UCS4)(c - 0xe000) % 188;[](#l5.19) OUT1(c1 + 0xf0)[](#l5.20) OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)[](#l5.21) }[](#l5.22)
@@ -85,31 +85,30 @@ DECODER(cp932) while (inleft > 0) { unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)[](#l5.27) if (c <= 0x80) {[](#l5.28)
OUT1(c)[](#l5.29)
NEXT(1, 1)[](#l5.30)
OUTCHAR(c);[](#l5.31)
NEXT_IN(1);[](#l5.32) continue;[](#l5.33) }[](#l5.34) else if (c >= 0xa0 && c <= 0xdf) {[](#l5.35) if (c == 0xa0)[](#l5.36)
OUT1(0xf8f0) /* half-width katakana */[](#l5.37)
OUTCHAR(0xf8f0); /* half-width katakana */[](#l5.38) else[](#l5.39)
OUT1(0xfec0 + c)[](#l5.40)
NEXT(1, 1)[](#l5.41)
OUTCHAR(0xfec0 + c);[](#l5.42)
NEXT_IN(1);[](#l5.43) continue;[](#l5.44) }[](#l5.45) else if (c >= 0xfd/* && c <= 0xff*/) {[](#l5.46) /* Windows compatibility */[](#l5.47)
OUT1(0xf8f1 - 0xfd + c)[](#l5.48)
NEXT(1, 1)[](#l5.49)
OUTCHAR(0xf8f1 - 0xfd + c);[](#l5.50)
NEXT_IN(1);[](#l5.51) continue;[](#l5.52) }[](#l5.53)
TRYMAP_DEC(cp932ext, **outbuf, c, c2);[](#l5.58)
TRYMAP_DEC(cp932ext, writer, c, c2);[](#l5.59) else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){[](#l5.60) if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)[](#l5.61) return 1;[](#l5.62)
@@ -119,21 +118,21 @@ DECODER(cp932) c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21); c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
TRYMAP_DEC(jisx0208, **outbuf, c, c2);[](#l5.67)
TRYMAP_DEC(jisx0208, writer, c, c2);[](#l5.68) else return 1;[](#l5.69) }[](#l5.70) else if (c >= 0xf0 && c <= 0xf9) {[](#l5.71) if ((c2 >= 0x40 && c2 <= 0x7e) ||[](#l5.72) (c2 >= 0x80 && c2 <= 0xfc))[](#l5.73)
OUT1(0xe000 + 188 * (c - 0xf0) +[](#l5.74)
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))[](#l5.75)
OUTCHAR(0xe000 + 188 * (c - 0xf0) +[](#l5.76)
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));[](#l5.77) else[](#l5.78) return 1;[](#l5.79) }[](#l5.80) else[](#l5.81) return 1;[](#l5.82)
NEXT(2, 1)[](#l5.84)
} return 0; @@ -147,7 +146,7 @@ DECODER(cp932) ENCODER(euc_jis_2004) { while (inleft > 0) {NEXT_IN(2);[](#l5.85)
ucs4_t c = IN1;[](#l5.93)
Py_UCS4 c = IN1;[](#l5.94) DBCHAR code;[](#l5.95) Py_ssize_t insize;[](#l5.96)
@@ -235,13 +234,11 @@ DECODER(euc_jis_2004) { while (inleft > 0) { unsigned char c = IN1;
ucs4_t code;[](#l5.102)
REQUIRE_OUTBUF(1)[](#l5.104)
Py_UCS4 code;[](#l5.105)
OUT1(c)[](#l5.108)
NEXT(1, 1)[](#l5.109)
OUTCHAR(c);[](#l5.110)
NEXT_IN(1);[](#l5.111) continue;[](#l5.112) }[](#l5.113)
@@ -252,8 +249,8 @@ DECODER(euc_jis_2004) REQUIRE_INBUF(2) c2 = IN2; if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)[](#l5.119)
NEXT(2, 1)[](#l5.120)
OUTCHAR(0xfec0 + c2);[](#l5.121)
NEXT_IN(2);[](#l5.122) }[](#l5.123) else[](#l5.124) return 1;[](#l5.125)
@@ -266,16 +263,16 @@ DECODER(euc_jis_2004) c3 = IN3 ^ 0x80; /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)[](#l5.130)
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;[](#l5.131)
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {[](#l5.132)
WRITEUCS4(EMPBASE | code)[](#l5.133)
NEXT_IN(3)[](#l5.134)
EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)[](#l5.135)
else TRYMAP_DEC(jisx0213_2_bmp, writer, c2, c3) ;[](#l5.136)
else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c2, c3) {[](#l5.137)
OUTCHAR(EMPBASE | code);[](#l5.138)
NEXT_IN(3);[](#l5.139) continue;[](#l5.140) }[](#l5.141)
else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;[](#l5.142)
else TRYMAP_DEC(jisx0212, writer, c2, c3) ;[](#l5.143) else return 1;[](#l5.144)
NEXT(3, 1)[](#l5.145)
NEXT_IN(3);[](#l5.146) }[](#l5.147) else {[](#l5.148) unsigned char c2;[](#l5.149)
@@ -285,23 +282,23 @@ DECODER(euc_jis_2004) c2 = IN2 ^ 0x80; /* JIS X 0213 Plane 1 */
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)[](#l5.154)
else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;[](#l5.155)
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;[](#l5.156)
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);[](#l5.157)
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);[](#l5.158)
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {[](#l5.159)
WRITEUCS4(EMPBASE | code)[](#l5.160)
NEXT_IN(2)[](#l5.161)
EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)[](#l5.162)
else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c);[](#l5.163)
else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e);[](#l5.164)
else TRYMAP_DEC(jisx0208, writer, c, c2);[](#l5.165)
else TRYMAP_DEC(jisx0213_1_bmp, writer, c, c2);[](#l5.166)
else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c, c2) {[](#l5.167)
OUTCHAR(EMPBASE | code);[](#l5.168)
NEXT_IN(2);[](#l5.169) continue;[](#l5.170) }[](#l5.171)
else TRYMAP_DEC(jisx0213_pair, code, c, c2) {[](#l5.172)
WRITE2(code >> 16, code & 0xffff)[](#l5.173)
NEXT(2, 2)[](#l5.174)
else TRYMAP_DEC_CHAR(jisx0213_pair, code, c, c2) {[](#l5.175)
OUTCHAR2(code >> 16, code & 0xffff);[](#l5.176)
NEXT_IN(2);[](#l5.177) continue;[](#l5.178) }[](#l5.179) else return 1;[](#l5.180)
NEXT(2, 1)[](#l5.181)
} @@ -316,7 +313,7 @@ DECODER(euc_jis_2004) ENCODER(euc_jp) { while (inleft > 0) {NEXT_IN(2);[](#l5.182) }[](#l5.183)
Py_UNICODE c = IN1;[](#l5.190)
Py_UCS4 c = IN1;[](#l5.191) DBCHAR code;[](#l5.192)
if (c < 0x80) { @@ -369,11 +366,9 @@ DECODER(euc_jp) while (inleft > 0) { unsigned char c = IN1;
REQUIRE_OUTBUF(1)[](#l5.199)
OUT1(c)[](#l5.202)
NEXT(1, 1)[](#l5.203)
OUTCHAR(c);[](#l5.204)
NEXT_IN(1);[](#l5.205) continue;[](#l5.206) }[](#l5.207)
@@ -384,8 +379,8 @@ DECODER(euc_jp) REQUIRE_INBUF(2) c2 = IN2; if (c2 >= 0xa1 && c2 <= 0xdf) {
OUT1(0xfec0 + c2)[](#l5.213)
NEXT(2, 1)[](#l5.214)
OUTCHAR(0xfec0 + c2);[](#l5.215)
NEXT_IN(2);[](#l5.216) }[](#l5.217) else[](#l5.218) return 1;[](#l5.219)
@@ -397,8 +392,8 @@ DECODER(euc_jp) c2 = IN2; c3 = IN3; /* JIS X 0212 */
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {[](#l5.224)
NEXT(3, 1)[](#l5.225)
TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) {[](#l5.226)
NEXT_IN(3);[](#l5.227) }[](#l5.228) else[](#l5.229) return 1;[](#l5.230)
@@ -412,13 +407,13 @@ DECODER(euc_jp) #ifndef STRICT_BUILD if (c == 0xa1 && c2 == 0xc0) /* FULL-WIDTH REVERSE SOLIDUS */
**outbuf = 0xff3c;[](#l5.235)
OUTCHAR(0xff3c);[](#l5.236) else[](#l5.237)
TRYMAP_DEC(jisx0208, **outbuf,[](#l5.239)
TRYMAP_DEC(jisx0208, writer,[](#l5.240) c ^ 0x80, c2 ^ 0x80) ;[](#l5.241) else return 1;[](#l5.242)
NEXT(2, 1)[](#l5.243)
} @@ -433,7 +428,7 @@ DECODER(euc_jp) ENCODER(shift_jis) { while (inleft > 0) {NEXT_IN(2);[](#l5.244) }[](#l5.245)
Py_UNICODE c = IN1;[](#l5.252)
Py_UCS4 c = IN1;[](#l5.253) DBCHAR code;[](#l5.254) unsigned char c1, c2;[](#l5.255)
@@ -488,14 +483,12 @@ DECODER(shift_jis) while (inleft > 0) { unsigned char c = IN1;
REQUIRE_OUTBUF(1)[](#l5.261)
JISX0201_R_DECODE(c, **outbuf)[](#l5.264)
JISX0201_R_DECODE(c, writer)[](#l5.265)
if (c < 0x80) **outbuf = c;[](#l5.267)
if (c < 0x80) OUTCHAR(c);[](#l5.268)
else JISX0201_K_DECODE(c, **outbuf)[](#l5.270)
else JISX0201_K_DECODE(c, writer)[](#l5.271) else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){[](#l5.272) unsigned char c1, c2;[](#l5.273)
@@ -512,13 +505,13 @@ DECODER(shift_jis) #ifndef STRICT_BUILD if (c1 == 0x21 && c2 == 0x40) { /* FULL-WIDTH REVERSE SOLIDUS */
OUT1(0xff3c)[](#l5.279)
NEXT(2, 1)[](#l5.280)
OUTCHAR(0xff3c);[](#l5.281)
NEXT_IN(2);[](#l5.282) continue;[](#l5.283) }[](#l5.284)
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {[](#l5.286)
NEXT(2, 1)[](#l5.287)
TRYMAP_DEC(jisx0208, writer, c1, c2) {[](#l5.288)
NEXT_IN(2);[](#l5.289) continue;[](#l5.290) }[](#l5.291) else[](#l5.292)
@@ -527,7 +520,7 @@ DECODER(shift_jis) else return 1;
NEXT(1, 1) /* JIS X 0201 */[](#l5.297)
} return 0; @@ -541,7 +534,7 @@ DECODER(shift_jis) ENCODER(shift_jis_2004) { while (inleft > 0) {NEXT_IN(1); /* JIS X 0201 */[](#l5.298)
ucs4_t c = IN1;[](#l5.306)
Py_UCS4 c = IN1;[](#l5.307) DBCHAR code = NOCHAR;[](#l5.308) int c1, c2;[](#l5.309) Py_ssize_t insize;[](#l5.310)
@@ -636,11 +629,10 @@ DECODER(shift_jis_2004) while (inleft > 0) { unsigned char c = IN1;
REQUIRE_OUTBUF(1)[](#l5.315)
JISX0201_DECODE(c, **outbuf)[](#l5.316)
JISX0201_DECODE(c, writer)[](#l5.317) else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){[](#l5.318) unsigned char c1, c2;[](#l5.319)
ucs4_t code;[](#l5.320)
Py_UCS4 code;[](#l5.321)
REQUIRE_INBUF(2) c2 = IN2; @@ -654,50 +646,47 @@ DECODER(shift_jis_2004) if (c1 < 0x5e) { /* Plane 1 */ c1 += 0x21;
EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,[](#l5.329)
EMULATE_JISX0213_2000_DECODE_PLANE1(writer,[](#l5.330) c1, c2)[](#l5.331)
else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {[](#l5.332)
NEXT_OUT(1)[](#l5.333)
else TRYMAP_DEC(jisx0208, writer, c1, c2) {[](#l5.334) }[](#l5.335)
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,[](#l5.336)
else TRYMAP_DEC(jisx0213_1_bmp, writer,[](#l5.337) c1, c2) {[](#l5.338)
NEXT_OUT(1)[](#l5.339) }[](#l5.340)
else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {[](#l5.341)
WRITEUCS4(EMPBASE | code)[](#l5.342)
else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c1, c2) {[](#l5.343)
OUTCHAR(EMPBASE | code);[](#l5.344) }[](#l5.345)
else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {[](#l5.346)
WRITE2(code >> 16, code & 0xffff)[](#l5.347)
NEXT_OUT(2)[](#l5.348)
else TRYMAP_DEC_CHAR(jisx0213_pair, code, c1, c2) {[](#l5.349)
OUTCHAR2(code >> 16, code & 0xffff);[](#l5.350) }[](#l5.351) else[](#l5.352) return 1;[](#l5.353)
NEXT_IN(2)[](#l5.354)
NEXT_IN(2);[](#l5.355) }[](#l5.356) else { /* Plane 2 */[](#l5.357) if (c1 >= 0x67) c1 += 0x07;[](#l5.358) else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;[](#l5.359) else c1 -= 0x3d;[](#l5.360)
EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,[](#l5.362)
EMULATE_JISX0213_2000_DECODE_PLANE2(writer,[](#l5.363) c1, c2)[](#l5.364)
else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,[](#l5.365)
c1, c2) ;[](#l5.366)
else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {[](#l5.367)
WRITEUCS4(EMPBASE | code)[](#l5.368)
NEXT_IN(2)[](#l5.369)
else TRYMAP_DEC(jisx0213_2_bmp, writer,[](#l5.370)
c1, c2) {[](#l5.371)
} else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c1, c2) {[](#l5.372)
OUTCHAR(EMPBASE | code);[](#l5.373)
NEXT_IN(2);[](#l5.374) continue;[](#l5.375) }[](#l5.376) else[](#l5.377) return 1;[](#l5.378)
NEXT(2, 1)[](#l5.379)
NEXT_IN(2);[](#l5.380) }[](#l5.381) continue;[](#l5.382) }[](#l5.383) else[](#l5.384) return 1;[](#l5.385)
NEXT(1, 1) /* JIS X 0201 */[](#l5.387)
--- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -34,7 +34,7 @@ static const unsigned char u2cgk_jongseo ENCODER(euc_kr) { while (inleft > 0) {
Py_UNICODE c = IN1;[](#l6.7)
Py_UCS4 c = IN1;[](#l6.8) DBCHAR code;[](#l6.9)
if (c < 0x80) { @@ -104,11 +104,9 @@ DECODER(euc_kr) while (inleft > 0) { unsigned char c = IN1;
REQUIRE_OUTBUF(1)[](#l6.16)
OUT1(c)[](#l6.19)
NEXT(1, 1)[](#l6.20)
OUTCHAR(c);[](#l6.21)
NEXT_IN(1);[](#l6.22) continue;[](#l6.23) }[](#l6.24)
@@ -145,11 +143,11 @@ DECODER(euc_kr) if (cho == NONE || jung == NONE || jong == NONE) return 1;
OUT1(0xac00 + cho*588 + jung*28 + jong);[](#l6.30)
NEXT(8, 1)[](#l6.31)
OUTCHAR(0xac00 + cho*588 + jung*28 + jong);[](#l6.32)
NEXT_IN(8);[](#l6.33) }[](#l6.34)
else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {[](#l6.35)
NEXT(2, 1)[](#l6.36)
else TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80) {[](#l6.37)
NEXT_IN(2);[](#l6.38) }[](#l6.39) else[](#l6.40) return 1;[](#l6.41)
@@ -167,7 +165,7 @@ DECODER(euc_kr) ENCODER(cp949) { while (inleft > 0) {
Py_UNICODE c = IN1;[](#l6.46)
Py_UCS4 c = IN1;[](#l6.47) DBCHAR code;[](#l6.48)
if (c < 0x80) { @@ -197,20 +195,18 @@ DECODER(cp949) while (inleft > 0) { unsigned char c = IN1;
REQUIRE_OUTBUF(1)[](#l6.55)
OUT1(c)[](#l6.58)
NEXT(1, 1)[](#l6.59)
OUTCHAR(c);[](#l6.60)
NEXT_IN(1);[](#l6.61) continue;[](#l6.62) }[](#l6.63)
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);[](#l6.66)
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);[](#l6.67)
TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80);[](#l6.68)
else TRYMAP_DEC(cp949ext, writer, c, IN2);[](#l6.69) else return 1;[](#l6.70)
NEXT(2, 1)[](#l6.72)
} return 0; @@ -251,7 +247,7 @@ static const DBCHAR u2johabjamo[] = { ENCODER(johab) { while (inleft > 0) {NEXT_IN(2);[](#l6.73)
Py_UNICODE c = IN1;[](#l6.81)
Py_UCS4 c = IN1;[](#l6.82) DBCHAR code;[](#l6.83)
if (c < 0x80) { @@ -350,11 +346,9 @@ DECODER(johab) while (inleft > 0) { unsigned char c = IN1, c2;
REQUIRE_OUTBUF(1)[](#l6.90)
OUT1(c)[](#l6.93)
NEXT(1, 1)[](#l6.94)
OUTCHAR(c);[](#l6.95)
NEXT_IN(1);[](#l6.96) continue;[](#l6.97) }[](#l6.98)
@@ -381,33 +375,33 @@ DECODER(johab) if (i_cho == FILL) { if (i_jung == FILL) { if (i_jong == FILL)
OUT1(0x3000)[](#l6.104)
OUTCHAR(0x3000);[](#l6.105) else[](#l6.106)
OUT1(0x3100 |[](#l6.107)
johabjamo_jongseong[c_jong])[](#l6.108)
OUTCHAR(0x3100 |[](#l6.109)
johabjamo_jongseong[c_jong]);[](#l6.110) }[](#l6.111) else {[](#l6.112) if (i_jong == FILL)[](#l6.113)
OUT1(0x3100 |[](#l6.114)
johabjamo_jungseong[c_jung])[](#l6.115)
OUTCHAR(0x3100 |[](#l6.116)
johabjamo_jungseong[c_jung]);[](#l6.117) else[](#l6.118) return 1;[](#l6.119) }[](#l6.120) } else {[](#l6.121) if (i_jung == FILL) {[](#l6.122) if (i_jong == FILL)[](#l6.123)
OUT1(0x3100 |[](#l6.124)
johabjamo_choseong[c_cho])[](#l6.125)
OUTCHAR(0x3100 |[](#l6.126)
johabjamo_choseong[c_cho]);[](#l6.127) else[](#l6.128) return 1;[](#l6.129) }[](#l6.130) else[](#l6.131)
OUT1(0xac00 +[](#l6.132)
i_cho * 588 +[](#l6.133)
i_jung * 28 +[](#l6.134)
(i_jong == FILL ? 0 : i_jong))[](#l6.135)
OUTCHAR(0xac00 +[](#l6.136)
i_cho * 588 +[](#l6.137)
i_jung * 28 +[](#l6.138)
(i_jong == FILL ? 0 : i_jong));[](#l6.139) }[](#l6.140)
NEXT(2, 1)[](#l6.141)
NEXT_IN(2);[](#l6.142) } else {[](#l6.143) /* KS X 1001 except hangul jamos and syllables */[](#l6.144) if (c == 0xdf || c > 0xf9 ||[](#l6.145)
@@ -424,9 +418,9 @@ DECODER(johab) t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21; t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);[](#l6.150)
TRYMAP_DEC(ksx1001, writer, t1, t2);[](#l6.151) else return 1;[](#l6.152)
NEXT(2, 1)[](#l6.153)
--- a/Modules/cjkcodecs/_codecs_tw.c +++ b/Modules/cjkcodecs/_codecs_tw.c @@ -14,7 +14,7 @@ ENCODER(big5) { while (inleft > 0) {
Py_UNICODE c = **inbuf;[](#l7.7)
Py_UCS4 c = **inbuf;[](#l7.8) DBCHAR code;[](#l7.9)
if (c < 0x80) { @@ -43,17 +43,15 @@ DECODER(big5) while (inleft > 0) { unsigned char c = IN1;
REQUIRE_OUTBUF(1)[](#l7.16)
OUT1(c)[](#l7.19)
NEXT(1, 1)[](#l7.20)
OUTCHAR(c);[](#l7.21)
NEXT_IN(1);[](#l7.22) continue;[](#l7.23) }[](#l7.24)
TRYMAP_DEC(big5, **outbuf, c, IN2) {[](#l7.27)
NEXT(2, 1)[](#l7.28)
TRYMAP_DEC(big5, writer, c, IN2) {[](#l7.29)
} @@ -69,7 +67,7 @@ DECODER(big5) ENCODER(cp950) { while (inleft > 0) {NEXT_IN(2);[](#l7.30) }[](#l7.31) else return 1;[](#l7.32)
Py_UNICODE c = IN1;[](#l7.38)
Py_UCS4 c = IN1;[](#l7.39) DBCHAR code;[](#l7.40)
if (c < 0x80) { @@ -97,21 +95,19 @@ DECODER(cp950) while (inleft > 0) { unsigned char c = IN1;
REQUIRE_OUTBUF(1)[](#l7.47)
OUT1(c)[](#l7.50)
NEXT(1, 1)[](#l7.51)
OUTCHAR(c);[](#l7.52)
NEXT_IN(1);[](#l7.53) continue;[](#l7.54) }[](#l7.55)
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);[](#l7.59)
else TRYMAP_DEC(big5, **outbuf, c, IN2);[](#l7.60)
TRYMAP_DEC(cp950ext, writer, c, IN2);[](#l7.61)
else TRYMAP_DEC(big5, writer, c, IN2);[](#l7.62) else return 1;[](#l7.63)
NEXT(2, 1)[](#l7.65)
--- a/Modules/cjkcodecs/alg_jisx0201.h +++ b/Modules/cjkcodecs/alg_jisx0201.h @@ -10,15 +10,24 @@ JISX0201_R_ENCODE(c, assi) [](#l8.4) else JISX0201_K_ENCODE(c, assi) -#define JISX0201_R_DECODE(c, assi) [](#l8.7) +#define JISX0201_R_DECODE_CHAR(c, assi) [](#l8.8) if ((c) < 0x5c) (assi) = (c); [](#l8.9) else if ((c) == 0x5c) (assi) = 0x00a5; [](#l8.10) else if ((c) < 0x7e) (assi) = (c); [](#l8.11) else if ((c) == 0x7e) (assi) = 0x203e; [](#l8.12) else if ((c) == 0x7f) (assi) = 0x7f; -#define JISX0201_K_DECODE(c, assi) [](#l8.14) +#define JISX0201_R_DECODE(c, writer) [](#l8.15)
- if ((c) < 0x5c) OUTCHAR(c); [](#l8.16)
- else if ((c) == 0x5c) OUTCHAR(0x00a5); [](#l8.17)
- else if ((c) < 0x7e) OUTCHAR(c); [](#l8.18)
- else if ((c) == 0x7e) OUTCHAR(0x203e); [](#l8.19)
- else if ((c) == 0x7f) OUTCHAR(0x7f);
+#define JISX0201_K_DECODE(c, writer) [](#l8.21) if ((c) >= 0xa1 && (c) <= 0xdf) [](#l8.22)
-#define JISX0201_DECODE(c, assi) [](#l8.24)
OUTCHAR(0xfec0 + (c));[](#l8.27)
+#define JISX0201_K_DECODE_CHAR(c, assi) [](#l8.28)
- if ((c) >= 0xa1 && (c) <= 0xdf) [](#l8.29)
(assi) = 0xfec0 + (c);[](#l8.30)
+#define JISX0201_DECODE(c, writer) [](#l8.31)
--- a/Modules/cjkcodecs/cjkcodecs.h +++ b/Modules/cjkcodecs/cjkcodecs.h @@ -33,7 +33,7 @@ struct dbcs_index { typedef struct dbcs_index decode_map; struct widedbcs_index {
- const Py_UCS4 *map; unsigned char bottom, top; }; typedef struct widedbcs_index widedecode_map; @@ -56,7 +56,7 @@ struct dbcs_map { }; struct pair_encodemap {
- Py_UCS4 uniseq; DBCHAR code; }; @@ -86,7 +86,7 @@ static const struct dbcs_map *mapping_li static Py_ssize_t encoding##_decode( [](#l9.22) MultibyteCodec_State *state, const void *config, [](#l9.23) const unsigned char **inbuf, Py_ssize_t inleft, [](#l9.24)
Py_UNICODE **outbuf, Py_ssize_t outleft)[](#l9.25)
_PyUnicodeWriter *writer)[](#l9.26)
#define DECODER_RESET(encoding) [](#l9.27) static Py_ssize_t encoding##_decode_reset( [](#l9.28) MultibyteCodec_State *state, const void *config) @@ -101,13 +101,15 @@ static const struct dbcs_map *mapping_li #endif #define NEXT_IN(i) [](#l9.33)
#define NEXT_OUT(o) [](#l9.40) (*outbuf) += (o); [](#l9.41) (outleft) -= (o); #define NEXT(i, o) [](#l9.43)
#define REQUIRE_INBUF(n) [](#l9.47) if (inleft < (n)) [](#l9.48) @@ -121,6 +123,23 @@ static const struct dbcs_map *mapping_li #define IN3 ((*inbuf)[2]) #define IN4 ((*inbuf)[3]) +#define OUTCHAR(c) [](#l9.53)
- do { [](#l9.54)
if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \[](#l9.55)
return MBERR_TOOSMALL; \[](#l9.56)
- } while (0)
+ +#define OUTCHAR2(c1, c2) [](#l9.59)
- do { [](#l9.60)
Py_UCS4 _c1 = (c1); \[](#l9.61)
Py_UCS4 _c2 = (c2); \[](#l9.62)
if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \[](#l9.63)
return MBERR_TOOSMALL; \[](#l9.64)
PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \[](#l9.65)
PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \[](#l9.66)
writer->pos += 2; \[](#l9.67)
- } while (0)
+ #define OUT1(c) ((*outbuf)[0]) = (c); #define OUT2(c) ((*outbuf)[1]) = (c); #define OUT3(c) ((*outbuf)[2]) = (c); @@ -145,19 +164,6 @@ static const struct dbcs_map *mapping_li (*outbuf)[2] = (c3); [](#l9.74) (*outbuf)[3] = (c4); -#if Py_UNICODE_SIZE == 2 -# define WRITEUCS4(c) [](#l9.78)
- REQUIRE_OUTBUF(2) [](#l9.79)
- (*outbuf)[0] = Py_UNICODE_HIGH_SURROGATE(c); [](#l9.80)
- (*outbuf)[1] = Py_UNICODE_LOW_SURROGATE(c); [](#l9.81)
- NEXT_OUT(2)
-#else -# define WRITEUCS4(c) [](#l9.84)
-#endif - #define _TRYMAP_ENC(m, assi, val) [](#l9.90) ((m)->map != NULL && (val) >= (m)->bottom && [](#l9.91) (val)<= (m)->top && ((assi) = (m)->map(val) - [ @@ -167,24 +173,41 @@ static const struct dbcs_map *mapping_li #define TRYMAP_ENC(charset, assi, uni) [](#l9.94) if TRYMAP_ENC_COND(charset, assi, uni) -#define _TRYMAP_DEC(m, assi, val) [](#l9.97)
- ((m)->map != NULL && (val) >= (m)->bottom && [](#l9.98)
(val)<= (m)->top && ((assi) = (m)->map[(val) - \[](#l9.99)
(m)->bottom]) != UNIINV)[](#l9.100)
-#define TRYMAP_DEC(charset, assi, c1, c2) [](#l9.101)
+Py_LOCAL_INLINE(int) +_TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c) +{
- if (c == UNIINV || _PyUnicodeWriter_WriteChar(writer, c) < 0)
return UNIINV;[](#l9.107)
- else
return c;[](#l9.109)
+} -#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) [](#l9.112)
- ((m)->map != NULL && (val) >= (m)->bottom && [](#l9.113)
(val)<= (m)->top && \[](#l9.114)
((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \[](#l9.115)
+#define _TRYMAP_DEC(m, writer, val) [](#l9.116)
- ((m)->map != NULL && [](#l9.117)
(val) >= (m)->bottom && \[](#l9.118)
(val)<= (m)->top && \[](#l9.119)
_TRYMAP_DEC_WRITE(writer, (m)->map[(val) - (m)->bottom]) != UNIINV)[](#l9.120)
+#define _TRYMAP_DEC_CHAR(m, assi, val) [](#l9.121)
- ((m)->map != NULL && [](#l9.122)
(val) >= (m)->bottom && \[](#l9.123)
(val)<= (m)->top && \[](#l9.124)
((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)[](#l9.125)
+#define TRYMAP_DEC(charset, writer, c1, c2) [](#l9.126)
+#define TRYMAP_DEC_CHAR(charset, assi, c1, c2) [](#l9.128)
+ +#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val) [](#l9.131)
- ((m)->map != NULL && (val) >= (m)->bottom && [](#l9.132)
(val)<= (m)->top && \[](#l9.133)
((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \[](#l9.134) (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \[](#l9.135) (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))[](#l9.136)
#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) [](#l9.137) if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], [](#l9.138) assplane, asshi, asslo, (uni) & 0xff) -#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2) [](#l9.140)
+#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) [](#l9.142)
#if Py_UNICODE_SIZE == 2 #define DECODE_SURROGATE(c) [](#l9.146) @@ -323,7 +346,7 @@ find_pairencmap(ucs2_t body, ucs2_t modi const struct pair_encodemap *haystack, int haystacksize) { int pos, min, max;
--- a/Modules/cjkcodecs/emu_jisx0213_2000.h +++ b/Modules/cjkcodecs/emu_jisx0213_2000.h @@ -38,6 +38,9 @@ ((c1) == 0x7E && (c2) == 0x7E))) [](#l10.4) return EMULATE_JISX0213_2000_DECODE_INVALID; -#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2) [](#l10.7) +#define EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2) [](#l10.8)
- if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) [](#l10.9)
OUTCHAR(0x9B1D);[](#l10.10)
+#define EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(assi, c1, c2) [](#l10.11) if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B) [](#l10.12) (assi) = 0x9B1D;
--- a/Modules/cjkcodecs/mappings_cn.h +++ b/Modules/cjkcodecs/mappings_cn.h @@ -4049,7 +4049,7 @@ 0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0, static const struct _gb18030_to_unibmp_ranges {
- Py_UCS4 first, last; DBCHAR base; } gb18030_to_unibmp_ranges[] = { {128,163,0},{165,166,36},{169,175,38},{178,182,45},{184,214,50},{216,223,81},{
--- a/Modules/cjkcodecs/mappings_jisx0213_pair.h +++ b/Modules/cjkcodecs/mappings_jisx0213_pair.h @@ -3,7 +3,7 @@ static const struct widedbcs_index *jisx0213_pair_decmap; static const struct pair_encodemap *jisx0213_pair_encmap; #else -static const ucs4_t __jisx0213_pair_decmap[49] = { +static const Py_UCS4 __jisx0213_pair_decmap[49] = { 810234010,810365082,810496154,810627226,810758298,816525466,816656538, 816787610,816918682,817049754,817574042,818163866,818426010,838283418, 15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
--- a/Modules/cjkcodecs/multibytecodec.c +++ b/Modules/cjkcodecs/multibytecodec.c @@ -17,8 +17,8 @@ typedef struct { typedef struct { const unsigned char *inbuf, *inbuf_top, *inbuf_end;
} MultibyteDecodeBuffer; PyDoc_STRVAR(MultibyteCodec_Encode__doc__, @@ -197,29 +197,6 @@ expand_encodebuffer(MultibyteEncodeBuffe goto errorexit; [](#l13.15) } -static int -expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize) -{
- orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
- orgsize = PyUnicode_GET_SIZE(buf->outobj);
- if (PyUnicode_Resize(&buf->outobj, orgsize + (
esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)[](#l13.26)
return -1;[](#l13.27)
- buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
- buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
+ PyUnicode_GET_SIZE(buf->outobj);[](#l13.31)
-} -#define REQUIRE_DECODEBUFFER(buf, s) { [](#l13.35)
- if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) [](#l13.36)
if (expand_decodebuffer(buf, s) == -1) \[](#l13.37)
goto errorexit; \[](#l13.38)
- MultibyteCodec object @@ -374,7 +351,7 @@ multibytecodec_decerror(MultibyteCodec * PyObject *errors, Py_ssize_t e)
{ PyObject *retobj = NULL, *retuni = NULL;
- Py_ssize_t newpos; const char *reason; Py_ssize_t esize, start, end; @@ -385,7 +362,6 @@ multibytecodec_decerror(MultibyteCodec * else { switch (e) { case MBERR_TOOSMALL:
REQUIRE_DECODEBUFFER(buf, -1);[](#l13.57) return 0; /* retry it */[](#l13.58) case MBERR_TOOFEW:[](#l13.59) reason = "incomplete multibyte sequence";[](#l13.60)
@@ -403,8 +379,9 @@ multibytecodec_decerror(MultibyteCodec * } if (errors == ERROR_REPLACE) {
REQUIRE_DECODEBUFFER(buf, 1);[](#l13.65)
*buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;[](#l13.66)
if (_PyUnicodeWriter_WriteChar(&buf->writer,[](#l13.67)
Py_UNICODE_REPLACEMENT_CHARACTER) < 0)[](#l13.68)
} if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { buf->inbuf += esize;goto errorexit;[](#l13.69)
@@ -447,15 +424,8 @@ multibytecodec_decerror(MultibyteCodec * goto errorexit; }
- retunisize = PyUnicode_GET_SIZE(retuni);
- if (retunisize > 0) {
REQUIRE_DECODEBUFFER(buf, retunisize);[](#l13.82)
memcpy((char *)buf->outbuf, PyUnicode_AS_UNICODE(retuni),[](#l13.83)
retunisize * Py_UNICODE_SIZE);[](#l13.84)
buf->outbuf += retunisize;[](#l13.85)
- }
newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); if (newpos < 0 && !PyErr_Occurred()) @@ -617,10 +587,10 @@ MultibyteCodec_Decode(MultibyteCodecObje { MultibyteCodec_State state; MultibyteDecodeBuffer buf;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode", codeckwarglist, &pdata, &errors)) @@ -640,29 +610,22 @@ MultibyteCodec_Decode(MultibyteCodecObje return make_tuple(PyUnicode_New(0, 0), 0); }
- _PyUnicodeWriter_Init(&buf.writer, datalen); buf.excobj = NULL; buf.inbuf = buf.inbuf_top = (unsigned char *)data; buf.inbuf_end = buf.inbuf_top + datalen;
- buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
- if (buf.outobj == NULL)
goto errorexit;[](#l13.113)
- buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
- if (buf.outbuf == NULL)
goto errorexit;[](#l13.116)
- buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
if (self->codec->decinit != NULL && self->codec->decinit(&state, self->codec->config) != 0) goto errorexit; while (buf.inbuf < buf.inbuf_end) {
Py_ssize_t inleft, outleft, r;[](#l13.124)
Py_ssize_t inleft, r;[](#l13.125)
inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);[](#l13.128)
r = self->codec->decode(&state, self->codec->config,
&buf.inbuf, inleft, &buf.outbuf, outleft);[](#l13.131)
&buf.inbuf, inleft, &buf.writer);[](#l13.132) if (r == 0)[](#l13.133) break;[](#l13.134) else if (multibytecodec_decerror(self->codec, &state,[](#l13.135)
@@ -670,23 +633,20 @@ MultibyteCodec_Decode(MultibyteCodecObje goto errorexit; }
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)[](#l13.144)
goto errorexit;[](#l13.145)
PyBuffer_Release(&pdata); Py_XDECREF(buf.excobj); ERROR_DECREF(errorcb);
errorexit: PyBuffer_Release(&pdata); ERROR_DECREF(errorcb); Py_XDECREF(buf.excobj);
return NULL; } @@ -859,17 +819,7 @@ decoder_prepare_buffer(MultibyteDecodeBu { buf->inbuf = buf->inbuf_top = (const unsigned char *)data; buf->inbuf_end = buf->inbuf_top + size;
- if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
buf->outobj = PyUnicode_FromUnicode(NULL, size);[](#l13.170)
if (buf->outobj == NULL)[](#l13.171)
return -1;[](#l13.172)
buf->outbuf = PyUnicode_AsUnicode(buf->outobj);[](#l13.173)
if (buf->outbuf == NULL)[](#l13.174)
return -1;[](#l13.175)
buf->outbuf_end = buf->outbuf +[](#l13.176)
PyUnicode_GET_SIZE(buf->outobj);[](#l13.177)
- }
@@ -878,14 +828,13 @@ decoder_feed_buffer(MultibyteStatefulDec MultibyteDecodeBuffer *buf) { while (buf->inbuf < buf->inbuf_end) {
Py_ssize_t inleft, outleft;[](#l13.188)
Py_ssize_t inleft;[](#l13.189) Py_ssize_t r;[](#l13.190)
inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);[](#l13.193)
r = ctx->codec->decode(&ctx->state, ctx->codec->config,
&buf->inbuf, inleft, &buf->outbuf, outleft);[](#l13.196)
&buf->inbuf, inleft, &buf->writer);[](#l13.197) if (r == 0 || r == MBERR_TOOFEW)[](#l13.198) break;[](#l13.199) else if (multibytecodec_decerror(ctx->codec, &ctx->state,[](#l13.200)
@@ -1058,8 +1007,9 @@ mbidecoder_decode(MultibyteIncrementalDe MultibyteDecodeBuffer buf; char *data, *wdata = NULL; Py_buffer pdata;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode", incrementalkwarglist, &pdata, &final)) @@ -1067,7 +1017,8 @@ mbidecoder_decode(MultibyteIncrementalDe data = pdata.buf; size = pdata.len;
- _PyUnicodeWriter_Init(&buf.writer, 1);
- buf.excobj = NULL; origpending = self->pendingsize; if (self->pendingsize == 0) { @@ -1109,23 +1060,22 @@ mbidecoder_decode(MultibyteIncrementalDe goto errorexit; }
- finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)[](#l13.228)
goto errorexit;[](#l13.229)
PyBuffer_Release(&pdata); if (wdata != data) PyMem_Del(wdata); Py_XDECREF(buf.excobj);
errorexit: PyBuffer_Release(&pdata); if (wdata != NULL && wdata != data) PyMem_Del(wdata); Py_XDECREF(buf.excobj);
@@ -1265,13 +1215,14 @@ mbstreamreader_iread(MultibyteStreamRead const char *method, Py_ssize_t sizehint) { MultibyteDecodeBuffer buf;
if (sizehint == 0) return PyUnicode_New(0, 0);
- _PyUnicodeWriter_Init(&buf.writer, 1);
- buf.excobj = NULL; cres = NULL; for (;;) { @@ -1340,29 +1291,27 @@ mbstreamreader_iread(MultibyteStreamRead goto errorexit; }
finalsize = (Py_ssize_t)(buf.outbuf -[](#l13.273)
PyUnicode_AS_UNICODE(buf.outobj));[](#l13.274) Py_DECREF(cres);[](#l13.275) cres = NULL;[](#l13.276)
if (sizehint < 0 || finalsize != 0 || rsize == 0)[](#l13.278)
if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)[](#l13.279) break;[](#l13.280)
sizehint = 1; /* read 1 more byte and retry */ }
- if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)[](#l13.286)
goto errorexit;[](#l13.287)
Py_XDECREF(cres); Py_XDECREF(buf.excobj);
errorexit: Py_XDECREF(cres); Py_XDECREF(buf.excobj);
--- a/Modules/cjkcodecs/multibytecodec.h +++ b/Modules/cjkcodecs/multibytecodec.h @@ -10,12 +10,6 @@ extern "C" { #endif -#ifdef uint32_t -typedef uint32_t ucs4_t; -#else -typedef unsigned int ucs4_t; -#endif - #ifdef uint16_t typedef uint16_t ucs2_t, DBCHAR; #else @@ -27,7 +21,7 @@ typedef union { int i; unsigned char c[8]; ucs2_t u2[4];
} MultibyteCodec_State; typedef int (*mbcodec_init)(const void *config); @@ -44,7 +38,7 @@ typedef Py_ssize_t (*mbencodereset_func) typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state, const void *config, const unsigned char **inbuf, Py_ssize_t inleft,
Py_UNICODE **outbuf, Py_ssize_t outleft);[](#l14.29)
_PyUnicodeWriter *writer);[](#l14.30)
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state, const void *config); typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
--- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12948,6 +12948,16 @@ int } int +_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch) +{
- if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0)
return -1;[](#l15.10)
- PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
- writer->pos++;
- return 0;
+} + +int _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) { Py_UCS4 maxchar;