[Python-checkins] cpython: Close #17693: Rewrite CJK decoders to use the _PyUnicodeWriter API instead of

victor.stinner python-checkins at python.org
Thu Apr 11 22:13:18 CEST 2013


http://hg.python.org/cpython/rev/bcecf3910162
changeset:   83255:bcecf3910162
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Thu Apr 11 22:09:04 2013 +0200
summary:
  Close #17693: Rewrite CJK decoders to use the _PyUnicodeWriter API instead of
the legacy Py_UNICODE API.

Add also a new _PyUnicodeWriter_WriteChar() function.

files:
  Include/unicodeobject.h                    |    7 +
  Modules/cjkcodecs/_codecs_cn.c             |   89 ++--
  Modules/cjkcodecs/_codecs_hk.c             |   34 +-
  Modules/cjkcodecs/_codecs_iso2022.c        |  186 ++++-----
  Modules/cjkcodecs/_codecs_jp.c             |  167 ++++----
  Modules/cjkcodecs/_codecs_kr.c             |   66 +-
  Modules/cjkcodecs/_codecs_tw.c             |   26 +-
  Modules/cjkcodecs/alg_jisx0201.h           |   21 +-
  Modules/cjkcodecs/cjkcodecs.h              |   87 ++-
  Modules/cjkcodecs/emu_jisx0213_2000.h      |    5 +-
  Modules/cjkcodecs/mappings_cn.h            |    2 +-
  Modules/cjkcodecs/mappings_jisx0213_pair.h |    2 +-
  Modules/cjkcodecs/multibytecodec.c         |  129 ++----
  Modules/cjkcodecs/multibytecodec.h         |   10 +-
  Objects/unicodeobject.c                    |   10 +
  15 files changed, 401 insertions(+), 440 deletions(-)


diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -933,6 +933,13 @@
 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                  Py_ssize_t length, Py_UCS4 maxchar);
 
+/* Append a Unicode character.
+   Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
+    Py_UCS4 ch
+    );
+
 /* Append a Unicode string.
    Return 0 on success, raise an exception and return -1 on error. */
 PyAPI_FUNC(int)
diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c
--- a/Modules/cjkcodecs/_codecs_cn.c
+++ b/Modules/cjkcodecs/_codecs_cn.c
@@ -23,12 +23,12 @@
  * A844         undefined                       U+2015 HORIZONTAL BAR
  */
 
-#define GBK_DECODE(dc1, dc2, assi) \
-    if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
-    else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
-    else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
-    else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
-    else TRYMAP_DEC(gbkext, assi, dc1, dc2);
+#define GBK_DECODE(dc1, dc2, writer) \
+    if ((dc1) == 0xa1 && (dc2) == 0xaa) OUTCHAR(0x2014); \
+    else if ((dc1) == 0xa8 && (dc2) == 0x44) OUTCHAR(0x2015); \
+    else if ((dc1) == 0xa1 && (dc2) == 0xa4) OUTCHAR(0x00b7); \
+    else TRYMAP_DEC(gb2312, writer, dc1 ^ 0x80, dc2 ^ 0x80); \
+    else TRYMAP_DEC(gbkext, writer, dc1, dc2);
 
 #define GBK_ENCODE(code, assi) \
     if ((code) == 0x2014) (assi) = 0xa1aa; \
@@ -43,7 +43,7 @@
 ENCODER(gb2312)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -73,17 +73,15 @@
     while (inleft > 0) {
         unsigned char c = **inbuf;
 
-        REQUIRE_OUTBUF(1)
-
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
         REQUIRE_INBUF(2)
-        TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
-            NEXT(2, 1)
+        TRYMAP_DEC(gb2312, writer, c ^ 0x80, IN2 ^ 0x80) {
+            NEXT_IN(2);
         }
         else return 1;
     }
@@ -99,7 +97,7 @@
 ENCODER(gbk)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -130,20 +128,18 @@
     while (inleft > 0) {
         unsigned char c = IN1;
 
-        REQUIRE_OUTBUF(1)
-
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
         REQUIRE_INBUF(2)
 
-        GBK_DECODE(c, IN2, **outbuf)
+        GBK_DECODE(c, IN2, writer)
         else return 1;
 
-        NEXT(2, 1)
+        NEXT_IN(2);
     }
 
     return 0;
@@ -157,7 +153,7 @@
 ENCODER(gb18030)
 {
     while (inleft > 0) {
-        ucs4_t c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -174,7 +170,7 @@
             return 1;
 #endif
         else if (c >= 0x10000) {
-            ucs4_t tc = c - 0x10000;
+            Py_UCS4 tc = c - 0x10000;
 
             REQUIRE_OUTBUF(4)
 
@@ -208,7 +204,7 @@
                  utrrange++)
                 if (utrrange->first <= c &&
                     c <= utrrange->last) {
-                    Py_UNICODE tc;
+                    Py_UCS4 tc;
 
                     tc = c - utrrange->first +
                          utrrange->base;
@@ -247,11 +243,9 @@
     while (inleft > 0) {
         unsigned char c = IN1, c2;
 
-        REQUIRE_OUTBUF(1)
-
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
@@ -261,7 +255,7 @@
         if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
             const struct _gb18030_to_unibmp_ranges *utr;
             unsigned char c3, c4;
-            ucs4_t lseq;
+            Py_UCS4 lseq;
 
             REQUIRE_INBUF(4)
             c3 = IN3;
@@ -272,34 +266,34 @@
             c3 -= 0x81; c4 -= 0x30;
 
             if (c < 4) { /* U+0080 - U+FFFF */
-                lseq = ((ucs4_t)c * 10 + c2) * 1260 +
-                    (ucs4_t)c3 * 10 + c4;
+                lseq = ((Py_UCS4)c * 10 + c2) * 1260 +
+                    (Py_UCS4)c3 * 10 + c4;
                 if (lseq < 39420) {
                     for (utr = gb18030_to_unibmp_ranges;
                          lseq >= (utr + 1)->base;
                          utr++) ;
-                    OUT1(utr->first - utr->base + lseq)
-                    NEXT(4, 1)
+                    OUTCHAR(utr->first - utr->base + lseq);
+                    NEXT_IN(4);
                     continue;
                 }
             }
             else if (c >= 15) { /* U+10000 - U+10FFFF */
-                lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
-                    * 1260 + (ucs4_t)c3 * 10 + c4;
+                lseq = 0x10000 + (((Py_UCS4)c-15) * 10 + c2)
+                    * 1260 + (Py_UCS4)c3 * 10 + c4;
                 if (lseq <= 0x10FFFF) {
-                    WRITEUCS4(lseq);
-                    NEXT_IN(4)
+                    OUTCHAR(lseq);
+                    NEXT_IN(4);
                     continue;
                 }
             }
             return 1;
         }
 
-        GBK_DECODE(c, c2, **outbuf)
-        else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
+        GBK_DECODE(c, c2, writer)
+        else TRYMAP_DEC(gb18030ext, writer, c, c2);
         else return 1;
 
-        NEXT(2, 1)
+        NEXT_IN(2);
     }
 
     return 0;
@@ -329,7 +323,7 @@
 ENCODER(hz)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -389,8 +383,8 @@
 
             REQUIRE_INBUF(2)
             if (c2 == '~') {
-                WRITE1('~')
-                NEXT(2, 1)
+                OUTCHAR('~');
+                NEXT_IN(2);
                 continue;
             }
             else if (c2 == '{' && state->i == 0)
@@ -401,7 +395,7 @@
                 ; /* line-continuation */
             else
                 return 1;
-            NEXT(2, 0);
+            NEXT_IN(2);
             continue;
         }
 
@@ -409,14 +403,13 @@
             return 1;
 
         if (state->i == 0) { /* ASCII mode */
-            WRITE1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
         }
         else { /* GB mode */
             REQUIRE_INBUF(2)
-            REQUIRE_OUTBUF(1)
-            TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
-                NEXT(2, 1)
+            TRYMAP_DEC(gb2312, writer, c, IN2) {
+                NEXT_IN(2);
             }
             else
                 return 1;
diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c
--- a/Modules/cjkcodecs/_codecs_hk.c
+++ b/Modules/cjkcodecs/_codecs_hk.c
@@ -39,7 +39,7 @@
 ENCODER(big5hkscs)
 {
     while (inleft > 0) {
-        ucs4_t c = **inbuf;
+        Py_UCS4 c = **inbuf;
         DBCHAR code;
         Py_ssize_t insize;
 
@@ -103,26 +103,24 @@
 {
     while (inleft > 0) {
         unsigned char c = IN1;
-        ucs4_t decoded;
-
-        REQUIRE_OUTBUF(1)
+        Py_UCS4 decoded;
 
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
         REQUIRE_INBUF(2)
 
         if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) {
-            TRYMAP_DEC(big5, **outbuf, c, IN2) {
-                NEXT(2, 1)
+            TRYMAP_DEC(big5, writer, c, IN2) {
+                NEXT_IN(2);
                 continue;
             }
         }
 
-        TRYMAP_DEC(big5hkscs, decoded, c, IN2)
+        TRYMAP_DEC_CHAR(big5hkscs, decoded, c, IN2)
         {
             int s = BH2S(c, IN2);
             const unsigned char *hintbase;
@@ -146,25 +144,25 @@
                     return MBERR_INTERNAL;
 
             if (hintbase[s >> 3] & (1 << (s & 7))) {
-                    WRITEUCS4(decoded | 0x20000)
-                    NEXT_IN(2)
+                    OUTCHAR(decoded | 0x20000);
+                    NEXT_IN(2);
             }
             else {
-                    OUT1(decoded)
-                    NEXT(2, 1)
+                    OUTCHAR(decoded);
+                    NEXT_IN(2);
             }
             continue;
         }
 
         switch ((c << 8) | IN2) {
-        case 0x8862: WRITE2(0x00ca, 0x0304); break;
-        case 0x8864: WRITE2(0x00ca, 0x030c); break;
-        case 0x88a3: WRITE2(0x00ea, 0x0304); break;
-        case 0x88a5: WRITE2(0x00ea, 0x030c); break;
+        case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
+        case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
+        case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
+        case 0x88a5: OUTCHAR2(0x00ea, 0x030c); break;
         default: return 1;
         }
 
-        NEXT(2, 2) /* all decoded codepoints are pairs, above. */
+        NEXT_IN(2); /* all decoded codepoints are pairs, above. */
     }
 
     return 0;
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -102,8 +102,8 @@
 /*-*- internal data structures -*-*/
 
 typedef int (*iso2022_init_func)(void);
-typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
-typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
+typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data);
+typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length);
 
 struct iso2022_designation {
     unsigned char mark;
@@ -158,7 +158,7 @@
     while (inleft > 0) {
         const struct iso2022_designation *dsg;
         DBCHAR encoded;
-        ucs4_t c = **inbuf;
+        Py_UCS4 c = **inbuf;
         Py_ssize_t insize;
 
         if (c < 0x80) {
@@ -196,9 +196,9 @@
                     length = 2;
 #if Py_UNICODE_SIZE == 2
                 if (length == 2) {
-                    ucs4_t u4in[2];
-                    u4in[0] = (ucs4_t)IN1;
-                    u4in[1] = (ucs4_t)IN2;
+                    Py_UCS4 u4in[2];
+                    u4in[0] = (Py_UCS4)IN1;
+                    u4in[1] = (Py_UCS4)IN2;
                     encoded = dsg->encoder(u4in, &length);
                 } else
                     encoded = dsg->encoder(&c, &length);
@@ -277,7 +277,7 @@
             WRITE2(encoded >> 8, encoded & 0xff)
             NEXT_OUT(2)
         }
-        NEXT_IN(insize)
+        NEXT_IN(insize);
     }
 
     return 0;
@@ -376,45 +376,43 @@
     return 0;
 }
 
-#define ISO8859_7_DECODE(c, assi)                                       \
-    if ((c) < 0xa0) (assi) = (c);                                       \
-    else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0))))          \
-        (assi) = (c);                                                   \
-    else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 ||              \
-             (0xbffffd77L & (1L << ((c)-0xb4)))))                       \
-        (assi) = 0x02d0 + (c);                                          \
-    else if ((c) == 0xa1) (assi) = 0x2018;                              \
-    else if ((c) == 0xa2) (assi) = 0x2019;                              \
-    else if ((c) == 0xaf) (assi) = 0x2015;
+#define ISO8859_7_DECODE(c, writer)                               \
+    if ((c) < 0xa0) OUTCHAR(c);                                       \
+    else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0))))    \
+        OUTCHAR(c);                                                   \
+    else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 ||        \
+             (0xbffffd77L & (1L << ((c)-0xb4)))))                 \
+        OUTCHAR(0x02d0 + (c));                                        \
+    else if ((c) == 0xa1) OUTCHAR(0x2018);                            \
+    else if ((c) == 0xa2) OUTCHAR(0x2019);                            \
+    else if ((c) == 0xaf) OUTCHAR(0x2015);
 
 static Py_ssize_t
 iso2022processg2(const void *config, MultibyteCodec_State *state,
                  const unsigned char **inbuf, Py_ssize_t *inleft,
-                 Py_UNICODE **outbuf, Py_ssize_t *outleft)
+                 _PyUnicodeWriter *writer)
 {
     /* not written to use encoder, decoder functions because only few
      * encodings use G2 designations in CJKCodecs */
     if (STATE_G2 == CHARSET_ISO8859_1) {
         if (IN3 < 0x80)
-            OUT1(IN3 + 0x80)
+            OUTCHAR(IN3 + 0x80);
         else
             return 3;
     }
     else if (STATE_G2 == CHARSET_ISO8859_7) {
-        ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
+        ISO8859_7_DECODE(IN3 ^ 0x80, writer)
         else return 3;
     }
     else if (STATE_G2 == CHARSET_ASCII) {
         if (IN3 & 0x80) return 3;
-        else **outbuf = IN3;
+        else OUTCHAR(IN3);
     }
     else
         return MBERR_INTERNAL;
 
     (*inbuf) += 3;
     *inleft -= 3;
-    (*outbuf) += 1;
-    *outleft -= 1;
     return 0;
 }
 
@@ -429,8 +427,8 @@
         if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
             /* ESC throughout mode:
              * for non-iso2022 escape sequences */
-            WRITE1(c) /* assume as ISO-8859-1 */
-            NEXT(1, 1)
+            OUTCHAR(c); /* assume as ISO-8859-1 */
+            NEXT_IN(1);
             if (IS_ESCEND(c)) {
                 STATE_CLEARFLAG(F_ESCTHROUGHOUT)
             }
@@ -449,32 +447,32 @@
             else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
                 REQUIRE_INBUF(3)
                 err = iso2022processg2(config, state,
-                    inbuf, &inleft, outbuf, &outleft);
+                                       inbuf, &inleft, writer);
                 if (err != 0)
                     return err;
             }
             else {
-                WRITE1(ESC)
+                OUTCHAR(ESC);
                 STATE_SETFLAG(F_ESCTHROUGHOUT)
-                NEXT(1, 1)
+                NEXT_IN(1);
             }
             break;
         case SI:
             if (CONFIG_ISSET(NO_SHIFT))
                 goto bypass;
             STATE_CLEARFLAG(F_SHIFTED)
-            NEXT_IN(1)
+            NEXT_IN(1);
             break;
         case SO:
             if (CONFIG_ISSET(NO_SHIFT))
                 goto bypass;
             STATE_SETFLAG(F_SHIFTED)
-            NEXT_IN(1)
+            NEXT_IN(1);
             break;
         case LF:
             STATE_CLEARFLAG(F_SHIFTED)
-            WRITE1(LF)
-            NEXT(1, 1)
+            OUTCHAR(LF);
+            NEXT_IN(1);
             break;
         default:
             if (c < 0x20) /* C0 */
@@ -484,7 +482,7 @@
             else {
                 const struct iso2022_designation *dsg;
                 unsigned char charset;
-                ucs4_t decoded;
+                Py_UCS4 decoded;
 
                 if (STATE_GETFLAG(F_SHIFTED))
                     charset = STATE_G1;
@@ -492,8 +490,8 @@
                     charset = STATE_G0;
 
                 if (charset == CHARSET_ASCII) {
-bypass:                                 WRITE1(c)
-                                        NEXT(1, 1)
+bypass:                                 OUTCHAR(c);
+                                        NEXT_IN(1);
                                         break;
                                 }
 
@@ -518,17 +516,15 @@
                                         return dsg->width;
 
                                 if (decoded < 0x10000) {
-                                        WRITE1(decoded)
-                                        NEXT_OUT(1)
+                                        OUTCHAR(decoded);
                                 }
                                 else if (decoded < 0x30000) {
-                                        WRITEUCS4(decoded)
+                                        OUTCHAR(decoded);
                                 }
                                 else { /* JIS X 0213 pairs */
-                    WRITE2(decoded >> 16, decoded & 0xffff)
-                    NEXT_OUT(2)
+                    OUTCHAR2(decoded >> 16, decoded & 0xffff);
                 }
-                NEXT_IN(dsg->width)
+                NEXT_IN(dsg->width);
             }
             break;
         }
@@ -577,18 +573,18 @@
     return 0;
 }
 
-static ucs4_t
+static Py_UCS4
 ksx1001_decoder(const unsigned char *data)
 {
-    ucs4_t u;
-    TRYMAP_DEC(ksx1001, u, data[0], data[1])
+    Py_UCS4 u;
+    TRYMAP_DEC_CHAR(ksx1001, u, data[0], data[1])
         return u;
     else
         return MAP_UNMAPPABLE;
 }
 
 static DBCHAR
-ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
+ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded;
     assert(*length == 1);
@@ -613,20 +609,20 @@
     return 0;
 }
 
-static ucs4_t
+static Py_UCS4
 jisx0208_decoder(const unsigned char *data)
 {
-    ucs4_t u;
+    Py_UCS4 u;
     if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
         return 0xff3c;
-    else TRYMAP_DEC(jisx0208, u, data[0], data[1])
+    else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1])
         return u;
     else
         return MAP_UNMAPPABLE;
 }
 
 static DBCHAR
-jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded;
     assert(*length == 1);
@@ -654,18 +650,18 @@
     return 0;
 }
 
-static ucs4_t
+static Py_UCS4
 jisx0212_decoder(const unsigned char *data)
 {
-    ucs4_t u;
-    TRYMAP_DEC(jisx0212, u, data[0], data[1])
+    Py_UCS4 u;
+    TRYMAP_DEC_CHAR(jisx0212, u, data[0], data[1])
         return u;
     else
         return MAP_UNMAPPABLE;
 }
 
 static DBCHAR
-jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded;
     assert(*length == 1);
@@ -705,30 +701,30 @@
 }
 
 #define config ((void *)2000)
-static ucs4_t
+static Py_UCS4
 jisx0213_2000_1_decoder(const unsigned char *data)
 {
-    ucs4_t u;
+    Py_UCS4 u;
     EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
     else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
         return 0xff3c;
-    else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
-    else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
-    else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+    else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
+    else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
+    else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
         u |= 0x20000;
-    else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+    else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
     else
         return MAP_UNMAPPABLE;
     return u;
 }
 
-static ucs4_t
+static Py_UCS4
 jisx0213_2000_2_decoder(const unsigned char *data)
 {
-    ucs4_t u;
-    EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
-    TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
-    else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+    Py_UCS4 u;
+    EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1])
+    TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
+    else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
         u |= 0x20000;
     else
         return MAP_UNMAPPABLE;
@@ -736,28 +732,28 @@
 }
 #undef config
 
-static ucs4_t
+static Py_UCS4
 jisx0213_2004_1_decoder(const unsigned char *data)
 {
-    ucs4_t u;
+    Py_UCS4 u;
     if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
         return 0xff3c;
-    else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
-    else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
-    else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+    else TRYMAP_DEC_CHAR(jisx0208, u, data[0], data[1]);
+    else TRYMAP_DEC_CHAR(jisx0213_1_bmp, u, data[0], data[1]);
+    else TRYMAP_DEC_CHAR(jisx0213_1_emp, u, data[0], data[1])
         u |= 0x20000;
-    else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+    else TRYMAP_DEC_CHAR(jisx0213_pair, u, data[0], data[1]);
     else
         return MAP_UNMAPPABLE;
     return u;
 }
 
-static ucs4_t
+static Py_UCS4
 jisx0213_2004_2_decoder(const unsigned char *data)
 {
-    ucs4_t u;
-    TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
-    else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+    Py_UCS4 u;
+    TRYMAP_DEC_CHAR(jisx0213_2_bmp, u, data[0], data[1]);
+    else TRYMAP_DEC_CHAR(jisx0213_2_emp, u, data[0], data[1])
         u |= 0x20000;
     else
         return MAP_UNMAPPABLE;
@@ -765,7 +761,7 @@
 }
 
 static DBCHAR
-jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
+jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config)
 {
     DBCHAR coded;
 
@@ -819,7 +815,7 @@
 }
 
 static DBCHAR
-jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
     if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -831,7 +827,7 @@
 }
 
 static DBCHAR
-jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded;
     Py_ssize_t ilength = *length;
@@ -854,7 +850,7 @@
 }
 
 static DBCHAR
-jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
     if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -866,7 +862,7 @@
 }
 
 static DBCHAR
-jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded = jisx0213_encoder(data, length, NULL);
     if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -878,7 +874,7 @@
 }
 
 static DBCHAR
-jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded;
     Py_ssize_t ilength = *length;
@@ -901,7 +897,7 @@
 }
 
 static DBCHAR
-jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded = jisx0213_encoder(data, length, NULL);
     if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
@@ -912,17 +908,17 @@
         return MAP_UNMAPPABLE;
 }
 
-static ucs4_t
+static Py_UCS4
 jisx0201_r_decoder(const unsigned char *data)
 {
-    ucs4_t u;
-    JISX0201_R_DECODE(*data, u)
+    Py_UCS4 u;
+    JISX0201_R_DECODE_CHAR(*data, u)
     else return MAP_UNMAPPABLE;
     return u;
 }
 
 static DBCHAR
-jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded;
     JISX0201_R_ENCODE(*data, coded)
@@ -930,17 +926,17 @@
     return coded;
 }
 
-static ucs4_t
+static Py_UCS4
 jisx0201_k_decoder(const unsigned char *data)
 {
-    ucs4_t u;
-    JISX0201_K_DECODE(*data ^ 0x80, u)
+    Py_UCS4 u;
+    JISX0201_K_DECODE_CHAR(*data ^ 0x80, u)
     else return MAP_UNMAPPABLE;
     return u;
 }
 
 static DBCHAR
-jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)
+jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded;
     JISX0201_K_ENCODE(*data, coded)
@@ -961,18 +957,18 @@
     return 0;
 }
 
-static ucs4_t
+static Py_UCS4
 gb2312_decoder(const unsigned char *data)
 {
-    ucs4_t u;
-    TRYMAP_DEC(gb2312, u, data[0], data[1])
+    Py_UCS4 u;
+    TRYMAP_DEC_CHAR(gb2312, u, data[0], data[1])
         return u;
     else
         return MAP_UNMAPPABLE;
 }
 
 static DBCHAR
-gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
+gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     DBCHAR coded;
     assert(*length == 1);
@@ -986,14 +982,14 @@
 }
 
 
-static ucs4_t
+static Py_UCS4
 dummy_decoder(const unsigned char *data)
 {
     return MAP_UNMAPPABLE;
 }
 
 static DBCHAR
-dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
+dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length)
 {
     return MAP_UNMAPPABLE;
 }
diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c
--- a/Modules/cjkcodecs/_codecs_jp.c
+++ b/Modules/cjkcodecs/_codecs_jp.c
@@ -20,7 +20,7 @@
 ENCODER(cp932)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
         unsigned char c1, c2;
 
@@ -66,8 +66,8 @@
         }
         else if (c >= 0xe000 && c < 0xe758) {
             /* User-defined area */
-            c1 = (Py_UNICODE)(c - 0xe000) / 188;
-            c2 = (Py_UNICODE)(c - 0xe000) % 188;
+            c1 = (Py_UCS4)(c - 0xe000) / 188;
+            c2 = (Py_UCS4)(c - 0xe000) % 188;
             OUT1(c1 + 0xf0)
             OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
         }
@@ -85,31 +85,30 @@
     while (inleft > 0) {
         unsigned char c = IN1, c2;
 
-        REQUIRE_OUTBUF(1)
         if (c <= 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
         else if (c >= 0xa0 && c <= 0xdf) {
             if (c == 0xa0)
-                OUT1(0xf8f0) /* half-width katakana */
+                OUTCHAR(0xf8f0); /* half-width katakana */
             else
-                OUT1(0xfec0 + c)
-            NEXT(1, 1)
+                OUTCHAR(0xfec0 + c);
+            NEXT_IN(1);
             continue;
         }
         else if (c >= 0xfd/* && c <= 0xff*/) {
             /* Windows compatibility */
-            OUT1(0xf8f1 - 0xfd + c)
-            NEXT(1, 1)
+            OUTCHAR(0xf8f1 - 0xfd + c);
+            NEXT_IN(1);
             continue;
         }
 
         REQUIRE_INBUF(2)
         c2 = IN2;
 
-        TRYMAP_DEC(cp932ext, **outbuf, c, c2);
+        TRYMAP_DEC(cp932ext, writer, c, c2);
         else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
             if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
                 return 1;
@@ -119,21 +118,21 @@
             c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
             c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
 
-            TRYMAP_DEC(jisx0208, **outbuf, c, c2);
+            TRYMAP_DEC(jisx0208, writer, c, c2);
             else return 1;
         }
         else if (c >= 0xf0 && c <= 0xf9) {
             if ((c2 >= 0x40 && c2 <= 0x7e) ||
                 (c2 >= 0x80 && c2 <= 0xfc))
-                OUT1(0xe000 + 188 * (c - 0xf0) +
-                     (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
+                OUTCHAR(0xe000 + 188 * (c - 0xf0) +
+                    (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41));
             else
                 return 1;
         }
         else
             return 1;
 
-        NEXT(2, 1)
+        NEXT_IN(2);
     }
 
     return 0;
@@ -147,7 +146,7 @@
 ENCODER(euc_jis_2004)
 {
     while (inleft > 0) {
-        ucs4_t c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
         Py_ssize_t insize;
 
@@ -235,13 +234,11 @@
 {
     while (inleft > 0) {
         unsigned char c = IN1;
-        ucs4_t code;
-
-        REQUIRE_OUTBUF(1)
+        Py_UCS4 code;
 
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
@@ -252,8 +249,8 @@
             REQUIRE_INBUF(2)
             c2 = IN2;
             if (c2 >= 0xa1 && c2 <= 0xdf) {
-                OUT1(0xfec0 + c2)
-                NEXT(2, 1)
+                OUTCHAR(0xfec0 + c2);
+                NEXT_IN(2);
             }
             else
                 return 1;
@@ -266,16 +263,16 @@
             c3 = IN3 ^ 0x80;
 
             /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
-            EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
-            else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
-            else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
-                WRITEUCS4(EMPBASE | code)
-                NEXT_IN(3)
+            EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c2, c3)
+            else TRYMAP_DEC(jisx0213_2_bmp, writer, c2, c3) ;
+            else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c2, c3) {
+                OUTCHAR(EMPBASE | code);
+                NEXT_IN(3);
                 continue;
             }
-            else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
+            else TRYMAP_DEC(jisx0212, writer, c2, c3) ;
             else return 1;
-            NEXT(3, 1)
+            NEXT_IN(3);
         }
         else {
             unsigned char c2;
@@ -285,23 +282,23 @@
             c2 = IN2 ^ 0x80;
 
             /* JIS X 0213 Plane 1 */
-            EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
-            else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
-            else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
-            else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
-            else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
-            else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
-                WRITEUCS4(EMPBASE | code)
-                NEXT_IN(2)
+            EMULATE_JISX0213_2000_DECODE_PLANE1(writer, c, c2)
+            else if (c == 0x21 && c2 == 0x40) OUTCHAR(0xff3c);
+            else if (c == 0x22 && c2 == 0x32) OUTCHAR(0xff5e);
+            else TRYMAP_DEC(jisx0208, writer, c, c2);
+            else TRYMAP_DEC(jisx0213_1_bmp, writer, c, c2);
+            else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c, c2) {
+                OUTCHAR(EMPBASE | code);
+                NEXT_IN(2);
                 continue;
             }
-            else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
-                WRITE2(code >> 16, code & 0xffff)
-                NEXT(2, 2)
+            else TRYMAP_DEC_CHAR(jisx0213_pair, code, c, c2) {
+                OUTCHAR2(code >> 16, code & 0xffff);
+                NEXT_IN(2);
                 continue;
             }
             else return 1;
-            NEXT(2, 1)
+            NEXT_IN(2);
         }
     }
 
@@ -316,7 +313,7 @@
 ENCODER(euc_jp)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -369,11 +366,9 @@
     while (inleft > 0) {
         unsigned char c = IN1;
 
-        REQUIRE_OUTBUF(1)
-
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
@@ -384,8 +379,8 @@
             REQUIRE_INBUF(2)
             c2 = IN2;
             if (c2 >= 0xa1 && c2 <= 0xdf) {
-                OUT1(0xfec0 + c2)
-                NEXT(2, 1)
+                OUTCHAR(0xfec0 + c2);
+                NEXT_IN(2);
             }
             else
                 return 1;
@@ -397,8 +392,8 @@
             c2 = IN2;
             c3 = IN3;
             /* JIS X 0212 */
-            TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
-                NEXT(3, 1)
+            TRYMAP_DEC(jisx0212, writer, c2 ^ 0x80, c3 ^ 0x80) {
+                NEXT_IN(3);
             }
             else
                 return 1;
@@ -412,13 +407,13 @@
 #ifndef STRICT_BUILD
             if (c == 0xa1 && c2 == 0xc0)
                 /* FULL-WIDTH REVERSE SOLIDUS */
-                **outbuf = 0xff3c;
+                OUTCHAR(0xff3c);
             else
 #endif
-                TRYMAP_DEC(jisx0208, **outbuf,
+                TRYMAP_DEC(jisx0208, writer,
                            c ^ 0x80, c2 ^ 0x80) ;
             else return 1;
-            NEXT(2, 1)
+            NEXT_IN(2);
         }
     }
 
@@ -433,7 +428,7 @@
 ENCODER(shift_jis)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
         unsigned char c1, c2;
 
@@ -488,14 +483,12 @@
     while (inleft > 0) {
         unsigned char c = IN1;
 
-        REQUIRE_OUTBUF(1)
-
 #ifdef STRICT_BUILD
-        JISX0201_R_DECODE(c, **outbuf)
+        JISX0201_R_DECODE(c, writer)
 #else
-        if (c < 0x80) **outbuf = c;
+        if (c < 0x80) OUTCHAR(c);
 #endif
-        else JISX0201_K_DECODE(c, **outbuf)
+        else JISX0201_K_DECODE(c, writer)
         else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
             unsigned char c1, c2;
 
@@ -512,13 +505,13 @@
 #ifndef STRICT_BUILD
             if (c1 == 0x21 && c2 == 0x40) {
                 /* FULL-WIDTH REVERSE SOLIDUS */
-                OUT1(0xff3c)
-                NEXT(2, 1)
+                OUTCHAR(0xff3c);
+                NEXT_IN(2);
                 continue;
             }
 #endif
-            TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
-                NEXT(2, 1)
+            TRYMAP_DEC(jisx0208, writer, c1, c2) {
+                NEXT_IN(2);
                 continue;
             }
             else
@@ -527,7 +520,7 @@
         else
             return 1;
 
-        NEXT(1, 1) /* JIS X 0201 */
+        NEXT_IN(1); /* JIS X 0201 */
     }
 
     return 0;
@@ -541,7 +534,7 @@
 ENCODER(shift_jis_2004)
 {
     while (inleft > 0) {
-        ucs4_t c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code = NOCHAR;
         int c1, c2;
         Py_ssize_t insize;
@@ -636,11 +629,10 @@
     while (inleft > 0) {
         unsigned char c = IN1;
 
-        REQUIRE_OUTBUF(1)
-        JISX0201_DECODE(c, **outbuf)
+        JISX0201_DECODE(c, writer)
         else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
             unsigned char c1, c2;
-            ucs4_t code;
+            Py_UCS4 code;
 
             REQUIRE_INBUF(2)
             c2 = IN2;
@@ -654,50 +646,47 @@
 
             if (c1 < 0x5e) { /* Plane 1 */
                 c1 += 0x21;
-                EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
+                EMULATE_JISX0213_2000_DECODE_PLANE1(writer,
                                 c1, c2)
-                else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
-                    NEXT_OUT(1)
+                else TRYMAP_DEC(jisx0208, writer, c1, c2) {
                 }
-                else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
+                else TRYMAP_DEC(jisx0213_1_bmp, writer,
                                 c1, c2) {
-                    NEXT_OUT(1)
                 }
-                else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
-                    WRITEUCS4(EMPBASE | code)
+                else TRYMAP_DEC_CHAR(jisx0213_1_emp, code, c1, c2) {
+                    OUTCHAR(EMPBASE | code);
                 }
-                else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
-                    WRITE2(code >> 16, code & 0xffff)
-                    NEXT_OUT(2)
+                else TRYMAP_DEC_CHAR(jisx0213_pair, code, c1, c2) {
+                    OUTCHAR2(code >> 16, code & 0xffff);
                 }
                 else
                     return 1;
-                NEXT_IN(2)
+                NEXT_IN(2);
             }
             else { /* Plane 2 */
                 if (c1 >= 0x67) c1 += 0x07;
                 else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
                 else c1 -= 0x3d;
 
-                EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
+                EMULATE_JISX0213_2000_DECODE_PLANE2(writer,
                                 c1, c2)
-                else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
-                                c1, c2) ;
-                else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
-                    WRITEUCS4(EMPBASE | code)
-                    NEXT_IN(2)
+                else TRYMAP_DEC(jisx0213_2_bmp, writer,
+                                c1, c2) {
+                } else TRYMAP_DEC_CHAR(jisx0213_2_emp, code, c1, c2) {
+                    OUTCHAR(EMPBASE | code);
+                    NEXT_IN(2);
                     continue;
                 }
                 else
                     return 1;
-                NEXT(2, 1)
+                NEXT_IN(2);
             }
             continue;
         }
         else
             return 1;
 
-        NEXT(1, 1) /* JIS X 0201 */
+        NEXT_IN(1); /* JIS X 0201 */
     }
 
     return 0;
diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c
--- a/Modules/cjkcodecs/_codecs_kr.c
+++ b/Modules/cjkcodecs/_codecs_kr.c
@@ -34,7 +34,7 @@
 ENCODER(euc_kr)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -104,11 +104,9 @@
     while (inleft > 0) {
         unsigned char c = IN1;
 
-        REQUIRE_OUTBUF(1)
-
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
@@ -145,11 +143,11 @@
             if (cho == NONE || jung == NONE || jong == NONE)
                 return 1;
 
-            OUT1(0xac00 + cho*588 + jung*28 + jong);
-            NEXT(8, 1)
+            OUTCHAR(0xac00 + cho*588 + jung*28 + jong);
+            NEXT_IN(8);
         }
-        else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
-            NEXT(2, 1)
+        else TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80) {
+            NEXT_IN(2);
         }
         else
             return 1;
@@ -167,7 +165,7 @@
 ENCODER(cp949)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -197,20 +195,18 @@
     while (inleft > 0) {
         unsigned char c = IN1;
 
-        REQUIRE_OUTBUF(1)
-
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
         REQUIRE_INBUF(2)
-        TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
-        else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
+        TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80);
+        else TRYMAP_DEC(cp949ext, writer, c, IN2);
         else return 1;
 
-        NEXT(2, 1)
+        NEXT_IN(2);
     }
 
     return 0;
@@ -251,7 +247,7 @@
 ENCODER(johab)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -350,11 +346,9 @@
     while (inleft > 0) {
         unsigned char    c = IN1, c2;
 
-        REQUIRE_OUTBUF(1)
-
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
@@ -381,33 +375,33 @@
             if (i_cho == FILL) {
                 if (i_jung == FILL) {
                     if (i_jong == FILL)
-                        OUT1(0x3000)
+                        OUTCHAR(0x3000);
                     else
-                        OUT1(0x3100 |
-                          johabjamo_jongseong[c_jong])
+                        OUTCHAR(0x3100 |
+                            johabjamo_jongseong[c_jong]);
                 }
                 else {
                     if (i_jong == FILL)
-                        OUT1(0x3100 |
-                          johabjamo_jungseong[c_jung])
+                        OUTCHAR(0x3100 |
+                            johabjamo_jungseong[c_jung]);
                     else
                         return 1;
                 }
             } else {
                 if (i_jung == FILL) {
                     if (i_jong == FILL)
-                        OUT1(0x3100 |
-                          johabjamo_choseong[c_cho])
+                        OUTCHAR(0x3100 |
+                            johabjamo_choseong[c_cho]);
                     else
                         return 1;
                 }
                 else
-                    OUT1(0xac00 +
-                         i_cho * 588 +
-                         i_jung * 28 +
-                         (i_jong == FILL ? 0 : i_jong))
+                    OUTCHAR(0xac00 +
+                        i_cho * 588 +
+                        i_jung * 28 +
+                        (i_jong == FILL ? 0 : i_jong));
             }
-            NEXT(2, 1)
+            NEXT_IN(2);
         } else {
             /* KS X 1001 except hangul jamos and syllables */
             if (c == 0xdf || c > 0xf9 ||
@@ -424,9 +418,9 @@
                 t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
                 t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
 
-                TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
+                TRYMAP_DEC(ksx1001, writer, t1, t2);
                 else return 1;
-                NEXT(2, 1)
+                NEXT_IN(2);
             }
         }
     }
diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c
--- a/Modules/cjkcodecs/_codecs_tw.c
+++ b/Modules/cjkcodecs/_codecs_tw.c
@@ -14,7 +14,7 @@
 ENCODER(big5)
 {
     while (inleft > 0) {
-        Py_UNICODE c = **inbuf;
+        Py_UCS4 c = **inbuf;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -43,17 +43,15 @@
     while (inleft > 0) {
         unsigned char c = IN1;
 
-        REQUIRE_OUTBUF(1)
-
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
         REQUIRE_INBUF(2)
-        TRYMAP_DEC(big5, **outbuf, c, IN2) {
-            NEXT(2, 1)
+        TRYMAP_DEC(big5, writer, c, IN2) {
+            NEXT_IN(2);
         }
         else return 1;
     }
@@ -69,7 +67,7 @@
 ENCODER(cp950)
 {
     while (inleft > 0) {
-        Py_UNICODE c = IN1;
+        Py_UCS4 c = IN1;
         DBCHAR code;
 
         if (c < 0x80) {
@@ -97,21 +95,19 @@
     while (inleft > 0) {
         unsigned char c = IN1;
 
-        REQUIRE_OUTBUF(1)
-
         if (c < 0x80) {
-            OUT1(c)
-            NEXT(1, 1)
+            OUTCHAR(c);
+            NEXT_IN(1);
             continue;
         }
 
         REQUIRE_INBUF(2)
 
-        TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
-        else TRYMAP_DEC(big5, **outbuf, c, IN2);
+        TRYMAP_DEC(cp950ext, writer, c, IN2);
+        else TRYMAP_DEC(big5, writer, c, IN2);
         else return 1;
 
-        NEXT(2, 1)
+        NEXT_IN(2);
     }
 
     return 0;
diff --git a/Modules/cjkcodecs/alg_jisx0201.h b/Modules/cjkcodecs/alg_jisx0201.h
--- a/Modules/cjkcodecs/alg_jisx0201.h
+++ b/Modules/cjkcodecs/alg_jisx0201.h
@@ -10,15 +10,24 @@
     JISX0201_R_ENCODE(c, assi)                          \
     else JISX0201_K_ENCODE(c, assi)
 
-#define JISX0201_R_DECODE(c, assi)                      \
+#define JISX0201_R_DECODE_CHAR(c, assi)                 \
     if ((c) < 0x5c) (assi) = (c);                       \
     else if ((c) == 0x5c) (assi) = 0x00a5;              \
     else if ((c) < 0x7e) (assi) = (c);                  \
     else if ((c) == 0x7e) (assi) = 0x203e;              \
     else if ((c) == 0x7f) (assi) = 0x7f;
-#define JISX0201_K_DECODE(c, assi)                      \
+#define JISX0201_R_DECODE(c, writer)                    \
+    if ((c) < 0x5c) OUTCHAR(c);                             \
+    else if ((c) == 0x5c) OUTCHAR(0x00a5);                  \
+    else if ((c) < 0x7e) OUTCHAR(c);                        \
+    else if ((c) == 0x7e) OUTCHAR(0x203e);                  \
+    else if ((c) == 0x7f) OUTCHAR(0x7f);
+#define JISX0201_K_DECODE(c, writer)                    \
     if ((c) >= 0xa1 && (c) <= 0xdf)                     \
-    (assi) = 0xfec0 + (c);
-#define JISX0201_DECODE(c, assi)                        \
-    JISX0201_R_DECODE(c, assi)                          \
-    else JISX0201_K_DECODE(c, assi)
+        OUTCHAR(0xfec0 + (c));
+#define JISX0201_K_DECODE_CHAR(c, assi)                 \
+    if ((c) >= 0xa1 && (c) <= 0xdf)                     \
+        (assi) = 0xfec0 + (c);
+#define JISX0201_DECODE(c, writer)                      \
+    JISX0201_R_DECODE(c, writer)                        \
+    else JISX0201_K_DECODE(c, writer)
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -33,7 +33,7 @@
 typedef struct dbcs_index decode_map;
 
 struct widedbcs_index {
-    const ucs4_t *map;
+    const Py_UCS4 *map;
     unsigned char bottom, top;
 };
 typedef struct widedbcs_index widedecode_map;
@@ -56,7 +56,7 @@
 };
 
 struct pair_encodemap {
-    ucs4_t uniseq;
+    Py_UCS4 uniseq;
     DBCHAR code;
 };
 
@@ -86,7 +86,7 @@
     static Py_ssize_t encoding##_decode(                                \
         MultibyteCodec_State *state, const void *config,                \
         const unsigned char **inbuf, Py_ssize_t inleft,                 \
-        Py_UNICODE **outbuf, Py_ssize_t outleft)
+        _PyUnicodeWriter *writer)
 #define DECODER_RESET(encoding)                                         \
     static Py_ssize_t encoding##_decode_reset(                          \
         MultibyteCodec_State *state, const void *config)
@@ -101,13 +101,15 @@
 #endif
 
 #define NEXT_IN(i)                              \
-    (*inbuf) += (i);                            \
-    (inleft) -= (i);
+    do {                                        \
+        (*inbuf) += (i);                        \
+        (inleft) -= (i);                        \
+    } while (0)
 #define NEXT_OUT(o)                             \
     (*outbuf) += (o);                           \
     (outleft) -= (o);
 #define NEXT(i, o)                              \
-    NEXT_IN(i) NEXT_OUT(o)
+    NEXT_IN(i); NEXT_OUT(o)
 
 #define REQUIRE_INBUF(n)                        \
     if (inleft < (n))                           \
@@ -121,6 +123,23 @@
 #define IN3 ((*inbuf)[2])
 #define IN4 ((*inbuf)[3])
 
+#define OUTCHAR(c)                                                         \
+    do {                                                                   \
+        if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0)                   \
+            return MBERR_TOOSMALL;                                         \
+    } while (0)
+
+#define OUTCHAR2(c1, c2)                                                   \
+    do {                                                                   \
+        Py_UCS4 _c1 = (c1);                                                \
+        Py_UCS4 _c2 = (c2);                                                \
+        if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0)      \
+            return MBERR_TOOSMALL;                                         \
+        PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1);     \
+        PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \
+        writer->pos += 2;                                                  \
+    } while (0)
+
 #define OUT1(c) ((*outbuf)[0]) = (c);
 #define OUT2(c) ((*outbuf)[1]) = (c);
 #define OUT3(c) ((*outbuf)[2]) = (c);
@@ -145,19 +164,6 @@
     (*outbuf)[2] = (c3);        \
     (*outbuf)[3] = (c4);
 
-#if Py_UNICODE_SIZE == 2
-# define WRITEUCS4(c)                                           \
-    REQUIRE_OUTBUF(2)                                           \
-    (*outbuf)[0] = Py_UNICODE_HIGH_SURROGATE(c);                \
-    (*outbuf)[1] = Py_UNICODE_LOW_SURROGATE(c);                 \
-    NEXT_OUT(2)
-#else
-# define WRITEUCS4(c)                                           \
-    REQUIRE_OUTBUF(1)                                           \
-    **outbuf = (Py_UNICODE)(c);                                 \
-    NEXT_OUT(1)
-#endif
-
 #define _TRYMAP_ENC(m, assi, val)                               \
     ((m)->map != NULL && (val) >= (m)->bottom &&                \
         (val)<= (m)->top && ((assi) = (m)->map[(val) -          \
@@ -167,24 +173,41 @@
 #define TRYMAP_ENC(charset, assi, uni)                          \
     if TRYMAP_ENC_COND(charset, assi, uni)
 
-#define _TRYMAP_DEC(m, assi, val)                               \
-    ((m)->map != NULL && (val) >= (m)->bottom &&                \
-        (val)<= (m)->top && ((assi) = (m)->map[(val) -          \
-        (m)->bottom]) != UNIINV)
-#define TRYMAP_DEC(charset, assi, c1, c2)                       \
-    if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
+Py_LOCAL_INLINE(int)
+_TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
+{
+    if (c == UNIINV || _PyUnicodeWriter_WriteChar(writer, c) < 0)
+        return UNIINV;
+    else
+        return c;
+}
 
-#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val)      \
-    ((m)->map != NULL && (val) >= (m)->bottom &&                \
-        (val)<= (m)->top &&                                     \
-        ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
+#define _TRYMAP_DEC(m, writer, val)                             \
+    ((m)->map != NULL &&                                        \
+     (val) >= (m)->bottom &&                                    \
+     (val)<= (m)->top &&                                        \
+     _TRYMAP_DEC_WRITE(writer, (m)->map[(val) - (m)->bottom]) != UNIINV)
+#define _TRYMAP_DEC_CHAR(m, assi, val)                             \
+    ((m)->map != NULL &&                                        \
+     (val) >= (m)->bottom &&                                    \
+     (val)<= (m)->top &&                                        \
+     ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV)
+#define TRYMAP_DEC(charset, writer, c1, c2)                     \
+    if _TRYMAP_DEC(&charset##_decmap[c1], writer, c2)
+#define TRYMAP_DEC_CHAR(charset, assi, c1, c2)                     \
+    if _TRYMAP_DEC_CHAR(&charset##_decmap[c1], assi, c2)
+
+#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val)        \
+    ((m)->map != NULL && (val) >= (m)->bottom &&                  \
+        (val)<= (m)->top &&                                       \
+        ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 &&  \
         (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
         (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
 #define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
     if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
                        assplane, asshi, asslo, (uni) & 0xff)
-#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2)         \
-    if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
+#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2)         \
+    if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
 
 #if Py_UNICODE_SIZE == 2
 #define DECODE_SURROGATE(c)                                     \
@@ -323,7 +346,7 @@
                 const struct pair_encodemap *haystack, int haystacksize)
 {
     int pos, min, max;
-    ucs4_t value = body << 16 | modifier;
+    Py_UCS4 value = body << 16 | modifier;
 
     min = 0;
     max = haystacksize;
diff --git a/Modules/cjkcodecs/emu_jisx0213_2000.h b/Modules/cjkcodecs/emu_jisx0213_2000.h
--- a/Modules/cjkcodecs/emu_jisx0213_2000.h
+++ b/Modules/cjkcodecs/emu_jisx0213_2000.h
@@ -38,6 +38,9 @@
                      ((c1) == 0x7E && (c2) == 0x7E)))                   \
         return EMULATE_JISX0213_2000_DECODE_INVALID;
 
-#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2)               \
+#define EMULATE_JISX0213_2000_DECODE_PLANE2(writer, c1, c2)               \
+    if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B)         \
+        OUTCHAR(0x9B1D);
+#define EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(assi, c1, c2)               \
     if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B)         \
         (assi) = 0x9B1D;
diff --git a/Modules/cjkcodecs/mappings_cn.h b/Modules/cjkcodecs/mappings_cn.h
--- a/Modules/cjkcodecs/mappings_cn.h
+++ b/Modules/cjkcodecs/mappings_cn.h
@@ -4049,7 +4049,7 @@
 
 
 static const struct _gb18030_to_unibmp_ranges {
-    Py_UNICODE   first, last;
+    Py_UCS4   first, last;
     DBCHAR       base;
 } gb18030_to_unibmp_ranges[] = {
 {128,163,0},{165,166,36},{169,175,38},{178,182,45},{184,214,50},{216,223,81},{
diff --git a/Modules/cjkcodecs/mappings_jisx0213_pair.h b/Modules/cjkcodecs/mappings_jisx0213_pair.h
--- a/Modules/cjkcodecs/mappings_jisx0213_pair.h
+++ b/Modules/cjkcodecs/mappings_jisx0213_pair.h
@@ -3,7 +3,7 @@
 static const struct widedbcs_index *jisx0213_pair_decmap;
 static const struct pair_encodemap *jisx0213_pair_encmap;
 #else
-static const ucs4_t __jisx0213_pair_decmap[49] = {
+static const Py_UCS4 __jisx0213_pair_decmap[49] = {
 810234010,810365082,810496154,810627226,810758298,816525466,816656538,
 816787610,816918682,817049754,817574042,818163866,818426010,838283418,
 15074048,U,U,U,39060224,39060225,42730240,42730241,39387904,39387905,39453440,
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -17,8 +17,8 @@
 
 typedef struct {
     const unsigned char *inbuf, *inbuf_top, *inbuf_end;
-    Py_UNICODE          *outbuf, *outbuf_end;
-    PyObject            *excobj, *outobj;
+    PyObject            *excobj;
+    _PyUnicodeWriter    writer;
 } MultibyteDecodeBuffer;
 
 PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
@@ -197,29 +197,6 @@
             goto errorexit;                                             \
 }
 
-static int
-expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
-{
-    Py_ssize_t orgpos, orgsize;
-
-    orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
-    orgsize = PyUnicode_GET_SIZE(buf->outobj);
-    if (PyUnicode_Resize(&buf->outobj, orgsize + (
-        esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
-        return -1;
-
-    buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
-    buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
-                      + PyUnicode_GET_SIZE(buf->outobj);
-
-    return 0;
-}
-#define REQUIRE_DECODEBUFFER(buf, s) {                                  \
-    if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end)             \
-        if (expand_decodebuffer(buf, s) == -1)                          \
-            goto errorexit;                                             \
-}
-
 
 /**
  * MultibyteCodec object
@@ -374,7 +351,7 @@
                         PyObject *errors, Py_ssize_t e)
 {
     PyObject *retobj = NULL, *retuni = NULL;
-    Py_ssize_t retunisize, newpos;
+    Py_ssize_t newpos;
     const char *reason;
     Py_ssize_t esize, start, end;
 
@@ -385,7 +362,6 @@
     else {
         switch (e) {
         case MBERR_TOOSMALL:
-            REQUIRE_DECODEBUFFER(buf, -1);
             return 0; /* retry it */
         case MBERR_TOOFEW:
             reason = "incomplete multibyte sequence";
@@ -403,8 +379,9 @@
     }
 
     if (errors == ERROR_REPLACE) {
-        REQUIRE_DECODEBUFFER(buf, 1);
-        *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
+        if (_PyUnicodeWriter_WriteChar(&buf->writer,
+                                       Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
+            goto errorexit;
     }
     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
         buf->inbuf += esize;
@@ -447,15 +424,8 @@
         goto errorexit;
     }
 
-    if (PyUnicode_AsUnicode(retuni) == NULL)
+    if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
         goto errorexit;
-    retunisize = PyUnicode_GET_SIZE(retuni);
-    if (retunisize > 0) {
-        REQUIRE_DECODEBUFFER(buf, retunisize);
-        memcpy((char *)buf->outbuf, PyUnicode_AS_UNICODE(retuni),
-                        retunisize * Py_UNICODE_SIZE);
-        buf->outbuf += retunisize;
-    }
 
     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
     if (newpos < 0 && !PyErr_Occurred())
@@ -617,10 +587,10 @@
 {
     MultibyteCodec_State state;
     MultibyteDecodeBuffer buf;
-    PyObject *errorcb;
+    PyObject *errorcb, *res;
     Py_buffer pdata;
     const char *data, *errors = NULL;
-    Py_ssize_t datalen, finalsize;
+    Py_ssize_t datalen;
 
     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|z:decode",
                             codeckwarglist, &pdata, &errors))
@@ -640,29 +610,22 @@
         return make_tuple(PyUnicode_New(0, 0), 0);
     }
 
+    _PyUnicodeWriter_Init(&buf.writer, datalen);
     buf.excobj = NULL;
     buf.inbuf = buf.inbuf_top = (unsigned char *)data;
     buf.inbuf_end = buf.inbuf_top + datalen;
-    buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
-    if (buf.outobj == NULL)
-        goto errorexit;
-    buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
-    if (buf.outbuf == NULL)
-        goto errorexit;
-    buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
 
     if (self->codec->decinit != NULL &&
         self->codec->decinit(&state, self->codec->config) != 0)
         goto errorexit;
 
     while (buf.inbuf < buf.inbuf_end) {
-        Py_ssize_t inleft, outleft, r;
+        Py_ssize_t inleft, r;
 
         inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
-        outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
 
         r = self->codec->decode(&state, self->codec->config,
-                        &buf.inbuf, inleft, &buf.outbuf, outleft);
+                        &buf.inbuf, inleft, &buf.writer);
         if (r == 0)
             break;
         else if (multibytecodec_decerror(self->codec, &state,
@@ -670,23 +633,20 @@
             goto errorexit;
     }
 
-    finalsize = (Py_ssize_t)(buf.outbuf -
-                             PyUnicode_AS_UNICODE(buf.outobj));
-
-    if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
-        if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
-            goto errorexit;
+    res = _PyUnicodeWriter_Finish(&buf.writer);
+    if (res == NULL)
+        goto errorexit;
 
     PyBuffer_Release(&pdata);
     Py_XDECREF(buf.excobj);
     ERROR_DECREF(errorcb);
-    return make_tuple(buf.outobj, datalen);
+    return make_tuple(res, datalen);
 
 errorexit:
     PyBuffer_Release(&pdata);
     ERROR_DECREF(errorcb);
     Py_XDECREF(buf.excobj);
-    Py_XDECREF(buf.outobj);
+    _PyUnicodeWriter_Dealloc(&buf.writer);
 
     return NULL;
 }
@@ -859,17 +819,7 @@
 {
     buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
     buf->inbuf_end = buf->inbuf_top + size;
-    if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
-        buf->outobj = PyUnicode_FromUnicode(NULL, size);
-        if (buf->outobj == NULL)
-            return -1;
-        buf->outbuf = PyUnicode_AsUnicode(buf->outobj);
-        if (buf->outbuf == NULL)
-            return -1;
-        buf->outbuf_end = buf->outbuf +
-                          PyUnicode_GET_SIZE(buf->outobj);
-    }
-
+    _PyUnicodeWriter_Init(&buf->writer, size);
     return 0;
 }
 
@@ -878,14 +828,13 @@
                     MultibyteDecodeBuffer *buf)
 {
     while (buf->inbuf < buf->inbuf_end) {
-        Py_ssize_t inleft, outleft;
+        Py_ssize_t inleft;
         Py_ssize_t r;
 
         inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
-        outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
 
         r = ctx->codec->decode(&ctx->state, ctx->codec->config,
-            &buf->inbuf, inleft, &buf->outbuf, outleft);
+            &buf->inbuf, inleft, &buf->writer);
         if (r == 0 || r == MBERR_TOOFEW)
             break;
         else if (multibytecodec_decerror(ctx->codec, &ctx->state,
@@ -1058,8 +1007,9 @@
     MultibyteDecodeBuffer buf;
     char *data, *wdata = NULL;
     Py_buffer pdata;
-    Py_ssize_t wsize, finalsize = 0, size, origpending;
+    Py_ssize_t wsize, size, origpending;
     int final = 0;
+    PyObject *res;
 
     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i:decode",
                     incrementalkwarglist, &pdata, &final))
@@ -1067,7 +1017,8 @@
     data = pdata.buf;
     size = pdata.len;
 
-    buf.outobj = buf.excobj = NULL;
+    _PyUnicodeWriter_Init(&buf.writer, 1);
+    buf.excobj = NULL;
     origpending = self->pendingsize;
 
     if (self->pendingsize == 0) {
@@ -1109,23 +1060,22 @@
             goto errorexit;
     }
 
-    finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
-    if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
-        if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
-            goto errorexit;
+    res = _PyUnicodeWriter_Finish(&buf.writer);
+    if (res == NULL)
+        goto errorexit;
 
     PyBuffer_Release(&pdata);
     if (wdata != data)
         PyMem_Del(wdata);
     Py_XDECREF(buf.excobj);
-    return buf.outobj;
+    return res;
 
 errorexit:
     PyBuffer_Release(&pdata);
     if (wdata != NULL && wdata != data)
         PyMem_Del(wdata);
     Py_XDECREF(buf.excobj);
-    Py_XDECREF(buf.outobj);
+    _PyUnicodeWriter_Dealloc(&buf.writer);
     return NULL;
 }
 
@@ -1265,13 +1215,14 @@
                      const char *method, Py_ssize_t sizehint)
 {
     MultibyteDecodeBuffer buf;
-    PyObject *cres;
-    Py_ssize_t rsize, finalsize = 0;
+    PyObject *cres, *res;
+    Py_ssize_t rsize;
 
     if (sizehint == 0)
         return PyUnicode_New(0, 0);
 
-    buf.outobj = buf.excobj = NULL;
+    _PyUnicodeWriter_Init(&buf.writer, 1);
+    buf.excobj = NULL;
     cres = NULL;
 
     for (;;) {
@@ -1340,29 +1291,27 @@
                 goto errorexit;
         }
 
-        finalsize = (Py_ssize_t)(buf.outbuf -
-                        PyUnicode_AS_UNICODE(buf.outobj));
         Py_DECREF(cres);
         cres = NULL;
 
-        if (sizehint < 0 || finalsize != 0 || rsize == 0)
+        if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
             break;
 
         sizehint = 1; /* read 1 more byte and retry */
     }
 
-    if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
-        if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
-            goto errorexit;
+    res = _PyUnicodeWriter_Finish(&buf.writer);
+    if (res == NULL)
+        goto errorexit;
 
     Py_XDECREF(cres);
     Py_XDECREF(buf.excobj);
-    return buf.outobj;
+    return res;
 
 errorexit:
     Py_XDECREF(cres);
     Py_XDECREF(buf.excobj);
-    Py_XDECREF(buf.outobj);
+    _PyUnicodeWriter_Dealloc(&buf.writer);
     return NULL;
 }
 
diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h
--- a/Modules/cjkcodecs/multibytecodec.h
+++ b/Modules/cjkcodecs/multibytecodec.h
@@ -10,12 +10,6 @@
 extern "C" {
 #endif
 
-#ifdef uint32_t
-typedef uint32_t ucs4_t;
-#else
-typedef unsigned int ucs4_t;
-#endif
-
 #ifdef uint16_t
 typedef uint16_t ucs2_t, DBCHAR;
 #else
@@ -27,7 +21,7 @@
     int i;
     unsigned char c[8];
     ucs2_t u2[4];
-    ucs4_t u4[2];
+    Py_UCS4 u4[2];
 } MultibyteCodec_State;
 
 typedef int (*mbcodec_init)(const void *config);
@@ -44,7 +38,7 @@
 typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
                         const void *config,
                         const unsigned char **inbuf, Py_ssize_t inleft,
-                        Py_UNICODE **outbuf, Py_ssize_t outleft);
+                        _PyUnicodeWriter *writer);
 typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
                                  const void *config);
 typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12948,6 +12948,16 @@
 }
 
 int
+_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    if (_PyUnicodeWriter_Prepare(writer, 1, ch) < 0)
+        return -1;
+    PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch);
+    writer->pos++;
+    return 0;
+}
+
+int
 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
 {
     Py_UCS4 maxchar;

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list