[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.50,2.51

M.-A. Lemburg python-dev@python.org
Mon, 17 Jul 2000 11:23:20 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory slayer.i.sourceforge.net:/tmp/cvs-serv8024/Objects

Modified Files:
	unicodeobject.c 
Log Message:
Fixed problems with UTF error reporting macros and some formatting bugs.

Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.50
retrieving revision 2.51
diff -C2 -r2.50 -r2.51
*** unicodeobject.c	2000/07/17 09:04:43	2.50
--- unicodeobject.c	2000/07/17 18:23:13	2.51
***************
*** 634,644 ****
  }
  
- #define UTF8_ERROR(details) \
-   do {                                                      \
-       if (utf8_decoding_error(&s, &p, errors, (details)))   \
-           goto onError;                                     \
-       goto nextchar;                                        \
-   } while (0)
- 
  PyObject *PyUnicode_DecodeUTF8(const char *s,
  			       int size,
--- 634,637 ----
***************
*** 649,652 ****
--- 642,646 ----
      PyUnicodeObject *unicode;
      Py_UNICODE *p;
+     const char *errmsg = "";
  
      /* Note: size will always be longer than the resulting Unicode
***************
*** 673,706 ****
          n = utf8_code_length[ch];
  
!         if (s + n > e)
!             UTF8_ERROR("unexpected end of data");
  
          switch (n) {
  
          case 0:
!             UTF8_ERROR("unexpected code byte");
              break;
  
          case 1:
!             UTF8_ERROR("internal error");
              break;
  
          case 2:
!             if ((s[1] & 0xc0) != 0x80) 
!                 UTF8_ERROR("invalid data");
              ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
!             if (ch < 0x80)
!                 UTF8_ERROR("illegal encoding");
  	    else
! 				*p++ = (Py_UNICODE)ch;
              break;
  
          case 3:
              if ((s[1] & 0xc0) != 0x80 || 
!                 (s[2] & 0xc0) != 0x80) 
!                 UTF8_ERROR("invalid data");
              ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
!             if (ch < 0x800 || (ch >= 0xd800 && ch < 0xe000))
!                 UTF8_ERROR("illegal encoding");
  	    else
  				*p++ = (Py_UNICODE)ch;
--- 667,712 ----
          n = utf8_code_length[ch];
  
!         if (s + n > e) {
! 	    errmsg = "unexpected end of data";
! 	    goto utf8Error;
! 	}
  
          switch (n) {
  
          case 0:
!             errmsg = "unexpected code byte";
! 	    goto utf8Error;
              break;
  
          case 1:
!             errmsg = "internal error";
! 	    goto utf8Error;
              break;
  
          case 2:
!             if ((s[1] & 0xc0) != 0x80) {
!                 errmsg = "invalid data";
! 		goto utf8Error;
! 	    }
              ch = ((s[0] & 0x1f) << 6) + (s[1] & 0x3f);
!             if (ch < 0x80) {
!                 errmsg = "illegal encoding";
! 		goto utf8Error;
! 	    }
  	    else
! 		*p++ = (Py_UNICODE)ch;
              break;
  
          case 3:
              if ((s[1] & 0xc0) != 0x80 || 
!                 (s[2] & 0xc0) != 0x80) {
!                 errmsg = "invalid data";
! 		goto utf8Error;
! 	    }
              ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
!             if (ch < 0x800 || (ch >= 0xd800 && ch < 0xe000)) {
!                 errmsg = "illegal encoding";
! 		goto utf8Error;
! 	    }
  	    else
  				*p++ = (Py_UNICODE)ch;
***************
*** 710,721 ****
              if ((s[1] & 0xc0) != 0x80 ||
                  (s[2] & 0xc0) != 0x80 ||
!                 (s[3] & 0xc0) != 0x80)
!                 UTF8_ERROR("invalid data");
              ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
                   ((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
              /* validate and convert to UTF-16 */
!             if ((ch < 0x10000) ||                  /* minimum value allowed for 4 byte encoding */
!                 (ch > 0x10ffff))                   /* maximum value allowed for UTF-16 */
!                 UTF8_ERROR("illegal encoding");
              /*  compute and append the two surrogates: */
              
--- 716,733 ----
              if ((s[1] & 0xc0) != 0x80 ||
                  (s[2] & 0xc0) != 0x80 ||
!                 (s[3] & 0xc0) != 0x80) {
!                 errmsg = "invalid data";
! 		goto utf8Error;
! 	    }
              ch = ((s[0] & 0x7) << 18) + ((s[1] & 0x3f) << 12) +
                   ((s[2] & 0x3f) << 6) + (s[3] & 0x3f);
              /* validate and convert to UTF-16 */
!             if ((ch < 0x10000) ||   /* minimum value allowed for 4
!                                        byte encoding */
!                 (ch > 0x10ffff)) {  /* maximum value allowed for
!                                        UTF-16 */
!                 errmsg = "illegal encoding";
! 		goto utf8Error;
! 	    }
              /*  compute and append the two surrogates: */
              
***************
*** 732,741 ****
          default:
              /* Other sizes are only needed for UCS-4 */
!             UTF8_ERROR("unsupported Unicode code range");
          }
          s += n;
! 
!       nextchar:
!         ;
      }
  
--- 744,757 ----
          default:
              /* Other sizes are only needed for UCS-4 */
!             errmsg = "unsupported Unicode code range";
! 	    goto utf8Error;
! 	    break;
          }
          s += n;
! 	continue;
! 	
!     utf8Error:
!       if (utf8_decoding_error(&s, &p, errors, errmsg))
!           goto onError;
      }
  
***************
*** 750,757 ****
      return NULL;
  }
- 
- #undef UTF8_ERROR
  
! /* NOT USED */
  #if 0
  static
--- 766,772 ----
      return NULL;
  }
  
! /* Not used anymore, now that the encoder supports UTF-16
!    surrogates. */
  #if 0
  static
***************
*** 784,788 ****
      }
  }
! #endif /* NOT USED */
  
  PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
--- 799,803 ----
      }
  }
! #endif
  
  PyObject *PyUnicode_EncodeUTF8(const Py_UNICODE *s,
***************
*** 828,832 ****
  			    cbAllocated += 4*10;
                              if (_PyString_Resize(&v, cbAllocated))
! 		goto onError;
                          }
  
--- 843,847 ----
  			    cbAllocated += 4*10;
                              if (_PyString_Resize(&v, cbAllocated))
! 				goto onError;
                          }
  
***************
*** 939,948 ****
  }
  
- #define UTF16_ERROR(details)  do {                       \
-     if (utf16_decoding_error(&q, &p, errors, details))   \
-         goto onError;                                    \
-     continue;                                            \
- } while(0)
- 
  PyObject *PyUnicode_DecodeUTF16(const char *s,
  				int size,
--- 954,957 ----
***************
*** 954,957 ****
--- 963,967 ----
      const Py_UNICODE *q, *e;
      int bo = 0;
+     const char *errmsg = "";
  
      /* size should be an even number */
***************
*** 1013,1030 ****
  
  	/* UTF-16 code pair: */
! 	if (q >= e)
! 	    UTF16_ERROR("unexpected end of data");
  	if (0xDC00 <= *q && *q <= 0xDFFF) {
  	    q++;
! 	    if (0xD800 <= *q && *q <= 0xDBFF)
  		/* This is valid data (a UTF-16 surrogate pair), but
  		   we are not able to store this information since our
  		   Py_UNICODE type only has 16 bits... this might
  		   change someday, even though it's unlikely. */
! 		UTF16_ERROR("code pairs are not supported");
  	    else
  		continue;
  	}
! 	UTF16_ERROR("illegal encoding");
      }
  
--- 1023,1049 ----
  
  	/* UTF-16 code pair: */
! 	if (q >= e) {
! 	    errmsg = "unexpected end of data";
! 	    goto utf16Error;
! 	}
  	if (0xDC00 <= *q && *q <= 0xDFFF) {
  	    q++;
! 	    if (0xD800 <= *q && *q <= 0xDBFF) {
  		/* This is valid data (a UTF-16 surrogate pair), but
  		   we are not able to store this information since our
  		   Py_UNICODE type only has 16 bits... this might
  		   change someday, even though it's unlikely. */
! 		errmsg = "code pairs are not supported";
! 		goto utf16Error;
! 	    }
  	    else
  		continue;
  	}
! 	errmsg = "illegal encoding";
! 	/* Fall through to report the error */
! 
!     utf16Error:
! 	if (utf16_decoding_error(&q, &p, errors, errmsg))
! 	    goto onError;
      }