[Python-checkins] python/dist/src/Objects unicodectype.c, 2.14, 2.15 unicodeobject.c, 2.211, 2.212 unicodetype_db.h, 1.7, 1.8

perky at users.sourceforge.net perky at users.sourceforge.net
Wed Jun 2 12:49:20 EDT 2004


Update of /cvsroot/python/python/dist/src/Objects
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32555/Objects

Modified Files:
	unicodectype.c unicodeobject.c unicodetype_db.h 
Log Message:
- SF #962502: Add two more methods for unicode type; width() and
iswide() for east asian width manipulation. (Inspired by David
Goodger, Reviewed by Martin v. Loewis)
- Move _PyUnicode_TypeRecord.flags to the end of the struct so that
no padding is added for UCS-4 builds. (Suggested by Martin v. Loewis)


Index: unicodectype.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodectype.c,v
retrieving revision 2.14
retrieving revision 2.15
diff -C2 -d -r2.14 -r2.15
*** unicodectype.c	29 Dec 2003 01:36:01 -0000	2.14
--- unicodectype.c	2 Jun 2004 16:49:16 -0000	2.15
***************
*** 20,26 ****
  #define TITLE_MASK 0x40
  #define UPPER_MASK 0x80
  
  typedef struct {
-     const unsigned short flags;
      const Py_UNICODE upper;
      const Py_UNICODE lower;
--- 20,26 ----
  #define TITLE_MASK 0x40
  #define UPPER_MASK 0x80
+ #define WIDE_MASK 0x100
  
  typedef struct {
      const Py_UNICODE upper;
      const Py_UNICODE lower;
***************
*** 28,31 ****
--- 28,32 ----
      const unsigned char decimal;
      const unsigned char digit;
+     const unsigned short flags;
  } _PyUnicode_TypeRecord;
  
***************
*** 323,326 ****
--- 324,336 ----
  }
  
+ /* Returns 1 for Unicode characters having Full or Wide width, 0 otherwise */
+ 
+ int _PyUnicode_IsWide(Py_UNICODE ch)
+ {
+     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
+ 
+     return (ctype->flags & WIDE_MASK) != 0;
+ }
+ 
  #ifndef WANT_WCTYPE_FUNCTIONS
  

Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.211
retrieving revision 2.212
diff -C2 -d -r2.211 -r2.212
*** unicodeobject.c	6 Apr 2004 07:24:51 -0000	2.211
--- unicodeobject.c	2 Jun 2004 16:49:16 -0000	2.212
***************
*** 656,659 ****
--- 656,680 ----
  }
  
+ int PyUnicode_GetWidth(PyObject *unicode)
+ {
+     const Py_UNICODE *p, *e;
+     int width;
+ 
+     if (!PyUnicode_Check(unicode)) {
+ 	PyErr_BadArgument();
+ 	return -1;
+     }
+ 
+     p = PyUnicode_AS_UNICODE(unicode);
+     e = p + PyUnicode_GET_SIZE(unicode);
+     for (width = 0; p < e; p++)
+ 	if (Py_UNICODE_ISWIDE(*p))
+ 	    width += 2;
+ 	else
+ 	    width++;
+ 
+     return width;
+ }
+ 
  const char *PyUnicode_GetDefaultEncoding(void)
  {
***************
*** 5317,5320 ****
--- 5338,5370 ----
  }
  
+ PyDoc_STRVAR(iswide__doc__,
+ "S.iswide() -> bool\n\
+ \n\
+ Return True if all characters in S are wide width\n\
+ and there is at least one character in S, False otherwise.");
+ 
+ static PyObject*
+ unicode_iswide(PyUnicodeObject *self)
+ {
+     register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self);
+     register const Py_UNICODE *e;
+ 
+     /* Shortcut for single character strings */
+     if (PyUnicode_GET_SIZE(self) == 1 &&
+ 	Py_UNICODE_ISWIDE(*p))
+ 	Py_RETURN_TRUE;
+ 
+     /* Special case for empty strings */
+     if (PyString_GET_SIZE(self) == 0)
+ 	Py_RETURN_FALSE;
+ 
+     e = p + PyUnicode_GET_SIZE(self);
+     for (; p < e; p++) {
+ 	if (!Py_UNICODE_ISWIDE(*p))
+ 	    Py_RETURN_FALSE;
+     }
+     Py_RETURN_TRUE;
+ }
+ 
  PyDoc_STRVAR(join__doc__,
  "S.join(sequence) -> unicode\n\
***************
*** 5336,5340 ****
  
  PyDoc_STRVAR(ljust__doc__,
! "S.ljust(width[, fillchar]) -> unicode\n\
  \n\
  Return S left justified in a Unicode string of length width. Padding is\n\
--- 5386,5390 ----
  
  PyDoc_STRVAR(ljust__doc__,
! "S.ljust(width[, fillchar]) -> int\n\
  \n\
  Return S left justified in a Unicode string of length width. Padding is\n\
***************
*** 5928,5931 ****
--- 5978,5996 ----
  }
  
+ PyDoc_STRVAR(width__doc__,
+ "S.width() -> unicode\n\
+ \n\
+ Return a fixed-width representation length of S.");
+ 
+ static PyObject*
+ unicode_width(PyObject *self)
+ {
+     int width = PyUnicode_GetWidth(self);
+     if (width == -1)
+ 	return NULL;
+     else
+ 	return PyInt_FromLong((long)width);
+ }
+ 
  PyDoc_STRVAR(zfill__doc__,
  "S.zfill(width) -> unicode\n\
***************
*** 6091,6094 ****
--- 6156,6161 ----
      {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
      {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
+     {"iswide", (PyCFunction) unicode_iswide, METH_NOARGS, iswide__doc__},
+     {"width", (PyCFunction) unicode_width, METH_NOARGS, width__doc__},
      {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
  #if 0

Index: unicodetype_db.h
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodetype_db.h,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** unicodetype_db.h	25 Nov 2002 09:13:36 -0000	1.7
--- unicodetype_db.h	2 Jun 2004 16:49:16 -0000	1.8
***************
*** 1,3 ****
! /* this file was generated by Tools/unicode/makeunicodedata.py 2.2 */
  
  /* a list of unique character type descriptors */
--- 1,3 ----
! /* this file was generated by Tools/unicode/makeunicodedata.py 2.3 */
  
  /* a list of unique character type descriptors */
***************
*** 5,131 ****
      {0, 0, 0, 0, 0, 0},
[...1225 lines suppressed...]
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
!     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
***************
*** 1086,1090 ****
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
!     1, 1, 1, 1, 1, 1, 0, 0, 
  };
  
--- 1147,1151 ----
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
!     1, 1, 1, 1, 1, 0, 0, 
  };
  




More information about the Python-checkins mailing list