[Python-checkins] python/dist/src/Modules unicodedata.c,2.22,2.23

loewis@users.sourceforge.net loewis@users.sourceforge.net
Sat, 23 Nov 2002 10:01:35 -0800


Update of /cvsroot/python/python/dist/src/Modules
In directory sc8-pr-cvs1:/tmp/cvs-serv18377/Modules

Modified Files:
	unicodedata.c 
Log Message:
Implement names for CJK unified ideographs. Add name to KeyError output.
Verify that the lookup for an existing name succeeds.


Index: unicodedata.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/unicodedata.c,v
retrieving revision 2.22
retrieving revision 2.23
diff -C2 -d -r2.22 -r2.23
*** unicodedata.c	23 Nov 2002 17:11:06 -0000	2.22
--- unicodedata.c	23 Nov 2002 18:01:32 -0000	2.23
***************
*** 349,352 ****
--- 349,362 ----
      }
  
+     if ((0x3400 <= code && code <= 0x4DB5) ||  /* CJK Ideograph Extension A */
+         (0x4E00 <= code && code <= 0x9FA5) ||  /* CJK Ideograph */
+         (0x20000 <= code && code <= 0x2A6D6)) {/* CJK Ideograph Extension B */
+         if (buflen < 28)
+             /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
+             return 0;
+         sprintf(buffer, "CJK UNIFIED IDEOGRAPH-%X", code);
+         return 1;
+     }
+ 
      if (code >= 0x110000)
          return 0;
***************
*** 450,453 ****
--- 460,487 ----
  	    return 1;
  	}
+         /* Otherwise, it's an illegal syllable name. */
+         return 0;
+     }
+ 
+     /* Check for unified ideographs. */
+     if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
+         /* Four or five hexdigits must follow. */
+         v = 0;
+         name += 22;
+         namelen -= 22;
+         if (namelen != 4 && namelen != 5)
+             return 0;
+         while (namelen--) {
+             v *= 16;
+             if (*name >= '0' && *name <= '9')
+                 v += *name - '0';
+             else if (*name >= 'A' && *name <= 'F')
+                 v += *name - 'A' + 10;
+             else
+                 return 0;
+             name++;
+         }
+         *code = v;
+         return 1;
      }
  
***************
*** 536,540 ****
  
      if (!_getcode(name, namelen, &code)) {
!         PyErr_SetString(PyExc_KeyError, "undefined character name");
          return NULL;
      }
--- 570,578 ----
  
      if (!_getcode(name, namelen, &code)) {
!         char fmt[] = "undefined character name '%s'";
!         char *buf = PyMem_MALLOC(sizeof(fmt) + namelen);
!         sprintf(buf, fmt, name);
!         PyErr_SetString(PyExc_KeyError, buf);
!         PyMem_FREE(buf);
          return NULL;
      }