[Python-3000-checkins] r59205 - in python/branches/py3k: Doc/library/stdtypes.rst Lib/test/test_unicode.py Objects/unicodeobject.c

georg.brandl python-3000-checkins at python.org
Wed Nov 28 00:48:06 CET 2007


Author: georg.brandl
Date: Wed Nov 28 00:48:05 2007
New Revision: 59205

Modified:
   python/branches/py3k/Doc/library/stdtypes.rst
   python/branches/py3k/Lib/test/test_unicode.py
   python/branches/py3k/Objects/unicodeobject.c
Log:
#1496: revert str.translate() to the old version, and add
str.maketrans() to make a table in a more comfortable way.


Modified: python/branches/py3k/Doc/library/stdtypes.rst
==============================================================================
--- python/branches/py3k/Doc/library/stdtypes.rst	(original)
+++ python/branches/py3k/Doc/library/stdtypes.rst	Wed Nov 28 00:48:05 2007
@@ -800,6 +800,21 @@
       'example.com'
 
 
+.. method:: str.maketrans(x[, y[, z]])
+
+   This static method returns a translation table usable for :meth:`str.translate`.
+
+   If there is only one argument, it must be a dictionary mapping Unicode
+   ordinals (integers) or characters (strings of length 1) to Unicode ordinals,
+   strings (of arbitrary lengths) or None.  Character keys will then be
+   converted to ordinals.
+
+   If there are two arguments, they must be strings of equal length, and in the
+   resulting dictionary, each character in x will be mapped to the character at
+   the same position in y.  If there is a third argument, it must be a string,
+   whose characters will be mapped to None in the result.
+
+
 .. method:: str.partition(sep)
 
    Split the string at the first occurrence of *sep*, and return a 3-tuple
@@ -934,15 +949,17 @@
 .. method:: str.translate(map)
 
    Return a copy of the *s* where all characters have been mapped through the
-   *map* which must be a dictionary of characters (strings of length 1) or
-   Unicode ordinals (integers) to Unicode ordinals, strings or ``None``.
-   Unmapped characters are left untouched.  Characters mapped to ``None`` are
-   deleted.
+   *map* which must be a dictionary of Unicode ordinals(integers) to Unicode
+   ordinals, strings or ``None``.  Unmapped characters are left untouched.
+   Characters mapped to ``None`` are deleted.
+
+   A *map* for :meth:`translate` is usually best created by
+   :meth:`str.maketrans`.
 
    .. note::
 
-      A more flexible approach is to create a custom character mapping codec
-      using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
+      An even more flexible approach is to create a custom character mapping
+      codec using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an
       example).
 
 

Modified: python/branches/py3k/Lib/test/test_unicode.py
==============================================================================
--- python/branches/py3k/Lib/test/test_unicode.py	(original)
+++ python/branches/py3k/Lib/test/test_unicode.py	Wed Nov 28 00:48:05 2007
@@ -166,18 +166,37 @@
         self.assertRaises(ValueError, 'abcdefghi'.rindex,  'ghi', 0, 8)
         self.assertRaises(ValueError, 'abcdefghi'.rindex,  'ghi', 0, -1)
 
-    def test_translate(self):
-        self.checkequalnofix('bbbc', 'abababc', 'translate', {ord('a'):None})
-        self.checkequalnofix('iiic', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
-        self.checkequalnofix('iiix', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):'x'})
-        self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', {'a':None, 'b':'<i>'})
-        self.checkequalnofix('c', 'abababc', 'translate', {ord('a'):None, ord('b'):''})
-        self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'})
+    def test_maketrans_translate(self):
+        # these work with plain translate()
+        self.checkequalnofix('bbbc', 'abababc', 'translate',
+                             {ord('a'): None})
+        self.checkequalnofix('iiic', 'abababc', 'translate',
+                             {ord('a'): None, ord('b'): ord('i')})
+        self.checkequalnofix('iiix', 'abababc', 'translate',
+                             {ord('a'): None, ord('b'): ord('i'), ord('c'): 'x'})
+        self.checkequalnofix('c', 'abababc', 'translate',
+                             {ord('a'): None, ord('b'): ''})
+        self.checkequalnofix('xyyx', 'xzx', 'translate',
+                             {ord('z'): 'yy'})
+        # this needs maketrans()
+        self.checkequalnofix('abababc', 'abababc', 'translate',
+                             {'b': '<i>'})
+        tbl = self.type2test.maketrans({'a': None, 'b': '<i>'})
+        self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', tbl)
+        # test alternative way of calling maketrans()
+        tbl = self.type2test.maketrans('abc', 'xyz', 'd')
+        self.checkequalnofix('xyzzy', 'abdcdcbdddd', 'translate', tbl)
+
+        self.assertRaises(TypeError, self.type2test.maketrans)
+        self.assertRaises(ValueError, self.type2test.maketrans, 'abc', 'defg')
+        self.assertRaises(TypeError, self.type2test.maketrans, 2, 'def')
+        self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 2)
+        self.assertRaises(TypeError, self.type2test.maketrans, 'abc', 'def', 2)
+        self.assertRaises(ValueError, self.type2test.maketrans, {'xy': 2})
+        self.assertRaises(TypeError, self.type2test.maketrans, {(1,): 2})
 
         self.assertRaises(TypeError, 'hello'.translate)
         self.assertRaises(TypeError, 'abababc'.translate, 'abc', 'xyz')
-        self.assertRaises(ValueError, 'abababc'.translate, {'xy':2})
-        self.assertRaises(TypeError, 'abababc'.translate, {(1,):2})
 
     def test_split(self):
         string_tests.CommonTest.test_split(self)

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Wed Nov 28 00:48:05 2007
@@ -7793,68 +7793,124 @@
     return fixup(self, fixswapcase);
 }
 
-PyDoc_STRVAR(translate__doc__,
-"S.translate(table) -> unicode\n\
+PyDoc_STRVAR(maketrans__doc__,
+"str.maketrans(x[, y[, z]]) -> dict (static method)\n\
 \n\
-Return a copy of the string S, where all characters have been mapped\n\
-through the given translation table, which must be a mapping of\n\
-Unicode ordinals to Unicode ordinals, Unicode strings or None.\n\
-Unmapped characters are left untouched. Characters mapped to None\n\
-are deleted.");
+Return a translation table usable for str.translate().\n\
+If there is only one argument, it must be a dictionary mapping Unicode\n\
+ordinals (integers) or characters to Unicode ordinals, strings or None.\n\
+Character keys will then be converted to ordinals.\n\
+If there are two arguments, they must be strings of equal length, and\n\
+in the resulting dictionary, each character in x will be mapped to the\n\
+character at the same position in y. If there is a third argument, it\n\
+must be a string, whose characters will be mapped to None in the result.");
 
 static PyObject*
-unicode_translate(PyUnicodeObject *self, PyObject *table)
+unicode_maketrans(PyUnicodeObject *null, PyObject *args)
 {
-    PyObject *newtable = NULL;
+    PyObject *x, *y = NULL, *z = NULL;
+    PyObject *new = NULL, *key, *value;
     Py_ssize_t i = 0;
-    PyObject *key, *value, *result;
-
-    if (!PyDict_Check(table)) {
-        PyErr_SetString(PyExc_TypeError, "translate argument must be a dict");
+    int res;
+    
+    if (!PyArg_ParseTuple(args, "O|UU:maketrans", &x, &y, &z))
         return NULL;
-    }
-    /* fixup the table -- allow size-1 string keys instead of only int keys */
-    newtable = PyDict_Copy(table);
-    if (!newtable) return NULL;
-    while (PyDict_Next(table, &i, &key, &value)) {
-        if (PyUnicode_Check(key)) {
-            /* convert string keys to integer keys */
-            PyObject *newkey;
-            int res;
-            if (PyUnicode_GET_SIZE(key) != 1) {
-                PyErr_SetString(PyExc_ValueError, "string items in translate "
-                                "table must be 1 element long");
-                goto err;
-            }
-            newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
-            if (!newkey)
+    new = PyDict_New();
+    if (!new)
+        return NULL;
+    if (y != NULL) {
+        /* x must be a string too, of equal length */
+        Py_ssize_t ylen = PyUnicode_GET_SIZE(y);
+        if (!PyUnicode_Check(x)) {
+            PyErr_SetString(PyExc_TypeError, "first maketrans argument must "
+                            "be a string if there is a second argument");
+            goto err;
+        }
+        if (PyUnicode_GET_SIZE(x) != ylen) {
+            PyErr_SetString(PyExc_ValueError, "the first two maketrans "
+                            "arguments must have equal length");
+            goto err;
+        }
+        /* create entries for translating chars in x to those in y */
+        for (i = 0; i < PyUnicode_GET_SIZE(x); i++) {
+            key = PyInt_FromLong(PyUnicode_AS_UNICODE(x)[i]);
+            value = PyInt_FromLong(PyUnicode_AS_UNICODE(y)[i]);
+            if (!key || !value)
                 goto err;
-            res = PyDict_SetItem(newtable, newkey, value);
-            Py_DECREF(newkey);
+            res = PyDict_SetItem(new, key, value);
+            Py_DECREF(key);
+            Py_DECREF(value);
             if (res < 0)
                 goto err;
-        } else if (PyInt_Check(key)) {
-            /* just keep integer keys */
-            if (PyDict_SetItem(newtable, key, value) < 0)
-                goto err;
-        } else {
-            PyErr_SetString(PyExc_TypeError, "items in translate table must be "
-                            "strings or integers");
+        }
+        /* create entries for deleting chars in z */
+        if (z != NULL) {
+            for (i = 0; i < PyUnicode_GET_SIZE(z); i++) {
+                key = PyInt_FromLong(PyUnicode_AS_UNICODE(z)[i]);
+                if (!key)
+                    goto err;
+                res = PyDict_SetItem(new, key, Py_None);
+                Py_DECREF(key);
+                if (res < 0)
+                    goto err;
+            }
+        }
+    } else {
+        /* x must be a dict */
+        if (!PyDict_Check(x)) {
+            PyErr_SetString(PyExc_TypeError, "if you give only one argument "
+                            "to maketrans it must be a dict");
             goto err;
         }
+        /* copy entries into the new dict, converting string keys to int keys */
+        while (PyDict_Next(x, &i, &key, &value)) {
+            if (PyUnicode_Check(key)) {
+                /* convert string keys to integer keys */
+                PyObject *newkey;
+                if (PyUnicode_GET_SIZE(key) != 1) {
+                    PyErr_SetString(PyExc_ValueError, "string keys in translate "
+                                    "table must be of length 1");
+                    goto err;
+                }
+                newkey = PyInt_FromLong(PyUnicode_AS_UNICODE(key)[0]);
+                if (!newkey)
+                    goto err;
+                res = PyDict_SetItem(new, newkey, value);
+                Py_DECREF(newkey);
+                if (res < 0)
+                    goto err;
+            } else if (PyInt_Check(key)) {
+                /* just keep integer keys */
+                if (PyDict_SetItem(new, key, value) < 0)
+                    goto err;
+            } else {
+                PyErr_SetString(PyExc_TypeError, "keys in translate table must "
+                                "be strings or integers");
+                goto err;
+            }
+        }
     }
-
-    result = PyUnicode_TranslateCharmap(self->str,
-                                        self->length,
-                                        newtable,
-                                        "ignore");
-    Py_DECREF(newtable);
-    return result;
+    return new;
   err:
-    Py_DECREF(newtable);
+    Py_DECREF(new);
     return NULL;
 }
 
+PyDoc_STRVAR(translate__doc__,
+"S.translate(table) -> unicode\n\
+\n\
+Return a copy of the string S, where all characters have been mapped\n\
+through the given translation table, which must be a mapping of\n\
+Unicode ordinals to Unicode ordinals, Unicode strings or None.\n\
+Unmapped characters are left untouched. Characters mapped to None\n\
+are deleted.");
+
+static PyObject*
+unicode_translate(PyUnicodeObject *self, PyObject *table)
+{
+    return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore");
+}
+
 PyDoc_STRVAR(upper__doc__,
 "S.upper() -> unicode\n\
 \n\
@@ -8076,6 +8132,8 @@
     {"__format__", (PyCFunction) unicode_unicode__format__, METH_VARARGS, p_format__doc__},
     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
+    {"maketrans", (PyCFunction) unicode_maketrans,
+     METH_VARARGS | METH_STATIC, maketrans__doc__},
 #if 0
     {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
 #endif


More information about the Python-3000-checkins mailing list