[Python-checkins] CVS: python/dist/src/Objects stringobject.c,2.115,2.116

M.-A. Lemburg lemburg@users.sourceforge.net
Tue, 15 May 2001 04:58:08 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv24065/Objects

Modified Files:
	stringobject.c 
Log Message:
This patch changes the way the string .encode() method works slightly
and introduces a new method .decode(). 

The major change is that strg.encode() will no longer try to convert
Unicode returns from the codec into a string, but instead pass along
the Unicode object as-is. The same is now true for all other codec 
return types. The underlying C APIs were changed accordingly.

Note that even though this does have the potential of breaking
existing code, the chances are low since conversion from Unicode 
previously took place using the default encoding which is normally
set to ASCII rendering this auto-conversion mechanism useless for 
most Unicode encodings.

The good news is that you can now use .encode() and .decode() with
much greater ease and that the door was opened for better accessibility
of the builtin codecs.

As demonstration of the new feature, the patch includes a few new
codecs which allow string to string encoding and decoding (rot13,
hex, zip, uu, base64).

Written by Marc-Andre Lemburg. Copyright assigned to the PSF.



Index: stringobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v
retrieving revision 2.115
retrieving revision 2.116
diff -C2 -r2.115 -r2.116
*** stringobject.c	2001/05/10 00:32:57	2.115
--- stringobject.c	2001/05/15 11:58:06	2.116
***************
*** 153,188 ****
  			  const char *errors)
  {
!     PyObject *buffer = NULL, *str;
  
      if (encoding == NULL)
  	encoding = PyUnicode_GetDefaultEncoding();
  
      /* Decode via the codec registry */
!     buffer = PyBuffer_FromMemory((void *)s, size);
!     if (buffer == NULL)
          goto onError;
!     str = PyCodec_Decode(buffer, encoding, errors);
!     if (str == NULL)
          goto onError;
      /* Convert Unicode to a string using the default encoding */
!     if (PyUnicode_Check(str)) {
! 	PyObject *temp = str;
! 	str = PyUnicode_AsEncodedString(str, NULL, NULL);
  	Py_DECREF(temp);
! 	if (str == NULL)
  	    goto onError;
      }
!     if (!PyString_Check(str)) {
          PyErr_Format(PyExc_TypeError,
                       "decoder did not return a string object (type=%.400s)",
!                      str->ob_type->tp_name);
!         Py_DECREF(str);
          goto onError;
      }
!     Py_DECREF(buffer);
!     return str;
  
   onError:
-     Py_XDECREF(buffer);
      return NULL;
  }
--- 153,220 ----
  			  const char *errors)
  {
!     PyObject *v, *str;
! 
!     str = PyString_FromStringAndSize(s, size);
!     if (str == NULL)
! 	return NULL;
!     v = PyString_AsDecodedString(str, encoding, errors);
!     Py_DECREF(str);
!     return v;
! }
  
+ PyObject *PyString_AsDecodedObject(PyObject *str,
+ 				   const char *encoding,
+ 				   const char *errors)
+ {
+     PyObject *v;
+ 
+     if (!PyString_Check(str)) {
+         PyErr_BadArgument();
+         goto onError;
+     }
+ 
      if (encoding == NULL)
  	encoding = PyUnicode_GetDefaultEncoding();
  
      /* Decode via the codec registry */
!     v = PyCodec_Decode(str, encoding, errors);
!     if (v == NULL)
          goto onError;
! 
!     return v;
! 
!  onError:
!     return NULL;
! }
! 
! PyObject *PyString_AsDecodedString(PyObject *str,
! 				   const char *encoding,
! 				   const char *errors)
! {
!     PyObject *v;
! 
!     v = PyString_AsDecodedObject(str, encoding, errors);
!     if (v == NULL)
          goto onError;
+ 
      /* Convert Unicode to a string using the default encoding */
!     if (PyUnicode_Check(v)) {
! 	PyObject *temp = v;
! 	v = PyUnicode_AsEncodedString(v, NULL, NULL);
  	Py_DECREF(temp);
! 	if (v == NULL)
  	    goto onError;
      }
!     if (!PyString_Check(v)) {
          PyErr_Format(PyExc_TypeError,
                       "decoder did not return a string object (type=%.400s)",
!                      v->ob_type->tp_name);
!         Py_DECREF(v);
          goto onError;
      }
! 
!     return v;
  
   onError:
      return NULL;
  }
***************
*** 203,207 ****
  }
  
! PyObject *PyString_AsEncodedString(PyObject *str,
  				   const char *encoding,
  				   const char *errors)
--- 235,239 ----
  }
  
! PyObject *PyString_AsEncodedObject(PyObject *str,
  				   const char *encoding,
  				   const char *errors)
***************
*** 221,224 ****
--- 253,273 ----
      if (v == NULL)
          goto onError;
+ 
+     return v;
+ 
+  onError:
+     return NULL;
+ }
+ 
+ PyObject *PyString_AsEncodedString(PyObject *str,
+ 				   const char *encoding,
+ 				   const char *errors)
+ {
+     PyObject *v;
+ 
+     v = PyString_AsEncodedString(str, encoding, errors);
+     if (v == NULL)
+         goto onError;
+ 
      /* Convert Unicode to a string using the default encoding */
      if (PyUnicode_Check(v)) {
***************
*** 236,239 ****
--- 285,289 ----
          goto onError;
      }
+ 
      return v;
  
***************
*** 1780,1787 ****
  
  static char encode__doc__[] =
! "S.encode([encoding[,errors]]) -> string\n\
  \n\
! Return an encoded string version of S. Default encoding is the current\n\
! default string encoding. errors may be given to set a different error\n\
  handling scheme. Default is 'strict' meaning that encoding errors raise\n\
  a ValueError. Other possible values are 'ignore' and 'replace'.";
--- 1830,1837 ----
  
  static char encode__doc__[] =
! "S.encode([encoding[,errors]]) -> object\n\
  \n\
! Encodes S using the codec registered for encoding. encoding defaults\n\
! to the default encoding. errors may be given to set a different error\n\
  handling scheme. Default is 'strict' meaning that encoding errors raise\n\
  a ValueError. Other possible values are 'ignore' and 'replace'.";
***************
*** 1793,1798 ****
      char *errors = NULL;
      if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
          return NULL;
!     return PyString_AsEncodedString((PyObject *)self, encoding, errors);
  }
  
--- 1843,1867 ----
      char *errors = NULL;
      if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
+         return NULL;
+     return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
+ }
+ 
+ 
+ static char decode__doc__[] =
+ "S.decode([encoding[,errors]]) -> object\n\
+ \n\
+ Decodes S using the codec registered for encoding. encoding defaults\n\
+ to the default encoding. errors may be given to set a different error\n\
+ handling scheme. Default is 'strict' meaning that encoding errors raise\n\
+ a ValueError. Other possible values are 'ignore' and 'replace'.";
+ 
+ static PyObject *
+ string_decode(PyStringObject *self, PyObject *args)
+ {
+     char *encoding = NULL;
+     char *errors = NULL;
+     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
          return NULL;
!     return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
  }
  
***************
*** 2372,2375 ****
--- 2441,2445 ----
  	{"center",      (PyCFunction)string_center,      1, center__doc__},
  	{"encode",      (PyCFunction)string_encode,      1, encode__doc__},
+ 	{"decode",      (PyCFunction)string_decode,      1, decode__doc__},
  	{"expandtabs",  (PyCFunction)string_expandtabs,  1, expandtabs__doc__},
  	{"splitlines",  (PyCFunction)string_splitlines,  1, splitlines__doc__},