[Python-checkins] r54056 - sandbox/trunk/pep3101/README.txt sandbox/trunk/pep3101/pep3101.c sandbox/trunk/pep3101/pep3101.h sandbox/trunk/pep3101/setup.py sandbox/trunk/pep3101/stringformat.c sandbox/trunk/pep3101/test_simpleformat.py sandbox/trunk/pep3101/unicodeformat.c
patrick.maupin
python-checkins at python.org
Thu Mar 1 09:22:28 CET 2007
Author: patrick.maupin
Date: Thu Mar 1 09:22:25 2007
New Revision: 54056
Added:
sandbox/trunk/pep3101/README.txt
sandbox/trunk/pep3101/pep3101.c
sandbox/trunk/pep3101/pep3101.h
sandbox/trunk/pep3101/setup.py
sandbox/trunk/pep3101/stringformat.c
sandbox/trunk/pep3101/test_simpleformat.py
sandbox/trunk/pep3101/unicodeformat.c
Log:
Initial version of pep3101 sandbox code committed
Added: sandbox/trunk/pep3101/README.txt
==============================================================================
--- (empty file)
+++ sandbox/trunk/pep3101/README.txt Thu Mar 1 09:22:25 2007
@@ -0,0 +1,70 @@
+
+PEP 3101
+
+This directory is where sample code to support PEP 3101 and do
+related things is being developed.
+
+Current developers:
+
+ Patrick Maupin (pmaupin at gmail.com)
+ Eric V. Smith
+ Pete Shinner
+
+The code is only half-baked at present
+(development was started at PyCon 2007 and is in progress).
+
+Although the PEP3101 goal is a (unicode) string format method, since
+this is a sandbox, we might do a few more ambitious things as well,
+to see if people like them.
+
+The current plan of record is to make a pep3101 extension module.
+It will have at least the following features:
+
+ - contains a format function which takes a string and parameters,
+ (but which is easily portable to a string method for Py3k)
+ - can be compiled against 2.4, 2.5, and Py3K
+ - Works with the string object as well as unicode
+
+The current code has a module which is progressing nicely, and some
+unittests for the current version.
+
+Files:
+
+ - unicodeformat.c is designed to be easily added to Python
+ as a method of the unicode object.
+ - stringformat.c is a wrapper around unicodeformat.c, which
+ "templatizes" the entire file to make it easy to add to Python
+ as a method of the string object.
+ - pep3101.h contains definitions for the functions in stringformat
+ and unicodeformat
+ - pep3101.c contains a module implementation which can be linked
+ with these method files for testing and/or use with earlier
+ Python versions.
+ - setup.py -- Use "build" option to make the extension module
+ - test_simpleformat.py -- initial unittests
+
+Todo:
+
+ - finish up format specifier handling
+ - document differences between PEP and implementation
+ - Add docstrings to module
+ - print string offset information on certain errors
+ - Add _flags options
+ - Play with possible implementations for formatting
+ strings against dictionaries as well as the format
+ (dangerous)
+ - Play with possible implementations for exposing
+ lowest level format specifier handler for use in
+ compatible template systems.
+ - Play with possible options for specifying additional
+ escape syntaxes
+
+_flags options to consider adding:
+
+ - useall=1 means all arguments should be used
+ - allow_leading_under means leading underbars allowed
+ - syntax=0,1,2,3 -- different syntaxes
+ - hook=object -- callback hook as described in PEP
+ - informational mode to dump exceptions into string
+ (as described in pep)
+ - max_recursion=xxx (default 4)
Added: sandbox/trunk/pep3101/pep3101.c
==============================================================================
--- (empty file)
+++ sandbox/trunk/pep3101/pep3101.c Thu Mar 1 09:22:25 2007
@@ -0,0 +1,64 @@
+#include "Python.h"
+
+/* XXX -- remove this include if integrated into Python.h ??? */
+#include "pep3101.h"
+
+
+/* ----------------------------------------------------- */
+
+static char pep3101_format__doc__[] =
+""
+;
+
+static PyObject *
+pep3101_format(PyObject *self, PyObject *args, PyObject *keywords)
+{
+ PyObject *newargs, *newself, *result;
+ newself = PyTuple_GetItem(args, 0); /* borrowed reference */
+ if (newself == NULL)
+ return NULL;
+ newargs = PyTuple_GetSlice(args, 1, PyTuple_Size(args) + 1);
+ if (newargs == NULL)
+ return NULL;
+ if (PyUnicode_Check(newself))
+ result = PyUnicode_FormatMethod(newself, newargs, keywords);
+ else if (PyString_Check(newself))
+ result = PyString_FormatMethod(newself, newargs, keywords);
+ else {
+ result = NULL;
+ PyErr_SetString(PyExc_TypeError,
+ "First parameter to format must be string or unicode object");
+ }
+ Py_DECREF(newargs);
+ return result;
+}
+
+/* List of methods defined in the module */
+
+static struct PyMethodDef pep3101_methods[] = {
+ {"format", (PyCFunction)pep3101_format, METH_VARARGS | METH_KEYWORDS, pep3101_format__doc__},
+
+ {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
+};
+
+
+/* Initialization function for the module (*must* be called initpep3101) */
+
+static char pep3101_module_documentation[] =
+""
+;
+
+void
+initpep3101(void)
+{
+ PyObject *m;
+
+ /* Create the module and add the functions */
+ m = Py_InitModule4("pep3101", pep3101_methods,
+ pep3101_module_documentation,
+ (PyObject*)NULL,PYTHON_API_VERSION);
+
+ /* Check for errors */
+ if (PyErr_Occurred())
+ Py_FatalError("can't initialize module pep3101");
+}
Added: sandbox/trunk/pep3101/pep3101.h
==============================================================================
--- (empty file)
+++ sandbox/trunk/pep3101/pep3101.h Thu Mar 1 09:22:25 2007
@@ -0,0 +1,12 @@
+#ifndef Py_PEP3101_H
+#define Py_PEP3101_H
+
+/* XXX -- need #ifdefs to remove Unicode if not using it, and remove String on Py3K */
+
+PyObject *
+PyString_FormatMethod(PyObject *self, PyObject *args, PyObject *keywords);
+
+PyObject *
+PyUnicode_FormatMethod(PyObject *self, PyObject *args, PyObject *keywords);
+
+#endif
Added: sandbox/trunk/pep3101/setup.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/pep3101/setup.py Thu Mar 1 09:22:25 2007
@@ -0,0 +1,11 @@
+from distutils.core import setup, Extension
+
+module1 = Extension('pep3101',
+ sources = ['pep3101.c', 'unicodeformat.c', 'stringformat.c'],
+ depends = ['unicodeformat.c'], # Force rebuild of stringobject when this changes
+ )
+
+setup (name = 'pep3101',
+ version = '1.0',
+ description = 'Extension module to implement features of PEP 3101',
+ ext_modules = [module1])
Added: sandbox/trunk/pep3101/stringformat.c
==============================================================================
--- (empty file)
+++ sandbox/trunk/pep3101/stringformat.c Thu Mar 1 09:22:25 2007
@@ -0,0 +1,4 @@
+#include "Python.h"
+#define COMPILED_FROM_INSIDE_STRINGFORMAT
+#define C_UNICODE 0
+#include "unicodeformat.c"
Added: sandbox/trunk/pep3101/test_simpleformat.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/pep3101/test_simpleformat.py Thu Mar 1 09:22:25 2007
@@ -0,0 +1,115 @@
+
+import unittest
+from test import test_support
+
+import pep3101
+
+
+# The test implementation does not allow an argument
+# index or keyword name to be used more than once. The
+# PEP doesn't make any specification on this, but it
+# seems too useful to leave as is.
+
+
+class FormatTest(unittest.TestCase):
+ # All tests run through these functions. They can be
+ # overridden to change the class of string being tested
+ # and the function being used.
+ def formatEquals(self, result, text, *args, **kwargs):
+ text = str(text)
+ result = str(result)
+ val = pep3101.format(text, *args, **kwargs)
+ self.assertEquals(val, result)
+
+ def formatRaises(self, exc, text, *args, **kwargs):
+ exc = exc or Exception #StringFormat.FormatError
+ text = str(text)
+ #prevState = StringFormat.strict_format_errors
+ #StringFormat.strict_format_errors = True
+ try:
+ self.assertRaises(exc,
+ lambda: pep3101.format(
+ text, *args, **kwargs))
+ finally:
+ pass
+ #StringFormat.strict_format_errors = prevState
+
+
+ def test_basic(self):
+ # directly from the pep intro
+ self.formatEquals(
+ "My name is Fred",
+ "My name is {0}", "Fred")
+ self.formatEquals(
+ "My name is Fred :-{}",
+ "My name is {0} :-{{}}", "Fred")
+ self.formatEquals("abc", "{0:}", "abc") # is this allowed?
+
+ def test_missingargs(self):
+ #self.formatRaises(None, "Doesn't use all {0} args", 42, 24)
+ self.formatRaises(IndexError, "There is no {4} arg", 42, 24)
+ self.formatRaises(KeyError, "There question is {when}", who=True)
+
+ def test_attributes(self):
+ class Container(object):
+ one, _two, four4 = 1, 2, 4
+ def __getattr__(self, name):
+ if name == "five": return 5
+ raise TypeError("Never do this")
+ self.formatEquals(
+ "Count with me; 1 2 4",
+ "Count with me; {0.one} {item._two} {1.four4}",
+ Container, Container, item=Container)
+ self.formatEquals(
+ "Five is 5", "Five is {c.five}", c=Container())
+ self.formatRaises(AttributeError,
+ "Missing {0.rabbit} lookup", Container)
+ self.formatRaises(TypeError,
+ "Forbidden {0.secret} lookup", Container())
+
+ def test_items(self):
+ d = dict(val="value", sum=1)
+ t = tuple(("apple", "ball", "cat"))
+ self.formatEquals(
+ "The value of apple",
+ "The {0[val]} of {t[0]}", d, t=t)
+ # Decided against negative indices for now
+ #self.formatEquals(
+ # "The shiny red ball",
+ # "The shiny red {0[-2]}", t)
+
+ def test_formatlookup(self):
+ self.formatEquals("32_0>4d", "{0:{1}}", 32, "0>4d")
+ self.formatEquals("32_*>4d", "{0:{1}{2}4{3}}", 32, "*", ">", "d")
+
+ def test_specifiers(self):
+ self.formatEquals("97_c", "{0:c}", ord("a"))
+ self.formatEquals("8_08b", "{0:08b}", 8)
+ self.formatEquals("8_ >3d", "{0: >3d}", 8)
+ self.formatEquals("0.1515_.0%", "{0:.0%}", .1515)
+
+ def test_custom_format(self):
+ class Custom(object):
+ def __format__(self, specifiers):
+ return specifiers
+ custom = Custom()
+ self.formatEquals("magic", "{0:magic}", custom)
+ self.formatEquals("custom", "{0:{1}}", custom, "custom")
+
+ def test_syntaxerror(self):
+ self.assertRaises(Exception, "}{", True)
+ self.assertRaises(Exception, "{0", True)
+ self.assertRaises(Exception, "{0.[]}", True)
+ self.assertRaises(Exception, "{0[0}", True)
+ self.assertRaises(Exception, "{0[0:foo}", True)
+ self.assertRaises(Exception, "{c]}", True)
+ self.assertRaises(Exception, "{{1}}", True, 0)
+ self.assertRaises(Exception, "{{ {{{0}}", True)
+ self.assertRaises(Exception, "{0}}", True)
+
+
+def test_main():
+ test_support.run_unittest(FormatTest)
+
+if __name__ == "__main__":
+ test_main()
Added: sandbox/trunk/pep3101/unicodeformat.c
==============================================================================
--- (empty file)
+++ sandbox/trunk/pep3101/unicodeformat.c Thu Mar 1 09:22:25 2007
@@ -0,0 +1,1091 @@
+#define DUMMY_FORMATTING 1
+
+/*
+ unicodeformat.c -- implementation of PEP 3101
+
+ PEP 3101 and example Python implementation written by Talin
+
+ This module designed and written by Patrick Maupin and Eric V Smith
+
+ This module is designed to be compiled standalone, or from inside stringformat.c,
+ to support both unicode and traditional strings.
+*/
+
+/*
+ XXX -- todo: insert a fragment of the source string into error messages
+*/
+
+#ifndef COMPILED_FROM_INSIDE_STRINGFORMAT
+#include "Python.h"
+#define C_UNICODE 1
+#endif
+
+#if C_UNICODE
+#define CH_TYPE Py_UNICODE
+#define CH_TYPE_ISDECIMAL Py_UNICODE_ISDECIMAL
+#define CH_TYPE_TODECIMAL Py_UNICODE_TODECIMAL
+#define STROBJ_AS_PTR PyUnicode_AS_UNICODE
+#define STROBJ_GET_SIZE PyUnicode_GET_SIZE
+#define STROBJ_NEW PyUnicode_FromUnicode
+#define STROBJ_RESIZE PyUnicode_Resize
+#define STROBJ_CHECK PyUnicode_Check
+#define STROBJ_FORMAT PyUnicode_FormatMethod
+#define STROBJ_STR PyObject_Unicode
+#else
+#define CH_TYPE char
+#define CH_TYPE_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
+#define CH_TYPE_TODECIMAL(x) (CH_TYPE_ISDECIMAL(x) ? (x - '0') : -1)
+#define STROBJ_AS_PTR PyString_AS_STRING
+#define STROBJ_GET_SIZE PyString_GET_SIZE
+#define STROBJ_NEW PyString_FromStringAndSize
+#define STROBJ_RESIZE _PyString_Resize
+#define STROBJ_CHECK PyString_Check
+#define STROBJ_FORMAT PyString_FormatMethod
+#define STROBJ_STR PyObject_Str
+#endif
+
+/* Try to support older versions of Python*/
+#if PYTHON_API_VERSION < 1013
+typedef int Py_ssize_t;
+#define Py_LOCAL_INLINE(x) static x
+#endif
+
+/* Defines for more efficiently reallocating the string buffer */
+#define INITIAL_SIZE_INCREMENT 100
+#define SIZE_MULTIPLIER 2
+#define MAX_SIZE_INCREMENT 3200
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* XXX -- remove this include if integrated into Python.h ??? */
+#include "pep3101.h"
+
+/*
+ A SubString is a string between two unicode pointers.
+*/
+typedef struct {
+ CH_TYPE *ptr;
+ CH_TYPE *end;
+} SubString;
+
+/*
+ A SubStringObj is like a SubString, but also has an associated
+ object (which may be null).
+*/
+typedef struct {
+ CH_TYPE *ptr;
+ CH_TYPE *end;
+ PyObject * obj;
+} SubStringObj;
+
+/*
+ If this were written in C++, FmtState would be the class,
+ and most of the functions inside this file would be members
+ of this class.
+*/
+typedef struct {
+ /* args passed to PyString_FormatMethod or PyUnicode_FormatMethod */
+ PyObject *args;
+ /* keywords passed to PyString_FormatMethod or PyUnicode_FormatMethod */
+ PyObject *keywords;
+ /* current position and end of the 'self' string passed to FormatMethod */
+ SubString fmtstr;
+ /* Output string we are constructing, including current and end pointers*/
+ SubStringObj outstr;
+ /* Field Specifier, after the colon in {1:{2}}
+ This may or may not have a valid object (the field specifier might
+ just be a substring of the fmtstr. If it does not have its own
+ object, the .obj struct member will be NULL */
+ SubStringObj fieldspec;
+ /* size_increment is used for optimizing string growth */
+ int size_increment;
+ /* max_recursion is used to limit the ability of a malicious string
+ to damage the stack. Default value is 4 */
+ int max_recursion;
+ /* By default, leading underscores are not allowed for security reasons */
+ int allow_leading_under;
+ /* positional_arg_set contains one bit for every positional argument
+ that we still expect to be used. This implementation only checks
+ that the first 32 positional arguments are actually used. If they
+ want more than that, they probably really need the check, but on
+ the other hand they are probably beyond help, so the check would
+ be necessary but not sufficient :) */
+ int positional_arg_set;
+ /* Keyword arguments can be checked as well */
+ PyObject *keyword_arg_set;
+ /* For some interface functions, we could have a list or tuple of
+ dictionaries to search, e.g. locals()/globals(). */
+ int keywords_is_tuple;
+} FmtState;
+
+/*
+ Our internal conversion functions have this signature.
+
+ returns the number of characters written, or -1 if error
+*/
+/* XXX obviously wrong, but just a placeholder currently */
+typedef Py_ssize_t (*ConversionFunction)(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+
+/*
+ Forward declarations for our conversion functions
+*/
+static Py_ssize_t convert_binary(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_char(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_decimal(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_exponent(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_exponentUC(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_fixed(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_fixedUC(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_general(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_generalUC(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_number(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_octal(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_repr(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_string(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_hex(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_hexUC(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+static Py_ssize_t convert_percentage(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign);
+
+
+/* Some forward declarations for recursion */
+static PyObject *
+get_field_object(FmtState *fs);
+
+static PyObject *
+recurse_format(FmtState *fs);
+
+/*
+ Most of our errors are value errors, because to Python, the
+ format string is a "value". Also, it's convenient to return
+ a NULL when we are erroring out.
+*/
+static void *
+SetError(const char *s)
+{
+ PyErr_SetString(PyExc_ValueError, s);
+ return NULL;
+}
+
+/*
+ check_fmtstr returns True if we still have characters
+ left in the format string.
+*/
+Py_LOCAL_INLINE(int)
+check_fmtstr(FmtState *fs)
+{
+ return (fs->fmtstr.ptr < fs->fmtstr.end) ||
+ SetError("Invalid format string");
+}
+
+/*
+ end_identifier returns true if a character marks
+ the end of an identifier string.
+
+ Although the PEP specifies that identifiers are
+ numbers or valid Python identifiers, we just let
+ getattr/getitem handle that, so the implementation
+ is more flexible than the PEP would indicate.
+*/
+Py_LOCAL_INLINE(int)
+end_identifier(CH_TYPE c)
+{
+ switch (c) {
+ case '.': case '[': case ']': case '}': case ':':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+
+/* returns true if this character is a specifier alignment token */
+Py_LOCAL_INLINE(int)
+alignment_token(CH_TYPE c)
+{
+ switch (c) {
+ case '<': case '>': case '=': case '^':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* returns true if this character is a sign element */
+Py_LOCAL_INLINE(int)
+sign_element(CH_TYPE c)
+{
+ switch (c) {
+ case ' ': case '+': case '-': case '(':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+
+
+/* returns a pointer to our conversion function, or NULL if invalid */
+Py_LOCAL_INLINE(ConversionFunction)
+conversion_function(CH_TYPE c)
+{
+ switch (c) {
+ case 'b': return convert_binary; /* base-2 */
+ case 'c': return convert_char; /* as character */
+ case 'd': return convert_decimal; /* decimal integer */
+ case 'e': return convert_exponent; /* exponential notation */
+ case 'E': return convert_exponentUC; /* exponential notation with uppercase 'E' */
+ case 'f': return convert_fixed; /* fixed-point */
+ case 'F': return convert_fixedUC; /* fixed-point with uppercase */
+ case 'g': return convert_general; /* general number notation */
+ case 'G': return convert_generalUC; /* general number notation with uppercase 'E' */
+ case 'n': return convert_number; /* number in locale-specific format */
+ case 'o': return convert_octal; /* octal */
+ case 'r': return convert_repr; /* in repr() format */
+ case 's': return convert_string; /* convert using str() */
+ case 'x': return convert_hex; /* base 16 */
+ case 'X': return convert_hexUC; /* base 16 uppercase */
+ case '%': return convert_percentage; /* as percentage */
+ default:
+ return NULL;
+ }
+}
+
+/* Fill in a SubStringObj from a Python string */
+Py_LOCAL_INLINE(SubStringObj)
+make_substrobj(PyObject *obj)
+{
+ SubStringObj s;
+ s.obj = obj;
+ s.ptr = STROBJ_AS_PTR(obj);
+ s.end = STROBJ_GET_SIZE(obj) + s.ptr;
+ return s;
+}
+
+#if PYTHON_API_VERSION < 1013
+static void
+PySet_Discard(PyObject *myset, PyObject *mykey)
+{
+ /* XXX --- Need to add the right code here */
+}
+#endif
+
+/* XXX -- similar function elsewhere ???? */
+/*
+ output_data dumps characters into our output string
+ buffer.
+
+ In some cases, it has to reallocate the string.
+
+ It returns a status: 0 for a failed reallocation,
+ 1 for success.
+*/
+static int
+output_data(FmtState *fs, const CH_TYPE *s, Py_ssize_t count)
+{
+ Py_ssize_t room = fs->outstr.end - fs->outstr.ptr;
+ if (count > room) {
+ CH_TYPE *startptr;
+ Py_ssize_t curlen, maxlen;
+ startptr = STROBJ_AS_PTR(fs->outstr.obj);
+ curlen = fs->outstr.ptr - startptr;
+ maxlen = curlen + count + fs->size_increment;
+ if (STROBJ_RESIZE(&fs->outstr.obj, maxlen) < 0)
+ return 0;
+ startptr = STROBJ_AS_PTR(fs->outstr.obj);
+ fs->outstr.ptr = startptr + curlen;
+ fs->outstr.end = startptr + maxlen;
+ if (fs->size_increment < MAX_SIZE_INCREMENT)
+ fs->size_increment *= SIZE_MULTIPLIER;
+ }
+ memcpy(fs->outstr.ptr, s, count * sizeof(CH_TYPE));
+ fs->outstr.ptr += count;
+ return 1;
+}
+
+/*
+ get_python_identifier is a bit of a misnomer. It returns
+ a value for use with getattr or getindex. This value
+ will usually be a string value, but we allow {} inside the
+ text, so it could really be any arbitrary python object,
+ as retrieved from the method arguments.
+*/
+static PyObject *
+get_python_identifier(FmtState *fs, int isargument)
+{
+ CH_TYPE *startptr;
+ PyObject *result;
+ if (!check_fmtstr(fs))
+ return NULL;
+ if (*fs->fmtstr.ptr == '{') {
+ /* This little bit of mutual recursion allows nested dictionary
+ lookups and computed attribute names
+ */
+ if (--fs->max_recursion < 0)
+ return SetError("Max string recursion exceeded");
+ result = get_field_object(fs);
+ fs->max_recursion++;
+ if (result && (*fs->fmtstr.ptr++ != '}'))
+ result = SetError("Expected closing }");
+ return result;
+ }
+ if (end_identifier(*fs->fmtstr.ptr))
+ return SetError("Expected attribute or index");
+ if ((*fs->fmtstr.ptr == '_') && !fs->allow_leading_under)
+ return SetError("Index/attribute leading underscores disallowed");
+
+ for (startptr = fs->fmtstr.ptr;
+ !end_identifier(*fs->fmtstr.ptr);
+ fs->fmtstr.ptr++) {
+ if (!check_fmtstr(fs))
+ return NULL;
+ }
+ result = STROBJ_NEW(startptr, fs->fmtstr.ptr - startptr);
+ if (result == NULL)
+ return NULL;
+ if (isargument && (fs->keyword_arg_set != NULL))
+ PySet_Discard(fs->keyword_arg_set, result);
+ /*
+ We might want to add code here to check for invalid Python
+ identifiers. All identifiers are eventually passed to getattr
+ or getitem, so there is a check when used. However, we might
+ want to remove (or not) the ability to have strings like
+ "a/b" or " ab" or "-1" (which is not parsed as a number).
+ For now, this is left as an exercise for the first disgruntled
+ user...
+
+ if (XXX -- need check function) {
+ Py_DECREF(result);
+ PyErr_SetString(PyExc_ValueError, "Invalid embedded Python identifier");
+ return NULL;
+ }
+ */
+ return result;
+}
+
+/*
+ If keywords are supplied as a sequence of dictionaries
+ (e.g. locals/globals) then name_mapper will do multiple
+ lookups until it finds the right information. This
+ should not be called (keywords_is_tuple should not be
+ set) unless fs->keywords is a tuple.
+*/
+static PyObject *
+name_mapper(PyObject *keywords, PyObject *key)
+{
+ PyObject *result;
+ int index;
+ int lastindex = PyTuple_GET_SIZE(keywords)-1;
+
+ for (index=0;; index++) {
+ result = PyObject_GetItem(PyTuple_GET_ITEM(keywords, index), key);
+ if (result != NULL) {
+ Py_INCREF(result);
+ return result;
+ }
+ if (index >= lastindex)
+ return NULL;
+ PyErr_Clear();
+ }
+}
+
+/*
+ get_integer_index consumes 0 or more decimal digit characters
+ from a format string, updates *result with the corresponding
+ positive integer, and returns the number of digits consumed.
+
+ if the isargument parameter is true, it will remove the
+ integer from the arguments bitset.
+*/
+static int
+get_integer_index(FmtState *fs, Py_ssize_t *result)
+{
+ Py_ssize_t accumulator, digitval, oldaccumulator;
+ int numdigits;
+ accumulator = numdigits = 0;
+ for (;;fs->fmtstr.ptr++, numdigits++) {
+ if (fs->fmtstr.ptr >= fs->fmtstr.end)
+ break;
+ digitval = CH_TYPE_TODECIMAL(*fs->fmtstr.ptr);
+ if (digitval < 0)
+ break;
+ /*
+ This trick was copied from old Unicode format code. It's cute,
+ but would really suck on an old machine with a slow divide
+ implementation. Fortunately, in the normal case we do not
+ expect too many digits.
+ */
+ oldaccumulator = accumulator;
+ accumulator *= 10;
+ if ((accumulator+10)/10 != oldaccumulator+1)
+ return (int)SetError("field width or index value too large");
+ accumulator += digitval;
+ }
+ *result = accumulator;
+ return numdigits;
+}
+
+/*
+ get_specifier retrieves the part of the format string
+ between the colon and trailing }.
+*/
+static int
+get_specifier(FmtState *fs)
+{
+ CH_TYPE c;
+
+ int curlycount, gotcurly;
+
+ curlycount = 1;
+ gotcurly = 0;
+
+ fs->fieldspec.ptr = fs->fmtstr.ptr;
+ for (;;) {
+ if (!check_fmtstr(fs))
+ return 0;
+ c = *fs->fmtstr.ptr++;
+ if (c == '{') {
+ gotcurly = 1;
+ curlycount++;
+ }
+ else if (c == '}') {
+ curlycount--;
+ if (curlycount <= 0)
+ break;
+ }
+ }
+ fs->fieldspec.end = fs->fmtstr.ptr - 1;
+ if (gotcurly) {
+ PyObject *myobject;
+ SubString savefmt = fs->fmtstr;
+ fs->fmtstr.ptr = fs->fieldspec.ptr;
+ fs->fmtstr.end = fs->fieldspec.end;
+ myobject = recurse_format(fs);
+ if (myobject == NULL)
+ return 0;
+ fs->fieldspec = make_substrobj(myobject);
+ fs->fmtstr = savefmt;
+ }
+ return 1;
+}
+
+/*
+ get_field_object returns the object inside {}
+ It handles getindex and getattr lookups and consumes
+ the format string up to but not including the trailing
+ } or the optional : format specifier separator.
+*/
+static PyObject *
+get_field_object(FmtState *fs)
+{
+ PyObject *myobj, *subobj, *newobj;
+ CH_TYPE c;
+ Py_ssize_t index;
+ int isindex, expectclose, isnumeric, isargument;
+
+ if (!check_fmtstr(fs))
+ return NULL;
+ isnumeric = (CH_TYPE_ISDECIMAL(*fs->fmtstr.ptr));
+ myobj = isnumeric ? fs->args : fs->keywords;
+ Py_INCREF(myobj);
+
+ for (isindex=1, expectclose=0, isargument=1;;) {
+ if (!check_fmtstr(fs))
+ break;
+ if (!isindex) {
+ if ((subobj = get_python_identifier(fs, isargument)) == NULL)
+ break;
+ newobj = (isargument && fs->keywords_is_tuple)
+ ? name_mapper(myobj, subobj)
+ : PyObject_GetAttr(myobj, subobj);
+ Py_DECREF(subobj);
+ }
+ else {
+ isnumeric = (CH_TYPE_ISDECIMAL(*fs->fmtstr.ptr));
+ if (isnumeric) {
+ get_integer_index(fs, &index);
+ if (isargument)
+ fs->positional_arg_set &= ~(1 << index);
+ }
+ if (isnumeric && PySequence_Check(myobj))
+ newobj = PySequence_GetItem(myobj, index);
+ else {
+ /* XXX -- do we need PyLong_FromLongLong? Using ssizet, not int... */
+ subobj = isnumeric ?
+ PyInt_FromLong(index) :
+ get_python_identifier(fs, isargument);
+ if (subobj == NULL)
+ break;
+ newobj = PyObject_GetItem(myobj, subobj);
+ Py_DECREF(subobj);
+ }
+ }
+ Py_DECREF(myobj);
+ myobj = newobj;
+ if (expectclose)
+ if ((!check_fmtstr(fs)) || (*fs->fmtstr.ptr++ != ']')) {
+ SetError("Expected ]");
+ break;
+ }
+ if (!check_fmtstr(fs))
+ break;
+ c = *fs->fmtstr.ptr;
+ if ((c == '}') || (c == ':'))
+ return myobj;
+ fs->fmtstr.ptr++;
+ isargument = 0;
+ isindex = expectclose = (c == '[');
+ if (!isindex && (c != '.')) {
+ SetError("Expected ., [, :, or }");
+ break;
+ }
+ }
+ Py_DECREF(myobj);
+ return NULL;
+}
+/*
+ get_field_and_spec calls subfunctions to retrieve the
+ field object and optional specification string.
+*/
+static PyObject *
+get_field_and_spec(FmtState *fs)
+{
+ PyObject *myobj;
+ CH_TYPE c;
+
+ fs->fieldspec.ptr = fs->fieldspec.end = fs->fmtstr.ptr;
+ fs->fieldspec.obj = NULL;
+
+ myobj = get_field_object(fs);
+ if (myobj != NULL) {
+ if (check_fmtstr(fs)) {
+ c = *fs->fmtstr.ptr++;
+ if ((c == '}') || ((c == ':') && (get_specifier(fs))))
+ return myobj;
+ }
+ Py_DECREF(myobj);
+ }
+ return NULL;
+}
+
+/*
+ user_format is invoked to format an object with a defined __format__
+ attribute.
+*/
+static int
+user_format(FmtState *fs, PyObject *__format__)
+{
+ PyObject *myobj;
+ int ok;
+
+ myobj = fs->fieldspec.obj;
+ if (myobj == NULL) {
+ myobj = STROBJ_NEW(fs->fieldspec.ptr,
+ fs->fieldspec.end - fs->fieldspec.ptr);
+ if (myobj == NULL)
+ return 0;
+ fs->fieldspec.obj = myobj; /* Owned by our caller now */
+ }
+ /* XXX -- possible optimization to CallFunctionWithArgs */
+ myobj = PyObject_CallFunction(__format__, "(O)", myobj);
+ if (myobj == NULL)
+ return 0;
+ ok = STROBJ_CHECK(myobj);
+ if (!ok)
+ SetError("__format__ method did not return correct string type");
+ else
+ ok = output_data(fs, STROBJ_AS_PTR(myobj),
+ STROBJ_GET_SIZE(myobj));
+ Py_DECREF(myobj);
+ return ok;
+}
+
+typedef struct {
+ CH_TYPE fill_char;
+ CH_TYPE align;
+ CH_TYPE sign;
+ Py_ssize_t width;
+ Py_ssize_t precision;
+ CH_TYPE type;
+} DefaultFormat;
+
+/*
+ parse the default specification
+*/
+
+static int
+parse_default_format(FmtState *fs, DefaultFormat *format)
+{
+ Py_ssize_t index = 0;
+ Py_ssize_t specified_width;
+ Py_ssize_t remaining;
+ SubString *spec = &fs->fmtstr;
+
+ format->fill_char = '\0';
+ format->align = '\0';
+ format->sign = '\0';
+ format->width = -1;
+ format->precision = -1;
+ format->type = '\0';
+
+ /* cache the length, since that's convenient */
+ Py_ssize_t spec_len = spec->end - spec->ptr;
+
+ /* If the second char is an alignment token,
+ then parse the fill char */
+ if (spec_len >= 2 && alignment_token(spec->ptr[1])) {
+ format->align = spec->ptr[1];
+ format->fill_char = spec->ptr[0];
+ index = 2;
+ } else if (spec_len >= 1 && alignment_token(spec->ptr[0])) {
+ format->align = spec->ptr[0];
+ index = 1;
+ }
+
+ /* Parse the various sign options */
+ if (index < spec_len && sign_element(spec->ptr[index])) {
+ format->sign = spec->ptr[index];
+ index++;
+ if (index < spec_len && spec->ptr[index] == ')') {
+ index++;
+ }
+ }
+
+ /* The special case for 0-padding (backwards compat) */
+ if (format->fill_char == '\0' && index < spec_len && spec->ptr[index] == '0') {
+ format->fill_char = '0';
+ if (format->align == '\0') {
+ format->align = '=';
+ }
+ index++;
+ }
+
+ specified_width = get_integer_index(fs, &format->width);
+
+ /* recalculate the length, since the pointers may have just changed */
+ spec_len = spec->end - spec->ptr;
+
+ /* if specified_width is 0, we didn't consume any characters for the width.
+ in that case, reset the width to -1, because get_integer_index() will
+ have set it to zero */
+ if (specified_width <= 0) {
+ format->width = -1;
+ }
+
+ /* Parse field precision */
+ if (index < spec_len && spec->ptr[index] == '.') {
+ index++;
+
+ specified_width = get_integer_index(fs, &format->precision);
+
+ /* recalculate the length, since the pointers may have just changed */
+ spec_len = spec->end - spec->ptr;
+
+ /* again, check if any characters specified */
+ if (specified_width <= 0) {
+ format->precision = -1;
+ }
+ }
+
+ /* Finally, parse the type field */
+
+ remaining = spec_len - index;
+ if (remaining > 1) {
+ /* invalid conversion spec */
+ SetError("Invalid conversion specification");
+ return 0;
+ }
+
+ if (remaining == 1) {
+ format->type = spec->ptr[index];
+ }
+
+ return 1;
+}
+
+
+/* conversion functions */
+static Py_ssize_t
+convert_binary(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_char(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_decimal(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_exponent(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_exponentUC(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_fixed(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_fixedUC(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_general(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_generalUC(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_number(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_octal(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_repr(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ int ok;
+ PyObject *s;
+ PyObject *r;
+ Py_ssize_t len;
+
+ r = PyObject_Repr(fieldobj);
+ if (r == NULL)
+ return 0;
+
+ s = STROBJ_STR(r);
+ Py_DECREF(r);
+
+ len = STROBJ_GET_SIZE(s);
+ ok = output_data(fs, STROBJ_AS_PTR(s), len);
+ Py_DECREF(s);
+
+ return ok ? len : -1;
+}
+
+static Py_ssize_t
+convert_string(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ PyObject *myobj;
+ Py_ssize_t ok;
+ Py_ssize_t len;
+
+ myobj = STROBJ_STR(fieldobj);
+ if (myobj == NULL)
+ return -1;
+
+ len = STROBJ_GET_SIZE(myobj);
+
+ ok = output_data(fs, STROBJ_AS_PTR(myobj), len);
+ Py_DECREF(myobj);
+
+ return ok ? len : -1;
+}
+
+static Py_ssize_t
+convert_hex(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_hexUC(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+static Py_ssize_t
+convert_percentage(PyObject *fieldobj, FmtState *fs, CH_TYPE *sign)
+{
+ return -1;
+}
+
+/*
+ default_format -- "Then a miracle occurs"
+*/
+static int default_format(FmtState *fs, PyObject *fieldobj)
+{
+#if DUMMY_FORMATTING == 1
+ PyObject *myobj;
+ int ok;
+
+ /* Test implementation, only at top level */
+ CH_TYPE under = '_';
+
+ myobj = STROBJ_STR(fieldobj);
+ if (myobj == NULL)
+ return 0;
+ ok = output_data(fs, STROBJ_AS_PTR(myobj),
+ STROBJ_GET_SIZE(myobj));
+ Py_DECREF(myobj);
+ if (!ok)
+ return 0;
+ if (fs->fieldspec.ptr != fs->fieldspec.end) {
+ if (!output_data(fs, &under, 1))
+ return 0;
+ if (!output_data(fs, fs->fieldspec.ptr,
+ fs->fieldspec.end - fs->fieldspec.ptr))
+ return 0;
+ }
+#else
+
+ Py_ssize_t len;
+ Py_ssize_t padding;
+ DefaultFormat format;
+ ConversionFunction conversion;
+ CH_TYPE sign = '\0';
+ CH_TYPE prefix;
+ CH_TYPE suffix;
+
+ if (!parse_default_format(fs, &format)) {
+ return 0;
+ }
+
+ /* if no type character was specified, look up the default type character, based on the type of our object */
+ if (format.type == '\0') {
+ if (PyInt_Check(fieldobj) || PyLong_Check(fieldobj)) {
+ format.type = 'd';
+ } else if (PyFloat_Check(fieldobj)) {
+ format.type = 'g';
+ } else {
+ format.type = 's';
+ }
+ }
+
+ /* handle conversion functions that logically map to other conversion functions? */
+
+ conversion = conversion_function(format.type);
+ if (conversion == NULL) {
+ SetError("Invalid conversion character");
+ return 0;
+ }
+
+ /* convert to a string first */
+ /* get the length written so that we can fixup inside the buffer, as needed */
+ len = conversion(fieldobj, fs, &sign);
+ if (len < 0)
+ return 0;
+
+ /* we wrote "len" bytes. see what fixups need to be done */
+
+ /* Handle the sign logic */
+ prefix = '\0';
+ suffix = '\0';
+ if (sign == '-') {
+ if (format.sign == '(') {
+ prefix = '(';
+ suffix = ')';
+ } else {
+ prefix = '-';
+ }
+ } else if (sign == '+') {
+ if (format.sign == '+') {
+ prefix = '+';
+ } else if (format.sign == ' ') {
+ prefix = ' ';
+ }
+ }
+
+ /* Handle the padding logic */
+ if (format.width != -1) {
+ padding = format.width - len - (prefix == '\0' ? 0 : 1) - (suffix == '\0' ? 0 : 1);
+ if (padding > 0) {
+#if 0
+ if align == '>' or align == '^':
+ return fill_char * padding + prefix + result + suffix
+ elif align == '='
+ return prefix + fill_char * padding + result + suffix
+ else:
+ return prefix + result + suffix + fill_char * padding
+
+#endif
+ }
+ }
+
+#endif
+ return 1;
+}
+
+/*
+ renderfield determines if the field object has a defined __format__
+ method, and dispatches to the appropriate subfunction.
+*/
+static int
+renderfield(FmtState *fs, PyObject *fieldobj)
+{
+ int result;
+ SubString savefmt;
+
+ PyObject *__format__ = PyObject_GetAttrString(fieldobj, "__format__");
+ if (__format__ != NULL) {
+ result = user_format(fs, __format__);
+ Py_DECREF(__format__);
+ }
+ else {
+ /* XXX -- saw other things just do this, but Guido mentioned
+ that maybe we should check whether the error was
+ an AttributeError or not. Figure out if this is
+ necessary -- if so, and not AttributeError, propagate
+ the error up the stack.
+ */
+ PyErr_Clear(); /* For GetAttr __format__ */
+
+ savefmt = fs->fmtstr;
+ fs->fmtstr.ptr = fs->fieldspec.ptr;
+ fs->fmtstr.end = fs->fieldspec.end;
+ result = default_format(fs, fieldobj);
+ fs->fmtstr = savefmt;
+ }
+ return result;
+}
+
+/*
+ do_format is the main program loop. It rummages through
+ the format string, looking for escapes to markup, and
+ calls other functions to move non-markup text to the output,
+ and to perform the markup to the output.
+*/
+static PyObject *
+do_format(FmtState *fs)
+{
+ PyObject *myobj;
+ CH_TYPE c, *start;
+ Py_ssize_t count, total;
+ SubString fmtstr;
+ int doubled, ok;
+
+ fmtstr = fs->fmtstr;
+ count = fmtstr.end - fmtstr.ptr;
+ myobj = STROBJ_NEW(NULL, count + INITIAL_SIZE_INCREMENT);
+ if (myobj == NULL)
+ return NULL;
+ fs->outstr = make_substrobj(myobj);
+ fs->size_increment = INITIAL_SIZE_INCREMENT;
+
+ ok = 1;
+ c = '\0'; /* Avoid compiler warning */
+ while (fmtstr.ptr < fmtstr.end) {
+ start = fmtstr.ptr;
+ count = total = fmtstr.end - start;
+ while (count && ((c = *fmtstr.ptr) != '{') && (c != '}')) {
+ fmtstr.ptr++;
+ count--;
+ }
+ count = total - count;
+ total -= count;
+ doubled = (total > 1) && (fmtstr.ptr[1] == c);
+ if (doubled) {
+ output_data(fs, start, count+1);
+ fmtstr.ptr += 2;
+ continue;
+ } else if (count)
+ output_data(fs, start, count);
+ if (total < 2) {
+ ok = !total ||
+ (int)SetError("Invalid format string -- { or } at end");
+ break;
+ }
+ if (c == '}') {
+ SetError("Invalid format string -- single } encountered");
+ ok = 0;
+ break;
+ }
+ fs->fmtstr.ptr = fmtstr.ptr + 1;
+ myobj = get_field_and_spec(fs);
+ ok = (myobj != NULL) && renderfield(fs, myobj);
+ Py_XDECREF(fs->fieldspec.obj);
+ Py_XDECREF(myobj);
+ if (!ok)
+ break;
+ fmtstr.ptr = fs->fmtstr.ptr;
+ }
+ myobj = fs->outstr.obj;
+ if (ok) {
+ count = fs->outstr.ptr - STROBJ_AS_PTR(myobj);
+ if (STROBJ_RESIZE(&myobj, count) >= 0)
+ return myobj;
+ }
+ Py_XDECREF(myobj);
+ return NULL;
+}
+
+/*
+ recurse_format is called for nested format specifiers,
+ e.g. {1:{2}}. It saves off the current information,
+ and recursively calls do_format.
+*/
+static PyObject *
+recurse_format(FmtState *fs)
+{
+ PyObject *result;
+ SubStringObj saveoutstr = fs->outstr;
+ int saveincrement = fs->size_increment;
+ if (--(fs->max_recursion) < 0)
+ return SetError("Max string recursion exceeded");
+ result = do_format(fs);
+ fs->max_recursion++;
+ fs->outstr = saveoutstr;
+ fs->size_increment = saveincrement;
+ return result;
+}
+
+/*
+ STROBJ_FORMAT (actually PyUnicode_FormatMethod or PyString_FormatMethod)
+ is the public interface to the module.
+
+ XXX -- do we need to check input types here, or are we guaranteed
+ they are right????
+*/
+PyObject *
+STROBJ_FORMAT(PyObject *self, PyObject *args, PyObject *keywords)
+{
+ FmtState fs;
+
+ fs.max_recursion = 4;
+ fs.allow_leading_under = 1;
+ fs.positional_arg_set = 0;
+ fs.keyword_arg_set = NULL;
+ fs.keywords_is_tuple = 0;
+
+ fs.fmtstr.ptr = STROBJ_AS_PTR(self);
+ fs.fmtstr.end = fs.fmtstr.ptr + STROBJ_GET_SIZE(self);
+ fs.args = args;
+ fs.keywords = keywords;
+
+ return do_format(&fs);
+}
+
+#ifdef __cplusplus
+}
+#endif
More information about the Python-checkins
mailing list