[Jython-checkins] jython: Improvements to PyString.__format__ and in StringFormatter related to text.
jeff.allen
jython-checkins at python.org
Sun Jun 8 14:13:05 CEST 2014
http://hg.python.org/jython/rev/324e1138e1f3
changeset: 7284:324e1138e1f3
user: Jeff Allen <ja.py at farowl.co.uk>
date: Tue Jun 03 21:30:16 2014 +0100
summary:
Improvements to PyString.__format__ and in StringFormatter related to text.
Brings %s, %r, %c, %% into the new framework. Quite some rationalisation of
StringFormatter made possible. Small improvement in test conformity.
files:
Lib/test/test_format_jy.py | 57 +-
Lib/test/test_unicode.py | 7 +-
src/org/python/core/PyInteger.java | 2 +-
src/org/python/core/PyString.java | 482 ++++-----
src/org/python/core/stringlib/IntegerFormatter.java | 1 +
src/org/python/core/stringlib/InternalFormat.java | 132 +-
src/org/python/core/stringlib/TextFormatter.java | 105 ++
tests/java/org/python/core/StringFormatTest.java | 17 +-
8 files changed, 480 insertions(+), 323 deletions(-)
diff --git a/Lib/test/test_format_jy.py b/Lib/test/test_format_jy.py
--- a/Lib/test/test_format_jy.py
+++ b/Lib/test/test_format_jy.py
@@ -5,8 +5,9 @@
from test import test_support
import unittest
-class FormatTestCase(unittest.TestCase):
- # Tests that %d converts values for custom classes implementing __int__
+class FormatSubclass(unittest.TestCase):
+ # Custom __int__ and __float__ should be respected by %-formatting
+
def test_int_conversion_support(self):
class Foo(object):
def __init__(self, x): self.x = x
@@ -21,9 +22,59 @@
def __float__(self): return self. x
self.assertEqual('1.0', '%.1f' % Foo(1.0))
+class FormatUnicodeBase(unittest.TestCase):
+
+ # Test padding non-BMP result
+ def test_pad_string(self):
+ self.padcheck(u"architect")
+ self.padcheck(u'a\U00010001cde')
+
+class FormatUnicodeClassic(FormatUnicodeBase):
+ # Check using %-formatting
+
+ def padcheck(self, s):
+ self.assertEqual(10, len('%10.4s' % s))
+ self.assertEqual(u' '*6 + s[0:4], '%10.4s' % s)
+ self.assertEqual(u' '*6 + s[0:4], '% 10.4s' % s)
+ self.assertEqual(u' '*6 + s[0:4], '%010.4s' % s)
+ self.assertEqual(s[0:3] + u' '*5, '%-8.3s' % s)
+
+class FormatUnicodeModern(FormatUnicodeBase):
+ # Check using __format__
+
+ def padcheck(self, s):
+ self.assertEqual(10, len(format(s, '10.4s')))
+ self.assertEqual(s[0:3] + u' '*7, format(s, '10.3s'))
+ self.assertEqual(s[0:3] + u'~'*7, format(s, '~<10.3s'))
+ self.assertEqual(s[0:3] + u'~'*7, format(s, '~<10.3'))
+ self.assertEqual(u' '*6 + s[0:4], format(s, '>10.4s'))
+ self.assertEqual(u'*'*6 + s[0:4], format(s, '*>10.4s'))
+ self.assertEqual(u'*'*6 + s[0:4], format(s, '*>10.4'))
+
+
+class FormatMisc(unittest.TestCase):
+ # Odd tests Jython used to fail
+
+ def test_percent_padded(self) :
+ self.assertEqual('%hello', '%%%s' % 'hello')
+ self.assertEqual(u' %hello', '%6%%s' % u'hello')
+ self.assertEqual(u'% hello', u'%-6%%s' % 'hello')
+
+ self.assertEqual(' %', '%6%' % ())
+ self.assertEqual(' %', '%06%' % ())
+ self.assertEqual(' %', '%*%' % 4)
+ self.assertEqual('% ', '%-6%' % ())
+ self.assertEqual('% ', '%-06%' % ())
+ self.assertEqual('% ', '%*%' % -4)
+
def test_main():
- test_support.run_unittest(FormatTestCase)
+ test_support.run_unittest(
+ FormatSubclass,
+ FormatUnicodeClassic,
+ FormatUnicodeModern,
+ FormatMisc,
+ )
if __name__ == '__main__':
test_main()
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -357,13 +357,12 @@
self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000, 3.50')
self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000, 3.57')
self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
- if not sys.platform.startswith('java'):
- self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
+ self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
- # self.assertEqual(u'%c' % 0x1234, u'\u1234')
- # self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
+ self.assertEqual(u'%c' % 0x1234, u'\u1234')
+ self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
# formatting jobs delegated from the string implementation:
self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
diff --git a/src/org/python/core/PyInteger.java b/src/org/python/core/PyInteger.java
--- a/src/org/python/core/PyInteger.java
+++ b/src/org/python/core/PyInteger.java
@@ -1074,7 +1074,7 @@
case 'c':
// Character data: specific prohibitions.
if (Spec.specified(spec.sign)) {
- throw IntegerFormatter.notAllowed("Sign", "integer", spec.type);
+ throw IntegerFormatter.signNotAllowed("integer", spec.type);
} else if (spec.alternate) {
throw IntegerFormatter.alternateFormNotAllowed("integer", spec.type);
}
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -10,10 +10,11 @@
import org.python.core.stringlib.FieldNameIterator;
import org.python.core.stringlib.FloatFormatter;
import org.python.core.stringlib.IntegerFormatter;
+import org.python.core.stringlib.InternalFormat;
+import org.python.core.stringlib.InternalFormat.Formatter;
import org.python.core.stringlib.InternalFormat.Spec;
-import org.python.core.stringlib.InternalFormatSpec;
-import org.python.core.stringlib.InternalFormatSpecParser;
import org.python.core.stringlib.MarkupIterator;
+import org.python.core.stringlib.TextFormatter;
import org.python.core.util.StringUtil;
import org.python.expose.ExposedMethod;
import org.python.expose.ExposedNew;
@@ -3898,50 +3899,68 @@
@ExposedMethod(doc = BuiltinDocs.str___format___doc)
final PyObject str___format__(PyObject formatSpec) {
- if (!(formatSpec instanceof PyString)) {
- throw Py.TypeError("__format__ requires str or unicode");
+
+ // Parse the specification
+ Spec spec = InternalFormat.fromText(formatSpec, "__format__");
+
+ // Get a formatter for the specification
+ TextFormatter f = prepareFormatter(spec);
+ if (f == null) {
+ // The type code was not recognised
+ throw Formatter.unknownFormat(spec.type, "string");
}
- PyString formatSpecStr = (PyString)formatSpec;
- String result;
- try {
- String specString = formatSpecStr.getString();
- InternalFormatSpec spec = new InternalFormatSpecParser(specString).parse();
- result = formatString(getString(), spec);
- } catch (IllegalArgumentException e) {
- throw Py.ValueError(e.getMessage());
+ // Bytes mode if neither this nor formatSpec argument is Unicode.
+ boolean unicode = this instanceof PyUnicode || formatSpec instanceof PyUnicode;
+ f.setBytes(!unicode);
+
+ // Convert as per specification.
+ f.format(getString());
+
+ // Return a result that has the same type (str or unicode) as the formatSpec argument.
+ return f.pad().getPyResult();
+ }
+
+ /**
+ * Common code for {@link PyString} and {@link PyUnicode} to prepare a {@link TextFormatter}
+ * from a parsed specification. The object returned has format method
+ * {@link TextFormatter#format(String)} that treats its argument as UTF-16 encoded unicode (not
+ * just <code>char</code>s). That method will format its argument ( <code>str</code> or
+ * <code>unicode</code>) according to the PEP 3101 formatting specification supplied here. This
+ * would be used during <code>text.__format__(".5s")</code> or
+ * <code>"{:.5s}".format(text)</code> where <code>text</code> is this Python string.
+ *
+ * @param spec a parsed PEP-3101 format specification.
+ * @return a formatter ready to use, or null if the type is not a string format type.
+ * @throws PyException(ValueError) if the specification is faulty.
+ */
+ @SuppressWarnings("fallthrough")
+ static TextFormatter prepareFormatter(Spec spec) throws PyException {
+ // Slight differences between format types
+ switch (spec.type) {
+
+ case Spec.NONE:
+ case 's':
+ // Check for disallowed parts of the specification
+ if (spec.grouping) {
+ throw Formatter.notAllowed("Grouping", "string", spec.type);
+ } else if (Spec.specified(spec.sign)) {
+ throw Formatter.signNotAllowed("string", '\0');
+ } else if (spec.alternate) {
+ throw Formatter.alternateFormNotAllowed("string");
+ } else if (spec.align == '=') {
+ throw Formatter.alignmentNotAllowed('=', "string");
+ }
+ // spec may be incomplete. The defaults are those commonly used for string formats.
+ spec = spec.withDefaults(Spec.STRING);
+ // Get a formatter for the specification
+ return new TextFormatter(spec);
+
+ default:
+ // The type code was not recognised
+ return null;
}
- return formatSpecStr.createInstance(result);
- }
-
- /**
- * Format the given text according to a parsed PEP 3101 formatting specification, as during
- * <code>text.__format__(format_spec)</code> or <code>"{:s}".format(text)</code> where
- * <code>text</code> is a Python string.
- *
- * @param text to format
- * @param spec the parsed PEP 3101 formatting specification
- * @return the result of the formatting
- */
- public static String formatString(String text, InternalFormatSpec spec) {
- if (spec.sign != '\0') {
- throw new IllegalArgumentException("Sign not allowed in string format specifier");
- }
- if (spec.alternate) {
- throw new IllegalArgumentException(
- "Alternate form (#) not allowed in string format specifier");
- }
- if (spec.align == '=') {
- throw new IllegalArgumentException(
- "'=' alignment not allowed in string format specifier");
- }
- if (spec.precision >= 0 && text.length() > spec.precision) {
- text = text.substring(0, spec.precision);
- }
- return spec.pad(text, '<', 0);
- }
-
- /* arguments' conversion helper */
+ }
@Override
public String asString(int index) throws PyObject.ConversionException {
@@ -4006,10 +4025,6 @@
String format;
/** Where the output is built. */
StringBuilder buffer;
- /** Remembers that the value currently converted is negative */
- boolean negative;
- /** Precision from format specification. */
- int precision;
/**
* Index into args of argument currently being worked, or special values indicating -1: a single
* item that has not yet been used, -2: a single item that has already been used, -3: a mapping.
@@ -4018,7 +4033,7 @@
/** Arguments supplied to {@link #format(PyObject)} method. */
PyObject args;
/** Indicate a <code>PyUnicode</code> result is expected. */
- boolean unicodeCoercion;
+ boolean needUnicode;
final char pop() {
try {
@@ -4054,7 +4069,7 @@
public StringFormatter(String format, boolean unicodeCoercion) {
index = 0;
this.format = format;
- this.unicodeCoercion = unicodeCoercion;
+ this.needUnicode = unicodeCoercion;
buffer = new StringBuilder(format.length() + 100);
}
@@ -4155,9 +4170,9 @@
}
/**
- * Return the argument as either a {@link PyFloat} according to its <code>__float__</code>
- * method. If the argument has no such method, or it raises an exception, we return the argument
- * itself. The caller must check the return type.
+ * Return the argument as a {@link PyFloat} according to its <code>__float__</code> method. If
+ * the argument has no such method, or it raises an exception, we return the argument itself.
+ * The caller must check the return type.
*
* @param arg to convert
* @return PyFloat if possible
@@ -4171,7 +4186,7 @@
} else {
// use __float__ to get a float.
if (arg.getClass() == PyFloat.class) {
- // A common case where it is safe to return arg.__int__()
+ // A common case where it is safe to return arg.__float__()
return arg.__float__();
} else {
@@ -4194,6 +4209,46 @@
}
/**
+ * Return the argument as either a {@link PyString} or a {@link PyUnicode}, and set the
+ * {@link #needUnicode} member accordingly. If we already know we are building a Unicode string
+ * (<code>needUnicode==true</code>), then any argument that is not already a
+ * <code>PyUnicode</code> will be converted by calling its <code>__unicode__</code> method.
+ * Conversely, if we are not yet building a Unicode string (<code>needUnicode==false</code> ),
+ * then a PyString will pass unchanged, a <code>PyUnicode</code> will switch us to Unicode mode
+ * (<code>needUnicode=true</code>), and any other type will be converted by calling its
+ * <code>__str__</code> method, which will return a <code>PyString</code>, or possibly a
+ * <code>PyUnicode</code>, which will switch us to Unicode mode.
+ *
+ * @param arg to convert
+ * @return PyString or PyUnicode equivalent
+ */
+ private PyString asText(PyObject arg) {
+
+ if (arg instanceof PyUnicode) {
+ // arg is already acceptable.
+ needUnicode = true;
+ return (PyUnicode)arg;
+
+ } else if (needUnicode) {
+ // The string being built is unicode, so we need that version of the arg.
+ return arg.__unicode__();
+
+ } else if (arg instanceof PyString) {
+ // The string being built is not unicode, so arg is already acceptable.
+ return (PyString)arg;
+
+ } else {
+ // The string being built is not unicode, so use __str__ to get a PyString.
+ PyString s = arg.__str__();
+ // But __str__ might return PyUnicode, and we have to notice that.
+ if (s instanceof PyUnicode) {
+ needUnicode = true;
+ }
+ return s;
+ }
+ }
+
+ /**
* Main service of this class: format one or more arguments with the format string supplied at
* construction.
*
@@ -4204,7 +4259,7 @@
public PyString format(PyObject args) {
PyObject dict = null;
this.args = args;
- boolean needUnicode = unicodeCoercion;
+
if (args instanceof PyTuple) {
// We will simply work through the tuple elements
argIndex = 0;
@@ -4220,16 +4275,6 @@
while (index < format.length()) {
- // Attributes to be parsed from the next format specifier
- boolean ljustFlag = false;
- boolean signFlag = false;
- boolean blankFlag = false;
- boolean altFlag = false;
- boolean zeroFlag = false;
-
- int width = -1;
- precision = -1;
-
// Read one character from the format string
char c = pop();
if (c != '%') {
@@ -4239,6 +4284,14 @@
// It's a %, so the beginning of a conversion specifier. Parse it.
+ // Attributes to be parsed from the next format specifier
+ boolean altFlag = false;
+ char sign = Spec.NONE;
+ char fill = ' ';
+ char align = '>';
+ int width = Spec.UNSPECIFIED;
+ int precision = Spec.UNSPECIFIED;
+
// A conversion specifier contains the following components, in this order:
// + The '%' character, which marks the start of the specifier.
// + Mapping key (optional), consisting of a parenthesised sequence of characters.
@@ -4278,19 +4331,22 @@
while (true) {
switch (c = pop()) {
case '-':
- ljustFlag = true;
+ align = '<';
continue;
case '+':
- signFlag = true;
+ sign = '+';
continue;
case ' ':
- blankFlag = true;
+ if (!Spec.specified(sign)) {
+ // Blank sign only wins if '+' not specified.
+ sign = ' ';
+ }
continue;
case '#':
altFlag = true;
continue;
case '0':
- zeroFlag = true;
+ fill = '0';
continue;
}
break;
@@ -4307,7 +4363,7 @@
width = getNumber();
if (width < 0) {
width = -width;
- ljustFlag = true;
+ align = '<';
}
/*
@@ -4330,103 +4386,105 @@
c = pop();
}
- // c is now the conversion type.
- if (c == '%') {
- // It was just a percent sign after all
- buffer.append(c);
- continue;
+ /*
+ * As a function of the conversion type (currently in c) override some of the formatting
+ * flags we read from the format specification.
+ */
+ switch (c) {
+ case 's':
+ case 'r':
+ case 'c':
+ case '%':
+ // These have string-like results: fill, if needed, is always blank.
+ fill = ' ';
+ break;
+
+ default:
+ if (fill == '0' && align == '>') {
+ // Zero-fill comes after the sign in right-justification.
+ align = '=';
+ } else {
+ // If left-justifying, the fill is always blank.
+ fill = ' ';
+ }
}
/*
+ * Encode as an InternalFormat.Spec. The values in the constructor always have specified
+ * values, except for sign, width and precision.
+ */
+ Spec spec = new Spec(fill, align, sign, altFlag, width, false, precision, c);
+
+ /*
* Process argument according to format specification decoded from the string. It is
- * important we don't read the argumnent from the list until this point because of the
+ * important we don't read the argument from the list until this point because of the
* possibility that width and precision were specified via the argument list.
*/
- PyObject arg = getarg();
- String string = null;
- negative = false;
-
- // Independent of type, decide the padding character based on decoded flags.
- char fill = ' ';
- if (zeroFlag) {
- fill = '0';
- } else {
- fill = ' ';
- }
-
- // Encode as an InternalFormat.Spec
- char fill2 = ' ';
- char align = ljustFlag ? '<' : '>';
- if (zeroFlag && !ljustFlag) {
- // We only actually fill with zero if right-justifying
- fill2 = '0';
- // And then the fill comes after the sign.
- align = '=';
- }
- char sign = signFlag ? '+' : (blankFlag ? ' ' : Spec.NONE);
- int w = width;
- Spec spec = new Spec(fill2, align, sign, altFlag, w, false, precision, c);
-
- // Signal that the padding, sign, base prefix etc. have all been taken care of
- boolean jobDone = false;
-
- // Perform the type-specific formatting
- switch (c) {
-
- case 's':
- // String (converts any Python object using str()).
- if (arg instanceof PyUnicode) {
- needUnicode = true;
- }
- // fall through ...
-
- case 'r':
- // String (converts any Python object using repr()).
- fill = ' ';
- if (c == 's') {
- if (needUnicode) {
- string = arg.__unicode__().toString();
- } else {
- string = arg.__str__().toString();
- }
- } else {
- string = arg.__repr__().toString();
- }
- if (precision >= 0 && string.length() > precision) {
- string = string.substring(0, precision);
- }
-
+
+ // Depending on the type of conversion, we use one of these formatters:
+ FloatFormatter ff;
+ IntegerFormatter fi;
+ TextFormatter ft;
+ Formatter f; // = ff, fi or ft, whichever we actually use.
+
+ switch (spec.type) {
+
+ case 's': // String: converts any object using __str__(), __unicode__() ...
+ case 'r': // ... or repr().
+ PyObject arg = getarg();
+
+ // Get hold of the actual object to display (may set needUnicode)
+ PyString argAsString = asText(spec.type == 's' ? arg : arg.__repr__());
+ // Format the str/unicode form of the argument using this Spec.
+ f = ft = new TextFormatter(spec);
+ ft.setBytes(!needUnicode);
+ ft.format(argAsString.getString());
break;
case 'd': // All integer formats (+case for X).
case 'o':
case 'x':
case 'X':
+ case 'c': // Single character (accepts integer or single character string).
case 'u': // Obsolete type identical to 'd'.
case 'i': // Compatibility with scanf().
- // Format using this Spec the double form of the argument.
- IntegerFormatter fi = new IntegerFormatter.Traditional(spec);
-
- // Note various types accepted here as long as they have an __int__ method.
- PyObject argAsNumber = asNumber(arg);
-
- // We have to check what we got back..
- if (argAsNumber instanceof PyInteger) {
- fi.format(((PyInteger)argAsNumber).getValue());
- } else if (argAsNumber instanceof PyLong) {
- fi.format(((PyLong)argAsNumber).getValue());
+ // Format the argument using this Spec.
+ f = fi = new IntegerFormatter.Traditional(spec);
+ // If not producing PyUnicode, disallow codes >255.
+ fi.setBytes(!needUnicode);
+
+ arg = getarg();
+
+ if (arg instanceof PyString && spec.type == 'c') {
+ if (arg.__len__() != 1) {
+ throw Py.TypeError("%c requires int or char");
+ } else {
+ if (!needUnicode && arg instanceof PyUnicode) {
+ // Change of mind forced by encountering unicode object.
+ needUnicode = true;
+ fi.setBytes(false);
+ }
+ fi.format(((PyString)arg).getString().codePointAt(0));
+ }
+
} else {
- // It couldn't be converted, raise the error here
- throw Py.TypeError("%" + c + " format: a number is required, not "
- + arg.getType().fastGetName());
+ // Note various types accepted here as long as they have an __int__ method.
+ PyObject argAsNumber = asNumber(arg);
+
+ // We have to check what we got back.
+ if (argAsNumber instanceof PyInteger) {
+ fi.format(((PyInteger)argAsNumber).getValue());
+ } else if (argAsNumber instanceof PyLong) {
+ fi.format(((PyLong)argAsNumber).getValue());
+ } else {
+ // It couldn't be converted, raise the error here
+ throw Py.TypeError("%" + spec.type
+ + " format: a number is required, not "
+ + arg.getType().fastGetName());
+ }
}
- fi.pad();
- string = fi.getResult();
-
- // Suppress subsequent attempts to insert a correct sign, done already.
- jobDone = true;
break;
case 'e': // All floating point formats (+case).
@@ -4437,9 +4495,11 @@
case 'G':
// Format using this Spec the double form of the argument.
- FloatFormatter ff = new FloatFormatter(spec);
+ f = ff = new FloatFormatter(spec);
+ ff.setBytes(!needUnicode);
// Note various types accepted here as long as they have a __float__ method.
+ arg = getarg();
PyObject argAsFloat = asFloat(arg);
// We have to check what we got back..
@@ -4451,128 +4511,24 @@
+ arg.getType().fastGetName());
}
- ff.pad();
- string = ff.getResult();
-
- // Suppress subsequent attempts to insert a correct sign, done already.
- // signFlag = blankFlag = negative = false;
- jobDone = true;
break;
- case 'c':
- // Single character (accepts integer or single character string).
- fill = ' ';
- if (arg instanceof PyString) {
- string = ((PyString)arg).toString();
- if (string.length() != 1) {
- throw Py.TypeError("%c requires int or char");
- }
- if (arg instanceof PyUnicode) {
- needUnicode = true;
- }
- break;
- }
-
- // arg is not a str (or unicode)
- int val;
- try {
- // Explicitly __int__ so we can look for an AttributeError (which is
- // less invasive to mask than a TypeError)
- val = arg.__int__().asInt();
- } catch (PyException e) {
- if (e.match(Py.AttributeError)) {
- throw Py.TypeError("%c requires int or char");
- }
- throw e;
- }
- // Range check, according to ultimate type of result as presentl;y known.
- if (!needUnicode) {
- if (val < 0) {
- throw Py.OverflowError("unsigned byte integer is less than minimum");
- } else if (val > 255) {
- throw Py.OverflowError("unsigned byte integer is greater than maximum");
- }
- } else if (val < 0 || val > PySystemState.maxunicode) {
- throw Py.OverflowError("%c arg not in range(0x110000) (wide Python build)");
- }
- string = new String(new int[] {val}, 0, 1);
+ case '%': // Percent symbol, but surprisingly, padded.
+
+ // We use an integer formatter.
+ f = fi = new IntegerFormatter.Traditional(spec);
+ fi.setBytes(!needUnicode);
+ fi.format('%');
break;
default:
throw Py.ValueError("unsupported format character '"
- + codecs.encode(Py.newString(c), null, "replace") + "' (0x"
- + Integer.toHexString(c) + ") at index " + (index - 1));
+ + codecs.encode(Py.newString(spec.type), null, "replace") + "' (0x"
+ + Integer.toHexString(spec.type) + ") at index " + (index - 1));
}
- /*
- * We have now dealt with the translation of the (absolute value of the) argument, in
- * variable string[]. In the next sections we deal with sign, padding and base prefix.
- */
- if (jobDone) {
- // Type-specific formatting has already taken care of all this.
- buffer.append(string);
-
- } else {
- // Legacy code still needed
- int length = string.length();
- int skip = 0;
-
- // Decide how to represent the sign according to format and actual sign of argument.
- String signString = null;
- if (negative) {
- signString = "-";
- } else {
- if (signFlag) {
- signString = "+";
- } else if (blankFlag) {
- signString = " ";
- }
- }
-
- // The width (from here on) will be the remaining width on the line.
- if (width < length) {
- width = length;
- }
-
- // Insert the sign in the buffer and adjust the width.
- if (signString != null) {
- if (fill != ' ') {
- // When the fill is not space, the sign comes before the fill.
- buffer.append(signString);
- }
- // Adjust width for sign.
- if (width > length) {
- width--;
- }
- }
-
- // Fill on the left of the item.
- if (width > length && !ljustFlag) {
- do {
- buffer.append(fill);
- } while (--width > length);
- }
-
- // If the fill is spaces, we will have deferred the sign and hex base prefix
- if (fill == ' ') {
- if (signString != null) {
- buffer.append(signString);
- }
- }
-
- // Now append the converted argument.
- if (skip > 0) {
- // The string contains a hex-prefix, but we have already inserted one.
- buffer.append(string.substring(skip));
- } else {
- buffer.append(string);
- }
-
- // If this hasn't filled the space required, add right-padding.
- while (--width >= length) {
- buffer.append(' ');
- }
- }
+ // Pad the result as required in the format and append to the overall result.
+ buffer.append(f.pad().getResult());
}
/*
diff --git a/src/org/python/core/stringlib/IntegerFormatter.java b/src/org/python/core/stringlib/IntegerFormatter.java
--- a/src/org/python/core/stringlib/IntegerFormatter.java
+++ b/src/org/python/core/stringlib/IntegerFormatter.java
@@ -294,6 +294,7 @@
break;
case 'c':
+ case '%':
// Binary.
format_c(value);
break;
diff --git a/src/org/python/core/stringlib/InternalFormat.java b/src/org/python/core/stringlib/InternalFormat.java
--- a/src/org/python/core/stringlib/InternalFormat.java
+++ b/src/org/python/core/stringlib/InternalFormat.java
@@ -334,63 +334,75 @@
* modes, the padding is around the whole buffer.) When this would not be appropriate, it is
* up to the client to disallow this (which <code>complex</code> does).
*
- * @return this object
+ * @return this Formatter object
*/
public Formatter pad() {
-
// We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0.
int n = spec.width - result.length();
if (n > 0) {
+ // Note: use of leftIndex anticipates client-owned result buffer.
+ pad(0, n);
+ }
+ return this;
+ }
- char align = spec.getAlign('>'); // Right for numbers (wrong for strings)
- char fill = spec.getFill(' ');
+ /**
+ * Pad the last result (defined as the contents of {@link #result} from argument
+ * <code>leftIndex</code> to the end) using the alignment, by <code>n</code> repetitions of
+ * the fill character defined in {@link #spec}, and distributed according to
+ * <code>spec.align</code>. The value of <code>leftIndex</code> is only used if the
+ * alignment is '>' (left) or '^' (both). The value of the critical lengths (lenWhole,
+ * lenSign, etc.) are not affected, because we assume that <code>leftIndex <= </code>
+ * {@link #start}.
+ *
+ * @param leftIndex the index in result at which to insert left-fill characters.
+ * @param n number of fill characters to insert.
+ */
+ protected void pad(int leftIndex, int n) {
+ char align = spec.getAlign('>'); // Right for numbers (strings will supply '<' align)
+ char fill = spec.getFill(' ');
- // Start by assuming padding is all leading ('>' case or '=')
- int leading = n;
+ // Start by assuming padding is all leading ('>' case or '=')
+ int leading = n;
- // Split the total padding according to the alignment
- if (align == '^') {
- // Half the padding before
- leading = n / 2;
- } else if (align == '<') {
- // All the padding after
- leading = 0;
+ // Split the total padding according to the alignment
+ if (align == '^') {
+ // Half the padding before
+ leading = n / 2;
+ } else if (align == '<') {
+ // All the padding after
+ leading = 0;
+ }
+
+ // All padding that is not leading is trailing
+ int trailing = n - leading;
+
+ // Insert the leading space
+ if (leading > 0) {
+ if (align == '=') {
+ // Incorporate into the (latest) whole part
+ leftIndex = start + lenSign;
+ lenWhole += leading;
+ } else {
+ // Default is to insert at the stated leftIndex <= start.
+ start += leading;
}
-
- // All padding that is not leading is trailing
- int trailing = n - leading;
-
- // Insert the leading space
- if (leading > 0) {
- int pos;
- if (align == '=') {
- // Incorporate into the (latest) whole part
- pos = start + lenSign;
- lenWhole += leading;
- } else {
- // Insert at the very beginning (not start) by default.
- pos = 0;
- start += leading;
- }
- makeSpaceAt(pos, leading);
- for (int i = 0; i < leading; i++) {
- result.setCharAt(pos + i, fill);
- }
- }
-
- // Append the trailing space
- for (int i = 0; i < trailing; i++) {
- result.append(fill);
- }
-
- // Check for special case
- if (align == '=' && fill == '0' && spec.grouping) {
- // We must extend the grouping separator into the padding
- zeroPadAfterSignWithGroupingFixup(3, ',');
+ makeSpaceAt(leftIndex, leading);
+ for (int i = 0; i < leading; i++) {
+ result.setCharAt(leftIndex + i, fill);
}
}
- return this;
+ // Append the trailing space
+ for (int i = 0; i < trailing; i++) {
+ result.append(fill);
+ }
+
+ // Check for special case
+ if (align == '=' && fill == '0' && spec.grouping) {
+ // We must extend the grouping separator into the padding
+ zeroPadAfterSignWithGroupingFixup(3, ',');
+ }
}
/**
@@ -512,6 +524,18 @@
/**
* Convenience method returning a {@link Py#ValueError} reporting that specifying a
+ * sign is not allowed in a format specifier for the named type.
+ *
+ * @param forType the type it was found applied to
+ * @param code the formatting code (or '\0' not to mention one)
+ * @return exception to throw
+ */
+ public static PyException signNotAllowed(String forType, char code) {
+ return notAllowed("Sign", forType, code);
+ }
+
+ /**
+ * Convenience method returning a {@link Py#ValueError} reporting that specifying a
* precision is not allowed in a format specifier for the named type.
*
* @param forType the type it was found applied to
@@ -534,6 +558,18 @@
/**
* Convenience method returning a {@link Py#ValueError} reporting that some format specifier
+ * feature is not allowed for the named data type.
+ *
+ * @param outrage committed in the present case
+ * @param forType the data type (e.g. "integer") it where it is an outrage
+ * @return exception to throw
+ */
+ public static PyException notAllowed(String outrage, String forType) {
+ return notAllowed(outrage, forType, '\0');
+ }
+
+ /**
+ * Convenience method returning a {@link Py#ValueError} reporting that some format specifier
* feature is not allowed for the named format code and data type. Produces a message like:
* <p>
* <code>outrage+" not allowed with "+forType+" format specifier '"+code+"'"</code>
@@ -753,6 +789,12 @@
false, Spec.UNSPECIFIED, Spec.NONE);
/**
+ * Defaults applicable to string types. Equivalent to " <"
+ */
+ public static final Spec STRING = new Spec(' ', '<', Spec.NONE, false, Spec.UNSPECIFIED,
+ false, Spec.UNSPECIFIED, Spec.NONE);
+
+ /**
* Constructor offering just precision and type.
*
* <pre>
diff --git a/src/org/python/core/stringlib/TextFormatter.java b/src/org/python/core/stringlib/TextFormatter.java
new file mode 100644
--- /dev/null
+++ b/src/org/python/core/stringlib/TextFormatter.java
@@ -0,0 +1,105 @@
+// Copyright (c) Jython Developers
+package org.python.core.stringlib;
+
+import org.python.core.stringlib.InternalFormat.Spec;
+
+/**
+ * A class that provides the implementation of <code>str</code> and <code>unicode</code> formatting.
+ * In a limited way, it acts like a StringBuilder to which text, formatted according to the format
+ * specifier supplied at construction. These are ephemeral objects that are not, on their own,
+ * thread safe.
+ */
+public class TextFormatter extends InternalFormat.Formatter {
+
+ /**
+ * Construct the formatter from a specification and guess the initial buffer capacity. A
+ * reference is held to this specification.
+ *
+ * @param spec parsed conversion specification
+ */
+ public TextFormatter(Spec spec) {
+ // No right answer here for the buffer size, especially as non-BMP Unicode possible.
+ super(spec, Math.max(spec.width, spec.getPrecision(10)) + 6);
+ }
+
+ /*
+ * Re-implement the text appends so they return the right type.
+ */
+ @Override
+ public TextFormatter append(char c) {
+ super.append(c);
+ return this;
+ }
+
+ @Override
+ public TextFormatter append(CharSequence csq) {
+ super.append(csq);
+ return this;
+ }
+
+ @Override
+ public TextFormatter append(CharSequence csq, int start, int end) //
+ throws IndexOutOfBoundsException {
+ super.append(csq, start, end);
+ return this;
+ }
+
+ /**
+ * Format the given <code>String</code> into the <code>result</code> buffer. Largely, this is a
+ * matter of copying the value of the argument, but a subtlety arises when the string contains
+ * supplementary (non-BMP) Unicode characters, which are represented as surrogate pairs. The
+ * precision specified in the format relates to a count of Unicode characters (code points), not
+ * Java <code>char</code>s. The method deals with this correctly, essentially by not counting
+ * the high-surrogates in the allowance. The final value of {@link #lenWhole} counts the UTF-16
+ * units added.
+ *
+ * @param value to format
+ * @return this <code>TextFormatter</code> object
+ */
+ public TextFormatter format(String value) {
+ this.reset();
+ int p = spec.precision, n = value.length();
+
+ if (Spec.specified(p) && p < n) {
+ /*
+ * A precision p was specified less than the length: we may have to truncate. Note we
+ * compared p with the UTF-16 length, even though it is the code point length that
+ * matters. But the code point length cannot be greater than n.
+ */
+ int count = 0;
+ while (count < p) {
+ // count is the number of UTF-16 chars.
+ char c = value.charAt(count++);
+ result.append(c);
+ // A high-surrogate will always be followed by a low, so doesn't count.
+ if (Character.isHighSurrogate(c) && p < n) {
+ // Accomplish "not counting" by bumping the limit p, within the array bounds.
+ p += 1;
+ }
+ }
+ // Record the UTF-16 count as the length in buffer
+ lenWhole = count;
+
+ } else {
+ // We definitely don't need to truncate. Append the whole string.
+ lenWhole = n;
+ result.append(value);
+ }
+
+ return this;
+ }
+
+ /**
+ * Pad the result according to the specification, dealing correctly with Unicode.
+ */
+ @Override
+ public TextFormatter pad() {
+ // We'll need this many pad characters (if>0). Note Spec.UNDEFINED<0.
+ int n = spec.width - result.codePointCount(0, result.length());
+ if (n > 0) {
+ pad(0, n);
+ }
+ return this;
+ }
+
+}
diff --git a/tests/java/org/python/core/StringFormatTest.java b/tests/java/org/python/core/StringFormatTest.java
--- a/tests/java/org/python/core/StringFormatTest.java
+++ b/tests/java/org/python/core/StringFormatTest.java
@@ -10,6 +10,7 @@
import org.python.core.stringlib.InternalFormatSpec;
import org.python.core.stringlib.InternalFormatSpecParser;
import org.python.core.stringlib.MarkupIterator;
+import org.python.core.stringlib.TextFormatter;
import org.python.util.PythonInterpreter;
/**
@@ -219,15 +220,17 @@
}
public void testFormatString() {
- InternalFormatSpec spec = new InternalFormatSpec();
- assertEquals("abc", PyString.formatString("abc", spec));
+ String v = "abc";
+ TextFormatter f;
+ f = PyString.prepareFormatter(InternalFormat.fromText(""));
+ assertEquals("abc", f.format(v).pad().getResult());
- spec.precision = 3;
- assertEquals("abc", PyString.formatString("abcdef", spec));
+ String v2 = "abcdef";
+ f = PyString.prepareFormatter(InternalFormat.fromText(".3"));
+ assertEquals("abc", f.format(v2).pad().getResult());
- spec.precision = -1;
- spec.width = 6;
- assertEquals("abc ", PyString.formatString("abc", spec));
+ f = PyString.prepareFormatter(InternalFormat.fromText("6"));
+ assertEquals("abc ", f.format(v).pad().getResult());
}
public void testMarkupIterator() {
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list