[Jython-checkins] jython (merge default -> default): Merge of formatting changes
jeff.allen
jython-checkins at python.org
Mon May 19 00:49:38 CEST 2014
http://hg.python.org/jython/rev/f6768976582a
changeset: 7260:f6768976582a
parent: 7256:8ceb0723b372
parent: 7259:60838278e668
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun May 18 20:11:04 2014 +0100
summary:
Merge of formatting changes
files:
Lib/test/test_float.py | 40 -
Lib/test/test_float_jy.py | 5 +-
src/org/python/core/PyComplex.java | 24 +-
src/org/python/core/PyString.java | 395 ++++++---
src/org/python/core/stringlib/FloatFormatter.java | 91 +-
src/org/python/core/stringlib/InternalFormat.java | 37 +-
src/org/python/core/stringlib/InternalFormatSpec.java | 6 +-
7 files changed, 345 insertions(+), 253 deletions(-)
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py
--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@@ -617,8 +617,6 @@
self.assertEqual('{0:f}'.format(NAN), 'nan')
self.assertEqual('{0:F}'.format(NAN), 'NAN')
- @unittest.skipIf(test_support.is_jython,
- "FIXME: not working on Jython")
@requires_IEEE_754
def test_format_testfile(self):
with open(format_testfile) as testfile:
@@ -842,8 +840,6 @@
self.assertAlmostEqual(round(1.5e22, -22), 2e22)
- @unittest.skipIf(test_support.is_jython,
- "FIXME: %-formatting specials imperfect in Jython")
@requires_IEEE_754
def test_format_specials(self):
# Test formatting of nans and infs.
@@ -878,42 +874,6 @@
test(sfmt, NAN, ' nan')
test(sfmt, -NAN, ' nan')
- @requires_IEEE_754
- def test_format_specials_jy(self):
- # Test formatting of nans and infs (suppressing %-formatting).
- # This is just a crudely restricted copy of test_format_specials.
- # Delete this test when we no longer have to skip test_format_specials.
-
- def test(fmt, value, expected):
- # Test with only format().
- #self.assertEqual(fmt % value, expected, fmt)
- if not '#' in fmt:
- # Until issue 7094 is implemented, format() for floats doesn't
- # support '#' formatting
- fmt = fmt[1:] # strip off the %
- self.assertEqual(format(value, fmt), expected, fmt)
-
- for fmt in ['%e', '%f', '%g', '%.0e', '%.6f', '%.20g',
- '%#e', '%#f', '%#g', '%#.20e', '%#.15f', '%#.3g']:
- pfmt = '%+' + fmt[1:]
- sfmt = '% ' + fmt[1:]
- test(fmt, INF, 'inf')
- test(fmt, -INF, '-inf')
- test(fmt, NAN, 'nan')
- test(fmt, -NAN, 'nan')
- # When asking for a sign, it's always provided. nans are
- # always positive.
- test(pfmt, INF, '+inf')
- test(pfmt, -INF, '-inf')
- test(pfmt, NAN, '+nan')
- test(pfmt, -NAN, '+nan')
- # When using ' ' for a sign code, only infs can be negative.
- # Others have a space.
- test(sfmt, INF, ' inf')
- test(sfmt, -INF, '-inf')
- test(sfmt, NAN, ' nan')
- test(sfmt, -NAN, ' nan')
-
# Beginning with Python 2.6 float has cross platform compatible
# ways to create and represent inf and nan
diff --git a/Lib/test/test_float_jy.py b/Lib/test/test_float_jy.py
--- a/Lib/test/test_float_jy.py
+++ b/Lib/test/test_float_jy.py
@@ -40,11 +40,8 @@
def test_float_str_formatting(self):
self.assertEqual('%.13g' % 12345678.00005, '12345678.00005')
- self.assertEqual('%.12g' % 12345678.00005,
- jython and '12345678' or '12345678.0001')
+ self.assertEqual('%.12g' % 12345678.00005, '12345678.0001')
self.assertEqual('%.11g' % 12345678.00005, '12345678')
- # XXX: The exponential formatter isn't totally correct, e.g. our
- # output here is really .13g
self.assertEqual('%.12g' % math.pi**-100, '1.92758141606e-50')
self.assertEqual('%.5g' % 123.005, '123')
self.assertEqual('%#.5g' % 123.005, '123.00')
diff --git a/src/org/python/core/PyComplex.java b/src/org/python/core/PyComplex.java
--- a/src/org/python/core/PyComplex.java
+++ b/src/org/python/core/PyComplex.java
@@ -165,9 +165,10 @@
* and <code>__repr__</code>, and none-format.
* <p>
* In general, the output is surrounded in parentheses, like <code>"(12.34+24.68j)"</code>.
- * However, if the real part is zero, only the imaginary part is printed, and without
- * parentheses like <code>"24.68j"</code>. The number format specification passed in is used
- * without padding to width, for the real and imaginary parts individually.
+ * However, if the real part is zero (but not negative zero), only the imaginary part is
+ * printed, and without parentheses like <code>"24.68j"</code>. The number format specification
+ * passed in is used for the real and imaginary parts individually, with padding to width
+ * afterwards (if the specification requires it).
*
* @param spec parsed format specification string
* @return formatted value
@@ -176,12 +177,13 @@
FloatFormatter f = new FloatFormatter(spec, 2, 3); // Two elements + "(j)".length
// Even in r-format, complex strips *all* the trailing zeros.
f.setMinFracDigits(0);
- if (real == 0.0) {
+ if (Double.doubleToLongBits(real) == 0L) {
+ // Real part is truly zero: show no real part.
f.format(imag).append('j');
} else {
- f.append('(').format(real).format(imag, "+").append("j)").pad();
+ f.append('(').format(real).format(imag, "+").append("j)");
}
- return f.getResult();
+ return f.pad().getResult();
}
@Override
@@ -823,7 +825,7 @@
try {
String specString = formatSpecStr.getString();
Spec spec = InternalFormat.fromText(specString);
- if (spec.type!=Spec.NONE && "efgEFGn%".indexOf(spec.type) < 0) {
+ if (spec.type != Spec.NONE && "efgEFGn%".indexOf(spec.type) < 0) {
throw FloatFormatter.unknownFormat(spec.type, "complex");
} else if (spec.alternate) {
throw FloatFormatter.alternateFormNotAllowed("complex");
@@ -838,12 +840,12 @@
// And then we use the __str__ mechanism to get parentheses or real 0 elision.
result = formatComplex(spec);
} else {
- // In any other format, the defaults those commonly used for numeric formats.
+ // In any other format, defaults are those commonly used for numeric formats.
spec = spec.withDefaults(Spec.NUMERIC);
FloatFormatter f = new FloatFormatter(spec, 2, 1);// 2 floats + "j"
- // Convert as both parts per specification
- f.format(real).format(imag, "+").append('j').pad();
- result = f.getResult();
+ // Convert both parts as per specification
+ f.format(real).format(imag, "+").append('j');
+ result = f.pad().getResult();
}
}
} catch (IllegalArgumentException e) {
diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java
--- a/src/org/python/core/PyString.java
+++ b/src/org/python/core/PyString.java
@@ -4,17 +4,15 @@
import java.lang.ref.Reference;
import java.lang.ref.SoftReference;
import java.math.BigInteger;
-import java.text.DecimalFormat;
-import java.text.DecimalFormatSymbols;
-
-import org.python.core.StringFormatter.DecimalFormatTemplate;
+
import org.python.core.buffer.BaseBuffer;
import org.python.core.buffer.SimpleStringBuffer;
import org.python.core.stringlib.FieldNameIterator;
+import org.python.core.stringlib.FloatFormatter;
+import org.python.core.stringlib.InternalFormat.Spec;
import org.python.core.stringlib.InternalFormatSpec;
import org.python.core.stringlib.InternalFormatSpecParser;
import org.python.core.stringlib.MarkupIterator;
-import org.python.core.util.ExtraMath;
import org.python.core.util.StringUtil;
import org.python.expose.ExposedMethod;
import org.python.expose.ExposedNew;
@@ -1296,8 +1294,8 @@
* @param stripChars characters to strip from the left end of this str/bytes, or null
* @return a new String, stripped of the specified characters/bytes
*/
- public String lstrip(String sep) {
- return _lstrip(sep);
+ public String lstrip(String stripChars) {
+ return _lstrip(stripChars);
}
/**
@@ -1310,8 +1308,8 @@
* @return a new <code>PyString</code> (or {@link PyUnicode}), stripped of the specified
* characters/bytes
*/
- public PyObject lstrip(PyObject sep) {
- return str_lstrip(sep);
+ public PyObject lstrip(PyObject stripChars) {
+ return str_lstrip(stripChars);
}
@ExposedMethod(defaults = "null", doc = BuiltinDocs.str_lstrip_doc)
@@ -1385,8 +1383,8 @@
* @param stripChars characters to strip from either end of this str/bytes, or null
* @return a new String, stripped of the specified characters/bytes
*/
- public String rstrip(String sep) {
- return _rstrip(sep);
+ public String rstrip(String stripChars) {
+ return _rstrip(stripChars);
}
/**
@@ -1399,8 +1397,8 @@
* @return a new <code>PyString</code> (or {@link PyUnicode}), stripped of the specified
* characters/bytes
*/
- public PyObject rstrip(PyObject sep) {
- return str_rstrip(sep);
+ public PyObject rstrip(PyObject stripChars) {
+ return str_rstrip(stripChars);
}
@ExposedMethod(defaults = "null", doc = BuiltinDocs.str_rstrip_doc)
@@ -1546,7 +1544,7 @@
* the last element of the list contains the what is left over after the last split.
* <p>
* Implementation note: although a str contains only bytes, this method is also called by
- * {@link PyUnicode#unicode_split(PyObject)}.
+ * {@link PyUnicode#unicode_split(PyObject, int)}.
*
* @param sep string to use as separator (or <code>null</code> if to split on whitespace)
* @param maxsplit maximum number of splits to make (there may be <code>maxsplit+1</code>
@@ -1798,7 +1796,7 @@
* left over after the last split.
* <p>
* Implementation note: although a str contains only bytes, this method is also called by
- * {@link PyUnicode#unicode_rsplit(PyObject)} .
+ * {@link PyUnicode#unicode_rsplit(PyObject, int)} .
*
* @param sep string to use as separator (or <code>null</code> if to split on whitespace)
* @param maxsplit maximum number of splits to make (there may be <code>maxsplit+1</code>
@@ -2931,11 +2929,10 @@
*
* @param oldPiece to replace where found.
* @param newPiece replacement text.
- * @param count maximum number of replacements to make, or -1 meaning all of them.
* @return PyString (or PyUnicode if any string is one), this string after replacements.
*/
- public PyString replace(PyObject oldPieceObj, PyObject newPieceObj) {
- return str_replace(oldPieceObj, newPieceObj, -1);
+ public PyString replace(PyObject oldPiece, PyObject newPiece) {
+ return str_replace(oldPiece, newPiece, -1);
}
/**
@@ -2949,8 +2946,8 @@
* @param count maximum number of replacements to make, or -1 meaning all of them.
* @return PyString (or PyUnicode if any string is one), this string after replacements.
*/
- public PyString replace(PyObject oldPieceObj, PyObject newPieceObj, int count) {
- return str_replace(oldPieceObj, newPieceObj, count);
+ public PyString replace(PyObject oldPiece, PyObject newPiece, int count) {
+ return str_replace(oldPiece, newPiece, count);
}
@ExposedMethod(defaults = "-1", doc = BuiltinDocs.str_replace_doc)
@@ -3161,8 +3158,8 @@
* @return <code>true</code> if this string slice starts with a specified prefix, otherwise
* <code>false</code>.
*/
- public boolean startswith(PyObject prefix, PyObject offset) {
- return str_startswith(prefix, offset, null);
+ public boolean startswith(PyObject prefix, PyObject start) {
+ return str_startswith(prefix, start, null);
}
/**
@@ -4002,13 +3999,24 @@
*/
final class StringFormatter {
+ /** Index into {@link #format} being interpreted. */
int index;
+ /** Format being interpreted. */
String format;
+ /** Where the output is built. */
StringBuilder buffer;
+ /** Remembers that the value currently converted is negative */
boolean negative;
+ /** Precision from format specification. */
int precision;
+ /**
+ * Index into args of argument currently being worked, or special values indicating -1: a single
+ * item that has not yet been used, -2: a single item that has already been used, -3: a mapping.
+ */
int argIndex;
+ /** Arguments supplied to {@link #format(PyObject)} method. */
PyObject args;
+ /** Indicate a <code>PyUnicode</code> result is expected. */
boolean unicodeCoercion;
final char pop() {
@@ -4027,6 +4035,11 @@
index--;
}
+ /**
+ * Initialise the interpreter with the given format string, ready for {@link #format(PyObject)}.
+ *
+ * @param format string to interpret
+ */
public StringFormatter(String format) {
this(format, false);
}
@@ -4044,6 +4057,10 @@
buffer = new StringBuilder(format.length() + 100);
}
+ /**
+ * Read the next object from the argument list, taking special values of <code>argIndex</code>
+ * into account.
+ */
PyObject getarg() {
PyObject ret = null;
switch (argIndex) {
@@ -4064,6 +4081,10 @@
return ret;
}
+ /**
+ * Parse a number from the format, except if the next thing is "*", read it from the argument
+ * list.
+ */
int getNumber() {
char c = pop();
if (c == '*') {
@@ -4093,7 +4114,24 @@
}
+ /**
+ * Format the argument interpreted as a long, using the argument's <code>__str__</code>,
+ * <code>__oct__</code>, or <code>__hex__</code> method according to <code>type</code>. If v is
+ * being treated as signed, the sign of v is transferred to {@link #negative} and the absolute
+ * value is converted. The <code>altFlag</code> argument controls the appearance of a "0x" or
+ * "0X" prefix in the hex case, or a "0" prefix in the octal case. The hexadecimal case, the
+ * case of characters and digits will match the type ('x' meaning lowercase, 'X' meaning
+ * uppercase).
+ *
+ * @param arg to convert
+ * @param type one of 'o' for octal, 'x' or 'X' for hex, anything else calls
+ * <code>arg.__str__</code>.
+ * @param altFlag if true there will be a prefix
+ * @return converted value as <code>String</code>
+ */
private String formatLong(PyObject arg, char type, boolean altFlag) {
+ // Convert using the appropriate type
+ // XXX Results in behaviour divergent from CPython when any of the methods is overridden.
PyString argAsString;
switch (type) {
case 'o':
@@ -4107,29 +4145,37 @@
argAsString = arg.__str__();
break;
}
+
checkPrecision("long");
String s = argAsString.toString();
int end = s.length();
int ptr = 0;
+ // In the hex case, the __hex__ return starts 0x
+ // XXX (we assume, perhaps falsely)
int numnondigits = 0;
if (type == 'x' || type == 'X') {
numnondigits = 2;
}
+ // Strip a "long" indicator
if (s.endsWith("L")) {
end--;
}
+ // Strip a possible sign to member negative
negative = s.charAt(0) == '-';
if (negative) {
ptr++;
}
+ // The formatted number is s[ptr:end] and starts with numnondigits non-digits.
int numdigits = end - numnondigits - ptr;
if (!altFlag) {
+ // We should have no "base tag" '0' or "0x" on the front.
switch (type) {
case 'o':
+ // Strip the '0'
if (numdigits > 1) {
++ptr;
--numdigits;
@@ -4137,27 +4183,36 @@
break;
case 'x':
case 'X':
+ // Strip the "0x"
ptr += 2;
numnondigits -= 2;
break;
}
}
+
+ // If necessary, add leading zeros to the numerical digits part.
if (precision > numdigits) {
+ // Recompose the formatted number in this buffer
StringBuilder buf = new StringBuilder();
+ // The base indicator prefix
for (int i = 0; i < numnondigits; ++i) {
buf.append(s.charAt(ptr++));
}
+ // The extra zeros
for (int i = 0; i < precision - numdigits; i++) {
buf.append('0');
}
+ // The previously known digits
for (int i = 0; i < numdigits; i++) {
buf.append(s.charAt(ptr++));
}
s = buf.toString();
} else if (end < s.length() || ptr > 0) {
+ // It's only necessary to extract the formatted number from s
s = s.substring(ptr, end);
}
+ // And finally, deal with the case, so it matches x or X.
switch (type) {
case 'X':
s = s.toUpperCase();
@@ -4167,10 +4222,16 @@
}
/**
- * Formats arg as an integer, with the specified radix
+ * Formats arg as an integer, with the specified radix. The integer value is obtained from the
+ * result of <code>arg.__int__()</code>. <code>type</code> and <code>altFlag</code> are passed
+ * to {@link #formatLong(PyObject, char, boolean)} in case the result is a PyLong.
*
- * type and altFlag are needed to be passed to {@link #formatLong(PyObject, char, boolean)} in
- * case the result of <code>arg.__int__()</code> is a PyLong.
+ * @param arg to convert
+ * @param radix in which to express <code>arg</code>
+ * @param unsigned true if required to interpret a 32-bit integer as unsigned ('u' legacy?).
+ * @param type of conversion ('d', 'o', 'x', or 'X')
+ * @param altFlag '#' present in format (causes "0x" prefix in hex, and '0' prefix in octal)
+ * @return string form of the value
*/
private String formatInteger(PyObject arg, int radix, boolean unsigned, char type,
boolean altFlag) {
@@ -4183,7 +4244,6 @@
// safe to call __int__:
argAsInt = arg.__int__();
} else {
- // Same case noted on formatFloatDecimal:
// We can't simply call arg.__int__() because PyString implements
// it without exposing it to python (i.e, str instances has no
// __int__ attribute). So, we would support strings as arguments
@@ -4192,7 +4252,7 @@
try {
argAsInt = arg.__getattr__("__int__").__call__();
} catch (PyException e) {
- // XXX: Swallow customs AttributeError throws from __float__ methods
+ // XXX: Swallow custom AttributeError throws from __int__ methods
// No better alternative for the moment
if (e.match(Py.AttributeError)) {
throw Py.TypeError("int argument required");
@@ -4202,25 +4262,44 @@
}
}
if (argAsInt instanceof PyInteger) {
+ // This call does not provide the prefix and will be lowercase.
return formatInteger(((PyInteger)argAsInt).getValue(), radix, unsigned);
} else { // must be a PyLong (as per __int__ contract)
+ // This call provides the base prefix and case-matches with 'x' or 'X'.
return formatLong(argAsInt, type, altFlag);
}
}
+ /**
+ * Convert a 32-bit integer (as from a {@link PyInteger}) to characters, signed or unsigned. The
+ * values is presented in a <code>long</code>. The string result is left-padded with zeros to
+ * the stated {@link #precision}. If v is being treated as signed, the sign of v is transferred
+ * to {@link #negative} and the absolute value is converted. Otherwise (unsigned case)
+ * <code>0x100000000L + v</code> is converted. This method does not provide the '0' or "0x"
+ * prefix, just the padded digit string.
+ *
+ * @param v value to convert
+ * @param radix of conversion
+ * @param unsigned if should be treated as unsigned
+ * @return string form
+ */
private String formatInteger(long v, int radix, boolean unsigned) {
checkPrecision("integer");
if (unsigned) {
+ // If the high bit was set, this will have been sign-extended: correct that.
if (v < 0) {
v = 0x100000000l + v;
}
} else {
+ // If the high bit was set, the sign extension was correct, but we need sign + abs(v).
if (v < 0) {
negative = true;
v = -v;
}
}
+ // Use the method in java.lang.Long (lowercase, no prefix)
String s = Long.toString(v, radix);
+ // But zero pad to the requested precision
while (s.length() < precision) {
s = "0" + s;
}
@@ -4235,88 +4314,23 @@
}
}
- static class DecimalFormatTemplate {
-
- static DecimalFormat template;
- static {
- template =
- new DecimalFormat("#,##0.#####", new DecimalFormatSymbols(java.util.Locale.US));
- DecimalFormatSymbols symbols = template.getDecimalFormatSymbols();
- symbols.setNaN("nan");
- symbols.setInfinity("inf");
- template.setDecimalFormatSymbols(symbols);
- template.setGroupingUsed(false);
- }
- }
-
- private static final DecimalFormat getDecimalFormat() {
- return (DecimalFormat)DecimalFormatTemplate.template.clone();
- }
-
- private String formatFloatDecimal(double v, boolean truncate) {
- checkPrecision("decimal");
- int prec = precision;
- if (prec == -1) {
- prec = 6;
- }
- if (v < 0) {
- v = -v;
- negative = true;
- }
-
- DecimalFormat decimalFormat = getDecimalFormat();
- decimalFormat.setMaximumFractionDigits(prec);
- decimalFormat.setMinimumFractionDigits(truncate ? 0 : prec);
-
- String ret = decimalFormat.format(v);
- return ret;
- }
-
- private String formatFloatExponential(PyObject arg, char e, boolean truncate) {
- StringBuilder buf = new StringBuilder();
- double v = asDouble(arg);
- boolean isNegative = false;
- if (v < 0) {
- v = -v;
- isNegative = true;
- }
- double power = 0.0;
- if (v > 0) {
- power = ExtraMath.closeFloor(Math.log10(v));
- }
- // System.err.println("formatExp: "+v+", "+power);
- int savePrecision = precision;
- precision = 2;
-
- String exp = formatInteger((long)power, 10, false);
- if (negative) {
- negative = false;
- exp = '-' + exp;
- } else {
- exp = '+' + exp;
- }
-
- precision = savePrecision;
-
- double base = v / Math.pow(10, power);
- buf.append(formatFloatDecimal(base, truncate));
- buf.append(e);
-
- buf.append(exp);
- negative = isNegative;
-
- return buf.toString();
- }
-
+ /**
+ * Main service of this class: format one or more arguments with the format string supplied at
+ * construction.
+ *
+ * @param args tuple or map containing objects, or a single object, to convert
+ * @return result of formatting
+ */
@SuppressWarnings("fallthrough")
public PyString format(PyObject args) {
PyObject dict = null;
this.args = args;
boolean needUnicode = unicodeCoercion;
if (args instanceof PyTuple) {
+ // We will simply work through the tuple elements
argIndex = 0;
} else {
- // special index indicating a single item rather than a tuple
+ // Not a tuple, but possibly still some kind of container: use special argIndex values.
argIndex = -1;
if (args instanceof PyDictionary || args instanceof PyStringMap
|| (!(args instanceof PySequence) && args.__findattr__("__getitem__") != null)) {
@@ -4326,6 +4340,8 @@
}
while (index < format.length()) {
+
+ // Attributes to be parsed from the next format specifier
boolean ljustFlag = false;
boolean signFlag = false;
boolean blankFlag = false;
@@ -4335,16 +4351,31 @@
int width = -1;
precision = -1;
+ // Read one character from the format string
char c = pop();
if (c != '%') {
buffer.append(c);
continue;
}
+
+ // It's a %, so the beginning of a conversion specifier. Parse it.
+
+ // A conversion specifier contains the following components, in this order:
+ // + The '%' character, which marks the start of the specifier.
+ // + Mapping key (optional), consisting of a parenthesised sequence of characters.
+ // + Conversion flags (optional), which affect the result of some conversion types.
+ // + Minimum field width (optional), or an '*' (asterisk).
+ // + Precision (optional), given as a '.' (dot) followed by the precision or '*'.
+ // + Length modifier (optional).
+ // + Conversion type.
+
c = pop();
if (c == '(') {
+ // Mapping key, consisting of a parenthesised sequence of characters.
if (dict == null) {
throw Py.TypeError("format requires a mapping");
}
+ // Scan along until a matching close parenthesis is found
int parens = 1;
int keyStart = index;
while (parens > 0) {
@@ -4355,11 +4386,16 @@
parens++;
}
}
+ // Last c=pop() is the closing ')' while indexKey is just after the opening '('
String tmp = format.substring(keyStart, index - 1);
+ // Look it up using this extent as the (right type of) key.
this.args = dict.__getitem__(needUnicode ? new PyUnicode(tmp) : new PyString(tmp));
} else {
+ // Not a mapping key: next clause will re-read c.
push();
}
+
+ // Conversion flags (optional) that affect the result of some conversion types.
while (true) {
switch (c = pop()) {
case '-':
@@ -4380,43 +4416,77 @@
}
break;
}
+ // Push back c as next clause will re-read c.
push();
+
+ /*
+ * Minimum field width (optional). If specified as an '*' (asterisk), the actual width
+ * is read from the next element of the tuple in values, and the object to convert comes
+ * after the minimum field width and optional precision. A custom getNumber() takes care
+ * of the '*' case.
+ */
width = getNumber();
if (width < 0) {
width = -width;
ljustFlag = true;
}
+
+ /*
+ * Precision (optional), given as a '.' (dot) followed by the precision. If specified as
+ * '*' (an asterisk), the actual precision is read from the next element of the tuple in
+ * values, and the value to convert comes after the precision. A custom getNumber()
+ * takes care of the '*' case.
+ */
c = pop();
if (c == '.') {
precision = getNumber();
if (precision < -1) {
precision = 0;
}
-
c = pop();
}
+
+ // Length modifier (optional). (Compatibility feature?) It has no effect.
if (c == 'h' || c == 'l' || c == 'L') {
c = pop();
}
+
+ // c is now the conversion type.
if (c == '%') {
+ // It was just a percent sign after all
buffer.append(c);
continue;
}
+
+ /*
+ * Process argument according to format specification decoded from the string. It is
+ * important we don't read the argumnent from the list until this point because of the
+ * possibility that width and precision were specified via the argument list.
+ */
PyObject arg = getarg();
- char fill = ' ';
String string = null;
negative = false;
+
+ // Independent of type, decide the padding character based on decoded flags.
+ char fill = ' ';
if (zeroFlag) {
fill = '0';
} else {
fill = ' ';
}
+
+ // Perform the type-specific formatting
switch (c) {
+
case 's':
+ // String (converts any Python object using str()).
if (arg instanceof PyUnicode) {
needUnicode = true;
}
+ // fall through ...
+
case 'r':
+ // String (converts any Python object using repr()).
fill = ' ';
if (c == 's') {
if (needUnicode) {
@@ -4432,15 +4502,19 @@
}
break;
+
case 'i':
case 'd':
+ // Signed integer decimal. Note floats accepted.
if (arg instanceof PyLong) {
string = formatLong(arg, c, altFlag);
} else {
string = formatInteger(arg, 10, false, c, altFlag);
}
break;
+
case 'u':
+ // Obsolete type – it is identical to 'd'. (Why not identical here?)
if (arg instanceof PyLong) {
string = formatLong(arg, c, altFlag);
} else if (arg instanceof PyInteger || arg instanceof PyFloat) {
@@ -4449,10 +4523,15 @@
throw Py.TypeError("int argument required");
}
break;
+
case 'o':
+ // Signed octal value. Note floats accepted.
if (arg instanceof PyLong) {
+ // This call provides the base prefix '0' if altFlag.
string = formatLong(arg, c, altFlag);
} else if (arg instanceof PyInteger || arg instanceof PyFloat) {
+ // This call does not provide the '0' prefix and will be lowercase ...
+ // ... except where arg.__int__ returns PyLong, then it's like formatLong.
string = formatInteger(arg, 8, false, c, altFlag);
if (altFlag && string.charAt(0) != '0') {
string = "0" + string;
@@ -4461,10 +4540,15 @@
throw Py.TypeError("int argument required");
}
break;
+
case 'x':
+ // Signed hexadecimal (lowercase). Note floats accepted.
if (arg instanceof PyLong) {
+ // This call provides the base prefix "0x" if altFlag and case-matches c.
string = formatLong(arg, c, altFlag);
} else if (arg instanceof PyInteger || arg instanceof PyFloat) {
+ // This call does not provide the "0x" prefix and will be lowercase.
+ // ... except where arg.__int__ returns PyLong, then it's like formatLong.
string = formatInteger(arg, 16, false, c, altFlag);
string = string.toLowerCase();
if (altFlag) {
@@ -4474,10 +4558,15 @@
throw Py.TypeError("int argument required");
}
break;
+
case 'X':
+ // Signed hexadecimal (uppercase). Note floats accepted.
if (arg instanceof PyLong) {
+ // This call provides the base prefix "0x" if altFlag and case-matches c.
string = formatLong(arg, c, altFlag);
} else if (arg instanceof PyInteger || arg instanceof PyFloat) {
+ // This call does not provide the "0x" prefix and will be lowercase.
+ // ... except where arg.__int__ returns PyLong, then it's like formatLong.
string = formatInteger(arg, 16, false, c, altFlag);
string = string.toUpperCase();
if (altFlag) {
@@ -4487,60 +4576,33 @@
throw Py.TypeError("int argument required");
}
break;
+
case 'e':
case 'E':
- string = formatFloatExponential(arg, c, false);
- if (c == 'E') {
- string = string.toUpperCase();
- }
- break;
case 'f':
case 'F':
- string = formatFloatDecimal(asDouble(arg), false);
- if (c == 'F') {
- string = string.toUpperCase();
- }
- break;
case 'g':
case 'G':
- int origPrecision = precision;
- if (precision == -1) {
- precision = 6;
- }
-
+ // All floating point formats (+case).
+
+ // Convert the flags (local variables) to the form needed in the Spec object.
+ char align = ljustFlag ? '<' : '>';
+ char sign = signFlag ? '+' : (blankFlag ? ' ' : Spec.NONE);
+ int w = Spec.UNSPECIFIED;
+ Spec spec = new Spec(fill, align, sign, altFlag, w, false, precision, c);
+
+ // Format using this Spec the double form of the argument.
+ FloatFormatter f = new FloatFormatter(spec);
double v = asDouble(arg);
- int exponent = (int)ExtraMath.closeFloor(Math.log10(Math.abs(v == 0 ? 1 : v)));
- if (v == Double.POSITIVE_INFINITY) {
- string = "inf";
- } else if (v == Double.NEGATIVE_INFINITY) {
- string = "-inf";
- } else if (exponent >= -4 && exponent < precision) {
- precision -= exponent + 1;
- string = formatFloatDecimal(v, !altFlag);
-
- // XXX: this block may be unnecessary now
- if (altFlag && string.indexOf('.') == -1) {
- int zpad = origPrecision - string.length();
- string += '.';
- if (zpad > 0) {
- char zeros[] = new char[zpad];
- for (int ci = 0; ci < zpad; zeros[ci++] = '0') {}
- string += new String(zeros);
- }
- }
- } else {
- // Exponential precision is the number of digits after the decimal
- // point, whereas 'g' precision is the number of significant digits --
- // and exponential always provides one significant digit before the
- // decimal point
- precision--;
- string = formatFloatExponential(arg, (char)(c - 2), !altFlag);
- }
- if (c == 'G') {
- string = string.toUpperCase();
- }
+ f.format(v);
+ string = f.getResult();
+
+ // Suppress subsequent attempts to insert a correct sign, done already.
+ signFlag = blankFlag = negative = false;
break;
+
case 'c':
+ // Single character (accepts integer or single character string).
fill = ' ';
if (arg instanceof PyString) {
string = ((PyString)arg).toString();
@@ -4552,6 +4614,8 @@
}
break;
}
+
+ // arg is not a str (or unicode)
int val;
try {
// Explicitly __int__ so we can look for an AttributeError (which is
@@ -4563,6 +4627,7 @@
}
throw e;
}
+ // Range check, according to ultimate type of result as presentl;y known.
if (!needUnicode) {
if (val < 0) {
throw Py.OverflowError("unsigned byte integer is less than minimum");
@@ -4580,8 +4645,15 @@
+ codecs.encode(Py.newString(c), null, "replace") + "' (0x"
+ Integer.toHexString(c) + ") at index " + (index - 1));
}
+
+ /*
+ * We have now dealt with the translation of the (absolute value of the) argument, in
+ * variable string[]. In the next sections we deal with sign, padding and base prefix.
+ */
int length = string.length();
int skip = 0;
+
+ // Decide how to represent the sign according to format and actual sign of argument.
String signString = null;
if (negative) {
signString = "-";
@@ -4593,34 +4665,47 @@
}
}
+ // The width (from here on) will be the remaining width on the line.
if (width < length) {
width = length;
}
+
+ // Insert the sign in the buffer and adjust the width.
if (signString != null) {
if (fill != ' ') {
+ // When the fill is not space, the sign comes before the fill.
buffer.append(signString);
}
+ // Adjust width for sign.
if (width > length) {
width--;
}
}
+
+ // Insert base prefix used with alternate mode for hexadecimal.
if (altFlag && (c == 'x' || c == 'X')) {
if (fill != ' ') {
+ // When the fill is not space, this base prefix comes before the fill.
buffer.append('0');
buffer.append(c);
skip += 2;
}
+ // Adjust width for base prefix.
width -= 2;
if (width < 0) {
width = 0;
}
length -= 2;
}
+
+ // Fill on the left of the item.
if (width > length && !ljustFlag) {
do {
buffer.append(fill);
} while (--width > length);
}
+
+ // If the fill is spaces, we will have deferred the sign and hex base prefix
if (fill == ' ') {
if (signString != null) {
buffer.append(signString);
@@ -4631,19 +4716,33 @@
skip += 2;
}
}
+
+ // Now append the converted argument.
if (skip > 0) {
+ // The string contains a hex-prefix, but we have already inserted one.
buffer.append(string.substring(skip));
} else {
buffer.append(string);
}
+ // If this hasn't filled the space required, add right-padding.
while (--width >= length) {
buffer.append(' ');
}
}
+
+ /*
+ * All fields in the format string have been used to convert arguments (or used the argument
+ * as a width, etc.). This had better not leave any arguments unused. Note argIndex is an
+ * index into args or has a special value. If args is a 'proper' index, It should now be out
+ * of range; if a special value, it would be wrong if it were -1, indicating a single item
+ * that has not yet been used.
+ */
if (argIndex == -1 || (argIndex >= 0 && args.__finditem__(argIndex) != null)) {
throw Py.TypeError("not all arguments converted during string formatting");
}
+
+ // Return the final buffer contents as a str or unicode as appropriate.
if (needUnicode) {
return new PyUnicode(buffer);
}
diff --git a/src/org/python/core/stringlib/FloatFormatter.java b/src/org/python/core/stringlib/FloatFormatter.java
--- a/src/org/python/core/stringlib/FloatFormatter.java
+++ b/src/org/python/core/stringlib/FloatFormatter.java
@@ -16,7 +16,7 @@
public class FloatFormatter extends InternalFormat.Formatter {
/** The rounding mode dominant in the formatter. */
- static final RoundingMode ROUND_PY = RoundingMode.HALF_UP; // Believed to be HALF_EVEN in Py3k
+ static final RoundingMode ROUND_PY = RoundingMode.HALF_EVEN;
/** If it contains no decimal point, this length is zero, and 1 otherwise. */
private int lenPoint;
@@ -325,10 +325,9 @@
exp = lenFraction - vv.scale();
}
- // Finally add zeros, as necessary, and stick on the exponent.
-
+ // If the result is not already complete, add point and zeros as necessary, and exponent.
if (lenMarker == 0) {
- appendTrailingZeros(precision);
+ ensurePointAndTrailingZeros(precision);
appendExponent(exp);
}
}
@@ -345,14 +344,7 @@
*/
private void format_f(double value, String positivePrefix, int precision) {
- if (signAndSpecialNumber(value, positivePrefix)) {
-
- if (lenMarker == 0) {
- // May be 0 or -0 so we still need to ...
- appendTrailingZeros(precision);
- }
-
- } else {
+ if (!signAndSpecialNumber(value, positivePrefix)) {
// Convert value to decimal exactly. (This can be very long.)
BigDecimal vLong = new BigDecimal(Math.abs(value));
@@ -366,9 +358,53 @@
// There is a decimal point and some digits following
lenWhole = result.length() - (start + lenSign + (lenPoint = 1) + lenFraction);
} else {
+ // There are no fractional digits and so no decimal point
lenWhole = result.length() - (start + lenSign);
}
+ }
+ // Finally, ensure we have all the fractional digits we should.
+ if (lenMarker == 0) {
+ ensurePointAndTrailingZeros(precision);
+ }
+ }
+
+ /**
+ * Append a decimal point and trailing fractional zeros if necessary for 'e' and 'f' format.
+ * This should not be called if the result is not numeric ("inf" for example). This method deals
+ * with the following complexities: on return there will be at least the number of fractional
+ * digits specified in the argument <code>n</code>, and at least {@link #minFracDigits};
+ * further, if <code>minFracDigits<0</code>, signifying the "alternate mode" of certain
+ * formats, the method will ensure there is a decimal point, even if there are no fractional
+ * digits to follow.
+ *
+ * @param n smallest number of fractional digits on return
+ */
+ private void ensurePointAndTrailingZeros(int n) {
+
+ // Set n to the number of fractional digits we should have.
+ if (n < minFracDigits) {
+ n = minFracDigits;
+ }
+
+ // Do we have a decimal point already?
+ if (lenPoint == 0) {
+ // No decimal point: add one if there will be any fractional digits or
+ if (n > 0 || minFracDigits < 0) {
+ // First need to add a decimal point.
+ result.append('.');
+ lenPoint = 1;
+ }
+ }
+
+ // Do we have enough fractional digits already?
+ int f = lenFraction;
+ if (n > f) {
+ // Make up the required number of zeros.
+ for (; f < n; f++) {
+ result.append('0');
+ }
+ lenFraction = f;
}
}
@@ -635,7 +671,7 @@
// In no-truncate mode, the fraction is full precision. Otherwise trim it.
if (minFracDigits >= 0) {
- // Note minFracDigits only applies to fixed formats.
+ // Note positive minFracDigits only applies to fixed formats.
removeTrailingZeros(0);
}
@@ -757,16 +793,16 @@
/**
* Append the trailing fractional zeros, as required by certain formats, so that the total
- * number of fractional digits is no less than specified. If <code>minFracDigits<=0</code>,
- * the method leaves the {@link #result} buffer unchanged.
+ * number of fractional digits is no less than specified. If <code>n<=0</code>, the method
+ * leaves the {@link #result} buffer unchanged.
*
- * @param minFracDigits smallest number of fractional digits on return
+ * @param n smallest number of fractional digits on return
*/
- private void appendTrailingZeros(int minFracDigits) {
+ private void appendTrailingZeros(int n) {
int f = lenFraction;
- if (minFracDigits > f) {
+ if (n > f) {
if (lenPoint == 0) {
// First need to add a decimal point. (Implies lenFraction=0.)
result.append('.');
@@ -774,7 +810,7 @@
}
// Now make up the required number of zeros.
- for (; f < minFracDigits; f++) {
+ for (; f < n; f++) {
result.append('0');
}
lenFraction = f;
@@ -787,9 +823,9 @@
* originally (and therefore no fractional part), the method will add a decimal point, even if
* it adds no zeros.
*
- * @param minFracDigits smallest number of fractional digits on return
+ * @param n smallest number of fractional digits on return
*/
- private void appendPointAndTrailingZeros(int minFracDigits) {
+ private void appendPointAndTrailingZeros(int n) {
if (lenPoint == 0) {
// First need to add a decimal point. (Implies lenFraction=0.)
@@ -799,7 +835,7 @@
// Now make up the required number of zeros.
int f;
- for (f = lenFraction; f < minFracDigits; f++) {
+ for (f = lenFraction; f < n; f++) {
result.append('0');
}
lenFraction = f;
@@ -810,18 +846,17 @@
* least the number of fractional digits specified. If the resultant number of fractional digits
* is zero, this method will also remove the trailing decimal point (if there is one).
*
- * @param minFracDigits smallest number of fractional digits on return
+ * @param n smallest number of fractional digits on return
*/
- private void removeTrailingZeros(int minFracDigits) {
-
- int f = lenFraction;
+ private void removeTrailingZeros(int n) {
if (lenPoint > 0) {
// There's a decimal point at least, and there may be some fractional digits.
- if (minFracDigits == 0 || f > minFracDigits) {
+ int f = lenFraction;
+ if (n == 0 || f > n) {
int fracStart = result.length() - f;
- for (; f > minFracDigits; --f) {
+ for (; f > n; --f) {
if (result.charAt(fracStart - 1 + f) != '0') {
// Keeping this one as it isn't a zero
break;
diff --git a/src/org/python/core/stringlib/InternalFormat.java b/src/org/python/core/stringlib/InternalFormat.java
--- a/src/org/python/core/stringlib/InternalFormat.java
+++ b/src/org/python/core/stringlib/InternalFormat.java
@@ -108,7 +108,7 @@
* method shows a '|' character between each section when it prints out the buffer. Override
* this when you define more lengths in the subclass.
*
- * @return
+ * @return the lengths of the successive sections
*/
protected int[] sectionLengths() {
return new int[] {lenSign, lenWhole};
@@ -265,7 +265,6 @@
* result you probably don't want. It is up to the client to disallow this (which
* <code>complex</code> does).
*
- * @param value to pad
* @return this object
*/
public Formatter pad() {
@@ -468,8 +467,8 @@
* during the construction of a new <code>Spec</code>, for attributes that are unspecified in a
* primary source.
* <p>
- * This structure is returned by factory method {@link #fromText(CharSequence)}, and having
- * public final members is freely accessed by formatters such as {@link FloatBuilder}, and the
+ * This structure is returned by factory method {@link #fromText(String)}, and having public
+ * final members is freely accessed by formatters such as {@link FloatFormatter}, and the
* __format__ methods of client object types.
* <p>
* The fields correspond to the elements of a format specification. The grammar of a format
@@ -482,28 +481,28 @@
* A typical idiom is:
*
* <pre>
- * private static final InternalFormatSpec FLOAT_DEFAULT = InternalFormatSpec.from(">");
+ * private static final InternalFormatSpec FLOAT_DEFAULTS = InternalFormatSpec.from(">");
* ...
- * InternalFormatSpec spec = InternalFormatSpec.from(specString, FLOAT_DEFAULT);
+ * InternalFormat.Spec spec = InternalFormat.fromText(specString);
+ * spec = spec.withDefaults(FLOAT_DEFAULTS);
* ... // Validation of spec.type, and other attributes, for this type.
- * FloatBuilder buf = new FloatBuilder(spec);
- * buf.format(value);
- * String result = buf.getResult();
+ * FloatFormatter f = new FloatFormatter(spec);
+ * String result = f.format(value).getResult();
*
* </pre>
*/
public static class Spec {
- /** The fill character specified, or '\uffff' if unspecified. */
+ /** The fill character specified, or U+FFFF if unspecified. */
public final char fill;
/**
- * Alignment indicator is one of {<code>'<', '^', '>', '='</code>, or '\uffff' if
+ * Alignment indicator is one of {<code>'<', '^', '>', '='</code>, or U+FFFF if
* unspecified.
*/
public final char align;
/**
* Sign-handling flag, one of <code>'+'</code>, <code>'-'</code>, or <code>' '</code>, or
- * '\uffff' if unspecified.
+ * U+FFFF if unspecified.
*/
public final char sign;
/** The alternative format flag '#' was given. */
@@ -514,7 +513,7 @@
public final boolean grouping;
/** Precision decoded from the format, or -1 if unspecified. */
public final int precision;
- /** Type key from the format, or '\uffff' if unspecified. */
+ /** Type key from the format, or U+FFFF if unspecified. */
public final char type;
/** Non-character code point used to represent "no value" in <code>char</code> attributes. */
@@ -604,17 +603,17 @@
}
/**
- * Return a merged <code>Spec</code> object, in which any attribute of this object, that is
- * specified (or <code>true</code>) has the same value in the result, and any attribute of
- * this object that is unspecified (or <code>false</code>) has the value that attribute
- * takes in the other object. This the second object supplies default values. (These
+ * Return a merged <code>Spec</code> object, in which any attribute of this object that is
+ * specified (or <code>true</code>), has the same value in the result, and any attribute of
+ * this object that is unspecified (or <code>false</code>), has the value that attribute
+ * takes in the other object. Thus the second object supplies default values. (These
* defaults may also be unspecified.) The use of this method is to allow a <code>Spec</code>
* constructed from text to record exactly, and only, what was in the textual specification,
* while the __format__ method of a client object supplies its type-specific defaults. Thus
* "20" means "<20s" to a <code>str</code>, ">20.12" to a <code>float</code> and ">20.12g"
* to a <code>complex</code>.
*
- * @param defaults to merge where this object does not specify the attribute.
+ * @param other defaults to merge where this object does not specify the attribute.
* @return a new Spec object.
*/
public Spec withDefaults(Spec other) {
@@ -646,7 +645,7 @@
* @param precision (e.g. decimal places)
* @param type indicator character
*/
- public Spec(int width, int precision, char type) {
+ public Spec(int precision, char type) {
this(' ', '>', Spec.NONE, false, UNSPECIFIED, false, precision, type);
}
diff --git a/src/org/python/core/stringlib/InternalFormatSpec.java b/src/org/python/core/stringlib/InternalFormatSpec.java
--- a/src/org/python/core/stringlib/InternalFormatSpec.java
+++ b/src/org/python/core/stringlib/InternalFormatSpec.java
@@ -7,8 +7,8 @@
* to a string assumed to be the result of formatting to the given precision.
* <p>
* This structure is returned by {@link InternalFormatSpecParser#parse()} and having public members
- * is freely used by {@link InternalFormatSpecParser}, {@link Formatter} and the __format__ methods
- * of client object types.
+ * is freely used by {@link InternalFormatSpecParser}, and the __format__ methods of client object
+ * types.
* <p>
* The fields correspond to the elements of a format specification. The grammar of a format
* specification is:
@@ -45,7 +45,7 @@
* @param defaultAlign to use if <code>this.align</code>=0 (one of <code>'<'</code>,
* <code>'^'</code>, <code>'>'</code>, or <code>'='</code>).
* @param leaveWidth to reduce effective <code>this.width</code> by
- * @return
+ * @return padded value
*/
public String pad(String value, char defaultAlign, int leaveWidth) {
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list