[pypy-svn] r57200 - pypy/branch/2.5-features/lib-python/modified-2.5.1
bgola at codespeak.net
bgola at codespeak.net
Mon Aug 11 22:10:32 CEST 2008
Author: bgola
Date: Mon Aug 11 22:10:30 2008
New Revision: 57200
Added:
pypy/branch/2.5-features/lib-python/modified-2.5.1/
pypy/branch/2.5-features/lib-python/modified-2.5.1/__future__.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/binhex.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/cmd.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/copy.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/decimal.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/doctest.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/inspect.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/locale.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/opcode.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/pickle.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/pickletools.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/popen2.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/pprint.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/pydoc.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/site.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/socket.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/sre_compile.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/sre_constants.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/subprocess.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/tarfile.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/traceback.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/types.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/uu.py (contents, props changed)
pypy/branch/2.5-features/lib-python/modified-2.5.1/warnings.py (contents, props changed)
Log:
applying the changes from modified-2.4.1 to modified-2.5.1.
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/__future__.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/__future__.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,116 @@
+"""Record of phased-in incompatible language changes.
+
+Each line is of the form:
+
+ FeatureName = "_Feature(" OptionalRelease "," MandatoryRelease ","
+ CompilerFlag ")"
+
+where, normally, OptionalRelease < MandatoryRelease, and both are 5-tuples
+of the same form as sys.version_info:
+
+ (PY_MAJOR_VERSION, # the 2 in 2.1.0a3; an int
+ PY_MINOR_VERSION, # the 1; an int
+ PY_MICRO_VERSION, # the 0; an int
+ PY_RELEASE_LEVEL, # "alpha", "beta", "candidate" or "final"; string
+ PY_RELEASE_SERIAL # the 3; an int
+ )
+
+OptionalRelease records the first release in which
+
+ from __future__ import FeatureName
+
+was accepted.
+
+In the case of MandatoryReleases that have not yet occurred,
+MandatoryRelease predicts the release in which the feature will become part
+of the language.
+
+Else MandatoryRelease records when the feature became part of the language;
+in releases at or after that, modules no longer need
+
+ from __future__ import FeatureName
+
+to use the feature in question, but may continue to use such imports.
+
+MandatoryRelease may also be None, meaning that a planned feature got
+dropped.
+
+Instances of class _Feature have two corresponding methods,
+.getOptionalRelease() and .getMandatoryRelease().
+
+CompilerFlag is the (bitfield) flag that should be passed in the fourth
+argument to the builtin function compile() to enable the feature in
+dynamically compiled code. This flag is stored in the .compiler_flag
+attribute on _Future instances. These values must match the appropriate
+#defines of CO_xxx flags in Include/compile.h.
+
+No feature line is ever to be deleted from this file.
+"""
+
+all_feature_names = [
+ "nested_scopes",
+ "generators",
+ "division",
+ "absolute_import",
+ "with_statement",
+]
+
+__all__ = ["all_feature_names"] + all_feature_names
+
+# The CO_xxx symbols are defined here under the same names used by
+# compile.h, so that an editor search will find them here. However,
+# they're not exported in __all__, because they don't really belong to
+# this module.
+CO_NESTED = 0x0010 # nested_scopes
+CO_GENERATOR_ALLOWED = 0 # generators (obsolete, was 0x1000)
+CO_FUTURE_DIVISION = 0x2000 # division
+CO_FUTURE_ABSIMPORT = 0x4000 # absolute_import
+CO_FUTURE_WITH_STATEMENT = 0x8000 # with statement
+
+class _Feature:
+ def __init__(self, optionalRelease, mandatoryRelease, compiler_flag):
+ self.optional = optionalRelease
+ self.mandatory = mandatoryRelease
+ self.compiler_flag = compiler_flag
+
+ def getOptionalRelease(self):
+ """Return first release in which this feature was recognized.
+
+ This is a 5-tuple, of the same form as sys.version_info.
+ """
+
+ return self.optional
+
+ def getMandatoryRelease(self):
+ """Return release in which this feature will become mandatory.
+
+ This is a 5-tuple, of the same form as sys.version_info, or, if
+ the feature was dropped, is None.
+ """
+
+ return self.mandatory
+
+ def __repr__(self):
+ return "_Feature" + repr((self.optional,
+ self.mandatory,
+ self.compiler_flag))
+
+nested_scopes = _Feature((2, 1, 0, "beta", 1),
+ (2, 2, 0, "alpha", 0),
+ CO_NESTED)
+
+generators = _Feature((2, 2, 0, "alpha", 1),
+ (2, 3, 0, "final", 0),
+ CO_GENERATOR_ALLOWED)
+
+division = _Feature((2, 2, 0, "alpha", 2),
+ (3, 0, 0, "alpha", 0),
+ CO_FUTURE_DIVISION)
+
+absolute_import = _Feature((2, 5, 0, "alpha", 1),
+ (2, 7, 0, "alpha", 0),
+ CO_FUTURE_ABSIMPORT)
+
+with_statement = _Feature((2, 5, 0, "alpha", 1),
+ (2, 6, 0, "alpha", 0),
+ CO_FUTURE_WITH_STATEMENT)
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/binhex.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/binhex.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,527 @@
+"""Macintosh binhex compression/decompression.
+
+easy interface:
+binhex(inputfilename, outputfilename)
+hexbin(inputfilename, outputfilename)
+"""
+
+#
+# Jack Jansen, CWI, August 1995.
+#
+# The module is supposed to be as compatible as possible. Especially the
+# easy interface should work "as expected" on any platform.
+# XXXX Note: currently, textfiles appear in mac-form on all platforms.
+# We seem to lack a simple character-translate in python.
+# (we should probably use ISO-Latin-1 on all but the mac platform).
+# XXXX The simple routines are too simple: they expect to hold the complete
+# files in-core. Should be fixed.
+# XXXX It would be nice to handle AppleDouble format on unix
+# (for servers serving macs).
+# XXXX I don't understand what happens when you get 0x90 times the same byte on
+# input. The resulting code (xx 90 90) would appear to be interpreted as an
+# escaped *value* of 0x90. All coders I've seen appear to ignore this nicety...
+#
+import sys
+import os
+import struct
+import binascii
+
+__all__ = ["binhex","hexbin","Error"]
+
+class Error(Exception):
+ pass
+
+# States (what have we written)
+[_DID_HEADER, _DID_DATA, _DID_RSRC] = range(3)
+
+# Various constants
+REASONABLY_LARGE=32768 # Minimal amount we pass the rle-coder
+LINELEN=64
+RUNCHAR=chr(0x90) # run-length introducer
+
+#
+# This code is no longer byte-order dependent
+
+#
+# Workarounds for non-mac machines.
+try:
+ from Carbon.File import FSSpec, FInfo
+ from MacOS import openrf
+
+ def getfileinfo(name):
+ finfo = FSSpec(name).FSpGetFInfo()
+ dir, file = os.path.split(name)
+ # XXX Get resource/data sizes
+ fp = open(name, 'rb')
+ fp.seek(0, 2)
+ dlen = fp.tell()
+ fp = openrf(name, '*rb')
+ fp.seek(0, 2)
+ rlen = fp.tell()
+ return file, finfo, dlen, rlen
+
+ def openrsrc(name, *mode):
+ if not mode:
+ mode = '*rb'
+ else:
+ mode = '*' + mode[0]
+ return openrf(name, mode)
+
+except ImportError:
+ #
+ # Glue code for non-macintosh usage
+ #
+
+ class FInfo:
+ def __init__(self):
+ self.Type = '????'
+ self.Creator = '????'
+ self.Flags = 0
+
+ def getfileinfo(name):
+ finfo = FInfo()
+ # Quick check for textfile
+ fp = open(name)
+ data = open(name).read(256)
+ for c in data:
+ if not c.isspace() and (c<' ' or ord(c) > 0x7f):
+ break
+ else:
+ finfo.Type = 'TEXT'
+ fp.seek(0, 2)
+ dsize = fp.tell()
+ fp.close()
+ dir, file = os.path.split(name)
+ file = file.replace(':', '-', 1)
+ return file, finfo, dsize, 0
+
+ class openrsrc:
+ def __init__(self, *args):
+ pass
+
+ def read(self, *args):
+ return ''
+
+ def write(self, *args):
+ pass
+
+ def close(self):
+ pass
+
+class _Hqxcoderengine:
+ """Write data to the coder in 3-byte chunks"""
+
+ def __init__(self, ofp):
+ self.ofp = ofp
+ self.data = ''
+ self.hqxdata = ''
+ self.linelen = LINELEN-1
+
+ def write(self, data):
+ self.data = self.data + data
+ datalen = len(self.data)
+ todo = (datalen//3)*3
+ data = self.data[:todo]
+ self.data = self.data[todo:]
+ if not data:
+ return
+ self.hqxdata = self.hqxdata + binascii.b2a_hqx(data)
+ self._flush(0)
+
+ def _flush(self, force):
+ first = 0
+ while first <= len(self.hqxdata)-self.linelen:
+ last = first + self.linelen
+ self.ofp.write(self.hqxdata[first:last]+'\n')
+ self.linelen = LINELEN
+ first = last
+ self.hqxdata = self.hqxdata[first:]
+ if force:
+ self.ofp.write(self.hqxdata + ':\n')
+
+ def close(self):
+ if self.data:
+ self.hqxdata = \
+ self.hqxdata + binascii.b2a_hqx(self.data)
+ self._flush(1)
+ self.ofp.close()
+ del self.ofp
+
+class _Rlecoderengine:
+ """Write data to the RLE-coder in suitably large chunks"""
+
+ def __init__(self, ofp):
+ self.ofp = ofp
+ self.data = ''
+
+ def write(self, data):
+ self.data = self.data + data
+ if len(self.data) < REASONABLY_LARGE:
+ return
+ rledata = binascii.rlecode_hqx(self.data)
+ self.ofp.write(rledata)
+ self.data = ''
+
+ def close(self):
+ if self.data:
+ rledata = binascii.rlecode_hqx(self.data)
+ self.ofp.write(rledata)
+ self.ofp.close()
+ del self.ofp
+
+class BinHex:
+ def __init__(self, (name, finfo, dlen, rlen), ofp):
+ if type(ofp) == type(''):
+ ofname = ofp
+ ofp = open(ofname, 'w')
+ if os.name == 'mac':
+ fss = FSSpec(ofname)
+ fss.SetCreatorType('BnHq', 'TEXT')
+ ofp.write('(This file must be converted with BinHex 4.0)\n\n:')
+ hqxer = _Hqxcoderengine(ofp)
+ self.ofp = _Rlecoderengine(hqxer)
+ self.crc = 0
+ if finfo is None:
+ finfo = FInfo()
+ self.dlen = dlen
+ self.rlen = rlen
+ self._writeinfo(name, finfo)
+ self.state = _DID_HEADER
+
+ def _writeinfo(self, name, finfo):
+ nl = len(name)
+ if nl > 63:
+ raise Error, 'Filename too long'
+ d = chr(nl) + name + '\0'
+ d2 = finfo.Type + finfo.Creator
+
+ # Force all structs to be packed with big-endian
+ d3 = struct.pack('>h', finfo.Flags)
+ d4 = struct.pack('>ii', self.dlen, self.rlen)
+ info = d + d2 + d3 + d4
+ self._write(info)
+ self._writecrc()
+
+ def _write(self, data):
+ self.crc = binascii.crc_hqx(data, self.crc)
+ self.ofp.write(data)
+
+ def _writecrc(self):
+ # XXXX Should this be here??
+ # self.crc = binascii.crc_hqx('\0\0', self.crc)
+ if self.crc < 0:
+ fmt = '>h'
+ else:
+ fmt = '>H'
+ self.ofp.write(struct.pack(fmt, self.crc))
+ self.crc = 0
+
+ def write(self, data):
+ if self.state != _DID_HEADER:
+ raise Error, 'Writing data at the wrong time'
+ self.dlen = self.dlen - len(data)
+ self._write(data)
+
+ def close_data(self):
+ if self.dlen != 0:
+ raise Error, 'Incorrect data size, diff=%r' % (self.rlen,)
+ self._writecrc()
+ self.state = _DID_DATA
+
+ def write_rsrc(self, data):
+ if self.state < _DID_DATA:
+ self.close_data()
+ if self.state != _DID_DATA:
+ raise Error, 'Writing resource data at the wrong time'
+ self.rlen = self.rlen - len(data)
+ self._write(data)
+
+ def close(self):
+ if self.state < _DID_DATA:
+ self.close_data()
+ if self.state != _DID_DATA:
+ raise Error, 'Close at the wrong time'
+ if self.rlen != 0:
+ raise Error, \
+ "Incorrect resource-datasize, diff=%r" % (self.rlen,)
+ self._writecrc()
+ self.ofp.close()
+ self.state = None
+ del self.ofp
+
+def binhex(inp, out):
+ """(infilename, outfilename) - Create binhex-encoded copy of a file"""
+ finfo = getfileinfo(inp)
+ ofp = BinHex(finfo, out)
+
+ ifp = open(inp, 'rb')
+ # XXXX Do textfile translation on non-mac systems
+ while 1:
+ d = ifp.read(128000)
+ if not d: break
+ ofp.write(d)
+ ofp.close_data()
+ ifp.close()
+
+ ifp = openrsrc(inp, 'rb')
+ while 1:
+ d = ifp.read(128000)
+ if not d: break
+ ofp.write_rsrc(d)
+ ofp.close()
+ ifp.close()
+
+class _Hqxdecoderengine:
+ """Read data via the decoder in 4-byte chunks"""
+
+ def __init__(self, ifp):
+ self.ifp = ifp
+ self.eof = 0
+
+ def read(self, totalwtd):
+ """Read at least wtd bytes (or until EOF)"""
+ decdata = ''
+ wtd = totalwtd
+ #
+ # The loop here is convoluted, since we don't really now how
+ # much to decode: there may be newlines in the incoming data.
+ while wtd > 0:
+ if self.eof: return decdata
+ wtd = ((wtd+2)//3)*4
+ data = self.ifp.read(wtd)
+ #
+ # Next problem: there may not be a complete number of
+ # bytes in what we pass to a2b. Solve by yet another
+ # loop.
+ #
+ while 1:
+ try:
+ decdatacur, self.eof = \
+ binascii.a2b_hqx(data)
+ break
+ except binascii.Incomplete:
+ pass
+ newdata = self.ifp.read(1)
+ if not newdata:
+ raise Error, \
+ 'Premature EOF on binhex file'
+ data = data + newdata
+ decdata = decdata + decdatacur
+ wtd = totalwtd - len(decdata)
+ if not decdata and not self.eof:
+ raise Error, 'Premature EOF on binhex file'
+ return decdata
+
+ def close(self):
+ self.ifp.close()
+
+class _Rledecoderengine:
+ """Read data via the RLE-coder"""
+
+ def __init__(self, ifp):
+ self.ifp = ifp
+ self.pre_buffer = ''
+ self.post_buffer = ''
+ self.eof = 0
+
+ def read(self, wtd):
+ if wtd > len(self.post_buffer):
+ self._fill(wtd-len(self.post_buffer))
+ rv = self.post_buffer[:wtd]
+ self.post_buffer = self.post_buffer[wtd:]
+ return rv
+
+ def _fill(self, wtd):
+ self.pre_buffer = self.pre_buffer + self.ifp.read(wtd+4)
+ if self.ifp.eof:
+ self.post_buffer = self.post_buffer + \
+ binascii.rledecode_hqx(self.pre_buffer)
+ self.pre_buffer = ''
+ return
+
+ #
+ # Obfuscated code ahead. We have to take care that we don't
+ # end up with an orphaned RUNCHAR later on. So, we keep a couple
+ # of bytes in the buffer, depending on what the end of
+ # the buffer looks like:
+ # '\220\0\220' - Keep 3 bytes: repeated \220 (escaped as \220\0)
+ # '?\220' - Keep 2 bytes: repeated something-else
+ # '\220\0' - Escaped \220: Keep 2 bytes.
+ # '?\220?' - Complete repeat sequence: decode all
+ # otherwise: keep 1 byte.
+ #
+ mark = len(self.pre_buffer)
+ if self.pre_buffer[-3:] == RUNCHAR + '\0' + RUNCHAR:
+ mark = mark - 3
+ elif self.pre_buffer[-1] == RUNCHAR:
+ mark = mark - 2
+ elif self.pre_buffer[-2:] == RUNCHAR + '\0':
+ mark = mark - 2
+ elif self.pre_buffer[-2] == RUNCHAR:
+ pass # Decode all
+ else:
+ mark = mark - 1
+
+ self.post_buffer = self.post_buffer + \
+ binascii.rledecode_hqx(self.pre_buffer[:mark])
+ self.pre_buffer = self.pre_buffer[mark:]
+
+ def close(self):
+ self.ifp.close()
+
+class HexBin:
+ def __init__(self, ifp):
+ if type(ifp) == type(''):
+ ifp = open(ifp)
+ #
+ # Find initial colon.
+ #
+ while 1:
+ ch = ifp.read(1)
+ if not ch:
+ raise Error, "No binhex data found"
+ # Cater for \r\n terminated lines (which show up as \n\r, hence
+ # all lines start with \r)
+ if ch == '\r':
+ continue
+ if ch == ':':
+ break
+ if ch != '\n':
+ dummy = ifp.readline()
+
+ hqxifp = _Hqxdecoderengine(ifp)
+ self.ifp = _Rledecoderengine(hqxifp)
+ self.crc = 0
+ self._readheader()
+
+ def _read(self, len):
+ data = self.ifp.read(len)
+ self.crc = binascii.crc_hqx(data, self.crc)
+ return data
+
+ def _checkcrc(self):
+ filecrc = struct.unpack('>h', self.ifp.read(2))[0] & 0xffff
+ #self.crc = binascii.crc_hqx('\0\0', self.crc)
+ # XXXX Is this needed??
+ self.crc = self.crc & 0xffff
+ if filecrc != self.crc:
+ raise Error, 'CRC error, computed %x, read %x' \
+ %(self.crc, filecrc)
+ self.crc = 0
+
+ def _readheader(self):
+ len = self._read(1)
+ fname = self._read(ord(len))
+ rest = self._read(1+4+4+2+4+4)
+ self._checkcrc()
+
+ type = rest[1:5]
+ creator = rest[5:9]
+ flags = struct.unpack('>h', rest[9:11])[0]
+ self.dlen = struct.unpack('>l', rest[11:15])[0]
+ self.rlen = struct.unpack('>l', rest[15:19])[0]
+
+ self.FName = fname
+ self.FInfo = FInfo()
+ self.FInfo.Creator = creator
+ self.FInfo.Type = type
+ self.FInfo.Flags = flags
+
+ self.state = _DID_HEADER
+
+ def read(self, *n):
+ if self.state != _DID_HEADER:
+ raise Error, 'Read data at wrong time'
+ if n:
+ n = n[0]
+ n = min(n, self.dlen)
+ else:
+ n = self.dlen
+ rv = ''
+ while len(rv) < n:
+ rv = rv + self._read(n-len(rv))
+ self.dlen = self.dlen - n
+ return rv
+
+ def close_data(self):
+ if self.state != _DID_HEADER:
+ raise Error, 'close_data at wrong time'
+ if self.dlen:
+ dummy = self._read(self.dlen)
+ self._checkcrc()
+ self.state = _DID_DATA
+
+ def read_rsrc(self, *n):
+ if self.state == _DID_HEADER:
+ self.close_data()
+ if self.state != _DID_DATA:
+ raise Error, 'Read resource data at wrong time'
+ if n:
+ n = n[0]
+ n = min(n, self.rlen)
+ else:
+ n = self.rlen
+ self.rlen = self.rlen - n
+ return self._read(n)
+
+ def close(self):
+ if self.rlen:
+ dummy = self.read_rsrc(self.rlen)
+ self._checkcrc()
+ self.state = _DID_RSRC
+ self.ifp.close()
+
+def hexbin(inp, out):
+ """(infilename, outfilename) - Decode binhexed file"""
+ ifp = HexBin(inp)
+ finfo = ifp.FInfo
+ if not out:
+ out = ifp.FName
+ if os.name == 'mac':
+ ofss = FSSpec(out)
+ out = ofss.as_pathname()
+
+ ofp = open(out, 'wb')
+ # XXXX Do translation on non-mac systems
+ while 1:
+ d = ifp.read(128000)
+ if not d: break
+ ofp.write(d)
+ ofp.close()
+ ifp.close_data()
+
+ d = ifp.read_rsrc(128000)
+ if d:
+ ofp = openrsrc(out, 'wb')
+ ofp.write(d)
+ while 1:
+ d = ifp.read_rsrc(128000)
+ if not d: break
+ ofp.write(d)
+ ofp.close()
+
+ if os.name == 'mac':
+ nfinfo = ofss.GetFInfo()
+ nfinfo.Creator = finfo.Creator
+ nfinfo.Type = finfo.Type
+ nfinfo.Flags = finfo.Flags
+ ofss.SetFInfo(nfinfo)
+
+ ifp.close()
+
+def _test():
+ if os.name == 'mac':
+ import macfs
+ fss, ok = macfs.PromptGetFile('File to convert:')
+ if not ok:
+ sys.exit(0)
+ fname = fss.as_pathname()
+ else:
+ fname = sys.argv[1]
+ binhex(fname, fname+'.hqx')
+ hexbin(fname+'.hqx', fname+'.viahqx')
+ #hexbin(fname, fname+'.unpacked')
+ sys.exit(1)
+
+if __name__ == '__main__':
+ _test()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/cmd.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/cmd.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,405 @@
+"""A generic class to build line-oriented command interpreters.
+
+Interpreters constructed with this class obey the following conventions:
+
+1. End of file on input is processed as the command 'EOF'.
+2. A command is parsed out of each line by collecting the prefix composed
+ of characters in the identchars member.
+3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method
+ is passed a single argument consisting of the remainder of the line.
+4. Typing an empty line repeats the last command. (Actually, it calls the
+ method `emptyline', which may be overridden in a subclass.)
+5. There is a predefined `help' method. Given an argument `topic', it
+ calls the command `help_topic'. With no arguments, it lists all topics
+ with defined help_ functions, broken into up to three topics; documented
+ commands, miscellaneous help topics, and undocumented commands.
+6. The command '?' is a synonym for `help'. The command '!' is a synonym
+ for `shell', if a do_shell method exists.
+7. If completion is enabled, completing commands will be done automatically,
+ and completing of commands args is done by calling complete_foo() with
+ arguments text, line, begidx, endidx. text is string we are matching
+ against, all returned matches must begin with it. line is the current
+ input line (lstripped), begidx and endidx are the beginning and end
+ indexes of the text being matched, which could be used to provide
+ different completion depending upon which position the argument is in.
+
+The `default' method may be overridden to intercept commands for which there
+is no do_ method.
+
+The `completedefault' method may be overridden to intercept completions for
+commands that have no complete_ method.
+
+The data member `self.ruler' sets the character used to draw separator lines
+in the help messages. If empty, no ruler line is drawn. It defaults to "=".
+
+If the value of `self.intro' is nonempty when the cmdloop method is called,
+it is printed out on interpreter startup. This value may be overridden
+via an optional argument to the cmdloop() method.
+
+The data members `self.doc_header', `self.misc_header', and
+`self.undoc_header' set the headers used for the help function's
+listings of documented functions, miscellaneous topics, and undocumented
+functions respectively.
+
+These interpreters use raw_input; thus, if the readline module is loaded,
+they automatically support Emacs-like command history and editing features.
+"""
+
+import string
+
+__all__ = ["Cmd"]
+
+PROMPT = '(Cmd) '
+IDENTCHARS = string.ascii_letters + string.digits + '_'
+
+class Cmd:
+ """A simple framework for writing line-oriented command interpreters.
+
+ These are often useful for test harnesses, administrative tools, and
+ prototypes that will later be wrapped in a more sophisticated interface.
+
+ A Cmd instance or subclass instance is a line-oriented interpreter
+ framework. There is no good reason to instantiate Cmd itself; rather,
+ it's useful as a superclass of an interpreter class you define yourself
+ in order to inherit Cmd's methods and encapsulate action methods.
+
+ """
+ prompt = PROMPT
+ identchars = IDENTCHARS
+ ruler = '='
+ lastcmd = ''
+ intro = None
+ doc_leader = ""
+ doc_header = "Documented commands (type help <topic>):"
+ misc_header = "Miscellaneous help topics:"
+ undoc_header = "Undocumented commands:"
+ nohelp = "*** No help on %s"
+ use_rawinput = 1
+
+ def __init__(self, completekey='tab', stdin=None, stdout=None):
+ """Instantiate a line-oriented interpreter framework.
+
+ The optional argument 'completekey' is the readline name of a
+ completion key; it defaults to the Tab key. If completekey is
+ not None and the readline module is available, command completion
+ is done automatically. The optional arguments stdin and stdout
+ specify alternate input and output file objects; if not specified,
+ sys.stdin and sys.stdout are used.
+
+ """
+ import sys
+ if stdin is not None:
+ self.stdin = stdin
+ else:
+ self.stdin = sys.stdin
+ if stdout is not None:
+ self.stdout = stdout
+ else:
+ self.stdout = sys.stdout
+ self.cmdqueue = []
+ self.completekey = completekey
+
+ def cmdloop(self, intro=None):
+ """Repeatedly issue a prompt, accept input, parse an initial prefix
+ off the received input, and dispatch to action methods, passing them
+ the remainder of the line as argument.
+
+ """
+
+ self.preloop()
+ if self.use_rawinput and self.completekey:
+ try:
+ import readline
+ self.old_completer = readline.get_completer()
+ readline.set_completer(self.complete)
+ readline.parse_and_bind(self.completekey+": complete")
+ except (ImportError, AttributeError):
+ pass
+ try:
+ if intro is not None:
+ self.intro = intro
+ if self.intro:
+ self.stdout.write(str(self.intro)+"\n")
+ stop = None
+ while not stop:
+ if self.cmdqueue:
+ line = self.cmdqueue.pop(0)
+ else:
+ if self.use_rawinput:
+ try:
+ line = raw_input(self.prompt)
+ except EOFError:
+ line = 'EOF'
+ else:
+ self.stdout.write(self.prompt)
+ self.stdout.flush()
+ line = self.stdin.readline()
+ if not len(line):
+ line = 'EOF'
+ else:
+ line = line[:-1] # chop \n
+ line = self.precmd(line)
+ stop = self.onecmd(line)
+ stop = self.postcmd(stop, line)
+ self.postloop()
+ finally:
+ if self.use_rawinput and self.completekey:
+ try:
+ import readline
+ readline.set_completer(self.old_completer)
+ except (ImportError, AttributeError):
+ pass
+
+
+ def precmd(self, line):
+ """Hook method executed just before the command line is
+ interpreted, but after the input prompt is generated and issued.
+
+ """
+ return line
+
+ def postcmd(self, stop, line):
+ """Hook method executed just after a command dispatch is finished."""
+ return stop
+
+ def preloop(self):
+ """Hook method executed once when the cmdloop() method is called."""
+ pass
+
+ def postloop(self):
+ """Hook method executed once when the cmdloop() method is about to
+ return.
+
+ """
+ pass
+
+ def parseline(self, line):
+ """Parse the line into a command name and a string containing
+ the arguments. Returns a tuple containing (command, args, line).
+ 'command' and 'args' may be None if the line couldn't be parsed.
+ """
+ line = line.strip()
+ if not line:
+ return None, None, line
+ elif line[0] == '?':
+ line = 'help ' + line[1:]
+ elif line[0] == '!':
+ if hasattr(self, 'do_shell'):
+ line = 'shell ' + line[1:]
+ else:
+ return None, None, line
+ i, n = 0, len(line)
+ while i < n and line[i] in self.identchars: i = i+1
+ cmd, arg = line[:i], line[i:].strip()
+ return cmd, arg, line
+
+ def onecmd(self, line):
+ """Interpret the argument as though it had been typed in response
+ to the prompt.
+
+ This may be overridden, but should not normally need to be;
+ see the precmd() and postcmd() methods for useful execution hooks.
+ The return value is a flag indicating whether interpretation of
+ commands by the interpreter should stop.
+
+ """
+ cmd, arg, line = self.parseline(line)
+ if not line:
+ return self.emptyline()
+ if cmd is None:
+ return self.default(line)
+ self.lastcmd = line
+ if cmd == '':
+ return self.default(line)
+ else:
+ try:
+ func = getattr(self, 'do_' + cmd)
+ except AttributeError:
+ return self.default(line)
+ return func(arg)
+
+ def emptyline(self):
+ """Called when an empty line is entered in response to the prompt.
+
+ If this method is not overridden, it repeats the last nonempty
+ command entered.
+
+ """
+ if self.lastcmd:
+ return self.onecmd(self.lastcmd)
+
+ def default(self, line):
+ """Called on an input line when the command prefix is not recognized.
+
+ If this method is not overridden, it prints an error message and
+ returns.
+
+ """
+ self.stdout.write('*** Unknown syntax: %s\n'%line)
+
+ def completedefault(self, *ignored):
+ """Method called to complete an input line when no command-specific
+ complete_*() method is available.
+
+ By default, it returns an empty list.
+
+ """
+ return []
+
+ def completenames(self, text, *ignored):
+ dotext = 'do_'+text
+ return [a[3:] for a in self.get_names() if a.startswith(dotext)]
+
+ def complete(self, text, state):
+ """Return the next possible completion for 'text'.
+
+ If a command has not been entered, then complete against command list.
+ Otherwise try to call complete_<command> to get list of completions.
+ """
+ if state == 0:
+ import readline
+ origline = readline.get_line_buffer()
+ line = origline.lstrip()
+ stripped = len(origline) - len(line)
+ begidx = readline.get_begidx() - stripped
+ endidx = readline.get_endidx() - stripped
+ if begidx>0:
+ cmd, args, foo = self.parseline(line)
+ if cmd == '':
+ compfunc = self.completedefault
+ else:
+ try:
+ compfunc = getattr(self, 'complete_' + cmd)
+ except AttributeError:
+ compfunc = self.completedefault
+ else:
+ compfunc = self.completenames
+ self.completion_matches = compfunc(text, line, begidx, endidx)
+ try:
+ return self.completion_matches[state]
+ except IndexError:
+ return None
+
+ def get_names(self):
+ # Inheritance says we have to look in class and
+ # base classes; order is not important.
+ names = []
+ classes = [self.__class__]
+ while classes:
+ aclass = classes.pop(0)
+ if aclass.__bases__:
+ classes = classes + list(aclass.__bases__)
+ names = names + dir(aclass)
+ return names
+
+ def complete_help(self, *args):
+ return self.completenames(*args)
+
+ def do_help(self, arg):
+ if arg:
+ # XXX check arg syntax
+ try:
+ func = getattr(self, 'help_' + arg)
+ except AttributeError:
+ try:
+ doc=getattr(self, 'do_' + arg).__doc__
+ if doc:
+ self.stdout.write("%s\n"%str(doc))
+ return
+ except AttributeError:
+ pass
+ self.stdout.write("%s\n"%str(self.nohelp % (arg,)))
+ return
+ func()
+ else:
+ names = self.get_names()
+ cmds_doc = []
+ cmds_undoc = []
+ help = {}
+ for name in names:
+ if name[:5] == 'help_':
+ help[name[5:]]=1
+ names.sort()
+ # There can be duplicates if routines overridden
+ prevname = ''
+ for name in names:
+ if name[:3] == 'do_':
+ if name == prevname:
+ continue
+ prevname = name
+ cmd=name[3:]
+ if cmd in help:
+ cmds_doc.append(cmd)
+ del help[cmd]
+ elif getattr(self, name).__doc__:
+ cmds_doc.append(cmd)
+ else:
+ cmds_undoc.append(cmd)
+ self.stdout.write("%s\n"%str(self.doc_leader))
+ self.print_topics(self.doc_header, cmds_doc, 15,80)
+ self.print_topics(self.misc_header, help.keys(),15,80)
+ self.print_topics(self.undoc_header, cmds_undoc, 15,80)
+
+ def print_topics(self, header, cmds, cmdlen, maxcol):
+ if cmds:
+ self.stdout.write("%s\n"%str(header))
+ if self.ruler:
+ self.stdout.write("%s\n"%str(self.ruler * len(header)))
+ self.columnize(cmds, maxcol-1)
+ self.stdout.write("\n")
+
+ def columnize(self, list, displaywidth=80):
+ """Display a list of strings as a compact set of columns.
+
+ Each column is only as wide as necessary.
+ Columns are separated by two spaces (one was not legible enough).
+ """
+ if not list:
+ self.stdout.write("<empty>\n")
+ return
+ nonstrings = [i for i in range(len(list))
+ if not isinstance(list[i], str)]
+ if nonstrings:
+ raise TypeError, ("list[i] not a string for i in %s" %
+ ", ".join(map(str, nonstrings)))
+ size = len(list)
+ if size == 1:
+ self.stdout.write('%s\n'%str(list[0]))
+ return
+ # Try every row count from 1 upwards
+ for nrows in range(1, len(list)):
+ ncols = (size+nrows-1) // nrows
+ colwidths = []
+ totwidth = -2
+ for col in range(ncols):
+ colwidth = 0
+ for row in range(nrows):
+ i = row + nrows*col
+ if i >= size:
+ break
+ x = list[i]
+ colwidth = max(colwidth, len(x))
+ colwidths.append(colwidth)
+ totwidth += colwidth + 2
+ if totwidth > displaywidth:
+ break
+ if totwidth <= displaywidth:
+ break
+ else:
+ nrows = len(list)
+ ncols = 1
+ colwidths = [0]
+ for row in range(nrows):
+ texts = []
+ for col in range(ncols):
+ i = row + nrows*col
+ if i >= size:
+ x = ""
+ else:
+ x = list[i]
+ texts.append(x)
+ while texts and not texts[-1]:
+ del texts[-1]
+ for col in range(len(texts)):
+ texts[col] = texts[col].ljust(colwidths[col])
+ self.stdout.write("%s\n"%str(" ".join(texts)))
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/copy.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/copy.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,414 @@
+"""Generic (shallow and deep) copying operations.
+
+Interface summary:
+
+ import copy
+
+ x = copy.copy(y) # make a shallow copy of y
+ x = copy.deepcopy(y) # make a deep copy of y
+
+For module specific errors, copy.Error is raised.
+
+The difference between shallow and deep copying is only relevant for
+compound objects (objects that contain other objects, like lists or
+class instances).
+
+- A shallow copy constructs a new compound object and then (to the
+ extent possible) inserts *the same objects* into it that the
+ original contains.
+
+- A deep copy constructs a new compound object and then, recursively,
+ inserts *copies* into it of the objects found in the original.
+
+Two problems often exist with deep copy operations that don't exist
+with shallow copy operations:
+
+ a) recursive objects (compound objects that, directly or indirectly,
+ contain a reference to themselves) may cause a recursive loop
+
+ b) because deep copy copies *everything* it may copy too much, e.g.
+ administrative data structures that should be shared even between
+ copies
+
+Python's deep copy operation avoids these problems by:
+
+ a) keeping a table of objects already copied during the current
+ copying pass
+
+ b) letting user-defined classes override the copying operation or the
+ set of components copied
+
+This version does not copy types like module, class, function, method,
+nor stack trace, stack frame, nor file, socket, window, nor array, nor
+any similar types.
+
+Classes can use the same interfaces to control copying that they use
+to control pickling: they can define methods called __getinitargs__(),
+__getstate__() and __setstate__(). See the documentation for module
+"pickle" for information on these methods.
+"""
+
+import types
+from copy_reg import dispatch_table
+
+class Error(Exception):
+ pass
+error = Error # backward compatibility
+
+try:
+ from org.python.core import PyStringMap
+except ImportError:
+ PyStringMap = None
+
+__all__ = ["Error", "copy", "deepcopy"]
+
+def copy(x):
+ """Shallow copy operation on arbitrary Python objects.
+
+ See the module's __doc__ string for more info.
+ """
+
+ cls = type(x)
+
+ copier = _copy_dispatch.get(cls)
+ if copier:
+ return copier(x)
+
+ copier = getattr(cls, "__copy__", None)
+ if copier:
+ return copier(x)
+
+ reductor = dispatch_table.get(cls)
+ if reductor:
+ rv = reductor(x)
+ else:
+ reductor = getattr(x, "__reduce_ex__", None)
+ if reductor:
+ rv = reductor(2)
+ else:
+ reductor = getattr(x, "__reduce__", None)
+ if reductor:
+ rv = reductor()
+ else:
+ raise Error("un(shallow)copyable object of type %s" % cls)
+
+ return _reconstruct(x, rv, 0)
+
+
+_copy_dispatch = d = {}
+
+def _copy_immutable(x):
+ return x
+for t in (type(None), int, long, float, bool, str, tuple,
+ frozenset, type, xrange, types.ClassType,
+ types.BuiltinFunctionType,
+ types.FunctionType):
+ d[t] = _copy_immutable
+for name in ("ComplexType", "UnicodeType", "CodeType"):
+ t = getattr(types, name, None)
+ if t is not None:
+ d[t] = _copy_immutable
+
+def _copy_with_constructor(x):
+ return type(x)(x)
+for t in (list, dict, set):
+ d[t] = _copy_with_constructor
+
+def _copy_with_copy_method(x):
+ return x.copy()
+if PyStringMap is not None:
+ d[PyStringMap] = _copy_with_copy_method
+
+def _copy_inst(x):
+ if hasattr(x, '__copy__'):
+ return x.__copy__()
+ if hasattr(x, '__getinitargs__'):
+ args = x.__getinitargs__()
+ y = x.__class__(*args)
+ else:
+ y = _EmptyClass()
+ y.__class__ = x.__class__
+ if hasattr(x, '__getstate__'):
+ state = x.__getstate__()
+ else:
+ state = x.__dict__
+ if hasattr(y, '__setstate__'):
+ y.__setstate__(state)
+ else:
+ y.__dict__.update(state)
+ return y
+d[types.InstanceType] = _copy_inst
+
+del d
+
+def deepcopy(x, memo=None, _nil=[]):
+ """Deep copy operation on arbitrary Python objects.
+
+ See the module's __doc__ string for more info.
+ """
+
+ if memo is None:
+ memo = {}
+
+ d = id(x)
+ y = memo.get(d, _nil)
+ if y is not _nil:
+ return y
+
+ cls = type(x)
+
+ copier = _deepcopy_dispatch.get(cls)
+ if copier:
+ y = copier(x, memo)
+ else:
+ try:
+ issc = issubclass(cls, type)
+ except TypeError: # cls is not a class (old Boost; see SF #502085)
+ issc = 0
+ if issc:
+ y = _deepcopy_atomic(x, memo)
+ else:
+ copier = getattr(x, "__deepcopy__", None)
+ if copier:
+ y = copier(memo)
+ else:
+ reductor = dispatch_table.get(cls)
+ if reductor:
+ rv = reductor(x)
+ else:
+ reductor = getattr(x, "__reduce_ex__", None)
+ if reductor:
+ rv = reductor(2)
+ else:
+ reductor = getattr(x, "__reduce__", None)
+ if reductor:
+ rv = reductor()
+ else:
+ raise Error(
+ "un(deep)copyable object of type %s" % cls)
+ y = _reconstruct(x, rv, 1, memo)
+
+ memo[d] = y
+ _keep_alive(x, memo) # Make sure x lives at least as long as d
+ return y
+
+_deepcopy_dispatch = d = {}
+
+def _deepcopy_atomic(x, memo):
+ return x
+d[type(None)] = _deepcopy_atomic
+d[int] = _deepcopy_atomic
+d[long] = _deepcopy_atomic
+d[float] = _deepcopy_atomic
+d[bool] = _deepcopy_atomic
+try:
+ d[complex] = _deepcopy_atomic
+except NameError:
+ pass
+d[str] = _deepcopy_atomic
+try:
+ d[unicode] = _deepcopy_atomic
+except NameError:
+ pass
+try:
+ d[types.CodeType] = _deepcopy_atomic
+except AttributeError:
+ pass
+d[type] = _deepcopy_atomic
+d[xrange] = _deepcopy_atomic
+d[types.ClassType] = _deepcopy_atomic
+d[types.BuiltinFunctionType] = _deepcopy_atomic
+d[types.FunctionType] = _deepcopy_atomic
+
+def _deepcopy_list(x, memo):
+ y = []
+ memo[id(x)] = y
+ for a in x:
+ y.append(deepcopy(a, memo))
+ return y
+d[list] = _deepcopy_list
+
+def _deepcopy_tuple(x, memo):
+ y = []
+ for a in x:
+ y.append(deepcopy(a, memo))
+ d = id(x)
+ try:
+ return memo[d]
+ except KeyError:
+ pass
+ for i in range(len(x)):
+ if x[i] is not y[i]:
+ y = tuple(y)
+ break
+ else:
+ y = x
+ memo[d] = y
+ return y
+d[tuple] = _deepcopy_tuple
+
+def _deepcopy_dict(x, memo):
+ y = {}
+ memo[id(x)] = y
+ for key, value in x.iteritems():
+ y[deepcopy(key, memo)] = deepcopy(value, memo)
+ return y
+d[dict] = _deepcopy_dict
+if PyStringMap is not None:
+ d[PyStringMap] = _deepcopy_dict
+
+def _keep_alive(x, memo):
+ """Keeps a reference to the object x in the memo.
+
+ Because we remember objects by their id, we have
+ to assure that possibly temporary objects are kept
+ alive by referencing them.
+ We store a reference at the id of the memo, which should
+ normally not be used unless someone tries to deepcopy
+ the memo itself...
+ """
+ try:
+ memo[id(memo)].append(x)
+ except KeyError:
+ # aha, this is the first one :-)
+ memo[id(memo)]=[x]
+
+def _deepcopy_inst(x, memo):
+ if hasattr(x, '__deepcopy__'):
+ return x.__deepcopy__(memo)
+ if hasattr(x, '__getinitargs__'):
+ args = x.__getinitargs__()
+ args = deepcopy(args, memo)
+ y = x.__class__(*args)
+ else:
+ y = _EmptyClass()
+ y.__class__ = x.__class__
+ memo[id(x)] = y
+ if hasattr(x, '__getstate__'):
+ state = x.__getstate__()
+ else:
+ state = x.__dict__
+ state = deepcopy(state, memo)
+ if hasattr(y, '__setstate__'):
+ y.__setstate__(state)
+ else:
+ y.__dict__.update(state)
+ return y
+d[types.InstanceType] = _deepcopy_inst
+
+def _reconstruct(x, info, deep, memo=None):
+ if isinstance(info, str):
+ return x
+ assert isinstance(info, tuple)
+ if memo is None:
+ memo = {}
+ n = len(info)
+ assert n in (2, 3, 4, 5)
+ callable, args = info[:2]
+ if n > 2:
+ state = info[2]
+ else:
+ state = {}
+ if n > 3:
+ listiter = info[3]
+ else:
+ listiter = None
+ if n > 4:
+ dictiter = info[4]
+ else:
+ dictiter = None
+ if deep:
+ args = deepcopy(args, memo)
+ y = callable(*args)
+ memo[id(x)] = y
+ if listiter is not None:
+ for item in listiter:
+ if deep:
+ item = deepcopy(item, memo)
+ y.append(item)
+ if dictiter is not None:
+ for key, value in dictiter:
+ if deep:
+ key = deepcopy(key, memo)
+ value = deepcopy(value, memo)
+ y[key] = value
+ if state:
+ if deep:
+ state = deepcopy(state, memo)
+ if hasattr(y, '__setstate__'):
+ y.__setstate__(state)
+ else:
+ if isinstance(state, tuple) and len(state) == 2:
+ state, slotstate = state
+ else:
+ slotstate = None
+ if state is not None:
+ y.__dict__.update(state)
+ if slotstate is not None:
+ for key, value in slotstate.iteritems():
+ setattr(y, key, value)
+ return y
+
+del d
+
+del types
+
+# Helper for instance creation without calling __init__
+class _EmptyClass:
+ pass
+
+def _test():
+ l = [None, 1, 2L, 3.14, 'xyzzy', (1, 2L), [3.14, 'abc'],
+ {'abc': 'ABC'}, (), [], {}]
+ l1 = copy(l)
+ print l1==l
+ l1 = map(copy, l)
+ print l1==l
+ l1 = deepcopy(l)
+ print l1==l
+ class C:
+ def __init__(self, arg=None):
+ self.a = 1
+ self.arg = arg
+ if __name__ == '__main__':
+ import sys
+ file = sys.argv[0]
+ else:
+ file = __file__
+ self.fp = open(file)
+ self.fp.close()
+ def __getstate__(self):
+ return {'a': self.a, 'arg': self.arg}
+ def __setstate__(self, state):
+ for key, value in state.iteritems():
+ setattr(self, key, value)
+ def __deepcopy__(self, memo=None):
+ new = self.__class__(deepcopy(self.arg, memo))
+ new.a = self.a
+ return new
+ c = C('argument sketch')
+ l.append(c)
+ l2 = copy(l)
+ print l == l2
+ print l
+ print l2
+ l2 = deepcopy(l)
+ print l == l2
+ print l
+ print l2
+ l.append({l[1]: l, 'xyz': l[2]})
+ l3 = copy(l)
+ import repr
+ print map(repr.repr, l)
+ print map(repr.repr, l1)
+ print map(repr.repr, l2)
+ print map(repr.repr, l3)
+ l3 = deepcopy(l)
+ import repr
+ print map(repr.repr, l)
+ print map(repr.repr, l1)
+ print map(repr.repr, l2)
+ print map(repr.repr, l3)
+
+if __name__ == '__main__':
+ _test()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/decimal.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/decimal.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,3138 @@
+# Copyright (c) 2004 Python Software Foundation.
+# All rights reserved.
+
+# Written by Eric Price <eprice at tjhsst.edu>
+# and Facundo Batista <facundo at taniquetil.com.ar>
+# and Raymond Hettinger <python at rcn.com>
+# and Aahz <aahz at pobox.com>
+# and Tim Peters
+
+# This module is currently Py2.3 compatible and should be kept that way
+# unless a major compelling advantage arises. IOW, 2.3 compatibility is
+# strongly preferred, but not guaranteed.
+
+# Also, this module should be kept in sync with the latest updates of
+# the IBM specification as it evolves. Those updates will be treated
+# as bug fixes (deviation from the spec is a compatibility, usability
+# bug) and will be backported. At this point the spec is stabilizing
+# and the updates are becoming fewer, smaller, and less significant.
+
+"""
+This is a Py2.3 implementation of decimal floating point arithmetic based on
+the General Decimal Arithmetic Specification:
+
+ www2.hursley.ibm.com/decimal/decarith.html
+
+and IEEE standard 854-1987:
+
+ www.cs.berkeley.edu/~ejr/projects/754/private/drafts/854-1987/dir.html
+
+Decimal floating point has finite precision with arbitrarily large bounds.
+
+The purpose of the module is to support arithmetic using familiar
+"schoolhouse" rules and to avoid the some of tricky representation
+issues associated with binary floating point. The package is especially
+useful for financial applications or for contexts where users have
+expectations that are at odds with binary floating point (for instance,
+in binary floating point, 1.00 % 0.1 gives 0.09999999999999995 instead
+of the expected Decimal("0.00") returned by decimal floating point).
+
+Here are some examples of using the decimal module:
+
+>>> from decimal import *
+>>> setcontext(ExtendedContext)
+>>> Decimal(0)
+Decimal("0")
+>>> Decimal("1")
+Decimal("1")
+>>> Decimal("-.0123")
+Decimal("-0.0123")
+>>> Decimal(123456)
+Decimal("123456")
+>>> Decimal("123.45e12345678901234567890")
+Decimal("1.2345E+12345678901234567892")
+>>> Decimal("1.33") + Decimal("1.27")
+Decimal("2.60")
+>>> Decimal("12.34") + Decimal("3.87") - Decimal("18.41")
+Decimal("-2.20")
+>>> dig = Decimal(1)
+>>> print dig / Decimal(3)
+0.333333333
+>>> getcontext().prec = 18
+>>> print dig / Decimal(3)
+0.333333333333333333
+>>> print dig.sqrt()
+1
+>>> print Decimal(3).sqrt()
+1.73205080756887729
+>>> print Decimal(3) ** 123
+4.85192780976896427E+58
+>>> inf = Decimal(1) / Decimal(0)
+>>> print inf
+Infinity
+>>> neginf = Decimal(-1) / Decimal(0)
+>>> print neginf
+-Infinity
+>>> print neginf + inf
+NaN
+>>> print neginf * inf
+-Infinity
+>>> print dig / 0
+Infinity
+>>> getcontext().traps[DivisionByZero] = 1
+>>> print dig / 0
+Traceback (most recent call last):
+ ...
+ ...
+ ...
+DivisionByZero: x / 0
+>>> c = Context()
+>>> c.traps[InvalidOperation] = 0
+>>> print c.flags[InvalidOperation]
+0
+>>> c.divide(Decimal(0), Decimal(0))
+Decimal("NaN")
+>>> c.traps[InvalidOperation] = 1
+>>> print c.flags[InvalidOperation]
+1
+>>> c.flags[InvalidOperation] = 0
+>>> print c.flags[InvalidOperation]
+0
+>>> print c.divide(Decimal(0), Decimal(0))
+Traceback (most recent call last):
+ ...
+ ...
+ ...
+InvalidOperation: 0 / 0
+>>> print c.flags[InvalidOperation]
+1
+>>> c.flags[InvalidOperation] = 0
+>>> c.traps[InvalidOperation] = 0
+>>> print c.divide(Decimal(0), Decimal(0))
+NaN
+>>> print c.flags[InvalidOperation]
+1
+>>>
+"""
+
+__all__ = [
+ # Two major classes
+ 'Decimal', 'Context',
+
+ # Contexts
+ 'DefaultContext', 'BasicContext', 'ExtendedContext',
+
+ # Exceptions
+ 'DecimalException', 'Clamped', 'InvalidOperation', 'DivisionByZero',
+ 'Inexact', 'Rounded', 'Subnormal', 'Overflow', 'Underflow',
+
+ # Constants for use in setting up contexts
+ 'ROUND_DOWN', 'ROUND_HALF_UP', 'ROUND_HALF_EVEN', 'ROUND_CEILING',
+ 'ROUND_FLOOR', 'ROUND_UP', 'ROUND_HALF_DOWN',
+
+ # Functions for manipulating contexts
+ 'setcontext', 'getcontext', 'localcontext'
+]
+
+import copy as _copy
+
+#Rounding
+ROUND_DOWN = 'ROUND_DOWN'
+ROUND_HALF_UP = 'ROUND_HALF_UP'
+ROUND_HALF_EVEN = 'ROUND_HALF_EVEN'
+ROUND_CEILING = 'ROUND_CEILING'
+ROUND_FLOOR = 'ROUND_FLOOR'
+ROUND_UP = 'ROUND_UP'
+ROUND_HALF_DOWN = 'ROUND_HALF_DOWN'
+
+#Rounding decision (not part of the public API)
+NEVER_ROUND = 'NEVER_ROUND' # Round in division (non-divmod), sqrt ONLY
+ALWAYS_ROUND = 'ALWAYS_ROUND' # Every operation rounds at end.
+
+#Errors
+
+class DecimalException(ArithmeticError):
+ """Base exception class.
+
+ Used exceptions derive from this.
+ If an exception derives from another exception besides this (such as
+ Underflow (Inexact, Rounded, Subnormal) that indicates that it is only
+ called if the others are present. This isn't actually used for
+ anything, though.
+
+ handle -- Called when context._raise_error is called and the
+ trap_enabler is set. First argument is self, second is the
+ context. More arguments can be given, those being after
+ the explanation in _raise_error (For example,
+ context._raise_error(NewError, '(-x)!', self._sign) would
+ call NewError().handle(context, self._sign).)
+
+ To define a new exception, it should be sufficient to have it derive
+ from DecimalException.
+ """
+ def handle(self, context, *args):
+ pass
+
+
+class Clamped(DecimalException):
+ """Exponent of a 0 changed to fit bounds.
+
+ This occurs and signals clamped if the exponent of a result has been
+ altered in order to fit the constraints of a specific concrete
+ representation. This may occur when the exponent of a zero result would
+ be outside the bounds of a representation, or when a large normal
+ number would have an encoded exponent that cannot be represented. In
+ this latter case, the exponent is reduced to fit and the corresponding
+ number of zero digits are appended to the coefficient ("fold-down").
+ """
+
+
+class InvalidOperation(DecimalException):
+ """An invalid operation was performed.
+
+ Various bad things cause this:
+
+ Something creates a signaling NaN
+ -INF + INF
+ 0 * (+-)INF
+ (+-)INF / (+-)INF
+ x % 0
+ (+-)INF % x
+ x._rescale( non-integer )
+ sqrt(-x) , x > 0
+ 0 ** 0
+ x ** (non-integer)
+ x ** (+-)INF
+ An operand is invalid
+ """
+ def handle(self, context, *args):
+ if args:
+ if args[0] == 1: #sNaN, must drop 's' but keep diagnostics
+ return Decimal( (args[1]._sign, args[1]._int, 'n') )
+ return NaN
+
+class ConversionSyntax(InvalidOperation):
+ """Trying to convert badly formed string.
+
+ This occurs and signals invalid-operation if an string is being
+ converted to a number and it does not conform to the numeric string
+ syntax. The result is [0,qNaN].
+ """
+
+ def handle(self, context, *args):
+ return (0, (0,), 'n') #Passed to something which uses a tuple.
+
+class DivisionByZero(DecimalException, ZeroDivisionError):
+ """Division by 0.
+
+ This occurs and signals division-by-zero if division of a finite number
+ by zero was attempted (during a divide-integer or divide operation, or a
+ power operation with negative right-hand operand), and the dividend was
+ not zero.
+
+ The result of the operation is [sign,inf], where sign is the exclusive
+ or of the signs of the operands for divide, or is 1 for an odd power of
+ -0, for power.
+ """
+
+ def handle(self, context, sign, double = None, *args):
+ if double is not None:
+ return (Infsign[sign],)*2
+ return Infsign[sign]
+
+class DivisionImpossible(InvalidOperation):
+ """Cannot perform the division adequately.
+
+ This occurs and signals invalid-operation if the integer result of a
+ divide-integer or remainder operation had too many digits (would be
+ longer than precision). The result is [0,qNaN].
+ """
+
+ def handle(self, context, *args):
+ return (NaN, NaN)
+
+class DivisionUndefined(InvalidOperation, ZeroDivisionError):
+ """Undefined result of division.
+
+ This occurs and signals invalid-operation if division by zero was
+ attempted (during a divide-integer, divide, or remainder operation), and
+ the dividend is also zero. The result is [0,qNaN].
+ """
+
+ def handle(self, context, tup=None, *args):
+ if tup is not None:
+ return (NaN, NaN) #for 0 %0, 0 // 0
+ return NaN
+
+class Inexact(DecimalException):
+ """Had to round, losing information.
+
+ This occurs and signals inexact whenever the result of an operation is
+ not exact (that is, it needed to be rounded and any discarded digits
+ were non-zero), or if an overflow or underflow condition occurs. The
+ result in all cases is unchanged.
+
+ The inexact signal may be tested (or trapped) to determine if a given
+ operation (or sequence of operations) was inexact.
+ """
+ pass
+
+class InvalidContext(InvalidOperation):
+ """Invalid context. Unknown rounding, for example.
+
+ This occurs and signals invalid-operation if an invalid context was
+ detected during an operation. This can occur if contexts are not checked
+ on creation and either the precision exceeds the capability of the
+ underlying concrete representation or an unknown or unsupported rounding
+ was specified. These aspects of the context need only be checked when
+ the values are required to be used. The result is [0,qNaN].
+ """
+
+ def handle(self, context, *args):
+ return NaN
+
+class Rounded(DecimalException):
+ """Number got rounded (not necessarily changed during rounding).
+
+ This occurs and signals rounded whenever the result of an operation is
+ rounded (that is, some zero or non-zero digits were discarded from the
+ coefficient), or if an overflow or underflow condition occurs. The
+ result in all cases is unchanged.
+
+ The rounded signal may be tested (or trapped) to determine if a given
+ operation (or sequence of operations) caused a loss of precision.
+ """
+ pass
+
+class Subnormal(DecimalException):
+ """Exponent < Emin before rounding.
+
+ This occurs and signals subnormal whenever the result of a conversion or
+ operation is subnormal (that is, its adjusted exponent is less than
+ Emin, before any rounding). The result in all cases is unchanged.
+
+ The subnormal signal may be tested (or trapped) to determine if a given
+ or operation (or sequence of operations) yielded a subnormal result.
+ """
+ pass
+
+class Overflow(Inexact, Rounded):
+ """Numerical overflow.
+
+ This occurs and signals overflow if the adjusted exponent of a result
+ (from a conversion or from an operation that is not an attempt to divide
+ by zero), after rounding, would be greater than the largest value that
+ can be handled by the implementation (the value Emax).
+
+ The result depends on the rounding mode:
+
+ For round-half-up and round-half-even (and for round-half-down and
+ round-up, if implemented), the result of the operation is [sign,inf],
+ where sign is the sign of the intermediate result. For round-down, the
+ result is the largest finite number that can be represented in the
+ current precision, with the sign of the intermediate result. For
+ round-ceiling, the result is the same as for round-down if the sign of
+ the intermediate result is 1, or is [0,inf] otherwise. For round-floor,
+ the result is the same as for round-down if the sign of the intermediate
+ result is 0, or is [1,inf] otherwise. In all cases, Inexact and Rounded
+ will also be raised.
+ """
+
+ def handle(self, context, sign, *args):
+ if context.rounding in (ROUND_HALF_UP, ROUND_HALF_EVEN,
+ ROUND_HALF_DOWN, ROUND_UP):
+ return Infsign[sign]
+ if sign == 0:
+ if context.rounding == ROUND_CEILING:
+ return Infsign[sign]
+ return Decimal((sign, (9,)*context.prec,
+ context.Emax-context.prec+1))
+ if sign == 1:
+ if context.rounding == ROUND_FLOOR:
+ return Infsign[sign]
+ return Decimal( (sign, (9,)*context.prec,
+ context.Emax-context.prec+1))
+
+
+class Underflow(Inexact, Rounded, Subnormal):
+ """Numerical underflow with result rounded to 0.
+
+ This occurs and signals underflow if a result is inexact and the
+ adjusted exponent of the result would be smaller (more negative) than
+ the smallest value that can be handled by the implementation (the value
+ Emin). That is, the result is both inexact and subnormal.
+
+ The result after an underflow will be a subnormal number rounded, if
+ necessary, so that its exponent is not less than Etiny. This may result
+ in 0 with the sign of the intermediate result and an exponent of Etiny.
+
+ In all cases, Inexact, Rounded, and Subnormal will also be raised.
+ """
+
+# List of public traps and flags
+_signals = [Clamped, DivisionByZero, Inexact, Overflow, Rounded,
+ Underflow, InvalidOperation, Subnormal]
+
+# Map conditions (per the spec) to signals
+_condition_map = {ConversionSyntax:InvalidOperation,
+ DivisionImpossible:InvalidOperation,
+ DivisionUndefined:InvalidOperation,
+ InvalidContext:InvalidOperation}
+
+##### Context Functions #######################################
+
+# The getcontext() and setcontext() function manage access to a thread-local
+# current context. Py2.4 offers direct support for thread locals. If that
+# is not available, use threading.currentThread() which is slower but will
+# work for older Pythons. If threads are not part of the build, create a
+# mock threading object with threading.local() returning the module namespace.
+
+try:
+ import threading
+except ImportError:
+ # Python was compiled without threads; create a mock object instead
+ import sys
+ class MockThreading:
+ def local(self, sys=sys):
+ return sys.modules[__name__]
+ threading = MockThreading()
+ del sys, MockThreading
+
+try:
+ threading.local
+
+except AttributeError:
+
+ #To fix reloading, force it to create a new context
+ #Old contexts have different exceptions in their dicts, making problems.
+ if hasattr(threading.currentThread(), '__decimal_context__'):
+ del threading.currentThread().__decimal_context__
+
+ def setcontext(context):
+ """Set this thread's context to context."""
+ if context in (DefaultContext, BasicContext, ExtendedContext):
+ context = context.copy()
+ context.clear_flags()
+ threading.currentThread().__decimal_context__ = context
+
+ def getcontext():
+ """Returns this thread's context.
+
+ If this thread does not yet have a context, returns
+ a new context and sets this thread's context.
+ New contexts are copies of DefaultContext.
+ """
+ try:
+ return threading.currentThread().__decimal_context__
+ except AttributeError:
+ context = Context()
+ threading.currentThread().__decimal_context__ = context
+ return context
+
+else:
+
+ local = threading.local()
+ if hasattr(local, '__decimal_context__'):
+ del local.__decimal_context__
+
+ def getcontext(_local=local):
+ """Returns this thread's context.
+
+ If this thread does not yet have a context, returns
+ a new context and sets this thread's context.
+ New contexts are copies of DefaultContext.
+ """
+ try:
+ return _local.__decimal_context__
+ except AttributeError:
+ context = Context()
+ _local.__decimal_context__ = context
+ return context
+
+ def setcontext(context, _local=local):
+ """Set this thread's context to context."""
+ if context in (DefaultContext, BasicContext, ExtendedContext):
+ context = context.copy()
+ context.clear_flags()
+ _local.__decimal_context__ = context
+
+ del threading, local # Don't contaminate the namespace
+
+def localcontext(ctx=None):
+ """Return a context manager for a copy of the supplied context
+
+ Uses a copy of the current context if no context is specified
+ The returned context manager creates a local decimal context
+ in a with statement:
+ def sin(x):
+ with localcontext() as ctx:
+ ctx.prec += 2
+ # Rest of sin calculation algorithm
+ # uses a precision 2 greater than normal
+ return +s # Convert result to normal precision
+
+ def sin(x):
+ with localcontext(ExtendedContext):
+ # Rest of sin calculation algorithm
+ # uses the Extended Context from the
+ # General Decimal Arithmetic Specification
+ return +s # Convert result to normal context
+
+ """
+ # The string below can't be included in the docstring until Python 2.6
+ # as the doctest module doesn't understand __future__ statements
+ """
+ >>> from __future__ import with_statement
+ >>> print getcontext().prec
+ 28
+ >>> with localcontext():
+ ... ctx = getcontext()
+ ... ctx.prec += 2
+ ... print ctx.prec
+ ...
+ 30
+ >>> with localcontext(ExtendedContext):
+ ... print getcontext().prec
+ ...
+ 9
+ >>> print getcontext().prec
+ 28
+ """
+ if ctx is None: ctx = getcontext()
+ return _ContextManager(ctx)
+
+
+##### Decimal class ###########################################
+
+class Decimal(object):
+ """Floating point class for decimal arithmetic."""
+
+ __slots__ = ('_exp','_int','_sign', '_is_special')
+ # Generally, the value of the Decimal instance is given by
+ # (-1)**_sign * _int * 10**_exp
+ # Special values are signified by _is_special == True
+
+ # We're immutable, so use __new__ not __init__
+ def __new__(cls, value="0", context=None):
+ """Create a decimal point instance.
+
+ >>> Decimal('3.14') # string input
+ Decimal("3.14")
+ >>> Decimal((0, (3, 1, 4), -2)) # tuple input (sign, digit_tuple, exponent)
+ Decimal("3.14")
+ >>> Decimal(314) # int or long
+ Decimal("314")
+ >>> Decimal(Decimal(314)) # another decimal instance
+ Decimal("314")
+ """
+
+ self = object.__new__(cls)
+ self._is_special = False
+
+ # From an internal working value
+ if isinstance(value, _WorkRep):
+ self._sign = value.sign
+ self._int = tuple(map(int, str(value.int)))
+ self._exp = int(value.exp)
+ return self
+
+ # From another decimal
+ if isinstance(value, Decimal):
+ self._exp = value._exp
+ self._sign = value._sign
+ self._int = value._int
+ self._is_special = value._is_special
+ return self
+
+ # From an integer
+ if isinstance(value, (int,long)):
+ if value >= 0:
+ self._sign = 0
+ else:
+ self._sign = 1
+ self._exp = 0
+ self._int = tuple(map(int, str(abs(value))))
+ return self
+
+ # tuple/list conversion (possibly from as_tuple())
+ if isinstance(value, (list,tuple)):
+ if len(value) != 3:
+ raise ValueError, 'Invalid arguments'
+ if value[0] not in (0,1):
+ raise ValueError, 'Invalid sign'
+ for digit in value[1]:
+ if not isinstance(digit, (int,long)) or digit < 0:
+ raise ValueError, "The second value in the tuple must be composed of non negative integer elements."
+
+ self._sign = value[0]
+ self._int = tuple(value[1])
+ if value[2] in ('F','n','N'):
+ self._exp = value[2]
+ self._is_special = True
+ else:
+ self._exp = int(value[2])
+ return self
+
+ if isinstance(value, float):
+ raise TypeError("Cannot convert float to Decimal. " +
+ "First convert the float to a string")
+
+ # Other argument types may require the context during interpretation
+ if context is None:
+ context = getcontext()
+
+ # From a string
+ # REs insist on real strings, so we can too.
+ if isinstance(value, basestring):
+ if _isinfinity(value):
+ self._exp = 'F'
+ self._int = (0,)
+ self._is_special = True
+ if _isinfinity(value) == 1:
+ self._sign = 0
+ else:
+ self._sign = 1
+ return self
+ if _isnan(value):
+ sig, sign, diag = _isnan(value)
+ self._is_special = True
+ if len(diag) > context.prec: #Diagnostic info too long
+ self._sign, self._int, self._exp = \
+ context._raise_error(ConversionSyntax)
+ return self
+ if sig == 1:
+ self._exp = 'n' #qNaN
+ else: #sig == 2
+ self._exp = 'N' #sNaN
+ self._sign = sign
+ self._int = tuple(map(int, diag)) #Diagnostic info
+ return self
+ try:
+ self._sign, self._int, self._exp = _string2exact(value)
+ except ValueError:
+ self._is_special = True
+ self._sign, self._int, self._exp = context._raise_error(ConversionSyntax)
+ return self
+
+ raise TypeError("Cannot convert %r to Decimal" % value)
+
+ def _isnan(self):
+ """Returns whether the number is not actually one.
+
+ 0 if a number
+ 1 if NaN
+ 2 if sNaN
+ """
+ if self._is_special:
+ exp = self._exp
+ if exp == 'n':
+ return 1
+ elif exp == 'N':
+ return 2
+ return 0
+
+ def _isinfinity(self):
+ """Returns whether the number is infinite
+
+ 0 if finite or not a number
+ 1 if +INF
+ -1 if -INF
+ """
+ if self._exp == 'F':
+ if self._sign:
+ return -1
+ return 1
+ return 0
+
+ def _check_nans(self, other = None, context=None):
+ """Returns whether the number is not actually one.
+
+ if self, other are sNaN, signal
+ if self, other are NaN return nan
+ return 0
+
+ Done before operations.
+ """
+
+ self_is_nan = self._isnan()
+ if other is None:
+ other_is_nan = False
+ else:
+ other_is_nan = other._isnan()
+
+ if self_is_nan or other_is_nan:
+ if context is None:
+ context = getcontext()
+
+ if self_is_nan == 2:
+ return context._raise_error(InvalidOperation, 'sNaN',
+ 1, self)
+ if other_is_nan == 2:
+ return context._raise_error(InvalidOperation, 'sNaN',
+ 1, other)
+ if self_is_nan:
+ return self
+
+ return other
+ return 0
+
+ def __nonzero__(self):
+ """Is the number non-zero?
+
+ 0 if self == 0
+ 1 if self != 0
+ """
+ if self._is_special:
+ return 1
+ return sum(self._int) != 0
+
+ def __cmp__(self, other, context=None):
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context)
+ if ans:
+ return 1 # Comparison involving NaN's always reports self > other
+
+ # INF = INF
+ return cmp(self._isinfinity(), other._isinfinity())
+
+ if not self and not other:
+ return 0 #If both 0, sign comparison isn't certain.
+
+ #If different signs, neg one is less
+ if other._sign < self._sign:
+ return -1
+ if self._sign < other._sign:
+ return 1
+
+ self_adjusted = self.adjusted()
+ other_adjusted = other.adjusted()
+ if self_adjusted == other_adjusted and \
+ self._int + (0,)*(self._exp - other._exp) == \
+ other._int + (0,)*(other._exp - self._exp):
+ return 0 #equal, except in precision. ([0]*(-x) = [])
+ elif self_adjusted > other_adjusted and self._int[0] != 0:
+ return (-1)**self._sign
+ elif self_adjusted < other_adjusted and other._int[0] != 0:
+ return -((-1)**self._sign)
+
+ # Need to round, so make sure we have a valid context
+ if context is None:
+ context = getcontext()
+
+ context = context._shallow_copy()
+ rounding = context._set_rounding(ROUND_UP) #round away from 0
+
+ flags = context._ignore_all_flags()
+ res = self.__sub__(other, context=context)
+
+ context._regard_flags(*flags)
+
+ context.rounding = rounding
+
+ if not res:
+ return 0
+ elif res._sign:
+ return -1
+ return 1
+
+ def __eq__(self, other):
+ if not isinstance(other, (Decimal, int, long)):
+ return NotImplemented
+ return self.__cmp__(other) == 0
+
+ def __ne__(self, other):
+ if not isinstance(other, (Decimal, int, long)):
+ return NotImplemented
+ return self.__cmp__(other) != 0
+
+ def compare(self, other, context=None):
+ """Compares one to another.
+
+ -1 => a < b
+ 0 => a = b
+ 1 => a > b
+ NaN => one is NaN
+ Like __cmp__, but returns Decimal instances.
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ #compare(NaN, NaN) = NaN
+ if (self._is_special or other and other._is_special):
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ return Decimal(self.__cmp__(other, context))
+
+ def __hash__(self):
+ """x.__hash__() <==> hash(x)"""
+ # Decimal integers must hash the same as the ints
+ # Non-integer decimals are normalized and hashed as strings
+ # Normalization assures that hash(100E-1) == hash(10)
+ if self._is_special:
+ if self._isnan():
+ raise TypeError('Cannot hash a NaN value.')
+ return hash(str(self))
+ i = int(self)
+ if self == Decimal(i):
+ return hash(i)
+ assert self.__nonzero__() # '-0' handled by integer case
+ return hash(str(self.normalize()))
+
+ def as_tuple(self):
+ """Represents the number as a triple tuple.
+
+ To show the internals exactly as they are.
+ """
+ return (self._sign, self._int, self._exp)
+
+ def __repr__(self):
+ """Represents the number as an instance of Decimal."""
+ # Invariant: eval(repr(d)) == d
+ return 'Decimal("%s")' % str(self)
+
+ def __str__(self, eng = 0, context=None):
+ """Return string representation of the number in scientific notation.
+
+ Captures all of the information in the underlying representation.
+ """
+
+ if self._is_special:
+ if self._isnan():
+ minus = '-'*self._sign
+ if self._int == (0,):
+ info = ''
+ else:
+ info = ''.join(map(str, self._int))
+ if self._isnan() == 2:
+ return minus + 'sNaN' + info
+ return minus + 'NaN' + info
+ if self._isinfinity():
+ minus = '-'*self._sign
+ return minus + 'Infinity'
+
+ if context is None:
+ context = getcontext()
+
+ tmp = map(str, self._int)
+ numdigits = len(self._int)
+ leftdigits = self._exp + numdigits
+ if eng and not self: #self = 0eX wants 0[.0[0]]eY, not [[0]0]0eY
+ if self._exp < 0 and self._exp >= -6: #short, no need for e/E
+ s = '-'*self._sign + '0.' + '0'*(abs(self._exp))
+ return s
+ #exp is closest mult. of 3 >= self._exp
+ exp = ((self._exp - 1)// 3 + 1) * 3
+ if exp != self._exp:
+ s = '0.'+'0'*(exp - self._exp)
+ else:
+ s = '0'
+ if exp != 0:
+ if context.capitals:
+ s += 'E'
+ else:
+ s += 'e'
+ if exp > 0:
+ s += '+' #0.0e+3, not 0.0e3
+ s += str(exp)
+ s = '-'*self._sign + s
+ return s
+ if eng:
+ dotplace = (leftdigits-1)%3+1
+ adjexp = leftdigits -1 - (leftdigits-1)%3
+ else:
+ adjexp = leftdigits-1
+ dotplace = 1
+ if self._exp == 0:
+ pass
+ elif self._exp < 0 and adjexp >= 0:
+ tmp.insert(leftdigits, '.')
+ elif self._exp < 0 and adjexp >= -6:
+ tmp[0:0] = ['0'] * int(-leftdigits)
+ tmp.insert(0, '0.')
+ else:
+ if numdigits > dotplace:
+ tmp.insert(dotplace, '.')
+ elif numdigits < dotplace:
+ tmp.extend(['0']*(dotplace-numdigits))
+ if adjexp:
+ if not context.capitals:
+ tmp.append('e')
+ else:
+ tmp.append('E')
+ if adjexp > 0:
+ tmp.append('+')
+ tmp.append(str(adjexp))
+ if eng:
+ while tmp[0:1] == ['0']:
+ tmp[0:1] = []
+ if len(tmp) == 0 or tmp[0] == '.' or tmp[0].lower() == 'e':
+ tmp[0:0] = ['0']
+ if self._sign:
+ tmp.insert(0, '-')
+
+ return ''.join(tmp)
+
+ def to_eng_string(self, context=None):
+ """Convert to engineering-type string.
+
+ Engineering notation has an exponent which is a multiple of 3, so there
+ are up to 3 digits left of the decimal place.
+
+ Same rules for when in exponential and when as a value as in __str__.
+ """
+ return self.__str__(eng=1, context=context)
+
+ def __neg__(self, context=None):
+ """Returns a copy with the sign switched.
+
+ Rounds, if it has reason.
+ """
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if not self:
+ # -Decimal('0') is Decimal('0'), not Decimal('-0')
+ sign = 0
+ elif self._sign:
+ sign = 0
+ else:
+ sign = 1
+
+ if context is None:
+ context = getcontext()
+ if context._rounding_decision == ALWAYS_ROUND:
+ return Decimal((sign, self._int, self._exp))._fix(context)
+ return Decimal( (sign, self._int, self._exp))
+
+ def __pos__(self, context=None):
+ """Returns a copy, unless it is a sNaN.
+
+ Rounds the number (if more then precision digits)
+ """
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ sign = self._sign
+ if not self:
+ # + (-0) = 0
+ sign = 0
+
+ if context is None:
+ context = getcontext()
+
+ if context._rounding_decision == ALWAYS_ROUND:
+ ans = self._fix(context)
+ else:
+ ans = Decimal(self)
+ ans._sign = sign
+ return ans
+
+ def __abs__(self, round=1, context=None):
+ """Returns the absolute value of self.
+
+ If the second argument is 0, do not round.
+ """
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if not round:
+ if context is None:
+ context = getcontext()
+ context = context._shallow_copy()
+ context._set_rounding_decision(NEVER_ROUND)
+
+ if self._sign:
+ ans = self.__neg__(context=context)
+ else:
+ ans = self.__pos__(context=context)
+
+ return ans
+
+ def __add__(self, other, context=None):
+ """Returns self + other.
+
+ -INF + INF (or the reverse) cause InvalidOperation errors.
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if context is None:
+ context = getcontext()
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if self._isinfinity():
+ #If both INF, same sign => same as both, opposite => error.
+ if self._sign != other._sign and other._isinfinity():
+ return context._raise_error(InvalidOperation, '-INF + INF')
+ return Decimal(self)
+ if other._isinfinity():
+ return Decimal(other) #Can't both be infinity here
+
+ shouldround = context._rounding_decision == ALWAYS_ROUND
+
+ exp = min(self._exp, other._exp)
+ negativezero = 0
+ if context.rounding == ROUND_FLOOR and self._sign != other._sign:
+ #If the answer is 0, the sign should be negative, in this case.
+ negativezero = 1
+
+ if not self and not other:
+ sign = min(self._sign, other._sign)
+ if negativezero:
+ sign = 1
+ return Decimal( (sign, (0,), exp))
+ if not self:
+ exp = max(exp, other._exp - context.prec-1)
+ ans = other._rescale(exp, watchexp=0, context=context)
+ if shouldround:
+ ans = ans._fix(context)
+ return ans
+ if not other:
+ exp = max(exp, self._exp - context.prec-1)
+ ans = self._rescale(exp, watchexp=0, context=context)
+ if shouldround:
+ ans = ans._fix(context)
+ return ans
+
+ op1 = _WorkRep(self)
+ op2 = _WorkRep(other)
+ op1, op2 = _normalize(op1, op2, shouldround, context.prec)
+
+ result = _WorkRep()
+ if op1.sign != op2.sign:
+ # Equal and opposite
+ if op1.int == op2.int:
+ if exp < context.Etiny():
+ exp = context.Etiny()
+ context._raise_error(Clamped)
+ return Decimal((negativezero, (0,), exp))
+ if op1.int < op2.int:
+ op1, op2 = op2, op1
+ #OK, now abs(op1) > abs(op2)
+ if op1.sign == 1:
+ result.sign = 1
+ op1.sign, op2.sign = op2.sign, op1.sign
+ else:
+ result.sign = 0
+ #So we know the sign, and op1 > 0.
+ elif op1.sign == 1:
+ result.sign = 1
+ op1.sign, op2.sign = (0, 0)
+ else:
+ result.sign = 0
+ #Now, op1 > abs(op2) > 0
+
+ if op2.sign == 0:
+ result.int = op1.int + op2.int
+ else:
+ result.int = op1.int - op2.int
+
+ result.exp = op1.exp
+ ans = Decimal(result)
+ if shouldround:
+ ans = ans._fix(context)
+ return ans
+
+ __radd__ = __add__
+
+ def __sub__(self, other, context=None):
+ """Return self + (-other)"""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context=context)
+ if ans:
+ return ans
+
+ # -Decimal(0) = Decimal(0), which we don't want since
+ # (-0 - 0 = -0 + (-0) = -0, but -0 + 0 = 0.)
+ # so we change the sign directly to a copy
+ tmp = Decimal(other)
+ tmp._sign = 1-tmp._sign
+
+ return self.__add__(tmp, context=context)
+
+ def __rsub__(self, other, context=None):
+ """Return other + (-self)"""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ tmp = Decimal(self)
+ tmp._sign = 1 - tmp._sign
+ return other.__add__(tmp, context=context)
+
+ def _increment(self, round=1, context=None):
+ """Special case of add, adding 1eExponent
+
+ Since it is common, (rounding, for example) this adds
+ (sign)*one E self._exp to the number more efficiently than add.
+
+ For example:
+ Decimal('5.624e10')._increment() == Decimal('5.625e10')
+ """
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ return Decimal(self) # Must be infinite, and incrementing makes no difference
+
+ L = list(self._int)
+ L[-1] += 1
+ spot = len(L)-1
+ while L[spot] == 10:
+ L[spot] = 0
+ if spot == 0:
+ L[0:0] = [1]
+ break
+ L[spot-1] += 1
+ spot -= 1
+ ans = Decimal((self._sign, L, self._exp))
+
+ if context is None:
+ context = getcontext()
+ if round and context._rounding_decision == ALWAYS_ROUND:
+ ans = ans._fix(context)
+ return ans
+
+ def __mul__(self, other, context=None):
+ """Return self * other.
+
+ (+-) INF * 0 (or its reverse) raise InvalidOperation.
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if context is None:
+ context = getcontext()
+
+ resultsign = self._sign ^ other._sign
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if self._isinfinity():
+ if not other:
+ return context._raise_error(InvalidOperation, '(+-)INF * 0')
+ return Infsign[resultsign]
+
+ if other._isinfinity():
+ if not self:
+ return context._raise_error(InvalidOperation, '0 * (+-)INF')
+ return Infsign[resultsign]
+
+ resultexp = self._exp + other._exp
+ shouldround = context._rounding_decision == ALWAYS_ROUND
+
+ # Special case for multiplying by zero
+ if not self or not other:
+ ans = Decimal((resultsign, (0,), resultexp))
+ if shouldround:
+ #Fixing in case the exponent is out of bounds
+ ans = ans._fix(context)
+ return ans
+
+ # Special case for multiplying by power of 10
+ if self._int == (1,):
+ ans = Decimal((resultsign, other._int, resultexp))
+ if shouldround:
+ ans = ans._fix(context)
+ return ans
+ if other._int == (1,):
+ ans = Decimal((resultsign, self._int, resultexp))
+ if shouldround:
+ ans = ans._fix(context)
+ return ans
+
+ op1 = _WorkRep(self)
+ op2 = _WorkRep(other)
+
+ ans = Decimal( (resultsign, map(int, str(op1.int * op2.int)), resultexp))
+ if shouldround:
+ ans = ans._fix(context)
+
+ return ans
+ __rmul__ = __mul__
+
+ def __div__(self, other, context=None):
+ """Return self / other."""
+ return self._divide(other, context=context)
+ __truediv__ = __div__
+
+ def _divide(self, other, divmod = 0, context=None):
+ """Return a / b, to context.prec precision.
+
+ divmod:
+ 0 => true division
+ 1 => (a //b, a%b)
+ 2 => a //b
+ 3 => a%b
+
+ Actually, if divmod is 2 or 3 a tuple is returned, but errors for
+ computing the other value are not raised.
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ if divmod in (0, 1):
+ return NotImplemented
+ return (NotImplemented, NotImplemented)
+
+ if context is None:
+ context = getcontext()
+
+ sign = self._sign ^ other._sign
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context)
+ if ans:
+ if divmod:
+ return (ans, ans)
+ return ans
+
+ if self._isinfinity() and other._isinfinity():
+ if divmod:
+ return (context._raise_error(InvalidOperation,
+ '(+-)INF // (+-)INF'),
+ context._raise_error(InvalidOperation,
+ '(+-)INF % (+-)INF'))
+ return context._raise_error(InvalidOperation, '(+-)INF/(+-)INF')
+
+ if self._isinfinity():
+ if divmod == 1:
+ return (Infsign[sign],
+ context._raise_error(InvalidOperation, 'INF % x'))
+ elif divmod == 2:
+ return (Infsign[sign], NaN)
+ elif divmod == 3:
+ return (Infsign[sign],
+ context._raise_error(InvalidOperation, 'INF % x'))
+ return Infsign[sign]
+
+ if other._isinfinity():
+ if divmod:
+ return (Decimal((sign, (0,), 0)), Decimal(self))
+ context._raise_error(Clamped, 'Division by infinity')
+ return Decimal((sign, (0,), context.Etiny()))
+
+ # Special cases for zeroes
+ if not self and not other:
+ if divmod:
+ return context._raise_error(DivisionUndefined, '0 / 0', 1)
+ return context._raise_error(DivisionUndefined, '0 / 0')
+
+ if not self:
+ if divmod:
+ otherside = Decimal(self)
+ otherside._exp = min(self._exp, other._exp)
+ return (Decimal((sign, (0,), 0)), otherside)
+ exp = self._exp - other._exp
+ if exp < context.Etiny():
+ exp = context.Etiny()
+ context._raise_error(Clamped, '0e-x / y')
+ if exp > context.Emax:
+ exp = context.Emax
+ context._raise_error(Clamped, '0e+x / y')
+ return Decimal( (sign, (0,), exp) )
+
+ if not other:
+ if divmod:
+ return context._raise_error(DivisionByZero, 'divmod(x,0)',
+ sign, 1)
+ return context._raise_error(DivisionByZero, 'x / 0', sign)
+
+ #OK, so neither = 0, INF or NaN
+
+ shouldround = context._rounding_decision == ALWAYS_ROUND
+
+ #If we're dividing into ints, and self < other, stop.
+ #self.__abs__(0) does not round.
+ if divmod and (self.__abs__(0, context) < other.__abs__(0, context)):
+
+ if divmod == 1 or divmod == 3:
+ exp = min(self._exp, other._exp)
+ ans2 = self._rescale(exp, context=context, watchexp=0)
+ if shouldround:
+ ans2 = ans2._fix(context)
+ return (Decimal( (sign, (0,), 0) ),
+ ans2)
+
+ elif divmod == 2:
+ #Don't round the mod part, if we don't need it.
+ return (Decimal( (sign, (0,), 0) ), Decimal(self))
+
+ op1 = _WorkRep(self)
+ op2 = _WorkRep(other)
+ op1, op2, adjust = _adjust_coefficients(op1, op2)
+ res = _WorkRep( (sign, 0, (op1.exp - op2.exp)) )
+ if divmod and res.exp > context.prec + 1:
+ return context._raise_error(DivisionImpossible)
+
+ prec_limit = 10 ** context.prec
+ while 1:
+ while op2.int <= op1.int:
+ res.int += 1
+ op1.int -= op2.int
+ if res.exp == 0 and divmod:
+ if res.int >= prec_limit and shouldround:
+ return context._raise_error(DivisionImpossible)
+ otherside = Decimal(op1)
+ frozen = context._ignore_all_flags()
+
+ exp = min(self._exp, other._exp)
+ otherside = otherside._rescale(exp, context=context, watchexp=0)
+ context._regard_flags(*frozen)
+ if shouldround:
+ otherside = otherside._fix(context)
+ return (Decimal(res), otherside)
+
+ if op1.int == 0 and adjust >= 0 and not divmod:
+ break
+ if res.int >= prec_limit and shouldround:
+ if divmod:
+ return context._raise_error(DivisionImpossible)
+ shouldround=1
+ # Really, the answer is a bit higher, so adding a one to
+ # the end will make sure the rounding is right.
+ if op1.int != 0:
+ res.int *= 10
+ res.int += 1
+ res.exp -= 1
+
+ break
+ res.int *= 10
+ res.exp -= 1
+ adjust += 1
+ op1.int *= 10
+ op1.exp -= 1
+
+ if res.exp == 0 and divmod and op2.int > op1.int:
+ #Solves an error in precision. Same as a previous block.
+
+ if res.int >= prec_limit and shouldround:
+ return context._raise_error(DivisionImpossible)
+ otherside = Decimal(op1)
+ frozen = context._ignore_all_flags()
+
+ exp = min(self._exp, other._exp)
+ otherside = otherside._rescale(exp, context=context)
+
+ context._regard_flags(*frozen)
+
+ return (Decimal(res), otherside)
+
+ ans = Decimal(res)
+ if shouldround:
+ ans = ans._fix(context)
+ return ans
+
+ def __rdiv__(self, other, context=None):
+ """Swaps self/other and returns __div__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__div__(self, context=context)
+ __rtruediv__ = __rdiv__
+
+ def __divmod__(self, other, context=None):
+ """
+ (self // other, self % other)
+ """
+ return self._divide(other, 1, context)
+
+ def __rdivmod__(self, other, context=None):
+ """Swaps self/other and returns __divmod__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__divmod__(self, context=context)
+
+ def __mod__(self, other, context=None):
+ """
+ self % other
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+
+ if self and not other:
+ return context._raise_error(InvalidOperation, 'x % 0')
+
+ return self._divide(other, 3, context)[1]
+
+ def __rmod__(self, other, context=None):
+ """Swaps self/other and returns __mod__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__mod__(self, context=context)
+
+ def remainder_near(self, other, context=None):
+ """
+ Remainder nearest to 0- abs(remainder-near) <= other/2
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if self._is_special or other._is_special:
+ ans = self._check_nans(other, context)
+ if ans:
+ return ans
+ if self and not other:
+ return context._raise_error(InvalidOperation, 'x % 0')
+
+ if context is None:
+ context = getcontext()
+ # If DivisionImpossible causes an error, do not leave Rounded/Inexact
+ # ignored in the calling function.
+ context = context._shallow_copy()
+ flags = context._ignore_flags(Rounded, Inexact)
+ #keep DivisionImpossible flags
+ (side, r) = self.__divmod__(other, context=context)
+
+ if r._isnan():
+ context._regard_flags(*flags)
+ return r
+
+ context = context._shallow_copy()
+ rounding = context._set_rounding_decision(NEVER_ROUND)
+
+ if other._sign:
+ comparison = other.__div__(Decimal(-2), context=context)
+ else:
+ comparison = other.__div__(Decimal(2), context=context)
+
+ context._set_rounding_decision(rounding)
+ context._regard_flags(*flags)
+
+ s1, s2 = r._sign, comparison._sign
+ r._sign, comparison._sign = 0, 0
+
+ if r < comparison:
+ r._sign, comparison._sign = s1, s2
+ #Get flags now
+ self.__divmod__(other, context=context)
+ return r._fix(context)
+ r._sign, comparison._sign = s1, s2
+
+ rounding = context._set_rounding_decision(NEVER_ROUND)
+
+ (side, r) = self.__divmod__(other, context=context)
+ context._set_rounding_decision(rounding)
+ if r._isnan():
+ return r
+
+ decrease = not side._iseven()
+ rounding = context._set_rounding_decision(NEVER_ROUND)
+ side = side.__abs__(context=context)
+ context._set_rounding_decision(rounding)
+
+ s1, s2 = r._sign, comparison._sign
+ r._sign, comparison._sign = 0, 0
+ if r > comparison or decrease and r == comparison:
+ r._sign, comparison._sign = s1, s2
+ context.prec += 1
+ if len(side.__add__(Decimal(1), context=context)._int) >= context.prec:
+ context.prec -= 1
+ return context._raise_error(DivisionImpossible)[1]
+ context.prec -= 1
+ if self._sign == other._sign:
+ r = r.__sub__(other, context=context)
+ else:
+ r = r.__add__(other, context=context)
+ else:
+ r._sign, comparison._sign = s1, s2
+
+ return r._fix(context)
+
+ def __floordiv__(self, other, context=None):
+ """self // other"""
+ return self._divide(other, 2, context)[0]
+
+ def __rfloordiv__(self, other, context=None):
+ """Swaps self/other and returns __floordiv__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__floordiv__(self, context=context)
+
+ def __float__(self):
+ """Float representation."""
+ return float(str(self))
+
+ def __int__(self):
+ """Converts self to an int, truncating if necessary."""
+ if self._is_special:
+ if self._isnan():
+ context = getcontext()
+ return context._raise_error(InvalidContext)
+ elif self._isinfinity():
+ raise OverflowError, "Cannot convert infinity to long"
+ if self._exp >= 0:
+ s = ''.join(map(str, self._int)) + '0'*self._exp
+ else:
+ s = ''.join(map(str, self._int))[:self._exp]
+ if s == '':
+ s = '0'
+ sign = '-'*self._sign
+ return int(sign + s)
+
+ def __long__(self):
+ """Converts to a long.
+
+ Equivalent to long(int(self))
+ """
+ return long(self.__int__())
+
+ def _fix(self, context):
+ """Round if it is necessary to keep self within prec precision.
+
+ Rounds and fixes the exponent. Does not raise on a sNaN.
+
+ Arguments:
+ self - Decimal instance
+ context - context used.
+ """
+ if self._is_special:
+ return self
+ if context is None:
+ context = getcontext()
+ prec = context.prec
+ ans = self._fixexponents(context)
+ if len(ans._int) > prec:
+ ans = ans._round(prec, context=context)
+ ans = ans._fixexponents(context)
+ return ans
+
+ def _fixexponents(self, context):
+ """Fix the exponents and return a copy with the exponent in bounds.
+ Only call if known to not be a special value.
+ """
+ folddown = context._clamp
+ Emin = context.Emin
+ ans = self
+ ans_adjusted = ans.adjusted()
+ if ans_adjusted < Emin:
+ Etiny = context.Etiny()
+ if ans._exp < Etiny:
+ if not ans:
+ ans = Decimal(self)
+ ans._exp = Etiny
+ context._raise_error(Clamped)
+ return ans
+ ans = ans._rescale(Etiny, context=context)
+ #It isn't zero, and exp < Emin => subnormal
+ context._raise_error(Subnormal)
+ if context.flags[Inexact]:
+ context._raise_error(Underflow)
+ else:
+ if ans:
+ #Only raise subnormal if non-zero.
+ context._raise_error(Subnormal)
+ else:
+ Etop = context.Etop()
+ if folddown and ans._exp > Etop:
+ context._raise_error(Clamped)
+ ans = ans._rescale(Etop, context=context)
+ else:
+ Emax = context.Emax
+ if ans_adjusted > Emax:
+ if not ans:
+ ans = Decimal(self)
+ ans._exp = Emax
+ context._raise_error(Clamped)
+ return ans
+ context._raise_error(Inexact)
+ context._raise_error(Rounded)
+ return context._raise_error(Overflow, 'above Emax', ans._sign)
+ return ans
+
+ def _round(self, prec=None, rounding=None, context=None):
+ """Returns a rounded version of self.
+
+ You can specify the precision or rounding method. Otherwise, the
+ context determines it.
+ """
+
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if self._isinfinity():
+ return Decimal(self)
+
+ if context is None:
+ context = getcontext()
+
+ if rounding is None:
+ rounding = context.rounding
+ if prec is None:
+ prec = context.prec
+
+ if not self:
+ if prec <= 0:
+ dig = (0,)
+ exp = len(self._int) - prec + self._exp
+ else:
+ dig = (0,) * prec
+ exp = len(self._int) + self._exp - prec
+ ans = Decimal((self._sign, dig, exp))
+ context._raise_error(Rounded)
+ return ans
+
+ if prec == 0:
+ temp = Decimal(self)
+ temp._int = (0,)+temp._int
+ prec = 1
+ elif prec < 0:
+ exp = self._exp + len(self._int) - prec - 1
+ temp = Decimal( (self._sign, (0, 1), exp))
+ prec = 1
+ else:
+ temp = Decimal(self)
+
+ numdigits = len(temp._int)
+ if prec == numdigits:
+ return temp
+
+ # See if we need to extend precision
+ expdiff = prec - numdigits
+ if expdiff > 0:
+ tmp = list(temp._int)
+ tmp.extend([0] * expdiff)
+ ans = Decimal( (temp._sign, tmp, temp._exp - expdiff))
+ return ans
+
+ #OK, but maybe all the lost digits are 0.
+ lostdigits = self._int[expdiff:]
+ if lostdigits == (0,) * len(lostdigits):
+ ans = Decimal( (temp._sign, temp._int[:prec], temp._exp - expdiff))
+ #Rounded, but not Inexact
+ context._raise_error(Rounded)
+ return ans
+
+ # Okay, let's round and lose data
+
+ this_function = getattr(temp, self._pick_rounding_function[rounding])
+ #Now we've got the rounding function
+
+ if prec != context.prec:
+ context = context._shallow_copy()
+ context.prec = prec
+ ans = this_function(prec, expdiff, context)
+ context._raise_error(Rounded)
+ context._raise_error(Inexact, 'Changed in rounding')
+
+ return ans
+
+ _pick_rounding_function = {}
+
+ def _round_down(self, prec, expdiff, context):
+ """Also known as round-towards-0, truncate."""
+ return Decimal( (self._sign, self._int[:prec], self._exp - expdiff) )
+
+ def _round_half_up(self, prec, expdiff, context, tmp = None):
+ """Rounds 5 up (away from 0)"""
+
+ if tmp is None:
+ tmp = Decimal( (self._sign,self._int[:prec], self._exp - expdiff))
+ if self._int[prec] >= 5:
+ tmp = tmp._increment(round=0, context=context)
+ if len(tmp._int) > prec:
+ return Decimal( (tmp._sign, tmp._int[:-1], tmp._exp + 1))
+ return tmp
+
+ def _round_half_even(self, prec, expdiff, context):
+ """Round 5 to even, rest to nearest."""
+
+ tmp = Decimal( (self._sign, self._int[:prec], self._exp - expdiff))
+ half = (self._int[prec] == 5)
+ if half:
+ for digit in self._int[prec+1:]:
+ if digit != 0:
+ half = 0
+ break
+ if half:
+ if self._int[prec-1] & 1 == 0:
+ return tmp
+ return self._round_half_up(prec, expdiff, context, tmp)
+
+ def _round_half_down(self, prec, expdiff, context):
+ """Round 5 down"""
+
+ tmp = Decimal( (self._sign, self._int[:prec], self._exp - expdiff))
+ half = (self._int[prec] == 5)
+ if half:
+ for digit in self._int[prec+1:]:
+ if digit != 0:
+ half = 0
+ break
+ if half:
+ return tmp
+ return self._round_half_up(prec, expdiff, context, tmp)
+
+ def _round_up(self, prec, expdiff, context):
+ """Rounds away from 0."""
+ tmp = Decimal( (self._sign, self._int[:prec], self._exp - expdiff) )
+ for digit in self._int[prec:]:
+ if digit != 0:
+ tmp = tmp._increment(round=1, context=context)
+ if len(tmp._int) > prec:
+ return Decimal( (tmp._sign, tmp._int[:-1], tmp._exp + 1))
+ else:
+ return tmp
+ return tmp
+
+ def _round_ceiling(self, prec, expdiff, context):
+ """Rounds up (not away from 0 if negative.)"""
+ if self._sign:
+ return self._round_down(prec, expdiff, context)
+ else:
+ return self._round_up(prec, expdiff, context)
+
+ def _round_floor(self, prec, expdiff, context):
+ """Rounds down (not towards 0 if negative)"""
+ if not self._sign:
+ return self._round_down(prec, expdiff, context)
+ else:
+ return self._round_up(prec, expdiff, context)
+
+ def __pow__(self, n, modulo = None, context=None):
+ """Return self ** n (mod modulo)
+
+ If modulo is None (default), don't take it mod modulo.
+ """
+ n = _convert_other(n)
+ if n is NotImplemented:
+ return n
+
+ if context is None:
+ context = getcontext()
+
+ if self._is_special or n._is_special or n.adjusted() > 8:
+ #Because the spot << doesn't work with really big exponents
+ if n._isinfinity() or n.adjusted() > 8:
+ return context._raise_error(InvalidOperation, 'x ** INF')
+
+ ans = self._check_nans(n, context)
+ if ans:
+ return ans
+
+ if not n._isinteger():
+ return context._raise_error(InvalidOperation, 'x ** (non-integer)')
+
+ if not self and not n:
+ return context._raise_error(InvalidOperation, '0 ** 0')
+
+ if not n:
+ return Decimal(1)
+
+ if self == Decimal(1):
+ return Decimal(1)
+
+ sign = self._sign and not n._iseven()
+ n = int(n)
+
+ if self._isinfinity():
+ if modulo:
+ return context._raise_error(InvalidOperation, 'INF % x')
+ if n > 0:
+ return Infsign[sign]
+ return Decimal( (sign, (0,), 0) )
+
+ #with ludicrously large exponent, just raise an overflow and return inf.
+ if not modulo and n > 0 and (self._exp + len(self._int) - 1) * n > context.Emax \
+ and self:
+
+ tmp = Decimal('inf')
+ tmp._sign = sign
+ context._raise_error(Rounded)
+ context._raise_error(Inexact)
+ context._raise_error(Overflow, 'Big power', sign)
+ return tmp
+
+ elength = len(str(abs(n)))
+ firstprec = context.prec
+
+ if not modulo and firstprec + elength + 1 > DefaultContext.Emax:
+ return context._raise_error(Overflow, 'Too much precision.', sign)
+
+ mul = Decimal(self)
+ val = Decimal(1)
+ context = context._shallow_copy()
+ context.prec = firstprec + elength + 1
+ if n < 0:
+ #n is a long now, not Decimal instance
+ n = -n
+ mul = Decimal(1).__div__(mul, context=context)
+
+ spot = 1
+ while spot <= n:
+ spot <<= 1
+
+ spot >>= 1
+ #Spot is the highest power of 2 less than n
+ while spot:
+ val = val.__mul__(val, context=context)
+ if val._isinfinity():
+ val = Infsign[sign]
+ break
+ if spot & n:
+ val = val.__mul__(mul, context=context)
+ if modulo is not None:
+ val = val.__mod__(modulo, context=context)
+ spot >>= 1
+ context.prec = firstprec
+
+ if context._rounding_decision == ALWAYS_ROUND:
+ return val._fix(context)
+ return val
+
+ def __rpow__(self, other, context=None):
+ """Swaps self/other and returns __pow__."""
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+ return other.__pow__(self, context=context)
+
+ def normalize(self, context=None):
+ """Normalize- strip trailing 0s, change anything equal to 0 to 0e0"""
+
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ dup = self._fix(context)
+ if dup._isinfinity():
+ return dup
+
+ if not dup:
+ return Decimal( (dup._sign, (0,), 0) )
+ end = len(dup._int)
+ exp = dup._exp
+ while dup._int[end-1] == 0:
+ exp += 1
+ end -= 1
+ return Decimal( (dup._sign, dup._int[:end], exp) )
+
+
+ def quantize(self, exp, rounding=None, context=None, watchexp=1):
+ """Quantize self so its exponent is the same as that of exp.
+
+ Similar to self._rescale(exp._exp) but with error checking.
+ """
+ if self._is_special or exp._is_special:
+ ans = self._check_nans(exp, context)
+ if ans:
+ return ans
+
+ if exp._isinfinity() or self._isinfinity():
+ if exp._isinfinity() and self._isinfinity():
+ return self #if both are inf, it is OK
+ if context is None:
+ context = getcontext()
+ return context._raise_error(InvalidOperation,
+ 'quantize with one INF')
+ return self._rescale(exp._exp, rounding, context, watchexp)
+
+ def same_quantum(self, other):
+ """Test whether self and other have the same exponent.
+
+ same as self._exp == other._exp, except NaN == sNaN
+ """
+ if self._is_special or other._is_special:
+ if self._isnan() or other._isnan():
+ return self._isnan() and other._isnan() and True
+ if self._isinfinity() or other._isinfinity():
+ return self._isinfinity() and other._isinfinity() and True
+ return self._exp == other._exp
+
+ def _rescale(self, exp, rounding=None, context=None, watchexp=1):
+ """Rescales so that the exponent is exp.
+
+ exp = exp to scale to (an integer)
+ rounding = rounding version
+ watchexp: if set (default) an error is returned if exp is greater
+ than Emax or less than Etiny.
+ """
+ if context is None:
+ context = getcontext()
+
+ if self._is_special:
+ if self._isinfinity():
+ return context._raise_error(InvalidOperation, 'rescale with an INF')
+
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if watchexp and (context.Emax < exp or context.Etiny() > exp):
+ return context._raise_error(InvalidOperation, 'rescale(a, INF)')
+
+ if not self:
+ ans = Decimal(self)
+ ans._int = (0,)
+ ans._exp = exp
+ return ans
+
+ diff = self._exp - exp
+ digits = len(self._int) + diff
+
+ if watchexp and digits > context.prec:
+ return context._raise_error(InvalidOperation, 'Rescale > prec')
+
+ tmp = Decimal(self)
+ tmp._int = (0,) + tmp._int
+ digits += 1
+
+ if digits < 0:
+ tmp._exp = -digits + tmp._exp
+ tmp._int = (0,1)
+ digits = 1
+ tmp = tmp._round(digits, rounding, context=context)
+
+ if tmp._int[0] == 0 and len(tmp._int) > 1:
+ tmp._int = tmp._int[1:]
+ tmp._exp = exp
+
+ tmp_adjusted = tmp.adjusted()
+ if tmp and tmp_adjusted < context.Emin:
+ context._raise_error(Subnormal)
+ elif tmp and tmp_adjusted > context.Emax:
+ return context._raise_error(InvalidOperation, 'rescale(a, INF)')
+ return tmp
+
+ def to_integral(self, rounding=None, context=None):
+ """Rounds to the nearest integer, without raising inexact, rounded."""
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+ return self
+ if self._exp >= 0:
+ return self
+ if context is None:
+ context = getcontext()
+ flags = context._ignore_flags(Rounded, Inexact)
+ ans = self._rescale(0, rounding, context=context)
+ context._regard_flags(flags)
+ return ans
+
+ def sqrt(self, context=None):
+ """Return the square root of self.
+
+ Uses a converging algorithm (Xn+1 = 0.5*(Xn + self / Xn))
+ Should quadratically approach the right answer.
+ """
+ if self._is_special:
+ ans = self._check_nans(context=context)
+ if ans:
+ return ans
+
+ if self._isinfinity() and self._sign == 0:
+ return Decimal(self)
+
+ if not self:
+ #exponent = self._exp / 2, using round_down.
+ #if self._exp < 0:
+ # exp = (self._exp+1) // 2
+ #else:
+ exp = (self._exp) // 2
+ if self._sign == 1:
+ #sqrt(-0) = -0
+ return Decimal( (1, (0,), exp))
+ else:
+ return Decimal( (0, (0,), exp))
+
+ if context is None:
+ context = getcontext()
+
+ if self._sign == 1:
+ return context._raise_error(InvalidOperation, 'sqrt(-x), x > 0')
+
+ tmp = Decimal(self)
+
+ expadd = tmp._exp // 2
+ if tmp._exp & 1:
+ tmp._int += (0,)
+ tmp._exp = 0
+ else:
+ tmp._exp = 0
+
+ context = context._shallow_copy()
+ flags = context._ignore_all_flags()
+ firstprec = context.prec
+ context.prec = 3
+ if tmp.adjusted() & 1 == 0:
+ ans = Decimal( (0, (8,1,9), tmp.adjusted() - 2) )
+ ans = ans.__add__(tmp.__mul__(Decimal((0, (2,5,9), -2)),
+ context=context), context=context)
+ ans._exp -= 1 + tmp.adjusted() // 2
+ else:
+ ans = Decimal( (0, (2,5,9), tmp._exp + len(tmp._int)- 3) )
+ ans = ans.__add__(tmp.__mul__(Decimal((0, (8,1,9), -3)),
+ context=context), context=context)
+ ans._exp -= 1 + tmp.adjusted() // 2
+
+ #ans is now a linear approximation.
+
+ Emax, Emin = context.Emax, context.Emin
+ context.Emax, context.Emin = DefaultContext.Emax, DefaultContext.Emin
+
+ half = Decimal('0.5')
+
+ maxp = firstprec + 2
+ rounding = context._set_rounding(ROUND_HALF_EVEN)
+ while 1:
+ context.prec = min(2*context.prec - 2, maxp)
+ ans = half.__mul__(ans.__add__(tmp.__div__(ans, context=context),
+ context=context), context=context)
+ if context.prec == maxp:
+ break
+
+ #round to the answer's precision-- the only error can be 1 ulp.
+ context.prec = firstprec
+ prevexp = ans.adjusted()
+ ans = ans._round(context=context)
+
+ #Now, check if the other last digits are better.
+ context.prec = firstprec + 1
+ # In case we rounded up another digit and we should actually go lower.
+ if prevexp != ans.adjusted():
+ ans._int += (0,)
+ ans._exp -= 1
+
+
+ lower = ans.__sub__(Decimal((0, (5,), ans._exp-1)), context=context)
+ context._set_rounding(ROUND_UP)
+ if lower.__mul__(lower, context=context) > (tmp):
+ ans = ans.__sub__(Decimal((0, (1,), ans._exp)), context=context)
+
+ else:
+ upper = ans.__add__(Decimal((0, (5,), ans._exp-1)),context=context)
+ context._set_rounding(ROUND_DOWN)
+ if upper.__mul__(upper, context=context) < tmp:
+ ans = ans.__add__(Decimal((0, (1,), ans._exp)),context=context)
+
+ ans._exp += expadd
+
+ context.prec = firstprec
+ context.rounding = rounding
+ ans = ans._fix(context)
+
+ rounding = context._set_rounding_decision(NEVER_ROUND)
+ if not ans.__mul__(ans, context=context) == self:
+ # Only rounded/inexact if here.
+ context._regard_flags(flags)
+ context._raise_error(Rounded)
+ context._raise_error(Inexact)
+ else:
+ #Exact answer, so let's set the exponent right.
+ #if self._exp < 0:
+ # exp = (self._exp +1)// 2
+ #else:
+ exp = self._exp // 2
+ context.prec += ans._exp - exp
+ ans = ans._rescale(exp, context=context)
+ context.prec = firstprec
+ context._regard_flags(flags)
+ context.Emax, context.Emin = Emax, Emin
+
+ return ans._fix(context)
+
+ def max(self, other, context=None):
+ """Returns the larger value.
+
+ like max(self, other) except if one is not a number, returns
+ NaN (and signals if one is sNaN). Also rounds.
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if self._is_special or other._is_special:
+ # if one operand is a quiet NaN and the other is number, then the
+ # number is always returned
+ sn = self._isnan()
+ on = other._isnan()
+ if sn or on:
+ if on == 1 and sn != 2:
+ return self
+ if sn == 1 and on != 2:
+ return other
+ return self._check_nans(other, context)
+
+ ans = self
+ c = self.__cmp__(other)
+ if c == 0:
+ # if both operands are finite and equal in numerical value
+ # then an ordering is applied:
+ #
+ # if the signs differ then max returns the operand with the
+ # positive sign and min returns the operand with the negative sign
+ #
+ # if the signs are the same then the exponent is used to select
+ # the result.
+ if self._sign != other._sign:
+ if self._sign:
+ ans = other
+ elif self._exp < other._exp and not self._sign:
+ ans = other
+ elif self._exp > other._exp and self._sign:
+ ans = other
+ elif c == -1:
+ ans = other
+
+ if context is None:
+ context = getcontext()
+ if context._rounding_decision == ALWAYS_ROUND:
+ return ans._fix(context)
+ return ans
+
+ def min(self, other, context=None):
+ """Returns the smaller value.
+
+ like min(self, other) except if one is not a number, returns
+ NaN (and signals if one is sNaN). Also rounds.
+ """
+ other = _convert_other(other)
+ if other is NotImplemented:
+ return other
+
+ if self._is_special or other._is_special:
+ # if one operand is a quiet NaN and the other is number, then the
+ # number is always returned
+ sn = self._isnan()
+ on = other._isnan()
+ if sn or on:
+ if on == 1 and sn != 2:
+ return self
+ if sn == 1 and on != 2:
+ return other
+ return self._check_nans(other, context)
+
+ ans = self
+ c = self.__cmp__(other)
+ if c == 0:
+ # if both operands are finite and equal in numerical value
+ # then an ordering is applied:
+ #
+ # if the signs differ then max returns the operand with the
+ # positive sign and min returns the operand with the negative sign
+ #
+ # if the signs are the same then the exponent is used to select
+ # the result.
+ if self._sign != other._sign:
+ if other._sign:
+ ans = other
+ elif self._exp > other._exp and not self._sign:
+ ans = other
+ elif self._exp < other._exp and self._sign:
+ ans = other
+ elif c == 1:
+ ans = other
+
+ if context is None:
+ context = getcontext()
+ if context._rounding_decision == ALWAYS_ROUND:
+ return ans._fix(context)
+ return ans
+
+ def _isinteger(self):
+ """Returns whether self is an integer"""
+ if self._exp >= 0:
+ return True
+ rest = self._int[self._exp:]
+ return rest == (0,)*len(rest)
+
+ def _iseven(self):
+ """Returns 1 if self is even. Assumes self is an integer."""
+ if self._exp > 0:
+ return 1
+ return self._int[-1+self._exp] & 1 == 0
+
+ def adjusted(self):
+ """Return the adjusted exponent of self"""
+ try:
+ return self._exp + len(self._int) - 1
+ #If NaN or Infinity, self._exp is string
+ except TypeError:
+ return 0
+
+ # support for pickling, copy, and deepcopy
+ def __reduce__(self):
+ return (self.__class__, (str(self),))
+
+ def __copy__(self):
+ if type(self) == Decimal:
+ return self # I'm immutable; therefore I am my own clone
+ return self.__class__(str(self))
+
+ def __deepcopy__(self, memo):
+ if type(self) == Decimal:
+ return self # My components are also immutable
+ return self.__class__(str(self))
+
+##### Context class ###########################################
+
+
+# get rounding method function:
+rounding_functions = [name for name in Decimal.__dict__.keys() if name.startswith('_round_')]
+for name in rounding_functions:
+ #name is like _round_half_even, goes to the global ROUND_HALF_EVEN value.
+ globalname = name[1:].upper()
+ val = globals()[globalname]
+ Decimal._pick_rounding_function[val] = name
+
+del name, val, globalname, rounding_functions
+
+class _ContextManager(object):
+ """Context manager class to support localcontext().
+
+ Sets a copy of the supplied context in __enter__() and restores
+ the previous decimal context in __exit__()
+ """
+ def __init__(self, new_context):
+ self.new_context = new_context.copy()
+ def __enter__(self):
+ self.saved_context = getcontext()
+ setcontext(self.new_context)
+ return self.new_context
+ def __exit__(self, t, v, tb):
+ setcontext(self.saved_context)
+
+class Context(object):
+ """Contains the context for a Decimal instance.
+
+ Contains:
+ prec - precision (for use in rounding, division, square roots..)
+ rounding - rounding type. (how you round)
+ _rounding_decision - ALWAYS_ROUND, NEVER_ROUND -- do you round?
+ traps - If traps[exception] = 1, then the exception is
+ raised when it is caused. Otherwise, a value is
+ substituted in.
+ flags - When an exception is caused, flags[exception] is incremented.
+ (Whether or not the trap_enabler is set)
+ Should be reset by user of Decimal instance.
+ Emin - Minimum exponent
+ Emax - Maximum exponent
+ capitals - If 1, 1*10^1 is printed as 1E+1.
+ If 0, printed as 1e1
+ _clamp - If 1, change exponents if too high (Default 0)
+ """
+
+ def __init__(self, prec=None, rounding=None,
+ traps=None, flags=None,
+ _rounding_decision=None,
+ Emin=None, Emax=None,
+ capitals=None, _clamp=0,
+ _ignored_flags=None):
+ if flags is None:
+ flags = []
+ if _ignored_flags is None:
+ _ignored_flags = []
+ if not isinstance(flags, dict):
+ flags = dict([(s,s in flags) for s in _signals])
+ del s
+ if traps is not None and not isinstance(traps, dict):
+ traps = dict([(s,s in traps) for s in _signals])
+ del s
+ for name, val in locals().items():
+ if val is None:
+ setattr(self, name, _copy.copy(getattr(DefaultContext, name)))
+ else:
+ setattr(self, name, val)
+ del self.self
+
+ def __repr__(self):
+ """Show the current context."""
+ s = []
+ s.append('Context(prec=%(prec)d, rounding=%(rounding)s, Emin=%(Emin)d, Emax=%(Emax)d, capitals=%(capitals)d' % vars(self))
+ s.append('flags=[' + ', '.join([f.__name__ for f, v in self.flags.items() if v]) + ']')
+ s.append('traps=[' + ', '.join([t.__name__ for t, v in self.traps.items() if v]) + ']')
+ return ', '.join(s) + ')'
+
+ def clear_flags(self):
+ """Reset all flags to zero"""
+ for flag in self.flags:
+ self.flags[flag] = 0
+
+ def _shallow_copy(self):
+ """Returns a shallow copy from self."""
+ nc = Context(self.prec, self.rounding, self.traps, self.flags,
+ self._rounding_decision, self.Emin, self.Emax,
+ self.capitals, self._clamp, self._ignored_flags)
+ return nc
+
+ def copy(self):
+ """Returns a deep copy from self."""
+ nc = Context(self.prec, self.rounding, self.traps.copy(), self.flags.copy(),
+ self._rounding_decision, self.Emin, self.Emax,
+ self.capitals, self._clamp, self._ignored_flags)
+ return nc
+ __copy__ = copy
+
+ def _raise_error(self, condition, explanation = None, *args):
+ """Handles an error
+
+ If the flag is in _ignored_flags, returns the default response.
+ Otherwise, it increments the flag, then, if the corresponding
+ trap_enabler is set, it reaises the exception. Otherwise, it returns
+ the default value after incrementing the flag.
+ """
+ error = _condition_map.get(condition, condition)
+ if error in self._ignored_flags:
+ #Don't touch the flag
+ return error().handle(self, *args)
+
+ self.flags[error] += 1
+ if not self.traps[error]:
+ #The errors define how to handle themselves.
+ return condition().handle(self, *args)
+
+ # Errors should only be risked on copies of the context
+ #self._ignored_flags = []
+ raise error, explanation
+
+ def _ignore_all_flags(self):
+ """Ignore all flags, if they are raised"""
+ return self._ignore_flags(*_signals)
+
+ def _ignore_flags(self, *flags):
+ """Ignore the flags, if they are raised"""
+ # Do not mutate-- This way, copies of a context leave the original
+ # alone.
+ self._ignored_flags = (self._ignored_flags + list(flags))
+ return list(flags)
+
+ def _regard_flags(self, *flags):
+ """Stop ignoring the flags, if they are raised"""
+ if flags and isinstance(flags[0], (tuple,list)):
+ flags = flags[0]
+ for flag in flags:
+ self._ignored_flags.remove(flag)
+
+ def __hash__(self):
+ """A Context cannot be hashed."""
+ # We inherit object.__hash__, so we must deny this explicitly
+ raise TypeError, "Cannot hash a Context."
+
+ def Etiny(self):
+ """Returns Etiny (= Emin - prec + 1)"""
+ return int(self.Emin - self.prec + 1)
+
+ def Etop(self):
+ """Returns maximum exponent (= Emax - prec + 1)"""
+ return int(self.Emax - self.prec + 1)
+
+ def _set_rounding_decision(self, type):
+ """Sets the rounding decision.
+
+ Sets the rounding decision, and returns the current (previous)
+ rounding decision. Often used like:
+
+ context = context._shallow_copy()
+ # That so you don't change the calling context
+ # if an error occurs in the middle (say DivisionImpossible is raised).
+
+ rounding = context._set_rounding_decision(NEVER_ROUND)
+ instance = instance / Decimal(2)
+ context._set_rounding_decision(rounding)
+
+ This will make it not round for that operation.
+ """
+
+ rounding = self._rounding_decision
+ self._rounding_decision = type
+ return rounding
+
+ def _set_rounding(self, type):
+ """Sets the rounding type.
+
+ Sets the rounding type, and returns the current (previous)
+ rounding type. Often used like:
+
+ context = context.copy()
+ # so you don't change the calling context
+ # if an error occurs in the middle.
+ rounding = context._set_rounding(ROUND_UP)
+ val = self.__sub__(other, context=context)
+ context._set_rounding(rounding)
+
+ This will make it round up for that operation.
+ """
+ rounding = self.rounding
+ self.rounding= type
+ return rounding
+
+ def create_decimal(self, num='0'):
+ """Creates a new Decimal instance but using self as context."""
+ d = Decimal(num, context=self)
+ return d._fix(self)
+
+ #Methods
+ def abs(self, a):
+ """Returns the absolute value of the operand.
+
+ If the operand is negative, the result is the same as using the minus
+ operation on the operand. Otherwise, the result is the same as using
+ the plus operation on the operand.
+
+ >>> ExtendedContext.abs(Decimal('2.1'))
+ Decimal("2.1")
+ >>> ExtendedContext.abs(Decimal('-100'))
+ Decimal("100")
+ >>> ExtendedContext.abs(Decimal('101.5'))
+ Decimal("101.5")
+ >>> ExtendedContext.abs(Decimal('-101.5'))
+ Decimal("101.5")
+ """
+ return a.__abs__(context=self)
+
+ def add(self, a, b):
+ """Return the sum of the two operands.
+
+ >>> ExtendedContext.add(Decimal('12'), Decimal('7.00'))
+ Decimal("19.00")
+ >>> ExtendedContext.add(Decimal('1E+2'), Decimal('1.01E+4'))
+ Decimal("1.02E+4")
+ """
+ return a.__add__(b, context=self)
+
+ def _apply(self, a):
+ return str(a._fix(self))
+
+ def compare(self, a, b):
+ """Compares values numerically.
+
+ If the signs of the operands differ, a value representing each operand
+ ('-1' if the operand is less than zero, '0' if the operand is zero or
+ negative zero, or '1' if the operand is greater than zero) is used in
+ place of that operand for the comparison instead of the actual
+ operand.
+
+ The comparison is then effected by subtracting the second operand from
+ the first and then returning a value according to the result of the
+ subtraction: '-1' if the result is less than zero, '0' if the result is
+ zero or negative zero, or '1' if the result is greater than zero.
+
+ >>> ExtendedContext.compare(Decimal('2.1'), Decimal('3'))
+ Decimal("-1")
+ >>> ExtendedContext.compare(Decimal('2.1'), Decimal('2.1'))
+ Decimal("0")
+ >>> ExtendedContext.compare(Decimal('2.1'), Decimal('2.10'))
+ Decimal("0")
+ >>> ExtendedContext.compare(Decimal('3'), Decimal('2.1'))
+ Decimal("1")
+ >>> ExtendedContext.compare(Decimal('2.1'), Decimal('-3'))
+ Decimal("1")
+ >>> ExtendedContext.compare(Decimal('-3'), Decimal('2.1'))
+ Decimal("-1")
+ """
+ return a.compare(b, context=self)
+
+ def divide(self, a, b):
+ """Decimal division in a specified context.
+
+ >>> ExtendedContext.divide(Decimal('1'), Decimal('3'))
+ Decimal("0.333333333")
+ >>> ExtendedContext.divide(Decimal('2'), Decimal('3'))
+ Decimal("0.666666667")
+ >>> ExtendedContext.divide(Decimal('5'), Decimal('2'))
+ Decimal("2.5")
+ >>> ExtendedContext.divide(Decimal('1'), Decimal('10'))
+ Decimal("0.1")
+ >>> ExtendedContext.divide(Decimal('12'), Decimal('12'))
+ Decimal("1")
+ >>> ExtendedContext.divide(Decimal('8.00'), Decimal('2'))
+ Decimal("4.00")
+ >>> ExtendedContext.divide(Decimal('2.400'), Decimal('2.0'))
+ Decimal("1.20")
+ >>> ExtendedContext.divide(Decimal('1000'), Decimal('100'))
+ Decimal("10")
+ >>> ExtendedContext.divide(Decimal('1000'), Decimal('1'))
+ Decimal("1000")
+ >>> ExtendedContext.divide(Decimal('2.40E+6'), Decimal('2'))
+ Decimal("1.20E+6")
+ """
+ return a.__div__(b, context=self)
+
+ def divide_int(self, a, b):
+ """Divides two numbers and returns the integer part of the result.
+
+ >>> ExtendedContext.divide_int(Decimal('2'), Decimal('3'))
+ Decimal("0")
+ >>> ExtendedContext.divide_int(Decimal('10'), Decimal('3'))
+ Decimal("3")
+ >>> ExtendedContext.divide_int(Decimal('1'), Decimal('0.3'))
+ Decimal("3")
+ """
+ return a.__floordiv__(b, context=self)
+
+ def divmod(self, a, b):
+ return a.__divmod__(b, context=self)
+
+ def max(self, a,b):
+ """max compares two values numerically and returns the maximum.
+
+ If either operand is a NaN then the general rules apply.
+ Otherwise, the operands are compared as as though by the compare
+ operation. If they are numerically equal then the left-hand operand
+ is chosen as the result. Otherwise the maximum (closer to positive
+ infinity) of the two operands is chosen as the result.
+
+ >>> ExtendedContext.max(Decimal('3'), Decimal('2'))
+ Decimal("3")
+ >>> ExtendedContext.max(Decimal('-10'), Decimal('3'))
+ Decimal("3")
+ >>> ExtendedContext.max(Decimal('1.0'), Decimal('1'))
+ Decimal("1")
+ >>> ExtendedContext.max(Decimal('7'), Decimal('NaN'))
+ Decimal("7")
+ """
+ return a.max(b, context=self)
+
+ def min(self, a,b):
+ """min compares two values numerically and returns the minimum.
+
+ If either operand is a NaN then the general rules apply.
+ Otherwise, the operands are compared as as though by the compare
+ operation. If they are numerically equal then the left-hand operand
+ is chosen as the result. Otherwise the minimum (closer to negative
+ infinity) of the two operands is chosen as the result.
+
+ >>> ExtendedContext.min(Decimal('3'), Decimal('2'))
+ Decimal("2")
+ >>> ExtendedContext.min(Decimal('-10'), Decimal('3'))
+ Decimal("-10")
+ >>> ExtendedContext.min(Decimal('1.0'), Decimal('1'))
+ Decimal("1.0")
+ >>> ExtendedContext.min(Decimal('7'), Decimal('NaN'))
+ Decimal("7")
+ """
+ return a.min(b, context=self)
+
+ def minus(self, a):
+ """Minus corresponds to unary prefix minus in Python.
+
+ The operation is evaluated using the same rules as subtract; the
+ operation minus(a) is calculated as subtract('0', a) where the '0'
+ has the same exponent as the operand.
+
+ >>> ExtendedContext.minus(Decimal('1.3'))
+ Decimal("-1.3")
+ >>> ExtendedContext.minus(Decimal('-1.3'))
+ Decimal("1.3")
+ """
+ return a.__neg__(context=self)
+
+ def multiply(self, a, b):
+ """multiply multiplies two operands.
+
+ If either operand is a special value then the general rules apply.
+ Otherwise, the operands are multiplied together ('long multiplication'),
+ resulting in a number which may be as long as the sum of the lengths
+ of the two operands.
+
+ >>> ExtendedContext.multiply(Decimal('1.20'), Decimal('3'))
+ Decimal("3.60")
+ >>> ExtendedContext.multiply(Decimal('7'), Decimal('3'))
+ Decimal("21")
+ >>> ExtendedContext.multiply(Decimal('0.9'), Decimal('0.8'))
+ Decimal("0.72")
+ >>> ExtendedContext.multiply(Decimal('0.9'), Decimal('-0'))
+ Decimal("-0.0")
+ >>> ExtendedContext.multiply(Decimal('654321'), Decimal('654321'))
+ Decimal("4.28135971E+11")
+ """
+ return a.__mul__(b, context=self)
+
+ def normalize(self, a):
+ """normalize reduces an operand to its simplest form.
+
+ Essentially a plus operation with all trailing zeros removed from the
+ result.
+
+ >>> ExtendedContext.normalize(Decimal('2.1'))
+ Decimal("2.1")
+ >>> ExtendedContext.normalize(Decimal('-2.0'))
+ Decimal("-2")
+ >>> ExtendedContext.normalize(Decimal('1.200'))
+ Decimal("1.2")
+ >>> ExtendedContext.normalize(Decimal('-120'))
+ Decimal("-1.2E+2")
+ >>> ExtendedContext.normalize(Decimal('120.00'))
+ Decimal("1.2E+2")
+ >>> ExtendedContext.normalize(Decimal('0.00'))
+ Decimal("0")
+ """
+ return a.normalize(context=self)
+
+ def plus(self, a):
+ """Plus corresponds to unary prefix plus in Python.
+
+ The operation is evaluated using the same rules as add; the
+ operation plus(a) is calculated as add('0', a) where the '0'
+ has the same exponent as the operand.
+
+ >>> ExtendedContext.plus(Decimal('1.3'))
+ Decimal("1.3")
+ >>> ExtendedContext.plus(Decimal('-1.3'))
+ Decimal("-1.3")
+ """
+ return a.__pos__(context=self)
+
+ def power(self, a, b, modulo=None):
+ """Raises a to the power of b, to modulo if given.
+
+ The right-hand operand must be a whole number whose integer part (after
+ any exponent has been applied) has no more than 9 digits and whose
+ fractional part (if any) is all zeros before any rounding. The operand
+ may be positive, negative, or zero; if negative, the absolute value of
+ the power is used, and the left-hand operand is inverted (divided into
+ 1) before use.
+
+ If the increased precision needed for the intermediate calculations
+ exceeds the capabilities of the implementation then an Invalid operation
+ condition is raised.
+
+ If, when raising to a negative power, an underflow occurs during the
+ division into 1, the operation is not halted at that point but
+ continues.
+
+ >>> ExtendedContext.power(Decimal('2'), Decimal('3'))
+ Decimal("8")
+ >>> ExtendedContext.power(Decimal('2'), Decimal('-3'))
+ Decimal("0.125")
+ >>> ExtendedContext.power(Decimal('1.7'), Decimal('8'))
+ Decimal("69.7575744")
+ >>> ExtendedContext.power(Decimal('Infinity'), Decimal('-2'))
+ Decimal("0")
+ >>> ExtendedContext.power(Decimal('Infinity'), Decimal('-1'))
+ Decimal("0")
+ >>> ExtendedContext.power(Decimal('Infinity'), Decimal('0'))
+ Decimal("1")
+ >>> ExtendedContext.power(Decimal('Infinity'), Decimal('1'))
+ Decimal("Infinity")
+ >>> ExtendedContext.power(Decimal('Infinity'), Decimal('2'))
+ Decimal("Infinity")
+ >>> ExtendedContext.power(Decimal('-Infinity'), Decimal('-2'))
+ Decimal("0")
+ >>> ExtendedContext.power(Decimal('-Infinity'), Decimal('-1'))
+ Decimal("-0")
+ >>> ExtendedContext.power(Decimal('-Infinity'), Decimal('0'))
+ Decimal("1")
+ >>> ExtendedContext.power(Decimal('-Infinity'), Decimal('1'))
+ Decimal("-Infinity")
+ >>> ExtendedContext.power(Decimal('-Infinity'), Decimal('2'))
+ Decimal("Infinity")
+ >>> ExtendedContext.power(Decimal('0'), Decimal('0'))
+ Decimal("NaN")
+ """
+ return a.__pow__(b, modulo, context=self)
+
+ def quantize(self, a, b):
+ """Returns a value equal to 'a' (rounded) and having the exponent of 'b'.
+
+ The coefficient of the result is derived from that of the left-hand
+ operand. It may be rounded using the current rounding setting (if the
+ exponent is being increased), multiplied by a positive power of ten (if
+ the exponent is being decreased), or is unchanged (if the exponent is
+ already equal to that of the right-hand operand).
+
+ Unlike other operations, if the length of the coefficient after the
+ quantize operation would be greater than precision then an Invalid
+ operation condition is raised. This guarantees that, unless there is an
+ error condition, the exponent of the result of a quantize is always
+ equal to that of the right-hand operand.
+
+ Also unlike other operations, quantize will never raise Underflow, even
+ if the result is subnormal and inexact.
+
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.001'))
+ Decimal("2.170")
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.01'))
+ Decimal("2.17")
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.1'))
+ Decimal("2.2")
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('1e+0'))
+ Decimal("2")
+ >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('1e+1'))
+ Decimal("0E+1")
+ >>> ExtendedContext.quantize(Decimal('-Inf'), Decimal('Infinity'))
+ Decimal("-Infinity")
+ >>> ExtendedContext.quantize(Decimal('2'), Decimal('Infinity'))
+ Decimal("NaN")
+ >>> ExtendedContext.quantize(Decimal('-0.1'), Decimal('1'))
+ Decimal("-0")
+ >>> ExtendedContext.quantize(Decimal('-0'), Decimal('1e+5'))
+ Decimal("-0E+5")
+ >>> ExtendedContext.quantize(Decimal('+35236450.6'), Decimal('1e-2'))
+ Decimal("NaN")
+ >>> ExtendedContext.quantize(Decimal('-35236450.6'), Decimal('1e-2'))
+ Decimal("NaN")
+ >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e-1'))
+ Decimal("217.0")
+ >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e-0'))
+ Decimal("217")
+ >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e+1'))
+ Decimal("2.2E+2")
+ >>> ExtendedContext.quantize(Decimal('217'), Decimal('1e+2'))
+ Decimal("2E+2")
+ """
+ return a.quantize(b, context=self)
+
+ def remainder(self, a, b):
+ """Returns the remainder from integer division.
+
+ The result is the residue of the dividend after the operation of
+ calculating integer division as described for divide-integer, rounded to
+ precision digits if necessary. The sign of the result, if non-zero, is
+ the same as that of the original dividend.
+
+ This operation will fail under the same conditions as integer division
+ (that is, if integer division on the same two operands would fail, the
+ remainder cannot be calculated).
+
+ >>> ExtendedContext.remainder(Decimal('2.1'), Decimal('3'))
+ Decimal("2.1")
+ >>> ExtendedContext.remainder(Decimal('10'), Decimal('3'))
+ Decimal("1")
+ >>> ExtendedContext.remainder(Decimal('-10'), Decimal('3'))
+ Decimal("-1")
+ >>> ExtendedContext.remainder(Decimal('10.2'), Decimal('1'))
+ Decimal("0.2")
+ >>> ExtendedContext.remainder(Decimal('10'), Decimal('0.3'))
+ Decimal("0.1")
+ >>> ExtendedContext.remainder(Decimal('3.6'), Decimal('1.3'))
+ Decimal("1.0")
+ """
+ return a.__mod__(b, context=self)
+
+ def remainder_near(self, a, b):
+ """Returns to be "a - b * n", where n is the integer nearest the exact
+ value of "x / b" (if two integers are equally near then the even one
+ is chosen). If the result is equal to 0 then its sign will be the
+ sign of a.
+
+ This operation will fail under the same conditions as integer division
+ (that is, if integer division on the same two operands would fail, the
+ remainder cannot be calculated).
+
+ >>> ExtendedContext.remainder_near(Decimal('2.1'), Decimal('3'))
+ Decimal("-0.9")
+ >>> ExtendedContext.remainder_near(Decimal('10'), Decimal('6'))
+ Decimal("-2")
+ >>> ExtendedContext.remainder_near(Decimal('10'), Decimal('3'))
+ Decimal("1")
+ >>> ExtendedContext.remainder_near(Decimal('-10'), Decimal('3'))
+ Decimal("-1")
+ >>> ExtendedContext.remainder_near(Decimal('10.2'), Decimal('1'))
+ Decimal("0.2")
+ >>> ExtendedContext.remainder_near(Decimal('10'), Decimal('0.3'))
+ Decimal("0.1")
+ >>> ExtendedContext.remainder_near(Decimal('3.6'), Decimal('1.3'))
+ Decimal("-0.3")
+ """
+ return a.remainder_near(b, context=self)
+
+ def same_quantum(self, a, b):
+ """Returns True if the two operands have the same exponent.
+
+ The result is never affected by either the sign or the coefficient of
+ either operand.
+
+ >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('0.001'))
+ False
+ >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('0.01'))
+ True
+ >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('1'))
+ False
+ >>> ExtendedContext.same_quantum(Decimal('Inf'), Decimal('-Inf'))
+ True
+ """
+ return a.same_quantum(b)
+
+ def sqrt(self, a):
+ """Returns the square root of a non-negative number to context precision.
+
+ If the result must be inexact, it is rounded using the round-half-even
+ algorithm.
+
+ >>> ExtendedContext.sqrt(Decimal('0'))
+ Decimal("0")
+ >>> ExtendedContext.sqrt(Decimal('-0'))
+ Decimal("-0")
+ >>> ExtendedContext.sqrt(Decimal('0.39'))
+ Decimal("0.624499800")
+ >>> ExtendedContext.sqrt(Decimal('100'))
+ Decimal("10")
+ >>> ExtendedContext.sqrt(Decimal('1'))
+ Decimal("1")
+ >>> ExtendedContext.sqrt(Decimal('1.0'))
+ Decimal("1.0")
+ >>> ExtendedContext.sqrt(Decimal('1.00'))
+ Decimal("1.0")
+ >>> ExtendedContext.sqrt(Decimal('7'))
+ Decimal("2.64575131")
+ >>> ExtendedContext.sqrt(Decimal('10'))
+ Decimal("3.16227766")
+ >>> ExtendedContext.prec
+ 9
+ """
+ return a.sqrt(context=self)
+
+ def subtract(self, a, b):
+ """Return the difference between the two operands.
+
+ >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('1.07'))
+ Decimal("0.23")
+ >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('1.30'))
+ Decimal("0.00")
+ >>> ExtendedContext.subtract(Decimal('1.3'), Decimal('2.07'))
+ Decimal("-0.77")
+ """
+ return a.__sub__(b, context=self)
+
+ def to_eng_string(self, a):
+ """Converts a number to a string, using scientific notation.
+
+ The operation is not affected by the context.
+ """
+ return a.to_eng_string(context=self)
+
+ def to_sci_string(self, a):
+ """Converts a number to a string, using scientific notation.
+
+ The operation is not affected by the context.
+ """
+ return a.__str__(context=self)
+
+ def to_integral(self, a):
+ """Rounds to an integer.
+
+ When the operand has a negative exponent, the result is the same
+ as using the quantize() operation using the given operand as the
+ left-hand-operand, 1E+0 as the right-hand-operand, and the precision
+ of the operand as the precision setting, except that no flags will
+ be set. The rounding mode is taken from the context.
+
+ >>> ExtendedContext.to_integral(Decimal('2.1'))
+ Decimal("2")
+ >>> ExtendedContext.to_integral(Decimal('100'))
+ Decimal("100")
+ >>> ExtendedContext.to_integral(Decimal('100.0'))
+ Decimal("100")
+ >>> ExtendedContext.to_integral(Decimal('101.5'))
+ Decimal("102")
+ >>> ExtendedContext.to_integral(Decimal('-101.5'))
+ Decimal("-102")
+ >>> ExtendedContext.to_integral(Decimal('10E+5'))
+ Decimal("1.0E+6")
+ >>> ExtendedContext.to_integral(Decimal('7.89E+77'))
+ Decimal("7.89E+77")
+ >>> ExtendedContext.to_integral(Decimal('-Inf'))
+ Decimal("-Infinity")
+ """
+ return a.to_integral(context=self)
+
+class _WorkRep(object):
+ __slots__ = ('sign','int','exp')
+ # sign: 0 or 1
+ # int: int or long
+ # exp: None, int, or string
+
+ def __init__(self, value=None):
+ if value is None:
+ self.sign = None
+ self.int = 0
+ self.exp = None
+ elif isinstance(value, Decimal):
+ self.sign = value._sign
+ cum = 0
+ for digit in value._int:
+ cum = cum * 10 + digit
+ self.int = cum
+ self.exp = value._exp
+ else:
+ # assert isinstance(value, tuple)
+ self.sign = value[0]
+ self.int = value[1]
+ self.exp = value[2]
+
+ def __repr__(self):
+ return "(%r, %r, %r)" % (self.sign, self.int, self.exp)
+
+ __str__ = __repr__
+
+
+
+def _normalize(op1, op2, shouldround = 0, prec = 0):
+ """Normalizes op1, op2 to have the same exp and length of coefficient.
+
+ Done during addition.
+ """
+ # Yes, the exponent is a long, but the difference between exponents
+ # must be an int-- otherwise you'd get a big memory problem.
+ numdigits = int(op1.exp - op2.exp)
+ if numdigits < 0:
+ numdigits = -numdigits
+ tmp = op2
+ other = op1
+ else:
+ tmp = op1
+ other = op2
+
+
+ if shouldround and numdigits > prec + 1:
+ # Big difference in exponents - check the adjusted exponents
+ tmp_len = len(str(tmp.int))
+ other_len = len(str(other.int))
+ if numdigits > (other_len + prec + 1 - tmp_len):
+ # If the difference in adjusted exps is > prec+1, we know
+ # other is insignificant, so might as well put a 1 after the precision.
+ # (since this is only for addition.) Also stops use of massive longs.
+
+ extend = prec + 2 - tmp_len
+ if extend <= 0:
+ extend = 1
+ tmp.int *= 10 ** extend
+ tmp.exp -= extend
+ other.int = 1
+ other.exp = tmp.exp
+ return op1, op2
+
+ tmp.int *= 10 ** numdigits
+ tmp.exp -= numdigits
+ return op1, op2
+
+def _adjust_coefficients(op1, op2):
+ """Adjust op1, op2 so that op2.int * 10 > op1.int >= op2.int.
+
+ Returns the adjusted op1, op2 as well as the change in op1.exp-op2.exp.
+
+ Used on _WorkRep instances during division.
+ """
+ adjust = 0
+ #If op1 is smaller, make it larger
+ while op2.int > op1.int:
+ op1.int *= 10
+ op1.exp -= 1
+ adjust += 1
+
+ #If op2 is too small, make it larger
+ while op1.int >= (10 * op2.int):
+ op2.int *= 10
+ op2.exp -= 1
+ adjust -= 1
+
+ return op1, op2, adjust
+
+##### Helper Functions ########################################
+
+def _convert_other(other):
+ """Convert other to Decimal.
+
+ Verifies that it's ok to use in an implicit construction.
+ """
+ if isinstance(other, Decimal):
+ return other
+ if isinstance(other, (int, long)):
+ return Decimal(other)
+ return NotImplemented
+
+_infinity_map = {
+ 'inf' : 1,
+ 'infinity' : 1,
+ '+inf' : 1,
+ '+infinity' : 1,
+ '-inf' : -1,
+ '-infinity' : -1
+}
+
+def _isinfinity(num):
+ """Determines whether a string or float is infinity.
+
+ +1 for negative infinity; 0 for finite ; +1 for positive infinity
+ """
+ num = str(num).lower()
+ return _infinity_map.get(num, 0)
+
+def _isnan(num):
+ """Determines whether a string or float is NaN
+
+ (1, sign, diagnostic info as string) => NaN
+ (2, sign, diagnostic info as string) => sNaN
+ 0 => not a NaN
+ """
+ num = str(num).lower()
+ if not num:
+ return 0
+
+ #get the sign, get rid of trailing [+-]
+ sign = 0
+ if num[0] == '+':
+ num = num[1:]
+ elif num[0] == '-': #elif avoids '+-nan'
+ num = num[1:]
+ sign = 1
+
+ if num.startswith('nan'):
+ if len(num) > 3 and not num[3:].isdigit(): #diagnostic info
+ return 0
+ return (1, sign, num[3:].lstrip('0'))
+ if num.startswith('snan'):
+ if len(num) > 4 and not num[4:].isdigit():
+ return 0
+ return (2, sign, num[4:].lstrip('0'))
+ return 0
+
+
+##### Setup Specific Contexts ################################
+
+# The default context prototype used by Context()
+# Is mutable, so that new contexts can have different default values
+
+DefaultContext = Context(
+ prec=28, rounding=ROUND_HALF_EVEN,
+ traps=[DivisionByZero, Overflow, InvalidOperation],
+ flags=[],
+ _rounding_decision=ALWAYS_ROUND,
+ Emax=999999999,
+ Emin=-999999999,
+ capitals=1
+)
+
+# Pre-made alternate contexts offered by the specification
+# Don't change these; the user should be able to select these
+# contexts and be able to reproduce results from other implementations
+# of the spec.
+
+BasicContext = Context(
+ prec=9, rounding=ROUND_HALF_UP,
+ traps=[DivisionByZero, Overflow, InvalidOperation, Clamped, Underflow],
+ flags=[],
+)
+
+ExtendedContext = Context(
+ prec=9, rounding=ROUND_HALF_EVEN,
+ traps=[],
+ flags=[],
+)
+
+
+##### Useful Constants (internal use only) ####################
+
+#Reusable defaults
+Inf = Decimal('Inf')
+negInf = Decimal('-Inf')
+
+#Infsign[sign] is infinity w/ that sign
+Infsign = (Inf, negInf)
+
+NaN = Decimal('NaN')
+
+
+##### crud for parsing strings #################################
+import re
+
+# There's an optional sign at the start, and an optional exponent
+# at the end. The exponent has an optional sign and at least one
+# digit. In between, must have either at least one digit followed
+# by an optional fraction, or a decimal point followed by at least
+# one digit. Yuck.
+
+_parser = re.compile(r"""
+# \s*
+ (?P<sign>[-+])?
+ (
+ (?P<int>\d+) (\. (?P<frac>\d*))?
+ |
+ \. (?P<onlyfrac>\d+)
+ )
+ ([eE](?P<exp>[-+]? \d+))?
+# \s*
+ $
+""", re.VERBOSE).match #Uncomment the \s* to allow leading or trailing spaces.
+
+del re
+
+# return sign, n, p s.t. float string value == -1**sign * n * 10**p exactly
+
+def _string2exact(s):
+ m = _parser(s)
+ if m is None:
+ raise ValueError("invalid literal for Decimal: %r" % s)
+
+ if m.group('sign') == "-":
+ sign = 1
+ else:
+ sign = 0
+
+ exp = m.group('exp')
+ if exp is None:
+ exp = 0
+ else:
+ exp = int(exp)
+
+ intpart = m.group('int')
+ if intpart is None:
+ intpart = ""
+ fracpart = m.group('onlyfrac')
+ else:
+ fracpart = m.group('frac')
+ if fracpart is None:
+ fracpart = ""
+
+ exp -= len(fracpart)
+
+ mantissa = intpart + fracpart
+ tmp = map(int, mantissa)
+ backup = tmp
+ while tmp and tmp[0] == 0:
+ del tmp[0]
+
+ # It's a zero
+ if not tmp:
+ if backup:
+ return (sign, tuple(backup), exp)
+ return (sign, (0,), exp)
+ mantissa = tuple(tmp)
+
+ return (sign, mantissa, exp)
+
+
+if __name__ == '__main__':
+ import doctest, sys
+ doctest.testmod(sys.modules[__name__])
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/doctest.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/doctest.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,2637 @@
+# Module doctest.
+# Released to the public domain 16-Jan-2001, by Tim Peters (tim at python.org).
+# Major enhancements and refactoring by:
+# Jim Fulton
+# Edward Loper
+
+# Provided as-is; use at your own risk; no warranty; no promises; enjoy!
+
+r"""Module doctest -- a framework for running examples in docstrings.
+
+In simplest use, end each module M to be tested with:
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+if __name__ == "__main__":
+ _test()
+
+Then running the module as a script will cause the examples in the
+docstrings to get executed and verified:
+
+python M.py
+
+This won't display anything unless an example fails, in which case the
+failing example(s) and the cause(s) of the failure(s) are printed to stdout
+(why not stderr? because stderr is a lame hack <0.2 wink>), and the final
+line of output is "Test failed.".
+
+Run it with the -v switch instead:
+
+python M.py -v
+
+and a detailed report of all examples tried is printed to stdout, along
+with assorted summaries at the end.
+
+You can force verbose mode by passing "verbose=True" to testmod, or prohibit
+it by passing "verbose=False". In either of those cases, sys.argv is not
+examined by testmod.
+
+There are a variety of other ways to run doctests, including integration
+with the unittest framework, and support for running non-Python text
+files containing doctests. There are also many ways to override parts
+of doctest's default behaviors. See the Library Reference Manual for
+details.
+"""
+
+__docformat__ = 'reStructuredText en'
+
+__all__ = [
+ # 0, Option Flags
+ 'register_optionflag',
+ 'DONT_ACCEPT_TRUE_FOR_1',
+ 'DONT_ACCEPT_BLANKLINE',
+ 'NORMALIZE_WHITESPACE',
+ 'ELLIPSIS',
+ 'SKIP',
+ 'IGNORE_EXCEPTION_DETAIL',
+ 'COMPARISON_FLAGS',
+ 'REPORT_UDIFF',
+ 'REPORT_CDIFF',
+ 'REPORT_NDIFF',
+ 'REPORT_ONLY_FIRST_FAILURE',
+ 'REPORTING_FLAGS',
+ # 1. Utility Functions
+ # 2. Example & DocTest
+ 'Example',
+ 'DocTest',
+ # 3. Doctest Parser
+ 'DocTestParser',
+ # 4. Doctest Finder
+ 'DocTestFinder',
+ # 5. Doctest Runner
+ 'DocTestRunner',
+ 'OutputChecker',
+ 'DocTestFailure',
+ 'UnexpectedException',
+ 'DebugRunner',
+ # 6. Test Functions
+ 'testmod',
+ 'testfile',
+ 'run_docstring_examples',
+ # 7. Tester
+ 'Tester',
+ # 8. Unittest Support
+ 'DocTestSuite',
+ 'DocFileSuite',
+ 'set_unittest_reportflags',
+ # 9. Debugging Support
+ 'script_from_examples',
+ 'testsource',
+ 'debug_src',
+ 'debug',
+]
+
+import __future__
+
+import sys, traceback, inspect, linecache, os, re
+import unittest, difflib, pdb, tempfile
+import warnings
+from StringIO import StringIO
+
+# There are 4 basic classes:
+# - Example: a <source, want> pair, plus an intra-docstring line number.
+# - DocTest: a collection of examples, parsed from a docstring, plus
+# info about where the docstring came from (name, filename, lineno).
+# - DocTestFinder: extracts DocTests from a given object's docstring and
+# its contained objects' docstrings.
+# - DocTestRunner: runs DocTest cases, and accumulates statistics.
+#
+# So the basic picture is:
+#
+# list of:
+# +------+ +---------+ +-------+
+# |object| --DocTestFinder-> | DocTest | --DocTestRunner-> |results|
+# +------+ +---------+ +-------+
+# | Example |
+# | ... |
+# | Example |
+# +---------+
+
+# Option constants.
+
+OPTIONFLAGS_BY_NAME = {}
+def register_optionflag(name):
+ # Create a new flag unless `name` is already known.
+ return OPTIONFLAGS_BY_NAME.setdefault(name, 1 << len(OPTIONFLAGS_BY_NAME))
+
+DONT_ACCEPT_TRUE_FOR_1 = register_optionflag('DONT_ACCEPT_TRUE_FOR_1')
+DONT_ACCEPT_BLANKLINE = register_optionflag('DONT_ACCEPT_BLANKLINE')
+NORMALIZE_WHITESPACE = register_optionflag('NORMALIZE_WHITESPACE')
+ELLIPSIS = register_optionflag('ELLIPSIS')
+SKIP = register_optionflag('SKIP')
+IGNORE_EXCEPTION_DETAIL = register_optionflag('IGNORE_EXCEPTION_DETAIL')
+
+COMPARISON_FLAGS = (DONT_ACCEPT_TRUE_FOR_1 |
+ DONT_ACCEPT_BLANKLINE |
+ NORMALIZE_WHITESPACE |
+ ELLIPSIS |
+ SKIP |
+ IGNORE_EXCEPTION_DETAIL)
+
+REPORT_UDIFF = register_optionflag('REPORT_UDIFF')
+REPORT_CDIFF = register_optionflag('REPORT_CDIFF')
+REPORT_NDIFF = register_optionflag('REPORT_NDIFF')
+REPORT_ONLY_FIRST_FAILURE = register_optionflag('REPORT_ONLY_FIRST_FAILURE')
+
+REPORTING_FLAGS = (REPORT_UDIFF |
+ REPORT_CDIFF |
+ REPORT_NDIFF |
+ REPORT_ONLY_FIRST_FAILURE)
+
+# Special string markers for use in `want` strings:
+BLANKLINE_MARKER = '<BLANKLINE>'
+ELLIPSIS_MARKER = '...'
+
+######################################################################
+## Table of Contents
+######################################################################
+# 1. Utility Functions
+# 2. Example & DocTest -- store test cases
+# 3. DocTest Parser -- extracts examples from strings
+# 4. DocTest Finder -- extracts test cases from objects
+# 5. DocTest Runner -- runs test cases
+# 6. Test Functions -- convenient wrappers for testing
+# 7. Tester Class -- for backwards compatibility
+# 8. Unittest Support
+# 9. Debugging Support
+# 10. Example Usage
+
+######################################################################
+## 1. Utility Functions
+######################################################################
+
+def _extract_future_flags(globs):
+ """
+ Return the compiler-flags associated with the future features that
+ have been imported into the given namespace (globs).
+ """
+ flags = 0
+ for fname in __future__.all_feature_names:
+ feature = globs.get(fname, None)
+ if feature is getattr(__future__, fname):
+ flags |= feature.compiler_flag
+ return flags
+
+def _normalize_module(module, depth=2):
+ """
+ Return the module specified by `module`. In particular:
+ - If `module` is a module, then return module.
+ - If `module` is a string, then import and return the
+ module with that name.
+ - If `module` is None, then return the calling module.
+ The calling module is assumed to be the module of
+ the stack frame at the given depth in the call stack.
+ """
+ if inspect.ismodule(module):
+ return module
+ elif isinstance(module, (str, unicode)):
+ return __import__(module, globals(), locals(), ["*"])
+ elif module is None:
+ return sys.modules[sys._getframe(depth).f_globals['__name__']]
+ else:
+ raise TypeError("Expected a module, string, or None")
+
+def _load_testfile(filename, package, module_relative):
+ if module_relative:
+ package = _normalize_module(package, 3)
+ filename = _module_relative_path(package, filename)
+ if hasattr(package, '__loader__'):
+ if hasattr(package.__loader__, 'get_data'):
+ return package.__loader__.get_data(filename), filename
+ return open(filename).read(), filename
+
+def _indent(s, indent=4):
+ """
+ Add the given number of space characters to the beginning every
+ non-blank line in `s`, and return the result.
+ """
+ # This regexp matches the start of non-blank lines:
+ return re.sub('(?m)^(?!$)', indent*' ', s)
+
+def _exception_traceback(exc_info):
+ """
+ Return a string containing a traceback message for the given
+ exc_info tuple (as returned by sys.exc_info()).
+ """
+ # Get a traceback message.
+ excout = StringIO()
+ exc_type, exc_val, exc_tb = exc_info
+ traceback.print_exception(exc_type, exc_val, exc_tb, file=excout)
+ return excout.getvalue()
+
+# Override some StringIO methods.
+class _SpoofOut(StringIO):
+ def getvalue(self):
+ result = StringIO.getvalue(self)
+ # If anything at all was written, make sure there's a trailing
+ # newline. There's no way for the expected output to indicate
+ # that a trailing newline is missing.
+ if result and not result.endswith("\n"):
+ result += "\n"
+ # Prevent softspace from screwing up the next test case, in
+ # case they used print with a trailing comma in an example.
+ if hasattr(self, "softspace"):
+ del self.softspace
+ return result
+
+ def truncate(self, size=None):
+ StringIO.truncate(self, size)
+ if hasattr(self, "softspace"):
+ del self.softspace
+
+# Worst-case linear-time ellipsis matching.
+def _ellipsis_match(want, got):
+ """
+ Essentially the only subtle case:
+ >>> _ellipsis_match('aa...aa', 'aaa')
+ False
+ """
+ if ELLIPSIS_MARKER not in want:
+ return want == got
+
+ # Find "the real" strings.
+ ws = want.split(ELLIPSIS_MARKER)
+ assert len(ws) >= 2
+
+ # Deal with exact matches possibly needed at one or both ends.
+ startpos, endpos = 0, len(got)
+ w = ws[0]
+ if w: # starts with exact match
+ if got.startswith(w):
+ startpos = len(w)
+ del ws[0]
+ else:
+ return False
+ w = ws[-1]
+ if w: # ends with exact match
+ if got.endswith(w):
+ endpos -= len(w)
+ del ws[-1]
+ else:
+ return False
+
+ if startpos > endpos:
+ # Exact end matches required more characters than we have, as in
+ # _ellipsis_match('aa...aa', 'aaa')
+ return False
+
+ # For the rest, we only need to find the leftmost non-overlapping
+ # match for each piece. If there's no overall match that way alone,
+ # there's no overall match period.
+ for w in ws:
+ # w may be '' at times, if there are consecutive ellipses, or
+ # due to an ellipsis at the start or end of `want`. That's OK.
+ # Search for an empty string succeeds, and doesn't change startpos.
+ startpos = got.find(w, startpos, endpos)
+ if startpos < 0:
+ return False
+ startpos += len(w)
+
+ return True
+
+def _comment_line(line):
+ "Return a commented form of the given line"
+ line = line.rstrip()
+ if line:
+ return '# '+line
+ else:
+ return '#'
+
+class _OutputRedirectingPdb(pdb.Pdb):
+ """
+ A specialized version of the python debugger that redirects stdout
+ to a given stream when interacting with the user. Stdout is *not*
+ redirected when traced code is executed.
+ """
+ def __init__(self, out):
+ self.__out = out
+ pdb.Pdb.__init__(self, stdout=out)
+
+ def trace_dispatch(self, *args):
+ # Redirect stdout to the given stream.
+ save_stdout = sys.stdout
+ sys.stdout = self.__out
+ # Call Pdb's trace dispatch method.
+ try:
+ return pdb.Pdb.trace_dispatch(self, *args)
+ finally:
+ sys.stdout = save_stdout
+
+# [XX] Normalize with respect to os.path.pardir?
+def _module_relative_path(module, path):
+ if not inspect.ismodule(module):
+ raise TypeError, 'Expected a module: %r' % module
+ if path.startswith('/'):
+ raise ValueError, 'Module-relative files may not have absolute paths'
+
+ # Find the base directory for the path.
+ if hasattr(module, '__file__'):
+ # A normal module/package
+ basedir = os.path.split(module.__file__)[0]
+ elif module.__name__ == '__main__':
+ # An interactive session.
+ if len(sys.argv)>0 and sys.argv[0] != '':
+ basedir = os.path.split(sys.argv[0])[0]
+ else:
+ basedir = os.curdir
+ else:
+ # A module w/o __file__ (this includes builtins)
+ raise ValueError("Can't resolve paths relative to the module " +
+ module + " (it has no __file__)")
+
+ # Combine the base directory and the path.
+ return os.path.join(basedir, *(path.split('/')))
+
+######################################################################
+## 2. Example & DocTest
+######################################################################
+## - An "example" is a <source, want> pair, where "source" is a
+## fragment of source code, and "want" is the expected output for
+## "source." The Example class also includes information about
+## where the example was extracted from.
+##
+## - A "doctest" is a collection of examples, typically extracted from
+## a string (such as an object's docstring). The DocTest class also
+## includes information about where the string was extracted from.
+
+class Example:
+ """
+ A single doctest example, consisting of source code and expected
+ output. `Example` defines the following attributes:
+
+ - source: A single Python statement, always ending with a newline.
+ The constructor adds a newline if needed.
+
+ - want: The expected output from running the source code (either
+ from stdout, or a traceback in case of exception). `want` ends
+ with a newline unless it's empty, in which case it's an empty
+ string. The constructor adds a newline if needed.
+
+ - exc_msg: The exception message generated by the example, if
+ the example is expected to generate an exception; or `None` if
+ it is not expected to generate an exception. This exception
+ message is compared against the return value of
+ `traceback.format_exception_only()`. `exc_msg` ends with a
+ newline unless it's `None`. The constructor adds a newline
+ if needed.
+
+ - lineno: The line number within the DocTest string containing
+ this Example where the Example begins. This line number is
+ zero-based, with respect to the beginning of the DocTest.
+
+ - indent: The example's indentation in the DocTest string.
+ I.e., the number of space characters that preceed the
+ example's first prompt.
+
+ - options: A dictionary mapping from option flags to True or
+ False, which is used to override default options for this
+ example. Any option flags not contained in this dictionary
+ are left at their default value (as specified by the
+ DocTestRunner's optionflags). By default, no options are set.
+ """
+ def __init__(self, source, want, exc_msg=None, lineno=0, indent=0,
+ options=None):
+ # Normalize inputs.
+ if not source.endswith('\n'):
+ source += '\n'
+ if want and not want.endswith('\n'):
+ want += '\n'
+ if exc_msg is not None and not exc_msg.endswith('\n'):
+ exc_msg += '\n'
+ # Store properties.
+ self.source = source
+ self.want = want
+ self.lineno = lineno
+ self.indent = indent
+ if options is None: options = {}
+ self.options = options
+ self.exc_msg = exc_msg
+
+class DocTest:
+ """
+ A collection of doctest examples that should be run in a single
+ namespace. Each `DocTest` defines the following attributes:
+
+ - examples: the list of examples.
+
+ - globs: The namespace (aka globals) that the examples should
+ be run in.
+
+ - name: A name identifying the DocTest (typically, the name of
+ the object whose docstring this DocTest was extracted from).
+
+ - filename: The name of the file that this DocTest was extracted
+ from, or `None` if the filename is unknown.
+
+ - lineno: The line number within filename where this DocTest
+ begins, or `None` if the line number is unavailable. This
+ line number is zero-based, with respect to the beginning of
+ the file.
+
+ - docstring: The string that the examples were extracted from,
+ or `None` if the string is unavailable.
+ """
+ def __init__(self, examples, globs, name, filename, lineno, docstring):
+ """
+ Create a new DocTest containing the given examples. The
+ DocTest's globals are initialized with a copy of `globs`.
+ """
+ assert not isinstance(examples, basestring), \
+ "DocTest no longer accepts str; use DocTestParser instead"
+ self.examples = examples
+ self.docstring = docstring
+ self.globs = globs.copy()
+ self.name = name
+ self.filename = filename
+ self.lineno = lineno
+
+ def __repr__(self):
+ if len(self.examples) == 0:
+ examples = 'no examples'
+ elif len(self.examples) == 1:
+ examples = '1 example'
+ else:
+ examples = '%d examples' % len(self.examples)
+ return ('<DocTest %s from %s:%s (%s)>' %
+ (self.name, self.filename, self.lineno, examples))
+
+
+ # This lets us sort tests by name:
+ def __cmp__(self, other):
+ if not isinstance(other, DocTest):
+ return -1
+ return cmp((self.name, self.filename, self.lineno, id(self)),
+ (other.name, other.filename, other.lineno, id(other)))
+
+######################################################################
+## 3. DocTestParser
+######################################################################
+
+class DocTestParser:
+ """
+ A class used to parse strings containing doctest examples.
+ """
+ # This regular expression is used to find doctest examples in a
+ # string. It defines three groups: `source` is the source code
+ # (including leading indentation and prompts); `indent` is the
+ # indentation of the first (PS1) line of the source code; and
+ # `want` is the expected output (including leading indentation).
+ _EXAMPLE_RE = re.compile(r'''
+ # Source consists of a PS1 line followed by zero or more PS2 lines.
+ (?P<source>
+ (?:^(?P<indent> [ ]*) >>> .*) # PS1 line
+ (?:\n [ ]* \.\.\. .*)*) # PS2 lines
+ \n?
+ # Want consists of any non-blank lines that do not start with PS1.
+ (?P<want> (?:(?![ ]*$) # Not a blank line
+ (?![ ]*>>>) # Not a line starting with PS1
+ .*$\n? # But any other line
+ )*)
+ ''', re.MULTILINE | re.VERBOSE)
+
+ # A regular expression for handling `want` strings that contain
+ # expected exceptions. It divides `want` into three pieces:
+ # - the traceback header line (`hdr`)
+ # - the traceback stack (`stack`)
+ # - the exception message (`msg`), as generated by
+ # traceback.format_exception_only()
+ # `msg` may have multiple lines. We assume/require that the
+ # exception message is the first non-indented line starting with a word
+ # character following the traceback header line.
+ _EXCEPTION_RE = re.compile(r"""
+ # Grab the traceback header. Different versions of Python have
+ # said different things on the first traceback line.
+ ^(?P<hdr> Traceback\ \(
+ (?: most\ recent\ call\ last
+ | innermost\ last
+ ) \) :
+ )
+ \s* $ # toss trailing whitespace on the header.
+ (?P<stack> .*?) # don't blink: absorb stuff until...
+ ^ (?P<msg> \w+ .*) # a line *starts* with alphanum.
+ """, re.VERBOSE | re.MULTILINE | re.DOTALL)
+
+ # A callable returning a true value iff its argument is a blank line
+ # or contains a single comment.
+ _IS_BLANK_OR_COMMENT = re.compile(r'^[ ]*(#.*)?$').match
+
+ def parse(self, string, name='<string>'):
+ """
+ Divide the given string into examples and intervening text,
+ and return them as a list of alternating Examples and strings.
+ Line numbers for the Examples are 0-based. The optional
+ argument `name` is a name identifying this string, and is only
+ used for error messages.
+ """
+ string = string.expandtabs()
+ # If all lines begin with the same indentation, then strip it.
+ min_indent = self._min_indent(string)
+ if min_indent > 0:
+ string = '\n'.join([l[min_indent:] for l in string.split('\n')])
+
+ output = []
+ charno, lineno = 0, 0
+ # Find all doctest examples in the string:
+ for m in self._EXAMPLE_RE.finditer(string):
+ # Add the pre-example text to `output`.
+ output.append(string[charno:m.start()])
+ # Update lineno (lines before this example)
+ lineno += string.count('\n', charno, m.start())
+ # Extract info from the regexp match.
+ (source, options, want, exc_msg) = \
+ self._parse_example(m, name, lineno)
+ # Create an Example, and add it to the list.
+ if not self._IS_BLANK_OR_COMMENT(source):
+ output.append( Example(source, want, exc_msg,
+ lineno=lineno,
+ indent=min_indent+len(m.group('indent')),
+ options=options) )
+ # Update lineno (lines inside this example)
+ lineno += string.count('\n', m.start(), m.end())
+ # Update charno.
+ charno = m.end()
+ # Add any remaining post-example text to `output`.
+ output.append(string[charno:])
+ return output
+
+ def get_doctest(self, string, globs, name, filename, lineno):
+ """
+ Extract all doctest examples from the given string, and
+ collect them into a `DocTest` object.
+
+ `globs`, `name`, `filename`, and `lineno` are attributes for
+ the new `DocTest` object. See the documentation for `DocTest`
+ for more information.
+ """
+ return DocTest(self.get_examples(string, name), globs,
+ name, filename, lineno, string)
+
+ def get_examples(self, string, name='<string>'):
+ """
+ Extract all doctest examples from the given string, and return
+ them as a list of `Example` objects. Line numbers are
+ 0-based, because it's most common in doctests that nothing
+ interesting appears on the same line as opening triple-quote,
+ and so the first interesting line is called \"line 1\" then.
+
+ The optional argument `name` is a name identifying this
+ string, and is only used for error messages.
+ """
+ return [x for x in self.parse(string, name)
+ if isinstance(x, Example)]
+
+ def _parse_example(self, m, name, lineno):
+ """
+ Given a regular expression match from `_EXAMPLE_RE` (`m`),
+ return a pair `(source, want)`, where `source` is the matched
+ example's source code (with prompts and indentation stripped);
+ and `want` is the example's expected output (with indentation
+ stripped).
+
+ `name` is the string's name, and `lineno` is the line number
+ where the example starts; both are used for error messages.
+ """
+ # Get the example's indentation level.
+ indent = len(m.group('indent'))
+
+ # Divide source into lines; check that they're properly
+ # indented; and then strip their indentation & prompts.
+ source_lines = m.group('source').split('\n')
+ self._check_prompt_blank(source_lines, indent, name, lineno)
+ self._check_prefix(source_lines[1:], ' '*indent + '.', name, lineno)
+ source = '\n'.join([sl[indent+4:] for sl in source_lines])
+
+ # Divide want into lines; check that it's properly indented; and
+ # then strip the indentation. Spaces before the last newline should
+ # be preserved, so plain rstrip() isn't good enough.
+ want = m.group('want')
+ want_lines = want.split('\n')
+ if len(want_lines) > 1 and re.match(r' *$', want_lines[-1]):
+ del want_lines[-1] # forget final newline & spaces after it
+ self._check_prefix(want_lines, ' '*indent, name,
+ lineno + len(source_lines))
+ want = '\n'.join([wl[indent:] for wl in want_lines])
+
+ # If `want` contains a traceback message, then extract it.
+ m = self._EXCEPTION_RE.match(want)
+ if m:
+ exc_msg = m.group('msg')
+ else:
+ exc_msg = None
+
+ # Extract options from the source.
+ options = self._find_options(source, name, lineno)
+
+ return source, options, want, exc_msg
+
+ # This regular expression looks for option directives in the
+ # source code of an example. Option directives are comments
+ # starting with "doctest:". Warning: this may give false
+ # positives for string-literals that contain the string
+ # "#doctest:". Eliminating these false positives would require
+ # actually parsing the string; but we limit them by ignoring any
+ # line containing "#doctest:" that is *followed* by a quote mark.
+ _OPTION_DIRECTIVE_RE = re.compile(r'#\s*doctest:\s*([^\n\'"]*)$',
+ re.MULTILINE)
+
+ def _find_options(self, source, name, lineno):
+ """
+ Return a dictionary containing option overrides extracted from
+ option directives in the given source string.
+
+ `name` is the string's name, and `lineno` is the line number
+ where the example starts; both are used for error messages.
+ """
+ options = {}
+ # (note: with the current regexp, this will match at most once:)
+ for m in self._OPTION_DIRECTIVE_RE.finditer(source):
+ option_strings = m.group(1).replace(',', ' ').split()
+ for option in option_strings:
+ if (option[0] not in '+-' or
+ option[1:] not in OPTIONFLAGS_BY_NAME):
+ raise ValueError('line %r of the doctest for %s '
+ 'has an invalid option: %r' %
+ (lineno+1, name, option))
+ flag = OPTIONFLAGS_BY_NAME[option[1:]]
+ options[flag] = (option[0] == '+')
+ if options and self._IS_BLANK_OR_COMMENT(source):
+ raise ValueError('line %r of the doctest for %s has an option '
+ 'directive on a line with no example: %r' %
+ (lineno, name, source))
+ return options
+
+ # This regular expression finds the indentation of every non-blank
+ # line in a string.
+ _INDENT_RE = re.compile('^([ ]*)(?=\S)', re.MULTILINE)
+
+ def _min_indent(self, s):
+ "Return the minimum indentation of any non-blank line in `s`"
+ indents = [len(indent) for indent in self._INDENT_RE.findall(s)]
+ if len(indents) > 0:
+ return min(indents)
+ else:
+ return 0
+
+ def _check_prompt_blank(self, lines, indent, name, lineno):
+ """
+ Given the lines of a source string (including prompts and
+ leading indentation), check to make sure that every prompt is
+ followed by a space character. If any line is not followed by
+ a space character, then raise ValueError.
+ """
+ for i, line in enumerate(lines):
+ if len(line) >= indent+4 and line[indent+3] != ' ':
+ raise ValueError('line %r of the docstring for %s '
+ 'lacks blank after %s: %r' %
+ (lineno+i+1, name,
+ line[indent:indent+3], line))
+
+ def _check_prefix(self, lines, prefix, name, lineno):
+ """
+ Check that every line in the given list starts with the given
+ prefix; if any line does not, then raise a ValueError.
+ """
+ for i, line in enumerate(lines):
+ if line and not line.startswith(prefix):
+ raise ValueError('line %r of the docstring for %s has '
+ 'inconsistent leading whitespace: %r' %
+ (lineno+i+1, name, line))
+
+
+######################################################################
+## 4. DocTest Finder
+######################################################################
+
+class DocTestFinder:
+ """
+ A class used to extract the DocTests that are relevant to a given
+ object, from its docstring and the docstrings of its contained
+ objects. Doctests can currently be extracted from the following
+ object types: modules, functions, classes, methods, staticmethods,
+ classmethods, and properties.
+ """
+
+ def __init__(self, verbose=False, parser=DocTestParser(),
+ recurse=True, exclude_empty=True):
+ """
+ Create a new doctest finder.
+
+ The optional argument `parser` specifies a class or
+ function that should be used to create new DocTest objects (or
+ objects that implement the same interface as DocTest). The
+ signature for this factory function should match the signature
+ of the DocTest constructor.
+
+ If the optional argument `recurse` is false, then `find` will
+ only examine the given object, and not any contained objects.
+
+ If the optional argument `exclude_empty` is false, then `find`
+ will include tests for objects with empty docstrings.
+ """
+ self._parser = parser
+ self._verbose = verbose
+ self._recurse = recurse
+ self._exclude_empty = exclude_empty
+
+ def find(self, obj, name=None, module=None, globs=None, extraglobs=None):
+ """
+ Return a list of the DocTests that are defined by the given
+ object's docstring, or by any of its contained objects'
+ docstrings.
+
+ The optional parameter `module` is the module that contains
+ the given object. If the module is not specified or is None, then
+ the test finder will attempt to automatically determine the
+ correct module. The object's module is used:
+
+ - As a default namespace, if `globs` is not specified.
+ - To prevent the DocTestFinder from extracting DocTests
+ from objects that are imported from other modules.
+ - To find the name of the file containing the object.
+ - To help find the line number of the object within its
+ file.
+
+ Contained objects whose module does not match `module` are ignored.
+
+ If `module` is False, no attempt to find the module will be made.
+ This is obscure, of use mostly in tests: if `module` is False, or
+ is None but cannot be found automatically, then all objects are
+ considered to belong to the (non-existent) module, so all contained
+ objects will (recursively) be searched for doctests.
+
+ The globals for each DocTest is formed by combining `globs`
+ and `extraglobs` (bindings in `extraglobs` override bindings
+ in `globs`). A new copy of the globals dictionary is created
+ for each DocTest. If `globs` is not specified, then it
+ defaults to the module's `__dict__`, if specified, or {}
+ otherwise. If `extraglobs` is not specified, then it defaults
+ to {}.
+
+ """
+ # If name was not specified, then extract it from the object.
+ if name is None:
+ name = getattr(obj, '__name__', None)
+ if name is None:
+ raise ValueError("DocTestFinder.find: name must be given "
+ "when obj.__name__ doesn't exist: %r" %
+ (type(obj),))
+
+ # Find the module that contains the given object (if obj is
+ # a module, then module=obj.). Note: this may fail, in which
+ # case module will be None.
+ if module is False:
+ module = None
+ elif module is None:
+ module = inspect.getmodule(obj)
+
+ # Read the module's source code. This is used by
+ # DocTestFinder._find_lineno to find the line number for a
+ # given object's docstring.
+ try:
+ file = inspect.getsourcefile(obj) or inspect.getfile(obj)
+ source_lines = linecache.getlines(file)
+ if not source_lines:
+ source_lines = None
+ except TypeError:
+ source_lines = None
+
+ # Initialize globals, and merge in extraglobs.
+ if globs is None:
+ if module is None:
+ globs = {}
+ else:
+ globs = module.__dict__.copy()
+ else:
+ globs = globs.copy()
+ if extraglobs is not None:
+ globs.update(extraglobs)
+
+ # Recursively expore `obj`, extracting DocTests.
+ tests = []
+ self._find(tests, obj, name, module, source_lines, globs, {})
+ # Sort the tests by alpha order of names, for consistency in
+ # verbose-mode output. This was a feature of doctest in Pythons
+ # <= 2.3 that got lost by accident in 2.4. It was repaired in
+ # 2.4.4 and 2.5.
+ tests.sort()
+ return tests
+
+ def _from_module(self, module, object):
+ """
+ Return true if the given object is defined in the given
+ module.
+ """
+ if module is None:
+ return True
+ elif inspect.isfunction(object):
+ return module.__dict__ is object.func_globals
+ elif inspect.isclass(object):
+ return module.__name__ == object.__module__
+ elif isinstance(object, property):
+ return True # [XX] no way not be sure.
+ elif inspect.getmodule(object) is not None:
+ return module is inspect.getmodule(object)
+ elif hasattr(object, '__module__'):
+ return module.__name__ == object.__module__
+ else:
+ raise ValueError("object must be a class or function")
+
+ def _find(self, tests, obj, name, module, source_lines, globs, seen):
+ """
+ Find tests for the given object and any contained objects, and
+ add them to `tests`.
+ """
+ if self._verbose:
+ print 'Finding tests in %s' % name
+
+ # If we've already processed this object, then ignore it.
+ if id(obj) in seen:
+ return
+ seen[id(obj)] = 1
+
+ # Find a test for this object, and add it to the list of tests.
+ test = self._get_test(obj, name, module, globs, source_lines)
+ if test is not None:
+ tests.append(test)
+
+ # Look for tests in a module's contained objects.
+ if inspect.ismodule(obj) and self._recurse:
+ for valname, val in obj.__dict__.items():
+ valname = '%s.%s' % (name, valname)
+ # Recurse to functions & classes.
+ if ((inspect.isfunction(val) or inspect.isclass(val)) and
+ self._from_module(module, val)):
+ self._find(tests, val, valname, module, source_lines,
+ globs, seen)
+
+ # Look for tests in a module's __test__ dictionary.
+ if inspect.ismodule(obj) and self._recurse:
+ for valname, val in getattr(obj, '__test__', {}).items():
+ if not isinstance(valname, basestring):
+ raise ValueError("DocTestFinder.find: __test__ keys "
+ "must be strings: %r" %
+ (type(valname),))
+ if not (inspect.isfunction(val) or inspect.isclass(val) or
+ inspect.ismethod(val) or inspect.ismodule(val) or
+ isinstance(val, basestring)):
+ raise ValueError("DocTestFinder.find: __test__ values "
+ "must be strings, functions, methods, "
+ "classes, or modules: %r" %
+ (type(val),))
+ valname = '%s.__test__.%s' % (name, valname)
+ self._find(tests, val, valname, module, source_lines,
+ globs, seen)
+
+ # Look for tests in a class's contained objects.
+ if inspect.isclass(obj) and self._recurse:
+ for valname, val in obj.__dict__.items():
+ # Special handling for staticmethod/classmethod.
+ if isinstance(val, staticmethod):
+ val = getattr(obj, valname)
+ if isinstance(val, classmethod):
+ val = getattr(obj, valname).im_func
+
+ # Recurse to methods, properties, and nested classes.
+ if ((inspect.isfunction(val) or inspect.isclass(val) or
+ isinstance(val, property)) and
+ self._from_module(module, val)):
+ valname = '%s.%s' % (name, valname)
+ self._find(tests, val, valname, module, source_lines,
+ globs, seen)
+
+ def _get_test(self, obj, name, module, globs, source_lines):
+ """
+ Return a DocTest for the given object, if it defines a docstring;
+ otherwise, return None.
+ """
+ # Extract the object's docstring. If it doesn't have one,
+ # then return None (no test for this object).
+ if isinstance(obj, basestring):
+ docstring = obj
+ else:
+ try:
+ if obj.__doc__ is None:
+ docstring = ''
+ else:
+ docstring = obj.__doc__
+ if not isinstance(docstring, basestring):
+ docstring = str(docstring)
+ except (TypeError, AttributeError):
+ docstring = ''
+
+ # Find the docstring's location in the file.
+ lineno = self._find_lineno(obj, source_lines)
+
+ # Don't bother if the docstring is empty.
+ if self._exclude_empty and not docstring:
+ return None
+
+ # Return a DocTest for this object.
+ if module is None:
+ filename = None
+ else:
+ filename = getattr(module, '__file__', module.__name__)
+ if filename[-4:] in (".pyc", ".pyo"):
+ filename = filename[:-1]
+ return self._parser.get_doctest(docstring, globs, name,
+ filename, lineno)
+
+ def _find_lineno(self, obj, source_lines):
+ """
+ Return a line number of the given object's docstring. Note:
+ this method assumes that the object has a docstring.
+ """
+ lineno = None
+
+ # Find the line number for modules.
+ if inspect.ismodule(obj):
+ lineno = 0
+
+ # Find the line number for classes.
+ # Note: this could be fooled if a class is defined multiple
+ # times in a single file.
+ if inspect.isclass(obj):
+ if source_lines is None:
+ return None
+ pat = re.compile(r'^\s*class\s*%s\b' %
+ getattr(obj, '__name__', '-'))
+ for i, line in enumerate(source_lines):
+ if pat.match(line):
+ lineno = i
+ break
+
+ # Find the line number for functions & methods.
+ if inspect.ismethod(obj): obj = obj.im_func
+ if inspect.isfunction(obj): obj = obj.func_code
+ if inspect.istraceback(obj): obj = obj.tb_frame
+ if inspect.isframe(obj): obj = obj.f_code
+ if inspect.iscode(obj):
+ lineno = getattr(obj, 'co_firstlineno', None)-1
+
+ # Find the line number where the docstring starts. Assume
+ # that it's the first line that begins with a quote mark.
+ # Note: this could be fooled by a multiline function
+ # signature, where a continuation line begins with a quote
+ # mark.
+ if lineno is not None:
+ if source_lines is None:
+ return lineno+1
+ pat = re.compile('(^|.*:)\s*\w*("|\')')
+ for lineno in range(lineno, len(source_lines)):
+ if pat.match(source_lines[lineno]):
+ return lineno
+
+ # We couldn't find the line number.
+ return None
+
+######################################################################
+## 5. DocTest Runner
+######################################################################
+
+class DocTestRunner:
+ """
+ A class used to run DocTest test cases, and accumulate statistics.
+ The `run` method is used to process a single DocTest case. It
+ returns a tuple `(f, t)`, where `t` is the number of test cases
+ tried, and `f` is the number of test cases that failed.
+
+ >>> tests = DocTestFinder().find(_TestClass)
+ >>> runner = DocTestRunner(verbose=False)
+ >>> tests.sort(key = lambda test: test.name)
+ >>> for test in tests:
+ ... print test.name, '->', runner.run(test)
+ _TestClass -> (0, 2)
+ _TestClass.__init__ -> (0, 2)
+ _TestClass.get -> (0, 2)
+ _TestClass.square -> (0, 1)
+
+ The `summarize` method prints a summary of all the test cases that
+ have been run by the runner, and returns an aggregated `(f, t)`
+ tuple:
+
+ >>> runner.summarize(verbose=1)
+ 4 items passed all tests:
+ 2 tests in _TestClass
+ 2 tests in _TestClass.__init__
+ 2 tests in _TestClass.get
+ 1 tests in _TestClass.square
+ 7 tests in 4 items.
+ 7 passed and 0 failed.
+ Test passed.
+ (0, 7)
+
+ The aggregated number of tried examples and failed examples is
+ also available via the `tries` and `failures` attributes:
+
+ >>> runner.tries
+ 7
+ >>> runner.failures
+ 0
+
+ The comparison between expected outputs and actual outputs is done
+ by an `OutputChecker`. This comparison may be customized with a
+ number of option flags; see the documentation for `testmod` for
+ more information. If the option flags are insufficient, then the
+ comparison may also be customized by passing a subclass of
+ `OutputChecker` to the constructor.
+
+ The test runner's display output can be controlled in two ways.
+ First, an output function (`out) can be passed to
+ `TestRunner.run`; this function will be called with strings that
+ should be displayed. It defaults to `sys.stdout.write`. If
+ capturing the output is not sufficient, then the display output
+ can be also customized by subclassing DocTestRunner, and
+ overriding the methods `report_start`, `report_success`,
+ `report_unexpected_exception`, and `report_failure`.
+ """
+ # This divider string is used to separate failure messages, and to
+ # separate sections of the summary.
+ DIVIDER = "*" * 70
+
+ def __init__(self, checker=None, verbose=None, optionflags=0):
+ """
+ Create a new test runner.
+
+ Optional keyword arg `checker` is the `OutputChecker` that
+ should be used to compare the expected outputs and actual
+ outputs of doctest examples.
+
+ Optional keyword arg 'verbose' prints lots of stuff if true,
+ only failures if false; by default, it's true iff '-v' is in
+ sys.argv.
+
+ Optional argument `optionflags` can be used to control how the
+ test runner compares expected output to actual output, and how
+ it displays failures. See the documentation for `testmod` for
+ more information.
+ """
+ self._checker = checker or OutputChecker()
+ if verbose is None:
+ verbose = '-v' in sys.argv
+ self._verbose = verbose
+ self.optionflags = optionflags
+ self.original_optionflags = optionflags
+
+ # Keep track of the examples we've run.
+ self.tries = 0
+ self.failures = 0
+ self._name2ft = {}
+
+ # Create a fake output target for capturing doctest output.
+ self._fakeout = _SpoofOut()
+
+ #/////////////////////////////////////////////////////////////////
+ # Reporting methods
+ #/////////////////////////////////////////////////////////////////
+
+ def report_start(self, out, test, example):
+ """
+ Report that the test runner is about to process the given
+ example. (Only displays a message if verbose=True)
+ """
+ if self._verbose:
+ if example.want:
+ out('Trying:\n' + _indent(example.source) +
+ 'Expecting:\n' + _indent(example.want))
+ else:
+ out('Trying:\n' + _indent(example.source) +
+ 'Expecting nothing\n')
+
+ def report_success(self, out, test, example, got):
+ """
+ Report that the given example ran successfully. (Only
+ displays a message if verbose=True)
+ """
+ if self._verbose:
+ out("ok\n")
+
+ def report_failure(self, out, test, example, got):
+ """
+ Report that the given example failed.
+ """
+ out(self._failure_header(test, example) +
+ self._checker.output_difference(example, got, self.optionflags))
+
+ def report_unexpected_exception(self, out, test, example, exc_info):
+ """
+ Report that the given example raised an unexpected exception.
+ """
+ out(self._failure_header(test, example) +
+ 'Exception raised:\n' + _indent(_exception_traceback(exc_info)))
+
+ def _failure_header(self, test, example):
+ out = [self.DIVIDER]
+ if test.filename:
+ if test.lineno is not None and example.lineno is not None:
+ lineno = test.lineno + example.lineno + 1
+ else:
+ lineno = '?'
+ out.append('File "%s", line %s, in %s' %
+ (test.filename, lineno, test.name))
+ else:
+ out.append('Line %s, in %s' % (example.lineno+1, test.name))
+ out.append('Failed example:')
+ source = example.source
+ out.append(_indent(source))
+ return '\n'.join(out)
+
+ #/////////////////////////////////////////////////////////////////
+ # DocTest Running
+ #/////////////////////////////////////////////////////////////////
+
+ def __run(self, test, compileflags, out):
+ """
+ Run the examples in `test`. Write the outcome of each example
+ with one of the `DocTestRunner.report_*` methods, using the
+ writer function `out`. `compileflags` is the set of compiler
+ flags that should be used to execute examples. Return a tuple
+ `(f, t)`, where `t` is the number of examples tried, and `f`
+ is the number of examples that failed. The examples are run
+ in the namespace `test.globs`.
+ """
+ # Keep track of the number of failures and tries.
+ failures = tries = 0
+
+ # Save the option flags (since option directives can be used
+ # to modify them).
+ original_optionflags = self.optionflags
+
+ SUCCESS, FAILURE, BOOM = range(3) # `outcome` state
+
+ check = self._checker.check_output
+
+ # Process each example.
+ for examplenum, example in enumerate(test.examples):
+
+ # If REPORT_ONLY_FIRST_FAILURE is set, then supress
+ # reporting after the first failure.
+ quiet = (self.optionflags & REPORT_ONLY_FIRST_FAILURE and
+ failures > 0)
+
+ # Merge in the example's options.
+ self.optionflags = original_optionflags
+ if example.options:
+ for (optionflag, val) in example.options.items():
+ if val:
+ self.optionflags |= optionflag
+ else:
+ self.optionflags &= ~optionflag
+
+ # If 'SKIP' is set, then skip this example.
+ if self.optionflags & SKIP:
+ continue
+
+ # Record that we started this example.
+ tries += 1
+ if not quiet:
+ self.report_start(out, test, example)
+
+ # Use a special filename for compile(), so we can retrieve
+ # the source code during interactive debugging (see
+ # __patched_linecache_getlines).
+ filename = '<doctest %s[%d]>' % (test.name, examplenum)
+
+ # Run the example in the given context (globs), and record
+ # any exception that gets raised. (But don't intercept
+ # keyboard interrupts.)
+ try:
+ # Don't blink! This is where the user's code gets run.
+ exec compile(example.source, filename, "single",
+ compileflags, 1) in test.globs
+ self.debugger.set_continue() # ==== Example Finished ====
+ exception = None
+ except KeyboardInterrupt:
+ raise
+ except:
+ exception = sys.exc_info()
+ self.debugger.set_continue() # ==== Example Finished ====
+
+ got = self._fakeout.getvalue() # the actual output
+ self._fakeout.truncate(0)
+ outcome = FAILURE # guilty until proved innocent or insane
+
+ # If the example executed without raising any exceptions,
+ # verify its output.
+ if exception is None:
+ if check(example.want, got, self.optionflags):
+ outcome = SUCCESS
+
+ # The example raised an exception: check if it was expected.
+ else:
+ exc_info = sys.exc_info()
+ exc_msg = traceback.format_exception_only(*exc_info[:2])[-1]
+ if not quiet:
+ got += _exception_traceback(exc_info)
+
+ # If `example.exc_msg` is None, then we weren't expecting
+ # an exception.
+ if example.exc_msg is None:
+ outcome = BOOM
+
+ # We expected an exception: see whether it matches.
+ elif check(example.exc_msg, exc_msg, self.optionflags):
+ outcome = SUCCESS
+
+ # Another chance if they didn't care about the detail.
+ elif self.optionflags & IGNORE_EXCEPTION_DETAIL:
+ m1 = re.match(r'[^:]*:', example.exc_msg)
+ m2 = re.match(r'[^:]*:', exc_msg)
+ if m1 and m2 and check(m1.group(0), m2.group(0),
+ self.optionflags):
+ outcome = SUCCESS
+
+ # Report the outcome.
+ if outcome is SUCCESS:
+ if not quiet:
+ self.report_success(out, test, example, got)
+ elif outcome is FAILURE:
+ if not quiet:
+ self.report_failure(out, test, example, got)
+ failures += 1
+ elif outcome is BOOM:
+ if not quiet:
+ self.report_unexpected_exception(out, test, example,
+ exc_info)
+ failures += 1
+ else:
+ assert False, ("unknown outcome", outcome)
+
+ # Restore the option flags (in case they were modified)
+ self.optionflags = original_optionflags
+
+ # Record and return the number of failures and tries.
+ self.__record_outcome(test, failures, tries)
+ return failures, tries
+
+ def __record_outcome(self, test, f, t):
+ """
+ Record the fact that the given DocTest (`test`) generated `f`
+ failures out of `t` tried examples.
+ """
+ f2, t2 = self._name2ft.get(test.name, (0,0))
+ self._name2ft[test.name] = (f+f2, t+t2)
+ self.failures += f
+ self.tries += t
+
+ __LINECACHE_FILENAME_RE = re.compile(r'<doctest '
+ r'(?P<name>[\w\.]+)'
+ r'\[(?P<examplenum>\d+)\]>$')
+ def __patched_linecache_getlines(self, filename, module_globals=None):
+ m = self.__LINECACHE_FILENAME_RE.match(filename)
+ if m and m.group('name') == self.test.name:
+ example = self.test.examples[int(m.group('examplenum'))]
+ return example.source.splitlines(True)
+ else:
+ return self.save_linecache_getlines(filename, module_globals)
+
+ def run(self, test, compileflags=None, out=None, clear_globs=True):
+ """
+ Run the examples in `test`, and display the results using the
+ writer function `out`.
+
+ The examples are run in the namespace `test.globs`. If
+ `clear_globs` is true (the default), then this namespace will
+ be cleared after the test runs, to help with garbage
+ collection. If you would like to examine the namespace after
+ the test completes, then use `clear_globs=False`.
+
+ `compileflags` gives the set of flags that should be used by
+ the Python compiler when running the examples. If not
+ specified, then it will default to the set of future-import
+ flags that apply to `globs`.
+
+ The output of each example is checked using
+ `DocTestRunner.check_output`, and the results are formatted by
+ the `DocTestRunner.report_*` methods.
+ """
+ self.test = test
+
+ if compileflags is None:
+ compileflags = _extract_future_flags(test.globs)
+
+ save_stdout = sys.stdout
+ if out is None:
+ out = save_stdout.write
+ sys.stdout = self._fakeout
+
+ # Patch pdb.set_trace to restore sys.stdout during interactive
+ # debugging (so it's not still redirected to self._fakeout).
+ # Note that the interactive output will go to *our*
+ # save_stdout, even if that's not the real sys.stdout; this
+ # allows us to write test cases for the set_trace behavior.
+ save_set_trace = pdb.set_trace
+ self.debugger = _OutputRedirectingPdb(save_stdout)
+ self.debugger.reset()
+ pdb.set_trace = self.debugger.set_trace
+
+ # Patch linecache.getlines, so we can see the example's source
+ # when we're inside the debugger.
+ self.save_linecache_getlines = linecache.getlines
+ linecache.getlines = self.__patched_linecache_getlines
+
+ try:
+ return self.__run(test, compileflags, out)
+ finally:
+ sys.stdout = save_stdout
+ pdb.set_trace = save_set_trace
+ linecache.getlines = self.save_linecache_getlines
+ if clear_globs:
+ test.globs.clear()
+
+ #/////////////////////////////////////////////////////////////////
+ # Summarization
+ #/////////////////////////////////////////////////////////////////
+ def summarize(self, verbose=None):
+ """
+ Print a summary of all the test cases that have been run by
+ this DocTestRunner, and return a tuple `(f, t)`, where `f` is
+ the total number of failed examples, and `t` is the total
+ number of tried examples.
+
+ The optional `verbose` argument controls how detailed the
+ summary is. If the verbosity is not specified, then the
+ DocTestRunner's verbosity is used.
+ """
+ if verbose is None:
+ verbose = self._verbose
+ notests = []
+ passed = []
+ failed = []
+ totalt = totalf = 0
+ for x in self._name2ft.items():
+ name, (f, t) = x
+ assert f <= t
+ totalt += t
+ totalf += f
+ if t == 0:
+ notests.append(name)
+ elif f == 0:
+ passed.append( (name, t) )
+ else:
+ failed.append(x)
+ if verbose:
+ if notests:
+ print len(notests), "items had no tests:"
+ notests.sort()
+ for thing in notests:
+ print " ", thing
+ if passed:
+ print len(passed), "items passed all tests:"
+ passed.sort()
+ for thing, count in passed:
+ print " %3d tests in %s" % (count, thing)
+ if failed:
+ print self.DIVIDER
+ print len(failed), "items had failures:"
+ failed.sort()
+ for thing, (f, t) in failed:
+ print " %3d of %3d in %s" % (f, t, thing)
+ if verbose:
+ print totalt, "tests in", len(self._name2ft), "items."
+ print totalt - totalf, "passed and", totalf, "failed."
+ if totalf:
+ print "***Test Failed***", totalf, "failures."
+ elif verbose:
+ print "Test passed."
+ return totalf, totalt
+
+ #/////////////////////////////////////////////////////////////////
+ # Backward compatibility cruft to maintain doctest.master.
+ #/////////////////////////////////////////////////////////////////
+ def merge(self, other):
+ d = self._name2ft
+ for name, (f, t) in other._name2ft.items():
+ if name in d:
+ print "*** DocTestRunner.merge: '" + name + "' in both" \
+ " testers; summing outcomes."
+ f2, t2 = d[name]
+ f = f + f2
+ t = t + t2
+ d[name] = f, t
+
+class OutputChecker:
+ """
+ A class used to check the whether the actual output from a doctest
+ example matches the expected output. `OutputChecker` defines two
+ methods: `check_output`, which compares a given pair of outputs,
+ and returns true if they match; and `output_difference`, which
+ returns a string describing the differences between two outputs.
+ """
+ def check_output(self, want, got, optionflags):
+ """
+ Return True iff the actual output from an example (`got`)
+ matches the expected output (`want`). These strings are
+ always considered to match if they are identical; but
+ depending on what option flags the test runner is using,
+ several non-exact match types are also possible. See the
+ documentation for `TestRunner` for more information about
+ option flags.
+ """
+ # Handle the common case first, for efficiency:
+ # if they're string-identical, always return true.
+ if got == want:
+ return True
+
+ # The values True and False replaced 1 and 0 as the return
+ # value for boolean comparisons in Python 2.3.
+ if not (optionflags & DONT_ACCEPT_TRUE_FOR_1):
+ if (got,want) == ("True\n", "1\n"):
+ return True
+ if (got,want) == ("False\n", "0\n"):
+ return True
+
+ # <BLANKLINE> can be used as a special sequence to signify a
+ # blank line, unless the DONT_ACCEPT_BLANKLINE flag is used.
+ if not (optionflags & DONT_ACCEPT_BLANKLINE):
+ # Replace <BLANKLINE> in want with a blank line.
+ want = re.sub('(?m)^%s\s*?$' % re.escape(BLANKLINE_MARKER),
+ '', want)
+ # If a line in got contains only spaces, then remove the
+ # spaces.
+ got = re.sub('(?m)^\s*?$', '', got)
+ if got == want:
+ return True
+
+ # This flag causes doctest to ignore any differences in the
+ # contents of whitespace strings. Note that this can be used
+ # in conjunction with the ELLIPSIS flag.
+ if optionflags & NORMALIZE_WHITESPACE:
+ got = ' '.join(got.split())
+ want = ' '.join(want.split())
+ if got == want:
+ return True
+
+ # The ELLIPSIS flag says to let the sequence "..." in `want`
+ # match any substring in `got`.
+ if optionflags & ELLIPSIS:
+ if _ellipsis_match(want, got):
+ return True
+
+ # We didn't find any match; return false.
+ return False
+
+ # Should we do a fancy diff?
+ def _do_a_fancy_diff(self, want, got, optionflags):
+ # Not unless they asked for a fancy diff.
+ if not optionflags & (REPORT_UDIFF |
+ REPORT_CDIFF |
+ REPORT_NDIFF):
+ return False
+
+ # If expected output uses ellipsis, a meaningful fancy diff is
+ # too hard ... or maybe not. In two real-life failures Tim saw,
+ # a diff was a major help anyway, so this is commented out.
+ # [todo] _ellipsis_match() knows which pieces do and don't match,
+ # and could be the basis for a kick-ass diff in this case.
+ ##if optionflags & ELLIPSIS and ELLIPSIS_MARKER in want:
+ ## return False
+
+ # ndiff does intraline difference marking, so can be useful even
+ # for 1-line differences.
+ if optionflags & REPORT_NDIFF:
+ return True
+
+ # The other diff types need at least a few lines to be helpful.
+ return want.count('\n') > 2 and got.count('\n') > 2
+
+ def output_difference(self, example, got, optionflags):
+ """
+ Return a string describing the differences between the
+ expected output for a given example (`example`) and the actual
+ output (`got`). `optionflags` is the set of option flags used
+ to compare `want` and `got`.
+ """
+ want = example.want
+ # If <BLANKLINE>s are being used, then replace blank lines
+ # with <BLANKLINE> in the actual output string.
+ if not (optionflags & DONT_ACCEPT_BLANKLINE):
+ got = re.sub('(?m)^[ ]*(?=\n)', BLANKLINE_MARKER, got)
+
+ # Check if we should use diff.
+ if self._do_a_fancy_diff(want, got, optionflags):
+ # Split want & got into lines.
+ want_lines = want.splitlines(True) # True == keep line ends
+ got_lines = got.splitlines(True)
+ # Use difflib to find their differences.
+ if optionflags & REPORT_UDIFF:
+ diff = difflib.unified_diff(want_lines, got_lines, n=2)
+ diff = list(diff)[2:] # strip the diff header
+ kind = 'unified diff with -expected +actual'
+ elif optionflags & REPORT_CDIFF:
+ diff = difflib.context_diff(want_lines, got_lines, n=2)
+ diff = list(diff)[2:] # strip the diff header
+ kind = 'context diff with expected followed by actual'
+ elif optionflags & REPORT_NDIFF:
+ engine = difflib.Differ(charjunk=difflib.IS_CHARACTER_JUNK)
+ diff = list(engine.compare(want_lines, got_lines))
+ kind = 'ndiff with -expected +actual'
+ else:
+ assert 0, 'Bad diff option'
+ # Remove trailing whitespace on diff output.
+ diff = [line.rstrip() + '\n' for line in diff]
+ return 'Differences (%s):\n' % kind + _indent(''.join(diff))
+
+ # If we're not using diff, then simply list the expected
+ # output followed by the actual output.
+ if want and got:
+ return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got))
+ elif want:
+ return 'Expected:\n%sGot nothing\n' % _indent(want)
+ elif got:
+ return 'Expected nothing\nGot:\n%s' % _indent(got)
+ else:
+ return 'Expected nothing\nGot nothing\n'
+
+class DocTestFailure(Exception):
+ """A DocTest example has failed in debugging mode.
+
+ The exception instance has variables:
+
+ - test: the DocTest object being run
+
+ - example: the Example object that failed
+
+ - got: the actual output
+ """
+ def __init__(self, test, example, got):
+ self.test = test
+ self.example = example
+ self.got = got
+
+ def __str__(self):
+ return str(self.test)
+
+class UnexpectedException(Exception):
+ """A DocTest example has encountered an unexpected exception
+
+ The exception instance has variables:
+
+ - test: the DocTest object being run
+
+ - example: the Example object that failed
+
+ - exc_info: the exception info
+ """
+ def __init__(self, test, example, exc_info):
+ self.test = test
+ self.example = example
+ self.exc_info = exc_info
+
+ def __str__(self):
+ return str(self.test)
+
+class DebugRunner(DocTestRunner):
+ r"""Run doc tests but raise an exception as soon as there is a failure.
+
+ If an unexpected exception occurs, an UnexpectedException is raised.
+ It contains the test, the example, and the original exception:
+
+ >>> runner = DebugRunner(verbose=False)
+ >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42',
+ ... {}, 'foo', 'foo.py', 0)
+ >>> try:
+ ... runner.run(test)
+ ... except UnexpectedException, failure:
+ ... pass
+
+ >>> failure.test is test
+ True
+
+ >>> failure.example.want
+ '42\n'
+
+ >>> exc_info = failure.exc_info
+ >>> raise exc_info[0], exc_info[1], exc_info[2]
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+ We wrap the original exception to give the calling application
+ access to the test and example information.
+
+ If the output doesn't match, then a DocTestFailure is raised:
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 1
+ ... >>> x
+ ... 2
+ ... ''', {}, 'foo', 'foo.py', 0)
+
+ >>> try:
+ ... runner.run(test)
+ ... except DocTestFailure, failure:
+ ... pass
+
+ DocTestFailure objects provide access to the test:
+
+ >>> failure.test is test
+ True
+
+ As well as to the example:
+
+ >>> failure.example.want
+ '2\n'
+
+ and the actual output:
+
+ >>> failure.got
+ '1\n'
+
+ If a failure or error occurs, the globals are left intact:
+
+ >>> del test.globs['__builtins__']
+ >>> test.globs
+ {'x': 1}
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 2
+ ... >>> raise KeyError
+ ... ''', {}, 'foo', 'foo.py', 0)
+
+ >>> runner.run(test)
+ Traceback (most recent call last):
+ ...
+ UnexpectedException: <DocTest foo from foo.py:0 (2 examples)>
+
+ >>> del test.globs['__builtins__']
+ >>> test.globs
+ {'x': 2}
+
+ But the globals are cleared if there is no error:
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 2
+ ... ''', {}, 'foo', 'foo.py', 0)
+
+ >>> runner.run(test)
+ (0, 1)
+
+ >>> test.globs
+ {}
+
+ """
+
+ def run(self, test, compileflags=None, out=None, clear_globs=True):
+ r = DocTestRunner.run(self, test, compileflags, out, False)
+ if clear_globs:
+ test.globs.clear()
+ return r
+
+ def report_unexpected_exception(self, out, test, example, exc_info):
+ raise UnexpectedException(test, example, exc_info)
+
+ def report_failure(self, out, test, example, got):
+ raise DocTestFailure(test, example, got)
+
+######################################################################
+## 6. Test Functions
+######################################################################
+# These should be backwards compatible.
+
+# For backward compatibility, a global instance of a DocTestRunner
+# class, updated by testmod.
+master = None
+
+def testmod(m=None, name=None, globs=None, verbose=None,
+ report=True, optionflags=0, extraglobs=None,
+ raise_on_error=False, exclude_empty=False):
+ """m=None, name=None, globs=None, verbose=None, report=True,
+ optionflags=0, extraglobs=None, raise_on_error=False,
+ exclude_empty=False
+
+ Test examples in docstrings in functions and classes reachable
+ from module m (or the current module if m is not supplied), starting
+ with m.__doc__.
+
+ Also test examples reachable from dict m.__test__ if it exists and is
+ not None. m.__test__ maps names to functions, classes and strings;
+ function and class docstrings are tested even if the name is private;
+ strings are tested directly, as if they were docstrings.
+
+ Return (#failures, #tests).
+
+ See doctest.__doc__ for an overview.
+
+ Optional keyword arg "name" gives the name of the module; by default
+ use m.__name__.
+
+ Optional keyword arg "globs" gives a dict to be used as the globals
+ when executing examples; by default, use m.__dict__. A copy of this
+ dict is actually used for each docstring, so that each docstring's
+ examples start with a clean slate.
+
+ Optional keyword arg "extraglobs" gives a dictionary that should be
+ merged into the globals that are used to execute examples. By
+ default, no extra globals are used. This is new in 2.4.
+
+ Optional keyword arg "verbose" prints lots of stuff if true, prints
+ only failures if false; by default, it's true iff "-v" is in sys.argv.
+
+ Optional keyword arg "report" prints a summary at the end when true,
+ else prints nothing at the end. In verbose mode, the summary is
+ detailed, else very brief (in fact, empty if all tests passed).
+
+ Optional keyword arg "optionflags" or's together module constants,
+ and defaults to 0. This is new in 2.3. Possible values (see the
+ docs for details):
+
+ DONT_ACCEPT_TRUE_FOR_1
+ DONT_ACCEPT_BLANKLINE
+ NORMALIZE_WHITESPACE
+ ELLIPSIS
+ SKIP
+ IGNORE_EXCEPTION_DETAIL
+ REPORT_UDIFF
+ REPORT_CDIFF
+ REPORT_NDIFF
+ REPORT_ONLY_FIRST_FAILURE
+
+ Optional keyword arg "raise_on_error" raises an exception on the
+ first unexpected exception or failure. This allows failures to be
+ post-mortem debugged.
+
+ Advanced tomfoolery: testmod runs methods of a local instance of
+ class doctest.Tester, then merges the results into (or creates)
+ global Tester instance doctest.master. Methods of doctest.master
+ can be called directly too, if you want to do something unusual.
+ Passing report=0 to testmod is especially useful then, to delay
+ displaying a summary. Invoke doctest.master.summarize(verbose)
+ when you're done fiddling.
+ """
+ global master
+
+ # If no module was given, then use __main__.
+ if m is None:
+ # DWA - m will still be None if this wasn't invoked from the command
+ # line, in which case the following TypeError is about as good an error
+ # as we should expect
+ m = sys.modules.get('__main__')
+
+ # Check that we were actually given a module.
+ if not inspect.ismodule(m):
+ raise TypeError("testmod: module required; %r" % (m,))
+
+ # If no name was given, then use the module's name.
+ if name is None:
+ name = m.__name__
+
+ # Find, parse, and run all tests in the given module.
+ finder = DocTestFinder(exclude_empty=exclude_empty)
+
+ if raise_on_error:
+ runner = DebugRunner(verbose=verbose, optionflags=optionflags)
+ else:
+ runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
+
+ for test in finder.find(m, name, globs=globs, extraglobs=extraglobs):
+ runner.run(test)
+
+ if report:
+ runner.summarize()
+
+ if master is None:
+ master = runner
+ else:
+ master.merge(runner)
+
+ return runner.failures, runner.tries
+
+def testfile(filename, module_relative=True, name=None, package=None,
+ globs=None, verbose=None, report=True, optionflags=0,
+ extraglobs=None, raise_on_error=False, parser=DocTestParser(),
+ encoding=None):
+ """
+ Test examples in the given file. Return (#failures, #tests).
+
+ Optional keyword arg "module_relative" specifies how filenames
+ should be interpreted:
+
+ - If "module_relative" is True (the default), then "filename"
+ specifies a module-relative path. By default, this path is
+ relative to the calling module's directory; but if the
+ "package" argument is specified, then it is relative to that
+ package. To ensure os-independence, "filename" should use
+ "/" characters to separate path segments, and should not
+ be an absolute path (i.e., it may not begin with "/").
+
+ - If "module_relative" is False, then "filename" specifies an
+ os-specific path. The path may be absolute or relative (to
+ the current working directory).
+
+ Optional keyword arg "name" gives the name of the test; by default
+ use the file's basename.
+
+ Optional keyword argument "package" is a Python package or the
+ name of a Python package whose directory should be used as the
+ base directory for a module relative filename. If no package is
+ specified, then the calling module's directory is used as the base
+ directory for module relative filenames. It is an error to
+ specify "package" if "module_relative" is False.
+
+ Optional keyword arg "globs" gives a dict to be used as the globals
+ when executing examples; by default, use {}. A copy of this dict
+ is actually used for each docstring, so that each docstring's
+ examples start with a clean slate.
+
+ Optional keyword arg "extraglobs" gives a dictionary that should be
+ merged into the globals that are used to execute examples. By
+ default, no extra globals are used.
+
+ Optional keyword arg "verbose" prints lots of stuff if true, prints
+ only failures if false; by default, it's true iff "-v" is in sys.argv.
+
+ Optional keyword arg "report" prints a summary at the end when true,
+ else prints nothing at the end. In verbose mode, the summary is
+ detailed, else very brief (in fact, empty if all tests passed).
+
+ Optional keyword arg "optionflags" or's together module constants,
+ and defaults to 0. Possible values (see the docs for details):
+
+ DONT_ACCEPT_TRUE_FOR_1
+ DONT_ACCEPT_BLANKLINE
+ NORMALIZE_WHITESPACE
+ ELLIPSIS
+ SKIP
+ IGNORE_EXCEPTION_DETAIL
+ REPORT_UDIFF
+ REPORT_CDIFF
+ REPORT_NDIFF
+ REPORT_ONLY_FIRST_FAILURE
+
+ Optional keyword arg "raise_on_error" raises an exception on the
+ first unexpected exception or failure. This allows failures to be
+ post-mortem debugged.
+
+ Optional keyword arg "parser" specifies a DocTestParser (or
+ subclass) that should be used to extract tests from the files.
+
+ Optional keyword arg "encoding" specifies an encoding that should
+ be used to convert the file to unicode.
+
+ Advanced tomfoolery: testmod runs methods of a local instance of
+ class doctest.Tester, then merges the results into (or creates)
+ global Tester instance doctest.master. Methods of doctest.master
+ can be called directly too, if you want to do something unusual.
+ Passing report=0 to testmod is especially useful then, to delay
+ displaying a summary. Invoke doctest.master.summarize(verbose)
+ when you're done fiddling.
+ """
+ global master
+
+ if package and not module_relative:
+ raise ValueError("Package may only be specified for module-"
+ "relative paths.")
+
+ # Relativize the path
+ text, filename = _load_testfile(filename, package, module_relative)
+
+ # If no name was given, then use the file's name.
+ if name is None:
+ name = os.path.basename(filename)
+
+ # Assemble the globals.
+ if globs is None:
+ globs = {}
+ else:
+ globs = globs.copy()
+ if extraglobs is not None:
+ globs.update(extraglobs)
+
+ if raise_on_error:
+ runner = DebugRunner(verbose=verbose, optionflags=optionflags)
+ else:
+ runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
+
+ if encoding is not None:
+ text = text.decode(encoding)
+
+ # Read the file, convert it to a test, and run it.
+ test = parser.get_doctest(text, globs, name, filename, 0)
+ runner.run(test)
+
+ if report:
+ runner.summarize()
+
+ if master is None:
+ master = runner
+ else:
+ master.merge(runner)
+
+ return runner.failures, runner.tries
+
+def run_docstring_examples(f, globs, verbose=False, name="NoName",
+ compileflags=None, optionflags=0):
+ """
+ Test examples in the given object's docstring (`f`), using `globs`
+ as globals. Optional argument `name` is used in failure messages.
+ If the optional argument `verbose` is true, then generate output
+ even if there are no failures.
+
+ `compileflags` gives the set of flags that should be used by the
+ Python compiler when running the examples. If not specified, then
+ it will default to the set of future-import flags that apply to
+ `globs`.
+
+ Optional keyword arg `optionflags` specifies options for the
+ testing and output. See the documentation for `testmod` for more
+ information.
+ """
+ # Find, parse, and run all tests in the given module.
+ finder = DocTestFinder(verbose=verbose, recurse=False)
+ runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
+ for test in finder.find(f, name, globs=globs):
+ runner.run(test, compileflags=compileflags)
+
+######################################################################
+## 7. Tester
+######################################################################
+# This is provided only for backwards compatibility. It's not
+# actually used in any way.
+
+class Tester:
+ def __init__(self, mod=None, globs=None, verbose=None, optionflags=0):
+
+ warnings.warn("class Tester is deprecated; "
+ "use class doctest.DocTestRunner instead",
+ DeprecationWarning, stacklevel=2)
+ if mod is None and globs is None:
+ raise TypeError("Tester.__init__: must specify mod or globs")
+ if mod is not None and not inspect.ismodule(mod):
+ raise TypeError("Tester.__init__: mod must be a module; %r" %
+ (mod,))
+ if globs is None:
+ globs = mod.__dict__
+ self.globs = globs
+
+ self.verbose = verbose
+ self.optionflags = optionflags
+ self.testfinder = DocTestFinder()
+ self.testrunner = DocTestRunner(verbose=verbose,
+ optionflags=optionflags)
+
+ def runstring(self, s, name):
+ test = DocTestParser().get_doctest(s, self.globs, name, None, None)
+ if self.verbose:
+ print "Running string", name
+ (f,t) = self.testrunner.run(test)
+ if self.verbose:
+ print f, "of", t, "examples failed in string", name
+ return (f,t)
+
+ def rundoc(self, object, name=None, module=None):
+ f = t = 0
+ tests = self.testfinder.find(object, name, module=module,
+ globs=self.globs)
+ for test in tests:
+ (f2, t2) = self.testrunner.run(test)
+ (f,t) = (f+f2, t+t2)
+ return (f,t)
+
+ def rundict(self, d, name, module=None):
+ import new
+ m = new.module(name)
+ m.__dict__.update(d)
+ if module is None:
+ module = False
+ return self.rundoc(m, name, module)
+
+ def run__test__(self, d, name):
+ import new
+ m = new.module(name)
+ m.__test__ = d
+ return self.rundoc(m, name)
+
+ def summarize(self, verbose=None):
+ return self.testrunner.summarize(verbose)
+
+ def merge(self, other):
+ self.testrunner.merge(other.testrunner)
+
+######################################################################
+## 8. Unittest Support
+######################################################################
+
+_unittest_reportflags = 0
+
+def set_unittest_reportflags(flags):
+ """Sets the unittest option flags.
+
+ The old flag is returned so that a runner could restore the old
+ value if it wished to:
+
+ >>> import doctest
+ >>> old = doctest._unittest_reportflags
+ >>> doctest.set_unittest_reportflags(REPORT_NDIFF |
+ ... REPORT_ONLY_FIRST_FAILURE) == old
+ True
+
+ >>> doctest._unittest_reportflags == (REPORT_NDIFF |
+ ... REPORT_ONLY_FIRST_FAILURE)
+ True
+
+ Only reporting flags can be set:
+
+ >>> doctest.set_unittest_reportflags(ELLIPSIS)
+ Traceback (most recent call last):
+ ...
+ ValueError: ('Only reporting flags allowed', 8)
+
+ >>> doctest.set_unittest_reportflags(old) == (REPORT_NDIFF |
+ ... REPORT_ONLY_FIRST_FAILURE)
+ True
+ """
+ global _unittest_reportflags
+
+ if (flags & REPORTING_FLAGS) != flags:
+ raise ValueError("Only reporting flags allowed", flags)
+ old = _unittest_reportflags
+ _unittest_reportflags = flags
+ return old
+
+
+class DocTestCase(unittest.TestCase):
+
+ def __init__(self, test, optionflags=0, setUp=None, tearDown=None,
+ checker=None):
+
+ unittest.TestCase.__init__(self)
+ self._dt_optionflags = optionflags
+ self._dt_checker = checker
+ self._dt_test = test
+ self._dt_setUp = setUp
+ self._dt_tearDown = tearDown
+
+ def setUp(self):
+ test = self._dt_test
+
+ if self._dt_setUp is not None:
+ self._dt_setUp(test)
+
+ def tearDown(self):
+ test = self._dt_test
+
+ if self._dt_tearDown is not None:
+ self._dt_tearDown(test)
+
+ test.globs.clear()
+
+ def runTest(self):
+ test = self._dt_test
+ old = sys.stdout
+ new = StringIO()
+ optionflags = self._dt_optionflags
+
+ if not (optionflags & REPORTING_FLAGS):
+ # The option flags don't include any reporting flags,
+ # so add the default reporting flags
+ optionflags |= _unittest_reportflags
+
+ runner = DocTestRunner(optionflags=optionflags,
+ checker=self._dt_checker, verbose=False)
+
+ try:
+ runner.DIVIDER = "-"*70
+ failures, tries = runner.run(
+ test, out=new.write, clear_globs=False)
+ finally:
+ sys.stdout = old
+
+ if failures:
+ raise self.failureException(self.format_failure(new.getvalue()))
+
+ def format_failure(self, err):
+ test = self._dt_test
+ if test.lineno is None:
+ lineno = 'unknown line number'
+ else:
+ lineno = '%s' % test.lineno
+ lname = '.'.join(test.name.split('.')[-1:])
+ return ('Failed doctest test for %s\n'
+ ' File "%s", line %s, in %s\n\n%s'
+ % (test.name, test.filename, lineno, lname, err)
+ )
+
+ def debug(self):
+ r"""Run the test case without results and without catching exceptions
+
+ The unit test framework includes a debug method on test cases
+ and test suites to support post-mortem debugging. The test code
+ is run in such a way that errors are not caught. This way a
+ caller can catch the errors and initiate post-mortem debugging.
+
+ The DocTestCase provides a debug method that raises
+ UnexpectedException errors if there is an unexepcted
+ exception:
+
+ >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42',
+ ... {}, 'foo', 'foo.py', 0)
+ >>> case = DocTestCase(test)
+ >>> try:
+ ... case.debug()
+ ... except UnexpectedException, failure:
+ ... pass
+
+ The UnexpectedException contains the test, the example, and
+ the original exception:
+
+ >>> failure.test is test
+ True
+
+ >>> failure.example.want
+ '42\n'
+
+ >>> exc_info = failure.exc_info
+ >>> raise exc_info[0], exc_info[1], exc_info[2]
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+ If the output doesn't match, then a DocTestFailure is raised:
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 1
+ ... >>> x
+ ... 2
+ ... ''', {}, 'foo', 'foo.py', 0)
+ >>> case = DocTestCase(test)
+
+ >>> try:
+ ... case.debug()
+ ... except DocTestFailure, failure:
+ ... pass
+
+ DocTestFailure objects provide access to the test:
+
+ >>> failure.test is test
+ True
+
+ As well as to the example:
+
+ >>> failure.example.want
+ '2\n'
+
+ and the actual output:
+
+ >>> failure.got
+ '1\n'
+
+ """
+
+ self.setUp()
+ runner = DebugRunner(optionflags=self._dt_optionflags,
+ checker=self._dt_checker, verbose=False)
+ runner.run(self._dt_test)
+ self.tearDown()
+
+ def id(self):
+ return self._dt_test.name
+
+ def __repr__(self):
+ name = self._dt_test.name.split('.')
+ return "%s (%s)" % (name[-1], '.'.join(name[:-1]))
+
+ __str__ = __repr__
+
+ def shortDescription(self):
+ return "Doctest: " + self._dt_test.name
+
+def DocTestSuite(module=None, globs=None, extraglobs=None, test_finder=None,
+ **options):
+ """
+ Convert doctest tests for a module to a unittest test suite.
+
+ This converts each documentation string in a module that
+ contains doctest tests to a unittest test case. If any of the
+ tests in a doc string fail, then the test case fails. An exception
+ is raised showing the name of the file containing the test and a
+ (sometimes approximate) line number.
+
+ The `module` argument provides the module to be tested. The argument
+ can be either a module or a module name.
+
+ If no argument is given, the calling module is used.
+
+ A number of options may be provided as keyword arguments:
+
+ setUp
+ A set-up function. This is called before running the
+ tests in each file. The setUp function will be passed a DocTest
+ object. The setUp function can access the test globals as the
+ globs attribute of the test passed.
+
+ tearDown
+ A tear-down function. This is called after running the
+ tests in each file. The tearDown function will be passed a DocTest
+ object. The tearDown function can access the test globals as the
+ globs attribute of the test passed.
+
+ globs
+ A dictionary containing initial global variables for the tests.
+
+ optionflags
+ A set of doctest option flags expressed as an integer.
+ """
+
+ if test_finder is None:
+ test_finder = DocTestFinder()
+
+ module = _normalize_module(module)
+ tests = test_finder.find(module, globs=globs, extraglobs=extraglobs)
+ if globs is None:
+ globs = module.__dict__
+ if not tests:
+ # Why do we want to do this? Because it reveals a bug that might
+ # otherwise be hidden.
+ raise ValueError(module, "has no tests")
+
+ tests.sort()
+ suite = unittest.TestSuite()
+ for test in tests:
+ if len(test.examples) == 0:
+ continue
+ if not test.filename:
+ filename = module.__file__
+ if filename[-4:] in (".pyc", ".pyo"):
+ filename = filename[:-1]
+ test.filename = filename
+ suite.addTest(DocTestCase(test, **options))
+
+ return suite
+
+class DocFileCase(DocTestCase):
+
+ def id(self):
+ return '_'.join(self._dt_test.name.split('.'))
+
+ def __repr__(self):
+ return self._dt_test.filename
+ __str__ = __repr__
+
+ def format_failure(self, err):
+ return ('Failed doctest test for %s\n File "%s", line 0\n\n%s'
+ % (self._dt_test.name, self._dt_test.filename, err)
+ )
+
+def DocFileTest(path, module_relative=True, package=None,
+ globs=None, parser=DocTestParser(),
+ encoding=None, **options):
+ if globs is None:
+ globs = {}
+ else:
+ globs = globs.copy()
+
+ if package and not module_relative:
+ raise ValueError("Package may only be specified for module-"
+ "relative paths.")
+
+ # Relativize the path.
+ doc, path = _load_testfile(path, package, module_relative)
+
+ if "__file__" not in globs:
+ globs["__file__"] = path
+
+ # Find the file and read it.
+ name = os.path.basename(path)
+
+ # If an encoding is specified, use it to convert the file to unicode
+ if encoding is not None:
+ doc = doc.decode(encoding)
+
+ # Convert it to a test, and wrap it in a DocFileCase.
+ test = parser.get_doctest(doc, globs, name, path, 0)
+ return DocFileCase(test, **options)
+
+def DocFileSuite(*paths, **kw):
+ """A unittest suite for one or more doctest files.
+
+ The path to each doctest file is given as a string; the
+ interpretation of that string depends on the keyword argument
+ "module_relative".
+
+ A number of options may be provided as keyword arguments:
+
+ module_relative
+ If "module_relative" is True, then the given file paths are
+ interpreted as os-independent module-relative paths. By
+ default, these paths are relative to the calling module's
+ directory; but if the "package" argument is specified, then
+ they are relative to that package. To ensure os-independence,
+ "filename" should use "/" characters to separate path
+ segments, and may not be an absolute path (i.e., it may not
+ begin with "/").
+
+ If "module_relative" is False, then the given file paths are
+ interpreted as os-specific paths. These paths may be absolute
+ or relative (to the current working directory).
+
+ package
+ A Python package or the name of a Python package whose directory
+ should be used as the base directory for module relative paths.
+ If "package" is not specified, then the calling module's
+ directory is used as the base directory for module relative
+ filenames. It is an error to specify "package" if
+ "module_relative" is False.
+
+ setUp
+ A set-up function. This is called before running the
+ tests in each file. The setUp function will be passed a DocTest
+ object. The setUp function can access the test globals as the
+ globs attribute of the test passed.
+
+ tearDown
+ A tear-down function. This is called after running the
+ tests in each file. The tearDown function will be passed a DocTest
+ object. The tearDown function can access the test globals as the
+ globs attribute of the test passed.
+
+ globs
+ A dictionary containing initial global variables for the tests.
+
+ optionflags
+ A set of doctest option flags expressed as an integer.
+
+ parser
+ A DocTestParser (or subclass) that should be used to extract
+ tests from the files.
+
+ encoding
+ An encoding that will be used to convert the files to unicode.
+ """
+ suite = unittest.TestSuite()
+
+ # We do this here so that _normalize_module is called at the right
+ # level. If it were called in DocFileTest, then this function
+ # would be the caller and we might guess the package incorrectly.
+ if kw.get('module_relative', True):
+ kw['package'] = _normalize_module(kw.get('package'))
+
+ for path in paths:
+ suite.addTest(DocFileTest(path, **kw))
+
+ return suite
+
+######################################################################
+## 9. Debugging Support
+######################################################################
+
+def script_from_examples(s):
+ r"""Extract script from text with examples.
+
+ Converts text with examples to a Python script. Example input is
+ converted to regular code. Example output and all other words
+ are converted to comments:
+
+ >>> text = '''
+ ... Here are examples of simple math.
+ ...
+ ... Python has super accurate integer addition
+ ...
+ ... >>> 2 + 2
+ ... 5
+ ...
+ ... And very friendly error messages:
+ ...
+ ... >>> 1/0
+ ... To Infinity
+ ... And
+ ... Beyond
+ ...
+ ... You can use logic if you want:
+ ...
+ ... >>> if 0:
+ ... ... blah
+ ... ... blah
+ ... ...
+ ...
+ ... Ho hum
+ ... '''
+
+ >>> print script_from_examples(text)
+ # Here are examples of simple math.
+ #
+ # Python has super accurate integer addition
+ #
+ 2 + 2
+ # Expected:
+ ## 5
+ #
+ # And very friendly error messages:
+ #
+ 1/0
+ # Expected:
+ ## To Infinity
+ ## And
+ ## Beyond
+ #
+ # You can use logic if you want:
+ #
+ if 0:
+ blah
+ blah
+ #
+ # Ho hum
+ <BLANKLINE>
+ """
+ output = []
+ for piece in DocTestParser().parse(s):
+ if isinstance(piece, Example):
+ # Add the example's source code (strip trailing NL)
+ output.append(piece.source[:-1])
+ # Add the expected output:
+ want = piece.want
+ if want:
+ output.append('# Expected:')
+ output += ['## '+l for l in want.split('\n')[:-1]]
+ else:
+ # Add non-example text.
+ output += [_comment_line(l)
+ for l in piece.split('\n')[:-1]]
+
+ # Trim junk on both ends.
+ while output and output[-1] == '#':
+ output.pop()
+ while output and output[0] == '#':
+ output.pop(0)
+ # Combine the output, and return it.
+ # Add a courtesy newline to prevent exec from choking (see bug #1172785)
+ return '\n'.join(output) + '\n'
+
+def testsource(module, name):
+ """Extract the test sources from a doctest docstring as a script.
+
+ Provide the module (or dotted name of the module) containing the
+ test to be debugged and the name (within the module) of the object
+ with the doc string with tests to be debugged.
+ """
+ module = _normalize_module(module)
+ tests = DocTestFinder().find(module)
+ test = [t for t in tests if t.name == name]
+ if not test:
+ raise ValueError(name, "not found in tests")
+ test = test[0]
+ testsrc = script_from_examples(test.docstring)
+ return testsrc
+
+def debug_src(src, pm=False, globs=None):
+ """Debug a single doctest docstring, in argument `src`'"""
+ testsrc = script_from_examples(src)
+ debug_script(testsrc, pm, globs)
+
+def debug_script(src, pm=False, globs=None):
+ "Debug a test script. `src` is the script, as a string."
+ import pdb
+
+ # Note that tempfile.NameTemporaryFile() cannot be used. As the
+ # docs say, a file so created cannot be opened by name a second time
+ # on modern Windows boxes, and execfile() needs to open it.
+ srcfilename = tempfile.mktemp(".py", "doctestdebug")
+ f = open(srcfilename, 'w')
+ f.write(src)
+ f.close()
+
+ try:
+ if globs:
+ globs = globs.copy()
+ else:
+ globs = {}
+
+ if pm:
+ try:
+ execfile(srcfilename, globs, globs)
+ except:
+ print sys.exc_info()[1]
+ pdb.post_mortem(sys.exc_info()[2])
+ else:
+ # Note that %r is vital here. '%s' instead can, e.g., cause
+ # backslashes to get treated as metacharacters on Windows.
+ pdb.run("execfile(%r)" % srcfilename, globs, globs)
+
+ finally:
+ os.remove(srcfilename)
+
+def debug(module, name, pm=False):
+ """Debug a single doctest docstring.
+
+ Provide the module (or dotted name of the module) containing the
+ test to be debugged and the name (within the module) of the object
+ with the docstring with tests to be debugged.
+ """
+ module = _normalize_module(module)
+ testsrc = testsource(module, name)
+ debug_script(testsrc, pm, module.__dict__)
+
+######################################################################
+## 10. Example Usage
+######################################################################
+class _TestClass:
+ """
+ A pointless class, for sanity-checking of docstring testing.
+
+ Methods:
+ square()
+ get()
+
+ >>> _TestClass(13).get() + _TestClass(-12).get()
+ 1
+ >>> hex(_TestClass(13).square().get())
+ '0xa9'
+ """
+
+ def __init__(self, val):
+ """val -> _TestClass object with associated value val.
+
+ >>> t = _TestClass(123)
+ >>> print t.get()
+ 123
+ """
+
+ self.val = val
+
+ def square(self):
+ """square() -> square TestClass's associated value
+
+ >>> _TestClass(13).square().get()
+ 169
+ """
+
+ self.val = self.val ** 2
+ return self
+
+ def get(self):
+ """get() -> return TestClass's associated value.
+
+ >>> x = _TestClass(-42)
+ >>> print x.get()
+ -42
+ """
+
+ return self.val
+
+__test__ = {"_TestClass": _TestClass,
+ "string": r"""
+ Example of a string object, searched as-is.
+ >>> x = 1; y = 2
+ >>> x + y, x * y
+ (3, 2)
+ """,
+
+ "bool-int equivalence": r"""
+ In 2.2, boolean expressions displayed
+ 0 or 1. By default, we still accept
+ them. This can be disabled by passing
+ DONT_ACCEPT_TRUE_FOR_1 to the new
+ optionflags argument.
+ >>> 4 == 4
+ 1
+ >>> 4 == 4
+ True
+ >>> 4 > 4
+ 0
+ >>> 4 > 4
+ False
+ """,
+
+ "blank lines": r"""
+ Blank lines can be marked with <BLANKLINE>:
+ >>> print 'foo\n\nbar\n'
+ foo
+ <BLANKLINE>
+ bar
+ <BLANKLINE>
+ """,
+
+ "ellipsis": r"""
+ If the ellipsis flag is used, then '...' can be used to
+ elide substrings in the desired output:
+ >>> print range(1000) #doctest: +ELLIPSIS
+ [0, 1, 2, ..., 999]
+ """,
+
+ "whitespace normalization": r"""
+ If the whitespace normalization flag is used, then
+ differences in whitespace are ignored.
+ >>> print range(30) #doctest: +NORMALIZE_WHITESPACE
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29]
+ """,
+ }
+
+def _test():
+ r = unittest.TextTestRunner()
+ r.run(DocTestSuite())
+
+if __name__ == "__main__":
+ _test()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/inspect.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/inspect.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,897 @@
+# -*- coding: iso-8859-1 -*-
+"""Get useful information from live Python objects.
+
+This module encapsulates the interface provided by the internal special
+attributes (func_*, co_*, im_*, tb_*, etc.) in a friendlier fashion.
+It also provides some help for examining source code and class layout.
+
+Here are some of the useful functions provided by this module:
+
+ ismodule(), isclass(), ismethod(), isfunction(), istraceback(),
+ isframe(), iscode(), isbuiltin(), isroutine() - check object types
+ getmembers() - get members of an object that satisfy a given condition
+
+ getfile(), getsourcefile(), getsource() - find an object's source code
+ getdoc(), getcomments() - get documentation on an object
+ getmodule() - determine the module that an object came from
+ getclasstree() - arrange classes so as to represent their hierarchy
+
+ getargspec(), getargvalues() - get info about function arguments
+ formatargspec(), formatargvalues() - format an argument spec
+ getouterframes(), getinnerframes() - get info about frames
+ currentframe() - get the current stack frame
+ stack(), trace() - get info about frames on the stack or in a traceback
+"""
+
+# This module is in the public domain. No warranties.
+
+__author__ = 'Ka-Ping Yee <ping at lfw.org>'
+__date__ = '1 Jan 2001'
+
+import sys, os, types, string, re, dis, imp, tokenize, linecache
+from operator import attrgetter
+
+# ----------------------------------------------------------- type-checking
+def ismodule(object):
+ """Return true if the object is a module.
+
+ Module objects provide these attributes:
+ __doc__ documentation string
+ __file__ filename (missing for built-in modules)"""
+ return isinstance(object, types.ModuleType)
+
+def isclass(object):
+ """Return true if the object is a class.
+
+ Class objects provide these attributes:
+ __doc__ documentation string
+ __module__ name of module in which this class was defined"""
+ return isinstance(object, types.ClassType) or hasattr(object, '__bases__')
+
+def ismethod(object):
+ """Return true if the object is an instance method.
+
+ Instance method objects provide these attributes:
+ __doc__ documentation string
+ __name__ name with which this method was defined
+ im_class class object in which this method belongs
+ im_func function object containing implementation of method
+ im_self instance to which this method is bound, or None"""
+ return isinstance(object, types.MethodType)
+
+def ismethoddescriptor(object):
+ """Return true if the object is a method descriptor.
+
+ But not if ismethod() or isclass() or isfunction() are true.
+
+ This is new in Python 2.2, and, for example, is true of int.__add__.
+ An object passing this test has a __get__ attribute but not a __set__
+ attribute, but beyond that the set of attributes varies. __name__ is
+ usually sensible, and __doc__ often is.
+
+ Methods implemented via descriptors that also pass one of the other
+ tests return false from the ismethoddescriptor() test, simply because
+ the other tests promise more -- you can, e.g., count on having the
+ im_func attribute (etc) when an object passes ismethod()."""
+ return (hasattr(object, "__get__")
+ and not hasattr(object, "__set__") # else it's a data descriptor
+ and not ismethod(object) # mutual exclusion
+ and not isfunction(object)
+ and not isclass(object))
+
+def isdatadescriptor(object):
+ """Return true if the object is a data descriptor.
+
+ Data descriptors have both a __get__ and a __set__ attribute. Examples are
+ properties (defined in Python) and getsets and members (defined in C).
+ Typically, data descriptors will also have __name__ and __doc__ attributes
+ (properties, getsets, and members have both of these attributes), but this
+ is not guaranteed."""
+ return (hasattr(object, "__set__") and hasattr(object, "__get__"))
+
+if hasattr(types, 'MemberDescriptorType'):
+ # CPython and equivalent
+ def ismemberdescriptor(object):
+ """Return true if the object is a member descriptor.
+
+ Member descriptors are specialized descriptors defined in extension
+ modules."""
+ return isinstance(object, types.MemberDescriptorType)
+else:
+ # Other implementations
+ def ismemberdescriptor(object):
+ """Return true if the object is a member descriptor.
+
+ Member descriptors are specialized descriptors defined in extension
+ modules."""
+ return False
+
+if hasattr(types, 'GetSetDescriptorType'):
+ # CPython and equivalent
+ def isgetsetdescriptor(object):
+ """Return true if the object is a getset descriptor.
+
+ getset descriptors are specialized descriptors defined in extension
+ modules."""
+ return isinstance(object, types.GetSetDescriptorType)
+else:
+ # Other implementations
+ def isgetsetdescriptor(object):
+ """Return true if the object is a getset descriptor.
+
+ getset descriptors are specialized descriptors defined in extension
+ modules."""
+ return False
+
+def isfunction(object):
+ """Return true if the object is a user-defined function.
+
+ Function objects provide these attributes:
+ __doc__ documentation string
+ __name__ name with which this function was defined
+ func_code code object containing compiled function bytecode
+ func_defaults tuple of any default values for arguments
+ func_doc (same as __doc__)
+ func_globals global namespace in which this function was defined
+ func_name (same as __name__)"""
+ return isinstance(object, types.FunctionType)
+
+def istraceback(object):
+ """Return true if the object is a traceback.
+
+ Traceback objects provide these attributes:
+ tb_frame frame object at this level
+ tb_lasti index of last attempted instruction in bytecode
+ tb_lineno current line number in Python source code
+ tb_next next inner traceback object (called by this level)"""
+ return isinstance(object, types.TracebackType)
+
+def isframe(object):
+ """Return true if the object is a frame object.
+
+ Frame objects provide these attributes:
+ f_back next outer frame object (this frame's caller)
+ f_builtins built-in namespace seen by this frame
+ f_code code object being executed in this frame
+ f_exc_traceback traceback if raised in this frame, or None
+ f_exc_type exception type if raised in this frame, or None
+ f_exc_value exception value if raised in this frame, or None
+ f_globals global namespace seen by this frame
+ f_lasti index of last attempted instruction in bytecode
+ f_lineno current line number in Python source code
+ f_locals local namespace seen by this frame
+ f_restricted 0 or 1 if frame is in restricted execution mode
+ f_trace tracing function for this frame, or None"""
+ return isinstance(object, types.FrameType)
+
+def iscode(object):
+ """Return true if the object is a code object.
+
+ Code objects provide these attributes:
+ co_argcount number of arguments (not including * or ** args)
+ co_code string of raw compiled bytecode
+ co_consts tuple of constants used in the bytecode
+ co_filename name of file in which this code object was created
+ co_firstlineno number of first line in Python source code
+ co_flags bitmap: 1=optimized | 2=newlocals | 4=*arg | 8=**arg
+ co_lnotab encoded mapping of line numbers to bytecode indices
+ co_name name with which this code object was defined
+ co_names tuple of names of local variables
+ co_nlocals number of local variables
+ co_stacksize virtual machine stack space required
+ co_varnames tuple of names of arguments and local variables"""
+ return isinstance(object, types.CodeType)
+
+def isbuiltin(object):
+ """Return true if the object is a built-in function or method.
+
+ Built-in functions and methods provide these attributes:
+ __doc__ documentation string
+ __name__ original name of this function or method
+ __self__ instance to which a method is bound, or None"""
+ return isinstance(object, types.BuiltinFunctionType)
+
+def isroutine(object):
+ """Return true if the object is any kind of function or method."""
+ return (isbuiltin(object)
+ or isfunction(object)
+ or ismethod(object)
+ or ismethoddescriptor(object))
+
+def getmembers(object, predicate=None):
+ """Return all members of an object as (name, value) pairs sorted by name.
+ Optionally, only return members that satisfy a given predicate."""
+ results = []
+ for key in dir(object):
+ value = getattr(object, key)
+ if not predicate or predicate(value):
+ results.append((key, value))
+ results.sort()
+ return results
+
+def classify_class_attrs(cls):
+ """Return list of attribute-descriptor tuples.
+
+ For each name in dir(cls), the return list contains a 4-tuple
+ with these elements:
+
+ 0. The name (a string).
+
+ 1. The kind of attribute this is, one of these strings:
+ 'class method' created via classmethod()
+ 'static method' created via staticmethod()
+ 'property' created via property()
+ 'method' any other flavor of method
+ 'data' not a method
+
+ 2. The class which defined this attribute (a class).
+
+ 3. The object as obtained directly from the defining class's
+ __dict__, not via getattr. This is especially important for
+ data attributes: C.data is just a data object, but
+ C.__dict__['data'] may be a data descriptor with additional
+ info, like a __doc__ string.
+ """
+
+ mro = getmro(cls)
+ names = dir(cls)
+ result = []
+ for name in names:
+ # Get the object associated with the name.
+ # Getting an obj from the __dict__ sometimes reveals more than
+ # using getattr. Static and class methods are dramatic examples.
+ if name in cls.__dict__:
+ obj = cls.__dict__[name]
+ else:
+ obj = getattr(cls, name)
+
+ # Figure out where it was defined.
+ homecls = getattr(obj, "__objclass__", None)
+ if homecls is None:
+ # search the dicts.
+ for base in mro:
+ if name in base.__dict__:
+ homecls = base
+ break
+
+ # Get the object again, in order to get it from the defining
+ # __dict__ instead of via getattr (if possible).
+ if homecls is not None and name in homecls.__dict__:
+ obj = homecls.__dict__[name]
+
+ # Also get the object via getattr.
+ obj_via_getattr = getattr(cls, name)
+
+ # Classify the object.
+ if isinstance(obj, staticmethod):
+ kind = "static method"
+ elif isinstance(obj, classmethod):
+ kind = "class method"
+ elif isinstance(obj, property):
+ kind = "property"
+ elif (ismethod(obj_via_getattr) or
+ ismethoddescriptor(obj_via_getattr)):
+ kind = "method"
+ else:
+ kind = "data"
+
+ result.append((name, kind, homecls, obj))
+
+ return result
+
+# ----------------------------------------------------------- class helpers
+def _searchbases(cls, accum):
+ # Simulate the "classic class" search order.
+ if cls in accum:
+ return
+ accum.append(cls)
+ for base in cls.__bases__:
+ _searchbases(base, accum)
+
+def getmro(cls):
+ "Return tuple of base classes (including cls) in method resolution order."
+ if hasattr(cls, "__mro__"):
+ return cls.__mro__
+ else:
+ result = []
+ _searchbases(cls, result)
+ return tuple(result)
+
+# -------------------------------------------------- source code extraction
+def indentsize(line):
+ """Return the indent size, in spaces, at the start of a line of text."""
+ expline = string.expandtabs(line)
+ return len(expline) - len(string.lstrip(expline))
+
+def getdoc(object):
+ """Get the documentation string for an object.
+
+ All tabs are expanded to spaces. To clean up docstrings that are
+ indented to line up with blocks of code, any whitespace than can be
+ uniformly removed from the second line onwards is removed."""
+ try:
+ doc = object.__doc__
+ except AttributeError:
+ return None
+ if not isinstance(doc, types.StringTypes):
+ return None
+ try:
+ lines = string.split(string.expandtabs(doc), '\n')
+ except UnicodeError:
+ return None
+ else:
+ # Find minimum indentation of any non-blank lines after first line.
+ margin = sys.maxint
+ for line in lines[1:]:
+ content = len(string.lstrip(line))
+ if content:
+ indent = len(line) - content
+ margin = min(margin, indent)
+ # Remove indentation.
+ if lines:
+ lines[0] = lines[0].lstrip()
+ if margin < sys.maxint:
+ for i in range(1, len(lines)): lines[i] = lines[i][margin:]
+ # Remove any trailing or leading blank lines.
+ while lines and not lines[-1]:
+ lines.pop()
+ while lines and not lines[0]:
+ lines.pop(0)
+ return string.join(lines, '\n')
+
+def getfile(object):
+ """Work out which source or compiled file an object was defined in."""
+ if ismodule(object):
+ if hasattr(object, '__file__'):
+ return object.__file__
+ raise TypeError('arg is a built-in module')
+ if isclass(object):
+ object = sys.modules.get(object.__module__)
+ if hasattr(object, '__file__'):
+ return object.__file__
+ raise TypeError('arg is a built-in class')
+ if ismethod(object):
+ object = object.im_func
+ if isfunction(object):
+ object = object.func_code
+ if istraceback(object):
+ object = object.tb_frame
+ if isframe(object):
+ object = object.f_code
+ if iscode(object):
+ return object.co_filename
+ raise TypeError('arg is not a module, class, method, '
+ 'function, traceback, frame, or code object')
+
+def getmoduleinfo(path):
+ """Get the module name, suffix, mode, and module type for a given file."""
+ filename = os.path.basename(path)
+ suffixes = map(lambda (suffix, mode, mtype):
+ (-len(suffix), suffix, mode, mtype), imp.get_suffixes())
+ suffixes.sort() # try longest suffixes first, in case they overlap
+ for neglen, suffix, mode, mtype in suffixes:
+ if filename[neglen:] == suffix:
+ return filename[:neglen], suffix, mode, mtype
+
+def getmodulename(path):
+ """Return the module name for a given file, or None."""
+ info = getmoduleinfo(path)
+ if info: return info[0]
+
+def getsourcefile(object):
+ """Return the Python source file an object was defined in, if it exists."""
+ filename = getfile(object)
+ if string.lower(filename[-4:]) in ('.pyc', '.pyo'):
+ filename = filename[:-4] + '.py'
+ for suffix, mode, kind in imp.get_suffixes():
+ if 'b' in mode and string.lower(filename[-len(suffix):]) == suffix:
+ # Looks like a binary file. We want to only return a text file.
+ return None
+ if os.path.exists(filename):
+ return filename
+ # only return a non-existent filename if the module has a PEP 302 loader
+ if hasattr(getmodule(object, filename), '__loader__'):
+ return filename
+
+def getabsfile(object, _filename=None):
+ """Return an absolute path to the source or compiled file for an object.
+
+ The idea is for each object to have a unique origin, so this routine
+ normalizes the result as much as possible."""
+ if _filename is None:
+ _filename = getsourcefile(object) or getfile(object)
+ return os.path.normcase(os.path.abspath(_filename))
+
+modulesbyfile = {}
+_filesbymodname = {}
+
+def getmodule(object, _filename=None):
+ """Return the module an object was defined in, or None if not found."""
+ if ismodule(object):
+ return object
+ if hasattr(object, '__module__'):
+ return sys.modules.get(object.__module__)
+ # Try the filename to modulename cache
+ if _filename is not None and _filename in modulesbyfile:
+ return sys.modules.get(modulesbyfile[_filename])
+ # Try the cache again with the absolute file name
+ try:
+ file = getabsfile(object, _filename)
+ except TypeError:
+ return None
+ if file in modulesbyfile:
+ return sys.modules.get(modulesbyfile[file])
+ # Update the filename to module name cache and check yet again
+ # Copy sys.modules in order to cope with changes while iterating
+ for modname, module in sys.modules.items():
+ if ismodule(module) and hasattr(module, '__file__'):
+ f = module.__file__
+ if f == _filesbymodname.get(modname, None):
+ # Have already mapped this module, so skip it
+ continue
+ _filesbymodname[modname] = f
+ f = getabsfile(module)
+ # Always map to the name the module knows itself by
+ modulesbyfile[f] = modulesbyfile[
+ os.path.realpath(f)] = module.__name__
+ if file in modulesbyfile:
+ return sys.modules.get(modulesbyfile[file])
+ # Check the main module
+ main = sys.modules['__main__']
+ if not hasattr(object, '__name__'):
+ return None
+ if hasattr(main, object.__name__):
+ mainobject = getattr(main, object.__name__)
+ if mainobject is object:
+ return main
+ # Check builtins
+ builtin = sys.modules['__builtin__']
+ if hasattr(builtin, object.__name__):
+ builtinobject = getattr(builtin, object.__name__)
+ if builtinobject is object:
+ return builtin
+
+def findsource(object):
+ """Return the entire source file and starting line number for an object.
+
+ The argument may be a module, class, method, function, traceback, frame,
+ or code object. The source code is returned as a list of all the lines
+ in the file and the line number indexes a line in that list. An IOError
+ is raised if the source code cannot be retrieved."""
+ file = getsourcefile(object) or getfile(object)
+ module = getmodule(object, file)
+ if module:
+ lines = linecache.getlines(file, module.__dict__)
+ else:
+ lines = linecache.getlines(file)
+ if not lines:
+ raise IOError('could not get source code')
+
+ if ismodule(object):
+ return lines, 0
+
+ if isclass(object):
+ name = object.__name__
+ pat = re.compile(r'^(\s*)class\s*' + name + r'\b')
+ # make some effort to find the best matching class definition:
+ # use the one with the least indentation, which is the one
+ # that's most probably not inside a function definition.
+ candidates = []
+ for i in range(len(lines)):
+ match = pat.match(lines[i])
+ if match:
+ # if it's at toplevel, it's already the best one
+ if lines[i][0] == 'c':
+ return lines, i
+ # else add whitespace to candidate list
+ candidates.append((match.group(1), i))
+ if candidates:
+ # this will sort by whitespace, and by line number,
+ # less whitespace first
+ candidates.sort()
+ return lines, candidates[0][1]
+ else:
+ raise IOError('could not find class definition')
+
+ if ismethod(object):
+ object = object.im_func
+ if isfunction(object):
+ object = object.func_code
+ if istraceback(object):
+ object = object.tb_frame
+ if isframe(object):
+ object = object.f_code
+ if iscode(object):
+ if not hasattr(object, 'co_firstlineno'):
+ raise IOError('could not find function definition')
+ lnum = object.co_firstlineno - 1
+ pat = re.compile(r'^(\s*def\s)|(.*(?<!\w)lambda(:|\s))|^(\s*@)')
+ while lnum > 0:
+ if pat.match(lines[lnum]): break
+ lnum = lnum - 1
+ return lines, lnum
+ raise IOError('could not find code object')
+
+def getcomments(object):
+ """Get lines of comments immediately preceding an object's source code.
+
+ Returns None when source can't be found.
+ """
+ try:
+ lines, lnum = findsource(object)
+ except (IOError, TypeError):
+ return None
+
+ if ismodule(object):
+ # Look for a comment block at the top of the file.
+ start = 0
+ if lines and lines[0][:2] == '#!': start = 1
+ while start < len(lines) and string.strip(lines[start]) in ('', '#'):
+ start = start + 1
+ if start < len(lines) and lines[start][:1] == '#':
+ comments = []
+ end = start
+ while end < len(lines) and lines[end][:1] == '#':
+ comments.append(string.expandtabs(lines[end]))
+ end = end + 1
+ return string.join(comments, '')
+
+ # Look for a preceding block of comments at the same indentation.
+ elif lnum > 0:
+ indent = indentsize(lines[lnum])
+ end = lnum - 1
+ if end >= 0 and string.lstrip(lines[end])[:1] == '#' and \
+ indentsize(lines[end]) == indent:
+ comments = [string.lstrip(string.expandtabs(lines[end]))]
+ if end > 0:
+ end = end - 1
+ comment = string.lstrip(string.expandtabs(lines[end]))
+ while comment[:1] == '#' and indentsize(lines[end]) == indent:
+ comments[:0] = [comment]
+ end = end - 1
+ if end < 0: break
+ comment = string.lstrip(string.expandtabs(lines[end]))
+ while comments and string.strip(comments[0]) == '#':
+ comments[:1] = []
+ while comments and string.strip(comments[-1]) == '#':
+ comments[-1:] = []
+ return string.join(comments, '')
+
+class EndOfBlock(Exception): pass
+
+class BlockFinder:
+ """Provide a tokeneater() method to detect the end of a code block."""
+ def __init__(self):
+ self.indent = 0
+ self.islambda = False
+ self.started = False
+ self.passline = False
+ self.last = 1
+
+ def tokeneater(self, type, token, (srow, scol), (erow, ecol), line):
+ if not self.started:
+ # look for the first "def", "class" or "lambda"
+ if token in ("def", "class", "lambda"):
+ if token == "lambda":
+ self.islambda = True
+ self.started = True
+ self.passline = True # skip to the end of the line
+ elif type == tokenize.NEWLINE:
+ self.passline = False # stop skipping when a NEWLINE is seen
+ self.last = srow
+ if self.islambda: # lambdas always end at the first NEWLINE
+ raise EndOfBlock
+ elif self.passline:
+ pass
+ elif type == tokenize.INDENT:
+ self.indent = self.indent + 1
+ self.passline = True
+ elif type == tokenize.DEDENT:
+ self.indent = self.indent - 1
+ # the end of matching indent/dedent pairs end a block
+ # (note that this only works for "def"/"class" blocks,
+ # not e.g. for "if: else:" or "try: finally:" blocks)
+ if self.indent <= 0:
+ raise EndOfBlock
+ elif self.indent == 0 and type not in (tokenize.COMMENT, tokenize.NL):
+ # any other token on the same indentation level end the previous
+ # block as well, except the pseudo-tokens COMMENT and NL.
+ raise EndOfBlock
+
+def getblock(lines):
+ """Extract the block of code at the top of the given list of lines."""
+ blockfinder = BlockFinder()
+ try:
+ tokenize.tokenize(iter(lines).next, blockfinder.tokeneater)
+ except (EndOfBlock, IndentationError):
+ pass
+ return lines[:blockfinder.last]
+
+def getsourcelines(object):
+ """Return a list of source lines and starting line number for an object.
+
+ The argument may be a module, class, method, function, traceback, frame,
+ or code object. The source code is returned as a list of the lines
+ corresponding to the object and the line number indicates where in the
+ original source file the first line of code was found. An IOError is
+ raised if the source code cannot be retrieved."""
+ lines, lnum = findsource(object)
+
+ if ismodule(object): return lines, 0
+ else: return getblock(lines[lnum:]), lnum + 1
+
+def getsource(object):
+ """Return the text of the source code for an object.
+
+ The argument may be a module, class, method, function, traceback, frame,
+ or code object. The source code is returned as a single string. An
+ IOError is raised if the source code cannot be retrieved."""
+ lines, lnum = getsourcelines(object)
+ return string.join(lines, '')
+
+# --------------------------------------------------- class tree extraction
+def walktree(classes, children, parent):
+ """Recursive helper function for getclasstree()."""
+ results = []
+ classes.sort(key=attrgetter('__module__', '__name__'))
+ for c in classes:
+ results.append((c, c.__bases__))
+ if c in children:
+ results.append(walktree(children[c], children, c))
+ return results
+
+def getclasstree(classes, unique=0):
+ """Arrange the given list of classes into a hierarchy of nested lists.
+
+ Where a nested list appears, it contains classes derived from the class
+ whose entry immediately precedes the list. Each entry is a 2-tuple
+ containing a class and a tuple of its base classes. If the 'unique'
+ argument is true, exactly one entry appears in the returned structure
+ for each class in the given list. Otherwise, classes using multiple
+ inheritance and their descendants will appear multiple times."""
+ children = {}
+ roots = []
+ for c in classes:
+ if c.__bases__:
+ for parent in c.__bases__:
+ if not parent in children:
+ children[parent] = []
+ children[parent].append(c)
+ if unique and parent in classes: break
+ elif c not in roots:
+ roots.append(c)
+ for parent in children:
+ if parent not in classes:
+ roots.append(parent)
+ return walktree(roots, children, None)
+
+# ------------------------------------------------ argument list extraction
+# These constants are from Python's compile.h.
+CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 1, 2, 4, 8
+
+def getargs(co):
+ """Get information about the arguments accepted by a code object.
+
+ Three things are returned: (args, varargs, varkw), where 'args' is
+ a list of argument names (possibly containing nested lists), and
+ 'varargs' and 'varkw' are the names of the * and ** arguments or None."""
+
+ if not iscode(co):
+ if hasattr(len, 'func_code') and type(co) is type(len.func_code):
+ # PyPy extension: built-in function objects have a func_code too.
+ # There is no co_code on it, but co_argcount and co_varnames and
+ # co_flags are present.
+ pass
+ else:
+ raise TypeError('arg is not a code object')
+
+ code = getattr(co, 'co_code', '')
+ nargs = co.co_argcount
+ names = co.co_varnames
+ args = list(names[:nargs])
+ step = 0
+
+ # The following acrobatics are for anonymous (tuple) arguments.
+ for i in range(nargs):
+ if args[i][:1] in ('', '.'):
+ stack, remain, count = [], [], []
+ while step < len(code):
+ op = ord(code[step])
+ step = step + 1
+ if op >= dis.HAVE_ARGUMENT:
+ opname = dis.opname[op]
+ value = ord(code[step]) + ord(code[step+1])*256
+ step = step + 2
+ if opname in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
+ remain.append(value)
+ count.append(value)
+ elif opname == 'STORE_FAST':
+ stack.append(names[value])
+
+ # Special case for sublists of length 1: def foo((bar))
+ # doesn't generate the UNPACK_TUPLE bytecode, so if
+ # `remain` is empty here, we have such a sublist.
+ if not remain:
+ stack[0] = [stack[0]]
+ break
+ else:
+ remain[-1] = remain[-1] - 1
+ while remain[-1] == 0:
+ remain.pop()
+ size = count.pop()
+ stack[-size:] = [stack[-size:]]
+ if not remain: break
+ remain[-1] = remain[-1] - 1
+ if not remain: break
+ args[i] = stack[0]
+
+ varargs = None
+ if co.co_flags & CO_VARARGS:
+ varargs = co.co_varnames[nargs]
+ nargs = nargs + 1
+ varkw = None
+ if co.co_flags & CO_VARKEYWORDS:
+ varkw = co.co_varnames[nargs]
+ return args, varargs, varkw
+
+def getargspec(func):
+ """Get the names and default values of a function's arguments.
+
+ A tuple of four things is returned: (args, varargs, varkw, defaults).
+ 'args' is a list of the argument names (it may contain nested lists).
+ 'varargs' and 'varkw' are the names of the * and ** arguments or None.
+ 'defaults' is an n-tuple of the default values of the last n arguments.
+ """
+
+ if ismethod(func):
+ func = func.im_func
+ if not (isfunction(func) or
+ isbuiltin(func) and hasattr(func, 'func_code')):
+ # PyPy extension: this works for built-in functions too
+ raise TypeError('arg is not a Python function')
+ args, varargs, varkw = getargs(func.func_code)
+ return args, varargs, varkw, func.func_defaults
+
+def getargvalues(frame):
+ """Get information about arguments passed into a particular frame.
+
+ A tuple of four things is returned: (args, varargs, varkw, locals).
+ 'args' is a list of the argument names (it may contain nested lists).
+ 'varargs' and 'varkw' are the names of the * and ** arguments or None.
+ 'locals' is the locals dictionary of the given frame."""
+ args, varargs, varkw = getargs(frame.f_code)
+ return args, varargs, varkw, frame.f_locals
+
+def joinseq(seq):
+ if len(seq) == 1:
+ return '(' + seq[0] + ',)'
+ else:
+ return '(' + string.join(seq, ', ') + ')'
+
+def strseq(object, convert, join=joinseq):
+ """Recursively walk a sequence, stringifying each element."""
+ if type(object) in (list, tuple):
+ return join(map(lambda o, c=convert, j=join: strseq(o, c, j), object))
+ else:
+ return convert(object)
+
+def formatargspec(args, varargs=None, varkw=None, defaults=None,
+ formatarg=str,
+ formatvarargs=lambda name: '*' + name,
+ formatvarkw=lambda name: '**' + name,
+ formatvalue=lambda value: '=' + repr(value),
+ join=joinseq):
+ """Format an argument spec from the 4 values returned by getargspec.
+
+ The first four arguments are (args, varargs, varkw, defaults). The
+ other four arguments are the corresponding optional formatting functions
+ that are called to turn names and values into strings. The ninth
+ argument is an optional function to format the sequence of arguments."""
+ specs = []
+ if defaults:
+ firstdefault = len(args) - len(defaults)
+ for i in range(len(args)):
+ spec = strseq(args[i], formatarg, join)
+ if defaults and i >= firstdefault:
+ spec = spec + formatvalue(defaults[i - firstdefault])
+ specs.append(spec)
+ if varargs is not None:
+ specs.append(formatvarargs(varargs))
+ if varkw is not None:
+ specs.append(formatvarkw(varkw))
+ return '(' + string.join(specs, ', ') + ')'
+
+def formatargvalues(args, varargs, varkw, locals,
+ formatarg=str,
+ formatvarargs=lambda name: '*' + name,
+ formatvarkw=lambda name: '**' + name,
+ formatvalue=lambda value: '=' + repr(value),
+ join=joinseq):
+ """Format an argument spec from the 4 values returned by getargvalues.
+
+ The first four arguments are (args, varargs, varkw, locals). The
+ next four arguments are the corresponding optional formatting functions
+ that are called to turn names and values into strings. The ninth
+ argument is an optional function to format the sequence of arguments."""
+ def convert(name, locals=locals,
+ formatarg=formatarg, formatvalue=formatvalue):
+ return formatarg(name) + formatvalue(locals[name])
+ specs = []
+ for i in range(len(args)):
+ specs.append(strseq(args[i], convert, join))
+ if varargs:
+ specs.append(formatvarargs(varargs) + formatvalue(locals[varargs]))
+ if varkw:
+ specs.append(formatvarkw(varkw) + formatvalue(locals[varkw]))
+ return '(' + string.join(specs, ', ') + ')'
+
+# -------------------------------------------------- stack frame extraction
+def getframeinfo(frame, context=1):
+ """Get information about a frame or traceback object.
+
+ A tuple of five things is returned: the filename, the line number of
+ the current line, the function name, a list of lines of context from
+ the source code, and the index of the current line within that list.
+ The optional second argument specifies the number of lines of context
+ to return, which are centered around the current line."""
+ if istraceback(frame):
+ lineno = frame.tb_lineno
+ frame = frame.tb_frame
+ else:
+ lineno = frame.f_lineno
+ if not isframe(frame):
+ raise TypeError('arg is not a frame or traceback object')
+
+ filename = getsourcefile(frame) or getfile(frame)
+ if context > 0:
+ start = lineno - 1 - context//2
+ try:
+ lines, lnum = findsource(frame)
+ except IOError:
+ lines = index = None
+ else:
+ start = max(start, 1)
+ start = max(0, min(start, len(lines) - context))
+ lines = lines[start:start+context]
+ index = lineno - 1 - start
+ else:
+ lines = index = None
+
+ return (filename, lineno, frame.f_code.co_name, lines, index)
+
+def getlineno(frame):
+ """Get the line number from a frame object, allowing for optimization."""
+ # FrameType.f_lineno is now a descriptor that grovels co_lnotab
+ return frame.f_lineno
+
+def getouterframes(frame, context=1):
+ """Get a list of records for a frame and all higher (calling) frames.
+
+ Each record contains a frame object, filename, line number, function
+ name, a list of lines of context, and index within the context."""
+ framelist = []
+ while frame:
+ framelist.append((frame,) + getframeinfo(frame, context))
+ frame = frame.f_back
+ return framelist
+
+def getinnerframes(tb, context=1):
+ """Get a list of records for a traceback's frame and all lower frames.
+
+ Each record contains a frame object, filename, line number, function
+ name, a list of lines of context, and index within the context."""
+ framelist = []
+ while tb:
+ framelist.append((tb.tb_frame,) + getframeinfo(tb, context))
+ tb = tb.tb_next
+ return framelist
+
+currentframe = sys._getframe
+
+def stack(context=1):
+ """Return a list of records for the stack above the caller's frame."""
+ return getouterframes(sys._getframe(1), context)
+
+def trace(context=1):
+ """Return a list of records for the stack below the current exception."""
+ return getinnerframes(sys.exc_info()[2], context)
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/locale.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/locale.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,1562 @@
+""" Locale support.
+
+ The module provides low-level access to the C lib's locale APIs
+ and adds high level number formatting APIs as well as a locale
+ aliasing engine to complement these.
+
+ The aliasing engine includes support for many commonly used locale
+ names and maps them to values suitable for passing to the C lib's
+ setlocale() function. It also includes default encodings for all
+ supported locale names.
+
+"""
+
+import sys, encodings, encodings.aliases
+
+# Try importing the _locale module.
+#
+# If this fails, fall back on a basic 'C' locale emulation.
+
+# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
+# trying the import. So __all__ is also fiddled at the end of the file.
+__all__ = ["setlocale","Error","localeconv","strcoll","strxfrm",
+ "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
+ "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
+
+try:
+
+ from _locale import *
+
+except ImportError:
+
+ # Locale emulation
+
+ CHAR_MAX = 127
+ LC_ALL = 6
+ LC_COLLATE = 3
+ LC_CTYPE = 0
+ LC_MESSAGES = 5
+ LC_MONETARY = 4
+ LC_NUMERIC = 1
+ LC_TIME = 2
+ Error = ValueError
+
+ def localeconv():
+ """ localeconv() -> dict.
+ Returns numeric and monetary locale-specific parameters.
+ """
+ # 'C' locale default values
+ return {'grouping': [127],
+ 'currency_symbol': '',
+ 'n_sign_posn': 127,
+ 'p_cs_precedes': 127,
+ 'n_cs_precedes': 127,
+ 'mon_grouping': [],
+ 'n_sep_by_space': 127,
+ 'decimal_point': '.',
+ 'negative_sign': '',
+ 'positive_sign': '',
+ 'p_sep_by_space': 127,
+ 'int_curr_symbol': '',
+ 'p_sign_posn': 127,
+ 'thousands_sep': '',
+ 'mon_thousands_sep': '',
+ 'frac_digits': 127,
+ 'mon_decimal_point': '',
+ 'int_frac_digits': 127}
+
+ def setlocale(category, value=None):
+ """ setlocale(integer,string=None) -> string.
+ Activates/queries locale processing.
+ """
+ if value not in (None, '', 'C'):
+ raise Error, '_locale emulation only supports "C" locale'
+ return 'C'
+
+ def strcoll(a,b):
+ """ strcoll(string,string) -> int.
+ Compares two strings according to the locale.
+ """
+ return cmp(a,b)
+
+ def strxfrm(s):
+ """ strxfrm(string) -> string.
+ Returns a string that behaves for cmp locale-aware.
+ """
+ return s
+
+### Number formatting APIs
+
+# Author: Martin von Loewis
+# improved by Georg Brandl
+
+#perform the grouping from right to left
+def _group(s, monetary=False):
+ conv = localeconv()
+ thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
+ grouping = conv[monetary and 'mon_grouping' or 'grouping']
+ if not grouping:
+ return (s, 0)
+ result = ""
+ seps = 0
+ spaces = ""
+ if s[-1] == ' ':
+ sp = s.find(' ')
+ spaces = s[sp:]
+ s = s[:sp]
+ while s and grouping:
+ # if grouping is -1, we are done
+ if grouping[0] == CHAR_MAX:
+ break
+ # 0: re-use last group ad infinitum
+ elif grouping[0] != 0:
+ #process last group
+ group = grouping[0]
+ grouping = grouping[1:]
+ if result:
+ result = s[-group:] + thousands_sep + result
+ seps += 1
+ else:
+ result = s[-group:]
+ s = s[:-group]
+ if s and s[-1] not in "0123456789":
+ # the leading string is only spaces and signs
+ return s + result + spaces, seps
+ if not result:
+ return s + spaces, seps
+ if s:
+ result = s + thousands_sep + result
+ seps += 1
+ return result + spaces, seps
+
+def format(percent, value, grouping=False, monetary=False, *additional):
+ """Returns the locale-aware substitution of a %? specifier
+ (percent).
+
+ additional is for format strings which contain one or more
+ '*' modifiers."""
+ # this is only for one-percent-specifier strings and this should be checked
+ if percent[0] != '%':
+ raise ValueError("format() must be given exactly one %char "
+ "format specifier")
+ if additional:
+ formatted = percent % ((value,) + additional)
+ else:
+ formatted = percent % value
+ # floats and decimal ints need special action!
+ if percent[-1] in 'eEfFgG':
+ seps = 0
+ parts = formatted.split('.')
+ if grouping:
+ parts[0], seps = _group(parts[0], monetary=monetary)
+ decimal_point = localeconv()[monetary and 'mon_decimal_point'
+ or 'decimal_point']
+ formatted = decimal_point.join(parts)
+ while seps:
+ sp = formatted.find(' ')
+ if sp == -1: break
+ formatted = formatted[:sp] + formatted[sp+1:]
+ seps -= 1
+ elif percent[-1] in 'diu':
+ if grouping:
+ formatted = _group(formatted, monetary=monetary)[0]
+ return formatted
+
+import re, operator
+_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
+ r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
+
+def format_string(f, val, grouping=False):
+ """Formats a string in the same way that the % formatting would use,
+ but takes the current locale into account.
+ Grouping is applied if the third parameter is true."""
+ percents = list(_percent_re.finditer(f))
+ new_f = _percent_re.sub('%s', f)
+
+ if isinstance(val, tuple):
+ new_val = list(val)
+ i = 0
+ for perc in percents:
+ starcount = perc.group('modifiers').count('*')
+ new_val[i] = format(perc.group(), new_val[i], grouping, False, *new_val[i+1:i+1+starcount])
+ del new_val[i+1:i+1+starcount]
+ i += (1 + starcount)
+ val = tuple(new_val)
+ elif operator.isMappingType(val):
+ for perc in percents:
+ key = perc.group("key")
+ val[key] = format(perc.group(), val[key], grouping)
+ else:
+ # val is a single value
+ val = format(percents[0].group(), val, grouping)
+
+ return new_f % val
+
+def currency(val, symbol=True, grouping=False, international=False):
+ """Formats val according to the currency settings
+ in the current locale."""
+ conv = localeconv()
+
+ # check for illegal values
+ digits = conv[international and 'int_frac_digits' or 'frac_digits']
+ if digits == 127:
+ raise ValueError("Currency formatting is not possible using "
+ "the 'C' locale.")
+
+ s = format('%%.%if' % digits, abs(val), grouping, monetary=True)
+ # '<' and '>' are markers if the sign must be inserted between symbol and value
+ s = '<' + s + '>'
+
+ if symbol:
+ smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
+ precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
+ separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
+
+ if precedes:
+ s = smb + (separated and ' ' or '') + s
+ else:
+ s = s + (separated and ' ' or '') + smb
+
+ sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
+ sign = conv[val<0 and 'negative_sign' or 'positive_sign']
+
+ if sign_pos == 0:
+ s = '(' + s + ')'
+ elif sign_pos == 1:
+ s = sign + s
+ elif sign_pos == 2:
+ s = s + sign
+ elif sign_pos == 3:
+ s = s.replace('<', sign)
+ elif sign_pos == 4:
+ s = s.replace('>', sign)
+ else:
+ # the default if nothing specified;
+ # this should be the most fitting sign position
+ s = sign + s
+
+ return s.replace('<', '').replace('>', '')
+
+def str(val):
+ """Convert float to integer, taking the locale into account."""
+ return format("%.12g", val)
+
+def atof(string, func=float):
+ "Parses a string as a float according to the locale settings."
+ #First, get rid of the grouping
+ ts = localeconv()['thousands_sep']
+ if ts:
+ string = string.replace(ts, '')
+ #next, replace the decimal point with a dot
+ dd = localeconv()['decimal_point']
+ if dd:
+ string = string.replace(dd, '.')
+ #finally, parse the string
+ return func(string)
+
+def atoi(str):
+ "Converts a string to an integer according to the locale settings."
+ return atof(str, int)
+
+def _test():
+ setlocale(LC_ALL, "")
+ #do grouping
+ s1 = format("%d", 123456789,1)
+ print s1, "is", atoi(s1)
+ #standard formatting
+ s1 = str(3.14)
+ print s1, "is", atof(s1)
+
+### Locale name aliasing engine
+
+# Author: Marc-Andre Lemburg, mal at lemburg.com
+# Various tweaks by Fredrik Lundh <fredrik at pythonware.com>
+
+# store away the low-level version of setlocale (it's
+# overridden below)
+_setlocale = setlocale
+
+def normalize(localename):
+
+ """ Returns a normalized locale code for the given locale
+ name.
+
+ The returned locale code is formatted for use with
+ setlocale().
+
+ If normalization fails, the original name is returned
+ unchanged.
+
+ If the given encoding is not known, the function defaults to
+ the default encoding for the locale code just like setlocale()
+ does.
+
+ """
+ # Normalize the locale name and extract the encoding
+ fullname = localename.lower()
+ if ':' in fullname:
+ # ':' is sometimes used as encoding delimiter.
+ fullname = fullname.replace(':', '.')
+ if '.' in fullname:
+ langname, encoding = fullname.split('.')[:2]
+ fullname = langname + '.' + encoding
+ else:
+ langname = fullname
+ encoding = ''
+
+ # First lookup: fullname (possibly with encoding)
+ norm_encoding = encoding.replace('-', '')
+ norm_encoding = norm_encoding.replace('_', '')
+ lookup_name = langname + '.' + encoding
+ code = locale_alias.get(lookup_name, None)
+ if code is not None:
+ return code
+ #print 'first lookup failed'
+
+ # Second try: langname (without encoding)
+ code = locale_alias.get(langname, None)
+ if code is not None:
+ #print 'langname lookup succeeded'
+ if '.' in code:
+ langname, defenc = code.split('.')
+ else:
+ langname = code
+ defenc = ''
+ if encoding:
+ # Convert the encoding to a C lib compatible encoding string
+ norm_encoding = encodings.normalize_encoding(encoding)
+ #print 'norm encoding: %r' % norm_encoding
+ norm_encoding = encodings.aliases.aliases.get(norm_encoding,
+ norm_encoding)
+ #print 'aliased encoding: %r' % norm_encoding
+ encoding = locale_encoding_alias.get(norm_encoding,
+ norm_encoding)
+ else:
+ encoding = defenc
+ #print 'found encoding %r' % encoding
+ if encoding:
+ return langname + '.' + encoding
+ else:
+ return langname
+
+ else:
+ return localename
+
+def _parse_localename(localename):
+
+ """ Parses the locale code for localename and returns the
+ result as tuple (language code, encoding).
+
+ The localename is normalized and passed through the locale
+ alias engine. A ValueError is raised in case the locale name
+ cannot be parsed.
+
+ The language code corresponds to RFC 1766. code and encoding
+ can be None in case the values cannot be determined or are
+ unknown to this implementation.
+
+ """
+ code = normalize(localename)
+ if '@' in code:
+ # Deal with locale modifiers
+ code, modifier = code.split('@')
+ if modifier == 'euro' and '.' not in code:
+ # Assume Latin-9 for @euro locales. This is bogus,
+ # since some systems may use other encodings for these
+ # locales. Also, we ignore other modifiers.
+ return code, 'iso-8859-15'
+
+ if '.' in code:
+ return tuple(code.split('.')[:2])
+ elif code == 'C':
+ return None, None
+ raise ValueError, 'unknown locale: %s' % localename
+
+def _build_localename(localetuple):
+
+ """ Builds a locale code from the given tuple (language code,
+ encoding).
+
+ No aliasing or normalizing takes place.
+
+ """
+ language, encoding = localetuple
+ if language is None:
+ language = 'C'
+ if encoding is None:
+ return language
+ else:
+ return language + '.' + encoding
+
+def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
+
+ """ Tries to determine the default locale settings and returns
+ them as tuple (language code, encoding).
+
+ According to POSIX, a program which has not called
+ setlocale(LC_ALL, "") runs using the portable 'C' locale.
+ Calling setlocale(LC_ALL, "") lets it use the default locale as
+ defined by the LANG variable. Since we don't want to interfere
+ with the current locale setting we thus emulate the behavior
+ in the way described above.
+
+ To maintain compatibility with other platforms, not only the
+ LANG variable is tested, but a list of variables given as
+ envvars parameter. The first found to be defined will be
+ used. envvars defaults to the search path used in GNU gettext;
+ it must always contain the variable name 'LANG'.
+
+ Except for the code 'C', the language code corresponds to RFC
+ 1766. code and encoding can be None in case the values cannot
+ be determined.
+
+ """
+
+ try:
+ # check if it's supported by the _locale module
+ import _locale
+ code, encoding = _locale._getdefaultlocale()
+ except (ImportError, AttributeError):
+ pass
+ else:
+ # make sure the code/encoding values are valid
+ if sys.platform == "win32" and code and code[:2] == "0x":
+ # map windows language identifier to language name
+ code = windows_locale.get(int(code, 0))
+ # ...add other platform-specific processing here, if
+ # necessary...
+ return code, encoding
+
+ # fall back on POSIX behaviour
+ import os
+ lookup = os.environ.get
+ for variable in envvars:
+ localename = lookup(variable,None)
+ if localename:
+ if variable == 'LANGUAGE':
+ localename = localename.split(':')[0]
+ break
+ else:
+ localename = 'C'
+ return _parse_localename(localename)
+
+
+def getlocale(category=LC_CTYPE):
+
+ """ Returns the current setting for the given locale category as
+ tuple (language code, encoding).
+
+ category may be one of the LC_* value except LC_ALL. It
+ defaults to LC_CTYPE.
+
+ Except for the code 'C', the language code corresponds to RFC
+ 1766. code and encoding can be None in case the values cannot
+ be determined.
+
+ """
+ localename = _setlocale(category)
+ if category == LC_ALL and ';' in localename:
+ raise TypeError, 'category LC_ALL is not supported'
+ return _parse_localename(localename)
+
+def setlocale(category, locale=None):
+
+ """ Set the locale for the given category. The locale can be
+ a string, a locale tuple (language code, encoding), or None.
+
+ Locale tuples are converted to strings the locale aliasing
+ engine. Locale strings are passed directly to the C lib.
+
+ category may be given as one of the LC_* values.
+
+ """
+ if locale and type(locale) is not type(""):
+ # convert to string
+ locale = normalize(_build_localename(locale))
+ return _setlocale(category, locale)
+
+def resetlocale(category=LC_ALL):
+
+ """ Sets the locale for category to the default setting.
+
+ The default setting is determined by calling
+ getdefaultlocale(). category defaults to LC_ALL.
+
+ """
+ _setlocale(category, _build_localename(getdefaultlocale()))
+
+if sys.platform in ('win32', 'darwin', 'mac'):
+ # On Win32, this will return the ANSI code page
+ # On the Mac, it should return the system encoding;
+ # it might return "ascii" instead
+ def getpreferredencoding(do_setlocale = True):
+ """Return the charset that the user is likely using."""
+ import _locale
+ return _locale._getdefaultlocale()[1]
+else:
+ # On Unix, if CODESET is available, use that.
+ try:
+ CODESET
+ except NameError:
+ # Fall back to parsing environment variables :-(
+ def getpreferredencoding(do_setlocale = True):
+ """Return the charset that the user is likely using,
+ by looking at environment variables."""
+ return getdefaultlocale()[1]
+ else:
+ def getpreferredencoding(do_setlocale = True):
+ """Return the charset that the user is likely using,
+ according to the system configuration."""
+ if do_setlocale:
+ oldloc = setlocale(LC_CTYPE)
+ setlocale(LC_CTYPE, "")
+ result = nl_langinfo(CODESET)
+ setlocale(LC_CTYPE, oldloc)
+ return result
+ else:
+ return nl_langinfo(CODESET)
+
+
+### Database
+#
+# The following data was extracted from the locale.alias file which
+# comes with X11 and then hand edited removing the explicit encoding
+# definitions and adding some more aliases. The file is usually
+# available as /usr/lib/X11/locale/locale.alias.
+#
+
+#
+# The local_encoding_alias table maps lowercase encoding alias names
+# to C locale encoding names (case-sensitive). Note that normalize()
+# first looks up the encoding in the encodings.aliases dictionary and
+# then applies this mapping to find the correct C lib name for the
+# encoding.
+#
+locale_encoding_alias = {
+
+ # Mappings for non-standard encoding names used in locale names
+ '437': 'C',
+ 'c': 'C',
+ 'en': 'ISO8859-1',
+ 'jis': 'JIS7',
+ 'jis7': 'JIS7',
+ 'ajec': 'eucJP',
+
+ # Mappings from Python codec names to C lib encoding names
+ 'ascii': 'ISO8859-1',
+ 'latin_1': 'ISO8859-1',
+ 'iso8859_1': 'ISO8859-1',
+ 'iso8859_10': 'ISO8859-10',
+ 'iso8859_11': 'ISO8859-11',
+ 'iso8859_13': 'ISO8859-13',
+ 'iso8859_14': 'ISO8859-14',
+ 'iso8859_15': 'ISO8859-15',
+ 'iso8859_2': 'ISO8859-2',
+ 'iso8859_3': 'ISO8859-3',
+ 'iso8859_4': 'ISO8859-4',
+ 'iso8859_5': 'ISO8859-5',
+ 'iso8859_6': 'ISO8859-6',
+ 'iso8859_7': 'ISO8859-7',
+ 'iso8859_8': 'ISO8859-8',
+ 'iso8859_9': 'ISO8859-9',
+ 'iso2022_jp': 'JIS7',
+ 'shift_jis': 'SJIS',
+ 'tactis': 'TACTIS',
+ 'euc_jp': 'eucJP',
+ 'euc_kr': 'eucKR',
+ 'utf_8': 'UTF8',
+ 'koi8_r': 'KOI8-R',
+ 'koi8_u': 'KOI8-U',
+ # XXX This list is still incomplete. If you know more
+ # mappings, please file a bug report. Thanks.
+}
+
+#
+# The locale_alias table maps lowercase alias names to C locale names
+# (case-sensitive). Encodings are always separated from the locale
+# name using a dot ('.'); they should only be given in case the
+# language name is needed to interpret the given encoding alias
+# correctly (CJK codes often have this need).
+#
+# Note that the normalize() function which uses this tables
+# removes '_' and '-' characters from the encoding part of the
+# locale name before doing the lookup. This saves a lot of
+# space in the table.
+#
+# MAL 2004-12-10:
+# Updated alias mapping to most recent locale.alias file
+# from X.org distribution using makelocalealias.py.
+#
+# These are the differences compared to the old mapping (Python 2.4
+# and older):
+#
+# updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
+# updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
+# updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
+# updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
+# updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
+# updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
+# updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
+# updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
+# updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
+# updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
+# updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
+# updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+# updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
+# updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
+# updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
+# updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
+# updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
+# updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
+# updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
+# updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
+# updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
+# updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
+#
+locale_alias = {
+ 'a3': 'a3_AZ.KOI8-C',
+ 'a3_az': 'a3_AZ.KOI8-C',
+ 'a3_az.koi8c': 'a3_AZ.KOI8-C',
+ 'af': 'af_ZA.ISO8859-1',
+ 'af_za': 'af_ZA.ISO8859-1',
+ 'af_za.iso88591': 'af_ZA.ISO8859-1',
+ 'am': 'am_ET.UTF-8',
+ 'american': 'en_US.ISO8859-1',
+ 'american.iso88591': 'en_US.ISO8859-1',
+ 'ar': 'ar_AA.ISO8859-6',
+ 'ar_aa': 'ar_AA.ISO8859-6',
+ 'ar_aa.iso88596': 'ar_AA.ISO8859-6',
+ 'ar_ae': 'ar_AE.ISO8859-6',
+ 'ar_bh': 'ar_BH.ISO8859-6',
+ 'ar_dz': 'ar_DZ.ISO8859-6',
+ 'ar_eg': 'ar_EG.ISO8859-6',
+ 'ar_eg.iso88596': 'ar_EG.ISO8859-6',
+ 'ar_iq': 'ar_IQ.ISO8859-6',
+ 'ar_jo': 'ar_JO.ISO8859-6',
+ 'ar_kw': 'ar_KW.ISO8859-6',
+ 'ar_lb': 'ar_LB.ISO8859-6',
+ 'ar_ly': 'ar_LY.ISO8859-6',
+ 'ar_ma': 'ar_MA.ISO8859-6',
+ 'ar_om': 'ar_OM.ISO8859-6',
+ 'ar_qa': 'ar_QA.ISO8859-6',
+ 'ar_sa': 'ar_SA.ISO8859-6',
+ 'ar_sa.iso88596': 'ar_SA.ISO8859-6',
+ 'ar_sd': 'ar_SD.ISO8859-6',
+ 'ar_sy': 'ar_SY.ISO8859-6',
+ 'ar_tn': 'ar_TN.ISO8859-6',
+ 'ar_ye': 'ar_YE.ISO8859-6',
+ 'arabic': 'ar_AA.ISO8859-6',
+ 'arabic.iso88596': 'ar_AA.ISO8859-6',
+ 'az': 'az_AZ.ISO8859-9E',
+ 'az_az': 'az_AZ.ISO8859-9E',
+ 'az_az.iso88599e': 'az_AZ.ISO8859-9E',
+ 'be': 'be_BY.CP1251',
+ 'be_by': 'be_BY.CP1251',
+ 'be_by.cp1251': 'be_BY.CP1251',
+ 'be_by.microsoftcp1251': 'be_BY.CP1251',
+ 'bg': 'bg_BG.CP1251',
+ 'bg_bg': 'bg_BG.CP1251',
+ 'bg_bg.cp1251': 'bg_BG.CP1251',
+ 'bg_bg.iso88595': 'bg_BG.ISO8859-5',
+ 'bg_bg.koi8r': 'bg_BG.KOI8-R',
+ 'bg_bg.microsoftcp1251': 'bg_BG.CP1251',
+ 'bokmal': 'nb_NO.ISO8859-1',
+ 'bokm\xe5l': 'nb_NO.ISO8859-1',
+ 'br': 'br_FR.ISO8859-1',
+ 'br_fr': 'br_FR.ISO8859-1',
+ 'br_fr.iso88591': 'br_FR.ISO8859-1',
+ 'br_fr.iso885914': 'br_FR.ISO8859-14',
+ 'br_fr.iso885915': 'br_FR.ISO8859-15',
+ 'br_fr at euro': 'br_FR.ISO8859-15',
+ 'bulgarian': 'bg_BG.CP1251',
+ 'c': 'C',
+ 'c-french': 'fr_CA.ISO8859-1',
+ 'c-french.iso88591': 'fr_CA.ISO8859-1',
+ 'c.en': 'C',
+ 'c.iso88591': 'en_US.ISO8859-1',
+ 'c_c': 'C',
+ 'c_c.c': 'C',
+ 'ca': 'ca_ES.ISO8859-1',
+ 'ca_es': 'ca_ES.ISO8859-1',
+ 'ca_es.iso88591': 'ca_ES.ISO8859-1',
+ 'ca_es.iso885915': 'ca_ES.ISO8859-15',
+ 'ca_es at euro': 'ca_ES.ISO8859-15',
+ 'catalan': 'ca_ES.ISO8859-1',
+ 'cextend': 'en_US.ISO8859-1',
+ 'cextend.en': 'en_US.ISO8859-1',
+ 'chinese-s': 'zh_CN.eucCN',
+ 'chinese-t': 'zh_TW.eucTW',
+ 'croatian': 'hr_HR.ISO8859-2',
+ 'cs': 'cs_CZ.ISO8859-2',
+ 'cs_cs': 'cs_CZ.ISO8859-2',
+ 'cs_cs.iso88592': 'cs_CZ.ISO8859-2',
+ 'cs_cz': 'cs_CZ.ISO8859-2',
+ 'cs_cz.iso88592': 'cs_CZ.ISO8859-2',
+ 'cy': 'cy_GB.ISO8859-1',
+ 'cy_gb': 'cy_GB.ISO8859-1',
+ 'cy_gb.iso88591': 'cy_GB.ISO8859-1',
+ 'cy_gb.iso885914': 'cy_GB.ISO8859-14',
+ 'cy_gb.iso885915': 'cy_GB.ISO8859-15',
+ 'cy_gb at euro': 'cy_GB.ISO8859-15',
+ 'cz': 'cs_CZ.ISO8859-2',
+ 'cz_cz': 'cs_CZ.ISO8859-2',
+ 'czech': 'cs_CZ.ISO8859-2',
+ 'da': 'da_DK.ISO8859-1',
+ 'da_dk': 'da_DK.ISO8859-1',
+ 'da_dk.88591': 'da_DK.ISO8859-1',
+ 'da_dk.885915': 'da_DK.ISO8859-15',
+ 'da_dk.iso88591': 'da_DK.ISO8859-1',
+ 'da_dk.iso885915': 'da_DK.ISO8859-15',
+ 'da_dk at euro': 'da_DK.ISO8859-15',
+ 'danish': 'da_DK.ISO8859-1',
+ 'danish.iso88591': 'da_DK.ISO8859-1',
+ 'dansk': 'da_DK.ISO8859-1',
+ 'de': 'de_DE.ISO8859-1',
+ 'de_at': 'de_AT.ISO8859-1',
+ 'de_at.iso88591': 'de_AT.ISO8859-1',
+ 'de_at.iso885915': 'de_AT.ISO8859-15',
+ 'de_at at euro': 'de_AT.ISO8859-15',
+ 'de_be': 'de_BE.ISO8859-1',
+ 'de_be.iso88591': 'de_BE.ISO8859-1',
+ 'de_be.iso885915': 'de_BE.ISO8859-15',
+ 'de_be at euro': 'de_BE.ISO8859-15',
+ 'de_ch': 'de_CH.ISO8859-1',
+ 'de_ch.iso88591': 'de_CH.ISO8859-1',
+ 'de_ch.iso885915': 'de_CH.ISO8859-15',
+ 'de_ch at euro': 'de_CH.ISO8859-15',
+ 'de_de': 'de_DE.ISO8859-1',
+ 'de_de.88591': 'de_DE.ISO8859-1',
+ 'de_de.885915': 'de_DE.ISO8859-15',
+ 'de_de.885915 at euro': 'de_DE.ISO8859-15',
+ 'de_de.iso88591': 'de_DE.ISO8859-1',
+ 'de_de.iso885915': 'de_DE.ISO8859-15',
+ 'de_de at euro': 'de_DE.ISO8859-15',
+ 'de_lu': 'de_LU.ISO8859-1',
+ 'de_lu.iso88591': 'de_LU.ISO8859-1',
+ 'de_lu.iso885915': 'de_LU.ISO8859-15',
+ 'de_lu at euro': 'de_LU.ISO8859-15',
+ 'deutsch': 'de_DE.ISO8859-1',
+ 'dutch': 'nl_NL.ISO8859-1',
+ 'dutch.iso88591': 'nl_BE.ISO8859-1',
+ 'ee': 'ee_EE.ISO8859-4',
+ 'ee_ee': 'ee_EE.ISO8859-4',
+ 'ee_ee.iso88594': 'ee_EE.ISO8859-4',
+ 'eesti': 'et_EE.ISO8859-1',
+ 'el': 'el_GR.ISO8859-7',
+ 'el_gr': 'el_GR.ISO8859-7',
+ 'el_gr.iso88597': 'el_GR.ISO8859-7',
+ 'el_gr at euro': 'el_GR.ISO8859-15',
+ 'en': 'en_US.ISO8859-1',
+ 'en.iso88591': 'en_US.ISO8859-1',
+ 'en_au': 'en_AU.ISO8859-1',
+ 'en_au.iso88591': 'en_AU.ISO8859-1',
+ 'en_be': 'en_BE.ISO8859-1',
+ 'en_be at euro': 'en_BE.ISO8859-15',
+ 'en_bw': 'en_BW.ISO8859-1',
+ 'en_ca': 'en_CA.ISO8859-1',
+ 'en_ca.iso88591': 'en_CA.ISO8859-1',
+ 'en_gb': 'en_GB.ISO8859-1',
+ 'en_gb.88591': 'en_GB.ISO8859-1',
+ 'en_gb.iso88591': 'en_GB.ISO8859-1',
+ 'en_gb.iso885915': 'en_GB.ISO8859-15',
+ 'en_gb at euro': 'en_GB.ISO8859-15',
+ 'en_hk': 'en_HK.ISO8859-1',
+ 'en_ie': 'en_IE.ISO8859-1',
+ 'en_ie.iso88591': 'en_IE.ISO8859-1',
+ 'en_ie.iso885915': 'en_IE.ISO8859-15',
+ 'en_ie at euro': 'en_IE.ISO8859-15',
+ 'en_in': 'en_IN.ISO8859-1',
+ 'en_nz': 'en_NZ.ISO8859-1',
+ 'en_nz.iso88591': 'en_NZ.ISO8859-1',
+ 'en_ph': 'en_PH.ISO8859-1',
+ 'en_sg': 'en_SG.ISO8859-1',
+ 'en_uk': 'en_GB.ISO8859-1',
+ 'en_us': 'en_US.ISO8859-1',
+ 'en_us.88591': 'en_US.ISO8859-1',
+ 'en_us.885915': 'en_US.ISO8859-15',
+ 'en_us.iso88591': 'en_US.ISO8859-1',
+ 'en_us.iso885915': 'en_US.ISO8859-15',
+ 'en_us.iso885915 at euro': 'en_US.ISO8859-15',
+ 'en_us at euro': 'en_US.ISO8859-15',
+ 'en_us at euro@euro': 'en_US.ISO8859-15',
+ 'en_za': 'en_ZA.ISO8859-1',
+ 'en_za.88591': 'en_ZA.ISO8859-1',
+ 'en_za.iso88591': 'en_ZA.ISO8859-1',
+ 'en_za.iso885915': 'en_ZA.ISO8859-15',
+ 'en_za at euro': 'en_ZA.ISO8859-15',
+ 'en_zw': 'en_ZW.ISO8859-1',
+ 'eng_gb': 'en_GB.ISO8859-1',
+ 'eng_gb.8859': 'en_GB.ISO8859-1',
+ 'english': 'en_EN.ISO8859-1',
+ 'english.iso88591': 'en_EN.ISO8859-1',
+ 'english_uk': 'en_GB.ISO8859-1',
+ 'english_uk.8859': 'en_GB.ISO8859-1',
+ 'english_united-states': 'en_US.ISO8859-1',
+ 'english_united-states.437': 'C',
+ 'english_us': 'en_US.ISO8859-1',
+ 'english_us.8859': 'en_US.ISO8859-1',
+ 'english_us.ascii': 'en_US.ISO8859-1',
+ 'eo': 'eo_XX.ISO8859-3',
+ 'eo_eo': 'eo_EO.ISO8859-3',
+ 'eo_eo.iso88593': 'eo_EO.ISO8859-3',
+ 'eo_xx': 'eo_XX.ISO8859-3',
+ 'eo_xx.iso88593': 'eo_XX.ISO8859-3',
+ 'es': 'es_ES.ISO8859-1',
+ 'es_ar': 'es_AR.ISO8859-1',
+ 'es_ar.iso88591': 'es_AR.ISO8859-1',
+ 'es_bo': 'es_BO.ISO8859-1',
+ 'es_bo.iso88591': 'es_BO.ISO8859-1',
+ 'es_cl': 'es_CL.ISO8859-1',
+ 'es_cl.iso88591': 'es_CL.ISO8859-1',
+ 'es_co': 'es_CO.ISO8859-1',
+ 'es_co.iso88591': 'es_CO.ISO8859-1',
+ 'es_cr': 'es_CR.ISO8859-1',
+ 'es_cr.iso88591': 'es_CR.ISO8859-1',
+ 'es_do': 'es_DO.ISO8859-1',
+ 'es_do.iso88591': 'es_DO.ISO8859-1',
+ 'es_ec': 'es_EC.ISO8859-1',
+ 'es_ec.iso88591': 'es_EC.ISO8859-1',
+ 'es_es': 'es_ES.ISO8859-1',
+ 'es_es.88591': 'es_ES.ISO8859-1',
+ 'es_es.iso88591': 'es_ES.ISO8859-1',
+ 'es_es.iso885915': 'es_ES.ISO8859-15',
+ 'es_es at euro': 'es_ES.ISO8859-15',
+ 'es_gt': 'es_GT.ISO8859-1',
+ 'es_gt.iso88591': 'es_GT.ISO8859-1',
+ 'es_hn': 'es_HN.ISO8859-1',
+ 'es_hn.iso88591': 'es_HN.ISO8859-1',
+ 'es_mx': 'es_MX.ISO8859-1',
+ 'es_mx.iso88591': 'es_MX.ISO8859-1',
+ 'es_ni': 'es_NI.ISO8859-1',
+ 'es_ni.iso88591': 'es_NI.ISO8859-1',
+ 'es_pa': 'es_PA.ISO8859-1',
+ 'es_pa.iso88591': 'es_PA.ISO8859-1',
+ 'es_pa.iso885915': 'es_PA.ISO8859-15',
+ 'es_pa at euro': 'es_PA.ISO8859-15',
+ 'es_pe': 'es_PE.ISO8859-1',
+ 'es_pe.iso88591': 'es_PE.ISO8859-1',
+ 'es_pe.iso885915': 'es_PE.ISO8859-15',
+ 'es_pe at euro': 'es_PE.ISO8859-15',
+ 'es_pr': 'es_PR.ISO8859-1',
+ 'es_pr.iso88591': 'es_PR.ISO8859-1',
+ 'es_py': 'es_PY.ISO8859-1',
+ 'es_py.iso88591': 'es_PY.ISO8859-1',
+ 'es_py.iso885915': 'es_PY.ISO8859-15',
+ 'es_py at euro': 'es_PY.ISO8859-15',
+ 'es_sv': 'es_SV.ISO8859-1',
+ 'es_sv.iso88591': 'es_SV.ISO8859-1',
+ 'es_sv.iso885915': 'es_SV.ISO8859-15',
+ 'es_sv at euro': 'es_SV.ISO8859-15',
+ 'es_us': 'es_US.ISO8859-1',
+ 'es_uy': 'es_UY.ISO8859-1',
+ 'es_uy.iso88591': 'es_UY.ISO8859-1',
+ 'es_uy.iso885915': 'es_UY.ISO8859-15',
+ 'es_uy at euro': 'es_UY.ISO8859-15',
+ 'es_ve': 'es_VE.ISO8859-1',
+ 'es_ve.iso88591': 'es_VE.ISO8859-1',
+ 'es_ve.iso885915': 'es_VE.ISO8859-15',
+ 'es_ve at euro': 'es_VE.ISO8859-15',
+ 'estonian': 'et_EE.ISO8859-1',
+ 'et': 'et_EE.ISO8859-15',
+ 'et_ee': 'et_EE.ISO8859-15',
+ 'et_ee.iso88591': 'et_EE.ISO8859-1',
+ 'et_ee.iso885913': 'et_EE.ISO8859-13',
+ 'et_ee.iso885915': 'et_EE.ISO8859-15',
+ 'et_ee.iso88594': 'et_EE.ISO8859-4',
+ 'et_ee at euro': 'et_EE.ISO8859-15',
+ 'eu': 'eu_ES.ISO8859-1',
+ 'eu_es': 'eu_ES.ISO8859-1',
+ 'eu_es.iso88591': 'eu_ES.ISO8859-1',
+ 'eu_es.iso885915': 'eu_ES.ISO8859-15',
+ 'eu_es at euro': 'eu_ES.ISO8859-15',
+ 'fa': 'fa_IR.UTF-8',
+ 'fa_ir': 'fa_IR.UTF-8',
+ 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342',
+ 'fi': 'fi_FI.ISO8859-15',
+ 'fi_fi': 'fi_FI.ISO8859-15',
+ 'fi_fi.88591': 'fi_FI.ISO8859-1',
+ 'fi_fi.iso88591': 'fi_FI.ISO8859-1',
+ 'fi_fi.iso885915': 'fi_FI.ISO8859-15',
+ 'fi_fi.utf8 at euro': 'fi_FI.UTF-8',
+ 'fi_fi at euro': 'fi_FI.ISO8859-15',
+ 'finnish': 'fi_FI.ISO8859-1',
+ 'finnish.iso88591': 'fi_FI.ISO8859-1',
+ 'fo': 'fo_FO.ISO8859-1',
+ 'fo_fo': 'fo_FO.ISO8859-1',
+ 'fo_fo.iso88591': 'fo_FO.ISO8859-1',
+ 'fo_fo.iso885915': 'fo_FO.ISO8859-15',
+ 'fo_fo at euro': 'fo_FO.ISO8859-15',
+ 'fr': 'fr_FR.ISO8859-1',
+ 'fr_be': 'fr_BE.ISO8859-1',
+ 'fr_be.88591': 'fr_BE.ISO8859-1',
+ 'fr_be.iso88591': 'fr_BE.ISO8859-1',
+ 'fr_be.iso885915': 'fr_BE.ISO8859-15',
+ 'fr_be at euro': 'fr_BE.ISO8859-15',
+ 'fr_ca': 'fr_CA.ISO8859-1',
+ 'fr_ca.88591': 'fr_CA.ISO8859-1',
+ 'fr_ca.iso88591': 'fr_CA.ISO8859-1',
+ 'fr_ca.iso885915': 'fr_CA.ISO8859-15',
+ 'fr_ca at euro': 'fr_CA.ISO8859-15',
+ 'fr_ch': 'fr_CH.ISO8859-1',
+ 'fr_ch.88591': 'fr_CH.ISO8859-1',
+ 'fr_ch.iso88591': 'fr_CH.ISO8859-1',
+ 'fr_ch.iso885915': 'fr_CH.ISO8859-15',
+ 'fr_ch at euro': 'fr_CH.ISO8859-15',
+ 'fr_fr': 'fr_FR.ISO8859-1',
+ 'fr_fr.88591': 'fr_FR.ISO8859-1',
+ 'fr_fr.iso88591': 'fr_FR.ISO8859-1',
+ 'fr_fr.iso885915': 'fr_FR.ISO8859-15',
+ 'fr_fr at euro': 'fr_FR.ISO8859-15',
+ 'fr_lu': 'fr_LU.ISO8859-1',
+ 'fr_lu.88591': 'fr_LU.ISO8859-1',
+ 'fr_lu.iso88591': 'fr_LU.ISO8859-1',
+ 'fr_lu.iso885915': 'fr_LU.ISO8859-15',
+ 'fr_lu at euro': 'fr_LU.ISO8859-15',
+ 'fran\xe7ais': 'fr_FR.ISO8859-1',
+ 'fre_fr': 'fr_FR.ISO8859-1',
+ 'fre_fr.8859': 'fr_FR.ISO8859-1',
+ 'french': 'fr_FR.ISO8859-1',
+ 'french.iso88591': 'fr_CH.ISO8859-1',
+ 'french_france': 'fr_FR.ISO8859-1',
+ 'french_france.8859': 'fr_FR.ISO8859-1',
+ 'ga': 'ga_IE.ISO8859-1',
+ 'ga_ie': 'ga_IE.ISO8859-1',
+ 'ga_ie.iso88591': 'ga_IE.ISO8859-1',
+ 'ga_ie.iso885914': 'ga_IE.ISO8859-14',
+ 'ga_ie.iso885915': 'ga_IE.ISO8859-15',
+ 'ga_ie at euro': 'ga_IE.ISO8859-15',
+ 'galego': 'gl_ES.ISO8859-1',
+ 'galician': 'gl_ES.ISO8859-1',
+ 'gd': 'gd_GB.ISO8859-1',
+ 'gd_gb': 'gd_GB.ISO8859-1',
+ 'gd_gb.iso88591': 'gd_GB.ISO8859-1',
+ 'gd_gb.iso885914': 'gd_GB.ISO8859-14',
+ 'gd_gb.iso885915': 'gd_GB.ISO8859-15',
+ 'gd_gb at euro': 'gd_GB.ISO8859-15',
+ 'ger_de': 'de_DE.ISO8859-1',
+ 'ger_de.8859': 'de_DE.ISO8859-1',
+ 'german': 'de_DE.ISO8859-1',
+ 'german.iso88591': 'de_CH.ISO8859-1',
+ 'german_germany': 'de_DE.ISO8859-1',
+ 'german_germany.8859': 'de_DE.ISO8859-1',
+ 'gl': 'gl_ES.ISO8859-1',
+ 'gl_es': 'gl_ES.ISO8859-1',
+ 'gl_es.iso88591': 'gl_ES.ISO8859-1',
+ 'gl_es.iso885915': 'gl_ES.ISO8859-15',
+ 'gl_es at euro': 'gl_ES.ISO8859-15',
+ 'greek': 'el_GR.ISO8859-7',
+ 'greek.iso88597': 'el_GR.ISO8859-7',
+ 'gv': 'gv_GB.ISO8859-1',
+ 'gv_gb': 'gv_GB.ISO8859-1',
+ 'gv_gb.iso88591': 'gv_GB.ISO8859-1',
+ 'gv_gb.iso885914': 'gv_GB.ISO8859-14',
+ 'gv_gb.iso885915': 'gv_GB.ISO8859-15',
+ 'gv_gb at euro': 'gv_GB.ISO8859-15',
+ 'he': 'he_IL.ISO8859-8',
+ 'he_il': 'he_IL.ISO8859-8',
+ 'he_il.cp1255': 'he_IL.CP1255',
+ 'he_il.iso88598': 'he_IL.ISO8859-8',
+ 'he_il.microsoftcp1255': 'he_IL.CP1255',
+ 'hebrew': 'iw_IL.ISO8859-8',
+ 'hebrew.iso88598': 'iw_IL.ISO8859-8',
+ 'hi': 'hi_IN.ISCII-DEV',
+ 'hi_in': 'hi_IN.ISCII-DEV',
+ 'hi_in.isciidev': 'hi_IN.ISCII-DEV',
+ 'hr': 'hr_HR.ISO8859-2',
+ 'hr_hr': 'hr_HR.ISO8859-2',
+ 'hr_hr.iso88592': 'hr_HR.ISO8859-2',
+ 'hrvatski': 'hr_HR.ISO8859-2',
+ 'hu': 'hu_HU.ISO8859-2',
+ 'hu_hu': 'hu_HU.ISO8859-2',
+ 'hu_hu.iso88592': 'hu_HU.ISO8859-2',
+ 'hungarian': 'hu_HU.ISO8859-2',
+ 'icelandic': 'is_IS.ISO8859-1',
+ 'icelandic.iso88591': 'is_IS.ISO8859-1',
+ 'id': 'id_ID.ISO8859-1',
+ 'id_id': 'id_ID.ISO8859-1',
+ 'in': 'id_ID.ISO8859-1',
+ 'in_id': 'id_ID.ISO8859-1',
+ 'is': 'is_IS.ISO8859-1',
+ 'is_is': 'is_IS.ISO8859-1',
+ 'is_is.iso88591': 'is_IS.ISO8859-1',
+ 'is_is.iso885915': 'is_IS.ISO8859-15',
+ 'is_is at euro': 'is_IS.ISO8859-15',
+ 'iso-8859-1': 'en_US.ISO8859-1',
+ 'iso-8859-15': 'en_US.ISO8859-15',
+ 'iso8859-1': 'en_US.ISO8859-1',
+ 'iso8859-15': 'en_US.ISO8859-15',
+ 'iso_8859_1': 'en_US.ISO8859-1',
+ 'iso_8859_15': 'en_US.ISO8859-15',
+ 'it': 'it_IT.ISO8859-1',
+ 'it_ch': 'it_CH.ISO8859-1',
+ 'it_ch.iso88591': 'it_CH.ISO8859-1',
+ 'it_ch.iso885915': 'it_CH.ISO8859-15',
+ 'it_ch at euro': 'it_CH.ISO8859-15',
+ 'it_it': 'it_IT.ISO8859-1',
+ 'it_it.88591': 'it_IT.ISO8859-1',
+ 'it_it.iso88591': 'it_IT.ISO8859-1',
+ 'it_it.iso885915': 'it_IT.ISO8859-15',
+ 'it_it at euro': 'it_IT.ISO8859-15',
+ 'italian': 'it_IT.ISO8859-1',
+ 'italian.iso88591': 'it_IT.ISO8859-1',
+ 'iu': 'iu_CA.NUNACOM-8',
+ 'iu_ca': 'iu_CA.NUNACOM-8',
+ 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8',
+ 'iw': 'he_IL.ISO8859-8',
+ 'iw_il': 'he_IL.ISO8859-8',
+ 'iw_il.iso88598': 'he_IL.ISO8859-8',
+ 'ja': 'ja_JP.eucJP',
+ 'ja.jis': 'ja_JP.JIS7',
+ 'ja.sjis': 'ja_JP.SJIS',
+ 'ja_jp': 'ja_JP.eucJP',
+ 'ja_jp.ajec': 'ja_JP.eucJP',
+ 'ja_jp.euc': 'ja_JP.eucJP',
+ 'ja_jp.eucjp': 'ja_JP.eucJP',
+ 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
+ 'ja_jp.iso2022jp': 'ja_JP.JIS7',
+ 'ja_jp.jis': 'ja_JP.JIS7',
+ 'ja_jp.jis7': 'ja_JP.JIS7',
+ 'ja_jp.mscode': 'ja_JP.SJIS',
+ 'ja_jp.sjis': 'ja_JP.SJIS',
+ 'ja_jp.ujis': 'ja_JP.eucJP',
+ 'japan': 'ja_JP.eucJP',
+ 'japanese': 'ja_JP.eucJP',
+ 'japanese-euc': 'ja_JP.eucJP',
+ 'japanese.euc': 'ja_JP.eucJP',
+ 'japanese.sjis': 'ja_JP.SJIS',
+ 'jp_jp': 'ja_JP.eucJP',
+ 'ka': 'ka_GE.GEORGIAN-ACADEMY',
+ 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY',
+ 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY',
+ 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS',
+ 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY',
+ 'kl': 'kl_GL.ISO8859-1',
+ 'kl_gl': 'kl_GL.ISO8859-1',
+ 'kl_gl.iso88591': 'kl_GL.ISO8859-1',
+ 'kl_gl.iso885915': 'kl_GL.ISO8859-15',
+ 'kl_gl at euro': 'kl_GL.ISO8859-15',
+ 'ko': 'ko_KR.eucKR',
+ 'ko_kr': 'ko_KR.eucKR',
+ 'ko_kr.euc': 'ko_KR.eucKR',
+ 'ko_kr.euckr': 'ko_KR.eucKR',
+ 'korean': 'ko_KR.eucKR',
+ 'korean.euc': 'ko_KR.eucKR',
+ 'kw': 'kw_GB.ISO8859-1',
+ 'kw_gb': 'kw_GB.ISO8859-1',
+ 'kw_gb.iso88591': 'kw_GB.ISO8859-1',
+ 'kw_gb.iso885914': 'kw_GB.ISO8859-14',
+ 'kw_gb.iso885915': 'kw_GB.ISO8859-15',
+ 'kw_gb at euro': 'kw_GB.ISO8859-15',
+ 'lithuanian': 'lt_LT.ISO8859-13',
+ 'lo': 'lo_LA.MULELAO-1',
+ 'lo_la': 'lo_LA.MULELAO-1',
+ 'lo_la.cp1133': 'lo_LA.IBM-CP1133',
+ 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133',
+ 'lo_la.mulelao1': 'lo_LA.MULELAO-1',
+ 'lt': 'lt_LT.ISO8859-13',
+ 'lt_lt': 'lt_LT.ISO8859-13',
+ 'lt_lt.iso885913': 'lt_LT.ISO8859-13',
+ 'lt_lt.iso88594': 'lt_LT.ISO8859-4',
+ 'lv': 'lv_LV.ISO8859-13',
+ 'lv_lv': 'lv_LV.ISO8859-13',
+ 'lv_lv.iso885913': 'lv_LV.ISO8859-13',
+ 'lv_lv.iso88594': 'lv_LV.ISO8859-4',
+ 'mi': 'mi_NZ.ISO8859-1',
+ 'mi_nz': 'mi_NZ.ISO8859-1',
+ 'mi_nz.iso88591': 'mi_NZ.ISO8859-1',
+ 'mk': 'mk_MK.ISO8859-5',
+ 'mk_mk': 'mk_MK.ISO8859-5',
+ 'mk_mk.cp1251': 'mk_MK.CP1251',
+ 'mk_mk.iso88595': 'mk_MK.ISO8859-5',
+ 'mk_mk.microsoftcp1251': 'mk_MK.CP1251',
+ 'ms': 'ms_MY.ISO8859-1',
+ 'ms_my': 'ms_MY.ISO8859-1',
+ 'ms_my.iso88591': 'ms_MY.ISO8859-1',
+ 'mt': 'mt_MT.ISO8859-3',
+ 'mt_mt': 'mt_MT.ISO8859-3',
+ 'mt_mt.iso88593': 'mt_MT.ISO8859-3',
+ 'nb': 'nb_NO.ISO8859-1',
+ 'nb_no': 'nb_NO.ISO8859-1',
+ 'nb_no.88591': 'nb_NO.ISO8859-1',
+ 'nb_no.iso88591': 'nb_NO.ISO8859-1',
+ 'nb_no.iso885915': 'nb_NO.ISO8859-15',
+ 'nb_no at euro': 'nb_NO.ISO8859-15',
+ 'nl': 'nl_NL.ISO8859-1',
+ 'nl_be': 'nl_BE.ISO8859-1',
+ 'nl_be.88591': 'nl_BE.ISO8859-1',
+ 'nl_be.iso88591': 'nl_BE.ISO8859-1',
+ 'nl_be.iso885915': 'nl_BE.ISO8859-15',
+ 'nl_be at euro': 'nl_BE.ISO8859-15',
+ 'nl_nl': 'nl_NL.ISO8859-1',
+ 'nl_nl.88591': 'nl_NL.ISO8859-1',
+ 'nl_nl.iso88591': 'nl_NL.ISO8859-1',
+ 'nl_nl.iso885915': 'nl_NL.ISO8859-15',
+ 'nl_nl at euro': 'nl_NL.ISO8859-15',
+ 'nn': 'nn_NO.ISO8859-1',
+ 'nn_no': 'nn_NO.ISO8859-1',
+ 'nn_no.88591': 'nn_NO.ISO8859-1',
+ 'nn_no.iso88591': 'nn_NO.ISO8859-1',
+ 'nn_no.iso885915': 'nn_NO.ISO8859-15',
+ 'nn_no at euro': 'nn_NO.ISO8859-15',
+ 'no': 'no_NO.ISO8859-1',
+ 'no at nynorsk': 'ny_NO.ISO8859-1',
+ 'no_no': 'no_NO.ISO8859-1',
+ 'no_no.88591': 'no_NO.ISO8859-1',
+ 'no_no.iso88591': 'no_NO.ISO8859-1',
+ 'no_no.iso885915': 'no_NO.ISO8859-15',
+ 'no_no at euro': 'no_NO.ISO8859-15',
+ 'norwegian': 'no_NO.ISO8859-1',
+ 'norwegian.iso88591': 'no_NO.ISO8859-1',
+ 'ny': 'ny_NO.ISO8859-1',
+ 'ny_no': 'ny_NO.ISO8859-1',
+ 'ny_no.88591': 'ny_NO.ISO8859-1',
+ 'ny_no.iso88591': 'ny_NO.ISO8859-1',
+ 'ny_no.iso885915': 'ny_NO.ISO8859-15',
+ 'ny_no at euro': 'ny_NO.ISO8859-15',
+ 'nynorsk': 'nn_NO.ISO8859-1',
+ 'oc': 'oc_FR.ISO8859-1',
+ 'oc_fr': 'oc_FR.ISO8859-1',
+ 'oc_fr.iso88591': 'oc_FR.ISO8859-1',
+ 'oc_fr.iso885915': 'oc_FR.ISO8859-15',
+ 'oc_fr at euro': 'oc_FR.ISO8859-15',
+ 'pd': 'pd_US.ISO8859-1',
+ 'pd_de': 'pd_DE.ISO8859-1',
+ 'pd_de.iso88591': 'pd_DE.ISO8859-1',
+ 'pd_de.iso885915': 'pd_DE.ISO8859-15',
+ 'pd_de at euro': 'pd_DE.ISO8859-15',
+ 'pd_us': 'pd_US.ISO8859-1',
+ 'pd_us.iso88591': 'pd_US.ISO8859-1',
+ 'pd_us.iso885915': 'pd_US.ISO8859-15',
+ 'pd_us at euro': 'pd_US.ISO8859-15',
+ 'ph': 'ph_PH.ISO8859-1',
+ 'ph_ph': 'ph_PH.ISO8859-1',
+ 'ph_ph.iso88591': 'ph_PH.ISO8859-1',
+ 'pl': 'pl_PL.ISO8859-2',
+ 'pl_pl': 'pl_PL.ISO8859-2',
+ 'pl_pl.iso88592': 'pl_PL.ISO8859-2',
+ 'polish': 'pl_PL.ISO8859-2',
+ 'portuguese': 'pt_PT.ISO8859-1',
+ 'portuguese.iso88591': 'pt_PT.ISO8859-1',
+ 'portuguese_brazil': 'pt_BR.ISO8859-1',
+ 'portuguese_brazil.8859': 'pt_BR.ISO8859-1',
+ 'posix': 'C',
+ 'posix-utf2': 'C',
+ 'pp': 'pp_AN.ISO8859-1',
+ 'pp_an': 'pp_AN.ISO8859-1',
+ 'pp_an.iso88591': 'pp_AN.ISO8859-1',
+ 'pt': 'pt_PT.ISO8859-1',
+ 'pt_br': 'pt_BR.ISO8859-1',
+ 'pt_br.88591': 'pt_BR.ISO8859-1',
+ 'pt_br.iso88591': 'pt_BR.ISO8859-1',
+ 'pt_br.iso885915': 'pt_BR.ISO8859-15',
+ 'pt_br at euro': 'pt_BR.ISO8859-15',
+ 'pt_pt': 'pt_PT.ISO8859-1',
+ 'pt_pt.88591': 'pt_PT.ISO8859-1',
+ 'pt_pt.iso88591': 'pt_PT.ISO8859-1',
+ 'pt_pt.iso885915': 'pt_PT.ISO8859-15',
+ 'pt_pt.utf8 at euro': 'pt_PT.UTF-8',
+ 'pt_pt at euro': 'pt_PT.ISO8859-15',
+ 'ro': 'ro_RO.ISO8859-2',
+ 'ro_ro': 'ro_RO.ISO8859-2',
+ 'ro_ro.iso88592': 'ro_RO.ISO8859-2',
+ 'romanian': 'ro_RO.ISO8859-2',
+ 'ru': 'ru_RU.ISO8859-5',
+ 'ru_ru': 'ru_RU.ISO8859-5',
+ 'ru_ru.cp1251': 'ru_RU.CP1251',
+ 'ru_ru.iso88595': 'ru_RU.ISO8859-5',
+ 'ru_ru.koi8r': 'ru_RU.KOI8-R',
+ 'ru_ru.microsoftcp1251': 'ru_RU.CP1251',
+ 'ru_ua': 'ru_UA.KOI8-U',
+ 'ru_ua.cp1251': 'ru_UA.CP1251',
+ 'ru_ua.koi8u': 'ru_UA.KOI8-U',
+ 'ru_ua.microsoftcp1251': 'ru_UA.CP1251',
+ 'rumanian': 'ro_RO.ISO8859-2',
+ 'russian': 'ru_RU.ISO8859-5',
+ 'se_no': 'se_NO.UTF-8',
+ 'serbocroatian': 'sh_YU.ISO8859-2',
+ 'sh': 'sh_YU.ISO8859-2',
+ 'sh_hr': 'sh_HR.ISO8859-2',
+ 'sh_hr.iso88592': 'sh_HR.ISO8859-2',
+ 'sh_sp': 'sh_YU.ISO8859-2',
+ 'sh_yu': 'sh_YU.ISO8859-2',
+ 'sk': 'sk_SK.ISO8859-2',
+ 'sk_sk': 'sk_SK.ISO8859-2',
+ 'sk_sk.iso88592': 'sk_SK.ISO8859-2',
+ 'sl': 'sl_SI.ISO8859-2',
+ 'sl_cs': 'sl_CS.ISO8859-2',
+ 'sl_si': 'sl_SI.ISO8859-2',
+ 'sl_si.iso88592': 'sl_SI.ISO8859-2',
+ 'slovak': 'sk_SK.ISO8859-2',
+ 'slovene': 'sl_SI.ISO8859-2',
+ 'slovenian': 'sl_SI.ISO8859-2',
+ 'sp': 'sp_YU.ISO8859-5',
+ 'sp_yu': 'sp_YU.ISO8859-5',
+ 'spanish': 'es_ES.ISO8859-1',
+ 'spanish.iso88591': 'es_ES.ISO8859-1',
+ 'spanish_spain': 'es_ES.ISO8859-1',
+ 'spanish_spain.8859': 'es_ES.ISO8859-1',
+ 'sq': 'sq_AL.ISO8859-2',
+ 'sq_al': 'sq_AL.ISO8859-2',
+ 'sq_al.iso88592': 'sq_AL.ISO8859-2',
+ 'sr': 'sr_YU.ISO8859-5',
+ 'sr at cyrillic': 'sr_YU.ISO8859-5',
+ 'sr_sp': 'sr_SP.ISO8859-2',
+ 'sr_yu': 'sr_YU.ISO8859-5',
+ 'sr_yu.cp1251 at cyrillic': 'sr_YU.CP1251',
+ 'sr_yu.iso88592': 'sr_YU.ISO8859-2',
+ 'sr_yu.iso88595': 'sr_YU.ISO8859-5',
+ 'sr_yu.iso88595 at cyrillic': 'sr_YU.ISO8859-5',
+ 'sr_yu.microsoftcp1251 at cyrillic': 'sr_YU.CP1251',
+ 'sr_yu.utf8 at cyrillic': 'sr_YU.UTF-8',
+ 'sr_yu at cyrillic': 'sr_YU.ISO8859-5',
+ 'sv': 'sv_SE.ISO8859-1',
+ 'sv_fi': 'sv_FI.ISO8859-1',
+ 'sv_fi.iso88591': 'sv_FI.ISO8859-1',
+ 'sv_fi.iso885915': 'sv_FI.ISO8859-15',
+ 'sv_fi at euro': 'sv_FI.ISO8859-15',
+ 'sv_se': 'sv_SE.ISO8859-1',
+ 'sv_se.88591': 'sv_SE.ISO8859-1',
+ 'sv_se.iso88591': 'sv_SE.ISO8859-1',
+ 'sv_se.iso885915': 'sv_SE.ISO8859-15',
+ 'sv_se at euro': 'sv_SE.ISO8859-15',
+ 'swedish': 'sv_SE.ISO8859-1',
+ 'swedish.iso88591': 'sv_SE.ISO8859-1',
+ 'ta': 'ta_IN.TSCII-0',
+ 'ta_in': 'ta_IN.TSCII-0',
+ 'ta_in.tscii': 'ta_IN.TSCII-0',
+ 'ta_in.tscii0': 'ta_IN.TSCII-0',
+ 'tg': 'tg_TJ.KOI8-C',
+ 'tg_tj': 'tg_TJ.KOI8-C',
+ 'tg_tj.koi8c': 'tg_TJ.KOI8-C',
+ 'th': 'th_TH.ISO8859-11',
+ 'th_th': 'th_TH.ISO8859-11',
+ 'th_th.iso885911': 'th_TH.ISO8859-11',
+ 'th_th.tactis': 'th_TH.TIS620',
+ 'th_th.tis620': 'th_TH.TIS620',
+ 'thai': 'th_TH.ISO8859-11',
+ 'tl': 'tl_PH.ISO8859-1',
+ 'tl_ph': 'tl_PH.ISO8859-1',
+ 'tl_ph.iso88591': 'tl_PH.ISO8859-1',
+ 'tr': 'tr_TR.ISO8859-9',
+ 'tr_tr': 'tr_TR.ISO8859-9',
+ 'tr_tr.iso88599': 'tr_TR.ISO8859-9',
+ 'tt': 'tt_RU.TATAR-CYR',
+ 'tt_ru': 'tt_RU.TATAR-CYR',
+ 'tt_ru.koi8c': 'tt_RU.KOI8-C',
+ 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR',
+ 'turkish': 'tr_TR.ISO8859-9',
+ 'turkish.iso88599': 'tr_TR.ISO8859-9',
+ 'uk': 'uk_UA.KOI8-U',
+ 'uk_ua': 'uk_UA.KOI8-U',
+ 'uk_ua.cp1251': 'uk_UA.CP1251',
+ 'uk_ua.iso88595': 'uk_UA.ISO8859-5',
+ 'uk_ua.koi8u': 'uk_UA.KOI8-U',
+ 'uk_ua.microsoftcp1251': 'uk_UA.CP1251',
+ 'univ': 'en_US.utf',
+ 'universal': 'en_US.utf',
+ 'universal.utf8 at ucs4': 'en_US.UTF-8',
+ 'ur': 'ur_PK.CP1256',
+ 'ur_pk': 'ur_PK.CP1256',
+ 'ur_pk.cp1256': 'ur_PK.CP1256',
+ 'ur_pk.microsoftcp1256': 'ur_PK.CP1256',
+ 'uz': 'uz_UZ.UTF-8',
+ 'uz_uz': 'uz_UZ.UTF-8',
+ 'vi': 'vi_VN.TCVN',
+ 'vi_vn': 'vi_VN.TCVN',
+ 'vi_vn.tcvn': 'vi_VN.TCVN',
+ 'vi_vn.tcvn5712': 'vi_VN.TCVN',
+ 'vi_vn.viscii': 'vi_VN.VISCII',
+ 'vi_vn.viscii111': 'vi_VN.VISCII',
+ 'wa': 'wa_BE.ISO8859-1',
+ 'wa_be': 'wa_BE.ISO8859-1',
+ 'wa_be.iso88591': 'wa_BE.ISO8859-1',
+ 'wa_be.iso885915': 'wa_BE.ISO8859-15',
+ 'wa_be at euro': 'wa_BE.ISO8859-15',
+ 'yi': 'yi_US.CP1255',
+ 'yi_us': 'yi_US.CP1255',
+ 'yi_us.cp1255': 'yi_US.CP1255',
+ 'yi_us.microsoftcp1255': 'yi_US.CP1255',
+ 'zh': 'zh_CN.eucCN',
+ 'zh_cn': 'zh_CN.gb2312',
+ 'zh_cn.big5': 'zh_TW.big5',
+ 'zh_cn.euc': 'zh_CN.eucCN',
+ 'zh_cn.gb18030': 'zh_CN.gb18030',
+ 'zh_cn.gb2312': 'zh_CN.gb2312',
+ 'zh_cn.gbk': 'zh_CN.gbk',
+ 'zh_hk': 'zh_HK.big5hkscs',
+ 'zh_hk.big5': 'zh_HK.big5',
+ 'zh_hk.big5hkscs': 'zh_HK.big5hkscs',
+ 'zh_tw': 'zh_TW.big5',
+ 'zh_tw.big5': 'zh_TW.big5',
+ 'zh_tw.euc': 'zh_TW.eucTW',
+}
+
+#
+# This maps Windows language identifiers to locale strings.
+#
+# This list has been updated from
+# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
+# to include every locale up to Windows XP.
+#
+# NOTE: this mapping is incomplete. If your language is missing, please
+# submit a bug report to Python bug manager, which you can find via:
+# http://www.python.org/dev/
+# Make sure you include the missing language identifier and the suggested
+# locale code.
+#
+
+windows_locale = {
+ 0x0436: "af_ZA", # Afrikaans
+ 0x041c: "sq_AL", # Albanian
+ 0x0401: "ar_SA", # Arabic - Saudi Arabia
+ 0x0801: "ar_IQ", # Arabic - Iraq
+ 0x0c01: "ar_EG", # Arabic - Egypt
+ 0x1001: "ar_LY", # Arabic - Libya
+ 0x1401: "ar_DZ", # Arabic - Algeria
+ 0x1801: "ar_MA", # Arabic - Morocco
+ 0x1c01: "ar_TN", # Arabic - Tunisia
+ 0x2001: "ar_OM", # Arabic - Oman
+ 0x2401: "ar_YE", # Arabic - Yemen
+ 0x2801: "ar_SY", # Arabic - Syria
+ 0x2c01: "ar_JO", # Arabic - Jordan
+ 0x3001: "ar_LB", # Arabic - Lebanon
+ 0x3401: "ar_KW", # Arabic - Kuwait
+ 0x3801: "ar_AE", # Arabic - United Arab Emirates
+ 0x3c01: "ar_BH", # Arabic - Bahrain
+ 0x4001: "ar_QA", # Arabic - Qatar
+ 0x042b: "hy_AM", # Armenian
+ 0x042c: "az_AZ", # Azeri Latin
+ 0x082c: "az_AZ", # Azeri - Cyrillic
+ 0x042d: "eu_ES", # Basque
+ 0x0423: "be_BY", # Belarusian
+ 0x0445: "bn_IN", # Begali
+ 0x201a: "bs_BA", # Bosnian
+ 0x141a: "bs_BA", # Bosnian - Cyrillic
+ 0x047e: "br_FR", # Breton - France
+ 0x0402: "bg_BG", # Bulgarian
+ 0x0403: "ca_ES", # Catalan
+ 0x0004: "zh_CHS",# Chinese - Simplified
+ 0x0404: "zh_TW", # Chinese - Taiwan
+ 0x0804: "zh_CN", # Chinese - PRC
+ 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
+ 0x1004: "zh_SG", # Chinese - Singapore
+ 0x1404: "zh_MO", # Chinese - Macao S.A.R.
+ 0x7c04: "zh_CHT",# Chinese - Traditional
+ 0x041a: "hr_HR", # Croatian
+ 0x101a: "hr_BA", # Croatian - Bosnia
+ 0x0405: "cs_CZ", # Czech
+ 0x0406: "da_DK", # Danish
+ 0x048c: "gbz_AF",# Dari - Afghanistan
+ 0x0465: "div_MV",# Divehi - Maldives
+ 0x0413: "nl_NL", # Dutch - The Netherlands
+ 0x0813: "nl_BE", # Dutch - Belgium
+ 0x0409: "en_US", # English - United States
+ 0x0809: "en_GB", # English - United Kingdom
+ 0x0c09: "en_AU", # English - Australia
+ 0x1009: "en_CA", # English - Canada
+ 0x1409: "en_NZ", # English - New Zealand
+ 0x1809: "en_IE", # English - Ireland
+ 0x1c09: "en_ZA", # English - South Africa
+ 0x2009: "en_JA", # English - Jamaica
+ 0x2409: "en_CB", # English - Carribbean
+ 0x2809: "en_BZ", # English - Belize
+ 0x2c09: "en_TT", # English - Trinidad
+ 0x3009: "en_ZW", # English - Zimbabwe
+ 0x3409: "en_PH", # English - Phillippines
+ 0x0425: "et_EE", # Estonian
+ 0x0438: "fo_FO", # Faroese
+ 0x0464: "fil_PH",# Filipino
+ 0x040b: "fi_FI", # Finnish
+ 0x040c: "fr_FR", # French - France
+ 0x080c: "fr_BE", # French - Belgium
+ 0x0c0c: "fr_CA", # French - Canada
+ 0x100c: "fr_CH", # French - Switzerland
+ 0x140c: "fr_LU", # French - Luxembourg
+ 0x180c: "fr_MC", # French - Monaco
+ 0x0462: "fy_NL", # Frisian - Netherlands
+ 0x0456: "gl_ES", # Galician
+ 0x0437: "ka_GE", # Georgian
+ 0x0407: "de_DE", # German - Germany
+ 0x0807: "de_CH", # German - Switzerland
+ 0x0c07: "de_AT", # German - Austria
+ 0x1007: "de_LU", # German - Luxembourg
+ 0x1407: "de_LI", # German - Liechtenstein
+ 0x0408: "el_GR", # Greek
+ 0x0447: "gu_IN", # Gujarati
+ 0x040d: "he_IL", # Hebrew
+ 0x0439: "hi_IN", # Hindi
+ 0x040e: "hu_HU", # Hungarian
+ 0x040f: "is_IS", # Icelandic
+ 0x0421: "id_ID", # Indonesian
+ 0x045d: "iu_CA", # Inuktitut
+ 0x085d: "iu_CA", # Inuktitut - Latin
+ 0x083c: "ga_IE", # Irish - Ireland
+ 0x0434: "xh_ZA", # Xhosa - South Africa
+ 0x0435: "zu_ZA", # Zulu
+ 0x0410: "it_IT", # Italian - Italy
+ 0x0810: "it_CH", # Italian - Switzerland
+ 0x0411: "ja_JP", # Japanese
+ 0x044b: "kn_IN", # Kannada - India
+ 0x043f: "kk_KZ", # Kazakh
+ 0x0457: "kok_IN",# Konkani
+ 0x0412: "ko_KR", # Korean
+ 0x0440: "ky_KG", # Kyrgyz
+ 0x0426: "lv_LV", # Latvian
+ 0x0427: "lt_LT", # Lithuanian
+ 0x046e: "lb_LU", # Luxembourgish
+ 0x042f: "mk_MK", # FYRO Macedonian
+ 0x043e: "ms_MY", # Malay - Malaysia
+ 0x083e: "ms_BN", # Malay - Brunei
+ 0x044c: "ml_IN", # Malayalam - India
+ 0x043a: "mt_MT", # Maltese
+ 0x0481: "mi_NZ", # Maori
+ 0x047a: "arn_CL",# Mapudungun
+ 0x044e: "mr_IN", # Marathi
+ 0x047c: "moh_CA",# Mohawk - Canada
+ 0x0450: "mn_MN", # Mongolian
+ 0x0461: "ne_NP", # Nepali
+ 0x0414: "nb_NO", # Norwegian - Bokmal
+ 0x0814: "nn_NO", # Norwegian - Nynorsk
+ 0x0482: "oc_FR", # Occitan - France
+ 0x0448: "or_IN", # Oriya - India
+ 0x0463: "ps_AF", # Pashto - Afghanistan
+ 0x0429: "fa_IR", # Persian
+ 0x0415: "pl_PL", # Polish
+ 0x0416: "pt_BR", # Portuguese - Brazil
+ 0x0816: "pt_PT", # Portuguese - Portugal
+ 0x0446: "pa_IN", # Punjabi
+ 0x046b: "quz_BO",# Quechua (Bolivia)
+ 0x086b: "quz_EC",# Quechua (Ecuador)
+ 0x0c6b: "quz_PE",# Quechua (Peru)
+ 0x0418: "ro_RO", # Romanian - Romania
+ 0x0417: "rm_CH", # Raeto-Romanese
+ 0x0419: "ru_RU", # Russian
+ 0x243b: "smn_FI",# Sami Finland
+ 0x103b: "smj_NO",# Sami Norway
+ 0x143b: "smj_SE",# Sami Sweden
+ 0x043b: "se_NO", # Sami Northern Norway
+ 0x083b: "se_SE", # Sami Northern Sweden
+ 0x0c3b: "se_FI", # Sami Northern Finland
+ 0x203b: "sms_FI",# Sami Skolt
+ 0x183b: "sma_NO",# Sami Southern Norway
+ 0x1c3b: "sma_SE",# Sami Southern Sweden
+ 0x044f: "sa_IN", # Sanskrit
+ 0x0c1a: "sr_SP", # Serbian - Cyrillic
+ 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
+ 0x081a: "sr_SP", # Serbian - Latin
+ 0x181a: "sr_BA", # Serbian - Bosnia Latin
+ 0x046c: "ns_ZA", # Northern Sotho
+ 0x0432: "tn_ZA", # Setswana - Southern Africa
+ 0x041b: "sk_SK", # Slovak
+ 0x0424: "sl_SI", # Slovenian
+ 0x040a: "es_ES", # Spanish - Spain
+ 0x080a: "es_MX", # Spanish - Mexico
+ 0x0c0a: "es_ES", # Spanish - Spain (Modern)
+ 0x100a: "es_GT", # Spanish - Guatemala
+ 0x140a: "es_CR", # Spanish - Costa Rica
+ 0x180a: "es_PA", # Spanish - Panama
+ 0x1c0a: "es_DO", # Spanish - Dominican Republic
+ 0x200a: "es_VE", # Spanish - Venezuela
+ 0x240a: "es_CO", # Spanish - Colombia
+ 0x280a: "es_PE", # Spanish - Peru
+ 0x2c0a: "es_AR", # Spanish - Argentina
+ 0x300a: "es_EC", # Spanish - Ecuador
+ 0x340a: "es_CL", # Spanish - Chile
+ 0x380a: "es_UR", # Spanish - Uruguay
+ 0x3c0a: "es_PY", # Spanish - Paraguay
+ 0x400a: "es_BO", # Spanish - Bolivia
+ 0x440a: "es_SV", # Spanish - El Salvador
+ 0x480a: "es_HN", # Spanish - Honduras
+ 0x4c0a: "es_NI", # Spanish - Nicaragua
+ 0x500a: "es_PR", # Spanish - Puerto Rico
+ 0x0441: "sw_KE", # Swahili
+ 0x041d: "sv_SE", # Swedish - Sweden
+ 0x081d: "sv_FI", # Swedish - Finland
+ 0x045a: "syr_SY",# Syriac
+ 0x0449: "ta_IN", # Tamil
+ 0x0444: "tt_RU", # Tatar
+ 0x044a: "te_IN", # Telugu
+ 0x041e: "th_TH", # Thai
+ 0x041f: "tr_TR", # Turkish
+ 0x0422: "uk_UA", # Ukrainian
+ 0x0420: "ur_PK", # Urdu
+ 0x0820: "ur_IN", # Urdu - India
+ 0x0443: "uz_UZ", # Uzbek - Latin
+ 0x0843: "uz_UZ", # Uzbek - Cyrillic
+ 0x042a: "vi_VN", # Vietnamese
+ 0x0452: "cy_GB", # Welsh
+}
+
+def _print_locale():
+
+ """ Test function.
+ """
+ categories = {}
+ def _init_categories(categories=categories):
+ for k,v in globals().items():
+ if k[:3] == 'LC_':
+ categories[k] = v
+ _init_categories()
+ del categories['LC_ALL']
+
+ print 'Locale defaults as determined by getdefaultlocale():'
+ print '-'*72
+ lang, enc = getdefaultlocale()
+ print 'Language: ', lang or '(undefined)'
+ print 'Encoding: ', enc or '(undefined)'
+ print
+
+ print 'Locale settings on startup:'
+ print '-'*72
+ for name,category in categories.items():
+ print name, '...'
+ lang, enc = getlocale(category)
+ print ' Language: ', lang or '(undefined)'
+ print ' Encoding: ', enc or '(undefined)'
+ print
+
+ print
+ print 'Locale settings after calling resetlocale():'
+ print '-'*72
+ resetlocale()
+ for name,category in categories.items():
+ print name, '...'
+ lang, enc = getlocale(category)
+ print ' Language: ', lang or '(undefined)'
+ print ' Encoding: ', enc or '(undefined)'
+ print
+
+ try:
+ setlocale(LC_ALL, "")
+ except:
+ print 'NOTE:'
+ print 'setlocale(LC_ALL, "") does not support the default locale'
+ print 'given in the OS environment variables.'
+ else:
+ print
+ print 'Locale settings after calling setlocale(LC_ALL, ""):'
+ print '-'*72
+ for name,category in categories.items():
+ print name, '...'
+ lang, enc = getlocale(category)
+ print ' Language: ', lang or '(undefined)'
+ print ' Encoding: ', enc or '(undefined)'
+ print
+
+###
+
+try:
+ LC_MESSAGES
+except NameError:
+ pass
+else:
+ __all__.append("LC_MESSAGES")
+
+if __name__=='__main__':
+ print 'Locale aliasing:'
+ print
+ _print_locale()
+ print
+ print 'Number formatting:'
+ print
+ _test()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/opcode.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/opcode.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,185 @@
+
+"""
+opcode module - potentially shared between dis and other modules which
+operate on bytecodes (e.g. peephole optimizers).
+"""
+
+__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
+ "haslocal", "hascompare", "hasfree", "opname", "opmap",
+ "HAVE_ARGUMENT", "EXTENDED_ARG"]
+
+cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
+ 'is not', 'exception match', 'BAD')
+
+hasconst = []
+hasname = []
+hasjrel = []
+hasjabs = []
+haslocal = []
+hascompare = []
+hasfree = []
+
+opmap = {}
+opname = [''] * 256
+for op in range(256): opname[op] = '<%r>' % (op,)
+del op
+
+def def_op(name, op):
+ opname[op] = name
+ opmap[name] = op
+
+def name_op(name, op):
+ def_op(name, op)
+ hasname.append(op)
+
+def jrel_op(name, op):
+ def_op(name, op)
+ hasjrel.append(op)
+
+def jabs_op(name, op):
+ def_op(name, op)
+ hasjabs.append(op)
+
+# Instruction opcodes for compiled code
+# Blank lines correspond to available opcodes
+
+def_op('STOP_CODE', 0)
+def_op('POP_TOP', 1)
+def_op('ROT_TWO', 2)
+def_op('ROT_THREE', 3)
+def_op('DUP_TOP', 4)
+def_op('ROT_FOUR', 5)
+
+def_op('NOP', 9)
+def_op('UNARY_POSITIVE', 10)
+def_op('UNARY_NEGATIVE', 11)
+def_op('UNARY_NOT', 12)
+def_op('UNARY_CONVERT', 13)
+
+def_op('UNARY_INVERT', 15)
+
+def_op('LIST_APPEND', 18)
+def_op('BINARY_POWER', 19)
+def_op('BINARY_MULTIPLY', 20)
+def_op('BINARY_DIVIDE', 21)
+def_op('BINARY_MODULO', 22)
+def_op('BINARY_ADD', 23)
+def_op('BINARY_SUBTRACT', 24)
+def_op('BINARY_SUBSCR', 25)
+def_op('BINARY_FLOOR_DIVIDE', 26)
+def_op('BINARY_TRUE_DIVIDE', 27)
+def_op('INPLACE_FLOOR_DIVIDE', 28)
+def_op('INPLACE_TRUE_DIVIDE', 29)
+def_op('SLICE+0', 30)
+def_op('SLICE+1', 31)
+def_op('SLICE+2', 32)
+def_op('SLICE+3', 33)
+
+def_op('STORE_SLICE+0', 40)
+def_op('STORE_SLICE+1', 41)
+def_op('STORE_SLICE+2', 42)
+def_op('STORE_SLICE+3', 43)
+
+def_op('DELETE_SLICE+0', 50)
+def_op('DELETE_SLICE+1', 51)
+def_op('DELETE_SLICE+2', 52)
+def_op('DELETE_SLICE+3', 53)
+
+def_op('INPLACE_ADD', 55)
+def_op('INPLACE_SUBTRACT', 56)
+def_op('INPLACE_MULTIPLY', 57)
+def_op('INPLACE_DIVIDE', 58)
+def_op('INPLACE_MODULO', 59)
+def_op('STORE_SUBSCR', 60)
+def_op('DELETE_SUBSCR', 61)
+def_op('BINARY_LSHIFT', 62)
+def_op('BINARY_RSHIFT', 63)
+def_op('BINARY_AND', 64)
+def_op('BINARY_XOR', 65)
+def_op('BINARY_OR', 66)
+def_op('INPLACE_POWER', 67)
+def_op('GET_ITER', 68)
+
+def_op('PRINT_EXPR', 70)
+def_op('PRINT_ITEM', 71)
+def_op('PRINT_NEWLINE', 72)
+def_op('PRINT_ITEM_TO', 73)
+def_op('PRINT_NEWLINE_TO', 74)
+def_op('INPLACE_LSHIFT', 75)
+def_op('INPLACE_RSHIFT', 76)
+def_op('INPLACE_AND', 77)
+def_op('INPLACE_XOR', 78)
+def_op('INPLACE_OR', 79)
+def_op('BREAK_LOOP', 80)
+def_op('WITH_CLEANUP', 81)
+def_op('LOAD_LOCALS', 82)
+def_op('RETURN_VALUE', 83)
+def_op('IMPORT_STAR', 84)
+def_op('EXEC_STMT', 85)
+def_op('YIELD_VALUE', 86)
+def_op('POP_BLOCK', 87)
+def_op('END_FINALLY', 88)
+def_op('BUILD_CLASS', 89)
+
+HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
+
+name_op('STORE_NAME', 90) # Index in name list
+name_op('DELETE_NAME', 91) # ""
+def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
+jrel_op('FOR_ITER', 93)
+
+name_op('STORE_ATTR', 95) # Index in name list
+name_op('DELETE_ATTR', 96) # ""
+name_op('STORE_GLOBAL', 97) # ""
+name_op('DELETE_GLOBAL', 98) # ""
+def_op('DUP_TOPX', 99) # number of items to duplicate
+def_op('LOAD_CONST', 100) # Index in const list
+hasconst.append(100)
+name_op('LOAD_NAME', 101) # Index in name list
+def_op('BUILD_TUPLE', 102) # Number of tuple items
+def_op('BUILD_LIST', 103) # Number of list items
+def_op('BUILD_MAP', 104) # Always zero for now
+name_op('LOAD_ATTR', 105) # Index in name list
+def_op('COMPARE_OP', 106) # Comparison operator
+hascompare.append(106)
+name_op('IMPORT_NAME', 107) # Index in name list
+name_op('IMPORT_FROM', 108) # Index in name list
+
+jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
+jrel_op('JUMP_IF_FALSE', 111) # ""
+jrel_op('JUMP_IF_TRUE', 112) # ""
+jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
+
+name_op('LOAD_GLOBAL', 116) # Index in name list
+
+jabs_op('CONTINUE_LOOP', 119) # Target address
+jrel_op('SETUP_LOOP', 120) # Distance to target address
+jrel_op('SETUP_EXCEPT', 121) # ""
+jrel_op('SETUP_FINALLY', 122) # ""
+
+def_op('LOAD_FAST', 124) # Local variable number
+haslocal.append(124)
+def_op('STORE_FAST', 125) # Local variable number
+haslocal.append(125)
+def_op('DELETE_FAST', 126) # Local variable number
+haslocal.append(126)
+
+def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
+def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
+def_op('MAKE_FUNCTION', 132) # Number of args with default values
+def_op('BUILD_SLICE', 133) # Number of items
+def_op('MAKE_CLOSURE', 134)
+def_op('LOAD_CLOSURE', 135)
+hasfree.append(135)
+def_op('LOAD_DEREF', 136)
+hasfree.append(136)
+def_op('STORE_DEREF', 137)
+hasfree.append(137)
+
+def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
+def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
+def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
+def_op('EXTENDED_ARG', 143)
+EXTENDED_ARG = 143
+
+del def_op, name_op, jrel_op, jabs_op
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/pickle.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/pickle.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,1435 @@
+"""Create portable serialized representations of Python objects.
+
+See module cPickle for a (much) faster implementation.
+See module copy_reg for a mechanism for registering custom picklers.
+See module pickletools source for extensive comments.
+
+Classes:
+
+ Pickler
+ Unpickler
+
+Functions:
+
+ dump(object, file)
+ dumps(object) -> string
+ load(file) -> object
+ loads(string) -> object
+
+Misc variables:
+
+ __version__
+ format_version
+ compatible_formats
+
+"""
+
+__version__ = "$Revision: 38432 $" # Code version
+
+from types import *
+from copy_reg import dispatch_table
+from copy_reg import _extension_registry, _inverted_registry, _extension_cache
+import marshal
+import sys
+import struct
+
+__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
+ "Unpickler", "dump", "dumps", "load", "loads"]
+
+# These are purely informational; no code uses these.
+format_version = "2.0" # File format version we write
+compatible_formats = ["1.0", # Original protocol 0
+ "1.1", # Protocol 0 with INST added
+ "1.2", # Original protocol 1
+ "1.3", # Protocol 1 with BINFLOAT added
+ "2.0", # Protocol 2
+ ] # Old format versions we can read
+
+# Keep in synch with cPickle. This is the highest protocol number we
+# know how to read.
+HIGHEST_PROTOCOL = 2
+
+# Why use struct.pack() for pickling but marshal.loads() for
+# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
+# marshal.loads() is twice as fast as struct.unpack()!
+mloads = marshal.loads
+
+class PickleError(Exception):
+ """A common base class for the other pickling exceptions."""
+ pass
+
+class PicklingError(PickleError):
+ """This exception is raised when an unpicklable object is passed to the
+ dump() method.
+
+ """
+ pass
+
+class UnpicklingError(PickleError):
+ """This exception is raised when there is a problem unpickling an object,
+ such as a security violation.
+
+ Note that other exceptions may also be raised during unpickling, including
+ (but not necessarily limited to) AttributeError, EOFError, ImportError,
+ and IndexError.
+
+ """
+ pass
+
+# An instance of _Stop is raised by Unpickler.load_stop() in response to
+# the STOP opcode, passing the object that is the result of unpickling.
+class _Stop(Exception):
+ def __init__(self, value):
+ self.value = value
+
+# Jython has PyStringMap; it's a dict subclass with string keys
+try:
+ from org.python.core import PyStringMap
+except ImportError:
+ PyStringMap = None
+
+# UnicodeType may or may not be exported (normally imported from types)
+try:
+ UnicodeType
+except NameError:
+ UnicodeType = None
+
+# Pickle opcodes. See pickletools.py for extensive docs. The listing
+# here is in kind-of alphabetical order of 1-character pickle code.
+# pickletools groups them by purpose.
+
+MARK = '(' # push special markobject on stack
+STOP = '.' # every pickle ends with STOP
+POP = '0' # discard topmost stack item
+POP_MARK = '1' # discard stack top through topmost markobject
+DUP = '2' # duplicate top stack item
+FLOAT = 'F' # push float object; decimal string argument
+INT = 'I' # push integer or bool; decimal string argument
+BININT = 'J' # push four-byte signed int
+BININT1 = 'K' # push 1-byte unsigned int
+LONG = 'L' # push long; decimal string argument
+BININT2 = 'M' # push 2-byte unsigned int
+NONE = 'N' # push None
+PERSID = 'P' # push persistent object; id is taken from string arg
+BINPERSID = 'Q' # " " " ; " " " " stack
+REDUCE = 'R' # apply callable to argtuple, both on stack
+STRING = 'S' # push string; NL-terminated string argument
+BINSTRING = 'T' # push string; counted binary string argument
+SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
+UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
+BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
+APPEND = 'a' # append stack top to list below it
+BUILD = 'b' # call __setstate__ or __dict__.update()
+GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
+DICT = 'd' # build a dict from stack items
+EMPTY_DICT = '}' # push empty dict
+APPENDS = 'e' # extend list on stack by topmost stack slice
+GET = 'g' # push item from memo on stack; index is string arg
+BINGET = 'h' # " " " " " " ; " " 1-byte arg
+INST = 'i' # build & push class instance
+LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
+LIST = 'l' # build list from topmost stack items
+EMPTY_LIST = ']' # push empty list
+OBJ = 'o' # build & push class instance
+PUT = 'p' # store stack top in memo; index is string arg
+BINPUT = 'q' # " " " " " ; " " 1-byte arg
+LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
+SETITEM = 's' # add key+value pair to dict
+TUPLE = 't' # build tuple from topmost stack items
+EMPTY_TUPLE = ')' # push empty tuple
+SETITEMS = 'u' # modify dict by adding topmost key+value pairs
+BINFLOAT = 'G' # push float; arg is 8-byte float encoding
+
+TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
+FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
+
+# Protocol 2
+
+PROTO = '\x80' # identify pickle protocol
+NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
+EXT1 = '\x82' # push object from extension registry; 1-byte index
+EXT2 = '\x83' # ditto, but 2-byte index
+EXT4 = '\x84' # ditto, but 4-byte index
+TUPLE1 = '\x85' # build 1-tuple from stack top
+TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
+TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
+NEWTRUE = '\x88' # push True
+NEWFALSE = '\x89' # push False
+LONG1 = '\x8a' # push long from < 256 bytes
+LONG4 = '\x8b' # push really big long
+
+_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
+
+
+__all__.extend([x for x in dir() if x[0].isalpha() and x == x.upper()])
+del x
+
+
+# Pickling machinery
+
+class Pickler:
+
+ def __init__(self, file, protocol=None):
+ """This takes a file-like object for writing a pickle data stream.
+
+ The optional protocol argument tells the pickler to use the
+ given protocol; supported protocols are 0, 1, 2. The default
+ protocol is 0, to be backwards compatible. (Protocol 0 is the
+ only protocol that can be written to a file opened in text
+ mode and read back successfully. When using a protocol higher
+ than 0, make sure the file is opened in binary mode, both when
+ pickling and unpickling.)
+
+ Protocol 1 is more efficient than protocol 0; protocol 2 is
+ more efficient than protocol 1.
+
+ Specifying a negative protocol version selects the highest
+ protocol version supported. The higher the protocol used, the
+ more recent the version of Python needed to read the pickle
+ produced.
+
+ The file parameter must have a write() method that accepts a single
+ string argument. It can thus be an open file object, a StringIO
+ object, or any other custom object that meets this interface.
+
+ """
+ if protocol is None:
+ protocol = 0
+ if protocol < 0:
+ protocol = HIGHEST_PROTOCOL
+ elif not 0 <= protocol <= HIGHEST_PROTOCOL:
+ raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
+ self.write = file.write
+ self.memo = {}
+ self.proto = int(protocol)
+ self.bin = protocol >= 1
+ self.fast = 0
+
+ def _pickle_moduledict(self, obj):
+ try:
+ modict = self.module_dict_ids
+ except AttributeError:
+ modict = {}
+ from sys import modules
+ for mod in modules.values():
+ if isinstance(mod, ModuleType):
+ try:
+ modict[id(mod.__dict__)] = mod
+ except KeyboardInterrupt:
+ raise
+ except: # obscure: the above can fail for
+ # arbitrary reasons, because of the py lib
+ pass
+ self.module_dict_ids = modict
+
+ thisid = id(obj)
+ try:
+ themodule = modict[thisid]
+ except KeyError:
+ return None
+ from __builtin__ import getattr
+ return getattr, (themodule, '__dict__')
+
+ def clear_memo(self):
+ """Clears the pickler's "memo".
+
+ The memo is the data structure that remembers which objects the
+ pickler has already seen, so that shared or recursive objects are
+ pickled by reference and not by value. This method is useful when
+ re-using picklers.
+
+ """
+ self.memo.clear()
+
+ def dump(self, obj):
+ """Write a pickled representation of obj to the open file."""
+ if self.proto >= 2:
+ self.write(PROTO + chr(self.proto))
+ self.save(obj)
+ self.write(STOP)
+
+ def memoize(self, obj):
+ """Store an object in the memo."""
+
+ # The Pickler memo is a dictionary mapping object ids to 2-tuples
+ # that contain the Unpickler memo key and the object being memoized.
+ # The memo key is written to the pickle and will become
+ # the key in the Unpickler's memo. The object is stored in the
+ # Pickler memo so that transient objects are kept alive during
+ # pickling.
+
+ # The use of the Unpickler memo length as the memo key is just a
+ # convention. The only requirement is that the memo values be unique.
+ # But there appears no advantage to any other scheme, and this
+ # scheme allows the Unpickler memo to be implemented as a plain (but
+ # growable) array, indexed by memo key.
+ if self.fast:
+ return
+ assert id(obj) not in self.memo
+ memo_len = len(self.memo)
+ self.write(self.put(memo_len))
+ self.memo[id(obj)] = memo_len, obj
+
+ # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
+ def put(self, i, pack=struct.pack):
+ if self.bin:
+ if i < 256:
+ return BINPUT + chr(i)
+ else:
+ return LONG_BINPUT + pack("<i", i)
+
+ return PUT + repr(i) + '\n'
+
+ # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
+ def get(self, i, pack=struct.pack):
+ if self.bin:
+ if i < 256:
+ return BINGET + chr(i)
+ else:
+ return LONG_BINGET + pack("<i", i)
+
+ return GET + repr(i) + '\n'
+
+ def save(self, obj):
+ # Check for persistent id (defined by a subclass)
+ pid = self.persistent_id(obj)
+ if pid:
+ self.save_pers(pid)
+ return
+
+ # Check the memo
+ x = self.memo.get(id(obj))
+ if x:
+ self.write(self.get(x[0]))
+ return
+
+ # Check the type dispatch table
+ t = type(obj)
+ f = self.dispatch.get(t)
+ if f:
+ f(self, obj) # Call unbound method with explicit self
+ return
+
+ # Check for a class with a custom metaclass; treat as regular class
+ try:
+ issc = issubclass(t, TypeType)
+ except TypeError: # t is not a class (old Boost; see SF #502085)
+ issc = 0
+ if issc:
+ self.save_global(obj)
+ return
+
+ # Check copy_reg.dispatch_table
+ reduce = dispatch_table.get(t)
+ if reduce:
+ rv = reduce(obj)
+ else:
+ # Check for a __reduce_ex__ method, fall back to __reduce__
+ reduce = getattr(obj, "__reduce_ex__", None)
+ if reduce:
+ rv = reduce(self.proto)
+ else:
+ reduce = getattr(obj, "__reduce__", None)
+ if reduce:
+ rv = reduce()
+ else:
+ raise PicklingError("Can't pickle %r object: %r" %
+ (t.__name__, obj))
+
+ # Check for string returned by reduce(), meaning "save as global"
+ if type(rv) is StringType:
+ self.save_global(obj, rv)
+ return
+
+ # Assert that reduce() returned a tuple
+ if type(rv) is not TupleType:
+ raise PicklingError("%s must return string or tuple" % reduce)
+
+ # Assert that it returned an appropriately sized tuple
+ l = len(rv)
+ if not (2 <= l <= 5):
+ raise PicklingError("Tuple returned by %s must have "
+ "two to five elements" % reduce)
+
+ # Save the reduce() output and finally memoize the object
+ self.save_reduce(obj=obj, *rv)
+
+ def persistent_id(self, obj):
+ # This exists so a subclass can override it
+ return None
+
+ def save_pers(self, pid):
+ # Save a persistent id reference
+ if self.bin:
+ self.save(pid)
+ self.write(BINPERSID)
+ else:
+ self.write(PERSID + str(pid) + '\n')
+
+ def save_reduce(self, func, args, state=None,
+ listitems=None, dictitems=None, obj=None):
+ # This API is called by some subclasses
+
+ # Assert that args is a tuple or None
+ if not isinstance(args, TupleType):
+ raise PicklingError("args from reduce() should be a tuple")
+
+ # Assert that func is callable
+ if not callable(func):
+ raise PicklingError("func from reduce should be callable")
+
+ save = self.save
+ write = self.write
+
+ # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
+ if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
+ # A __reduce__ implementation can direct protocol 2 to
+ # use the more efficient NEWOBJ opcode, while still
+ # allowing protocol 0 and 1 to work normally. For this to
+ # work, the function returned by __reduce__ should be
+ # called __newobj__, and its first argument should be a
+ # new-style class. The implementation for __newobj__
+ # should be as follows, although pickle has no way to
+ # verify this:
+ #
+ # def __newobj__(cls, *args):
+ # return cls.__new__(cls, *args)
+ #
+ # Protocols 0 and 1 will pickle a reference to __newobj__,
+ # while protocol 2 (and above) will pickle a reference to
+ # cls, the remaining args tuple, and the NEWOBJ code,
+ # which calls cls.__new__(cls, *args) at unpickling time
+ # (see load_newobj below). If __reduce__ returns a
+ # three-tuple, the state from the third tuple item will be
+ # pickled regardless of the protocol, calling __setstate__
+ # at unpickling time (see load_build below).
+ #
+ # Note that no standard __newobj__ implementation exists;
+ # you have to provide your own. This is to enforce
+ # compatibility with Python 2.2 (pickles written using
+ # protocol 0 or 1 in Python 2.3 should be unpicklable by
+ # Python 2.2).
+ cls = args[0]
+ if not hasattr(cls, "__new__"):
+ raise PicklingError(
+ "args[0] from __newobj__ args has no __new__")
+ if obj is not None and cls is not obj.__class__:
+ raise PicklingError(
+ "args[0] from __newobj__ args has the wrong class")
+ args = args[1:]
+ save(cls)
+ save(args)
+ write(NEWOBJ)
+ else:
+ save(func)
+ save(args)
+ write(REDUCE)
+
+ if obj is not None:
+ self.memoize(obj)
+
+ # More new special cases (that work with older protocols as
+ # well): when __reduce__ returns a tuple with 4 or 5 items,
+ # the 4th and 5th item should be iterators that provide list
+ # items and dict items (as (key, value) tuples), or None.
+
+ if listitems is not None:
+ self._batch_appends(listitems)
+
+ if dictitems is not None:
+ self._batch_setitems(dictitems)
+
+ if state is not None:
+ save(state)
+ write(BUILD)
+
+ # Methods below this point are dispatched through the dispatch table
+
+ dispatch = {}
+
+ def save_none(self, obj):
+ self.write(NONE)
+ dispatch[NoneType] = save_none
+
+ def save_bool(self, obj):
+ if self.proto >= 2:
+ self.write(obj and NEWTRUE or NEWFALSE)
+ else:
+ self.write(obj and TRUE or FALSE)
+ dispatch[bool] = save_bool
+
+ def save_int(self, obj, pack=struct.pack):
+ if self.bin:
+ # If the int is small enough to fit in a signed 4-byte 2's-comp
+ # format, we can store it more efficiently than the general
+ # case.
+ # First one- and two-byte unsigned ints:
+ if obj >= 0:
+ if obj <= 0xff:
+ self.write(BININT1 + chr(obj))
+ return
+ if obj <= 0xffff:
+ self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
+ return
+ # Next check for 4-byte signed ints:
+ high_bits = obj >> 31 # note that Python shift sign-extends
+ if high_bits == 0 or high_bits == -1:
+ # All high bits are copies of bit 2**31, so the value
+ # fits in a 4-byte signed int.
+ self.write(BININT + pack("<i", obj))
+ return
+ # Text pickle, or int too big to fit in signed 4-byte format.
+ self.write(INT + repr(obj) + '\n')
+ dispatch[IntType] = save_int
+
+ def save_long(self, obj, pack=struct.pack):
+ if self.proto >= 2:
+ bytes = encode_long(obj)
+ n = len(bytes)
+ if n < 256:
+ self.write(LONG1 + chr(n) + bytes)
+ else:
+ self.write(LONG4 + pack("<i", n) + bytes)
+ return
+ self.write(LONG + repr(obj) + '\n')
+ dispatch[LongType] = save_long
+
+ def save_float(self, obj, pack=struct.pack):
+ if self.bin:
+ self.write(BINFLOAT + pack('>d', obj))
+ else:
+ self.write(FLOAT + repr(obj) + '\n')
+ dispatch[FloatType] = save_float
+
+ def save_string(self, obj, pack=struct.pack):
+ if self.bin:
+ n = len(obj)
+ if n < 256:
+ self.write(SHORT_BINSTRING + chr(n) + obj)
+ else:
+ self.write(BINSTRING + pack("<i", n) + obj)
+ else:
+ self.write(STRING + repr(obj) + '\n')
+ self.memoize(obj)
+ dispatch[StringType] = save_string
+
+ def save_unicode(self, obj, pack=struct.pack):
+ if self.bin:
+ encoding = obj.encode('utf-8')
+ n = len(encoding)
+ self.write(BINUNICODE + pack("<i", n) + encoding)
+ else:
+ obj = obj.replace("\\", "\\u005c")
+ obj = obj.replace("\n", "\\u000a")
+ self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
+ self.memoize(obj)
+ dispatch[UnicodeType] = save_unicode
+
+ if StringType == UnicodeType:
+ # This is true for Jython
+ def save_string(self, obj, pack=struct.pack):
+ unicode = obj.isunicode()
+
+ if self.bin:
+ if unicode:
+ obj = obj.encode("utf-8")
+ l = len(obj)
+ if l < 256 and not unicode:
+ self.write(SHORT_BINSTRING + chr(l) + obj)
+ else:
+ s = pack("<i", l)
+ if unicode:
+ self.write(BINUNICODE + s + obj)
+ else:
+ self.write(BINSTRING + s + obj)
+ else:
+ if unicode:
+ obj = obj.replace("\\", "\\u005c")
+ obj = obj.replace("\n", "\\u000a")
+ obj = obj.encode('raw-unicode-escape')
+ self.write(UNICODE + obj + '\n')
+ else:
+ self.write(STRING + repr(obj) + '\n')
+ self.memoize(obj)
+ dispatch[StringType] = save_string
+
+ def save_tuple(self, obj):
+ write = self.write
+ proto = self.proto
+
+ n = len(obj)
+ if n == 0:
+ if proto:
+ write(EMPTY_TUPLE)
+ else:
+ write(MARK + TUPLE)
+ return
+
+ save = self.save
+ memo = self.memo
+ if n <= 3 and proto >= 2:
+ for element in obj:
+ save(element)
+ # Subtle. Same as in the big comment below.
+ if id(obj) in memo:
+ get = self.get(memo[id(obj)][0])
+ write(POP * n + get)
+ else:
+ write(_tuplesize2code[n])
+ self.memoize(obj)
+ return
+
+ # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
+ # has more than 3 elements.
+ write(MARK)
+ for element in obj:
+ save(element)
+
+ if id(obj) in memo:
+ # Subtle. d was not in memo when we entered save_tuple(), so
+ # the process of saving the tuple's elements must have saved
+ # the tuple itself: the tuple is recursive. The proper action
+ # now is to throw away everything we put on the stack, and
+ # simply GET the tuple (it's already constructed). This check
+ # could have been done in the "for element" loop instead, but
+ # recursive tuples are a rare thing.
+ get = self.get(memo[id(obj)][0])
+ if proto:
+ write(POP_MARK + get)
+ else: # proto 0 -- POP_MARK not available
+ write(POP * (n+1) + get)
+ return
+
+ # No recursion.
+ self.write(TUPLE)
+ self.memoize(obj)
+
+ dispatch[TupleType] = save_tuple
+
+ # save_empty_tuple() isn't used by anything in Python 2.3. However, I
+ # found a Pickler subclass in Zope3 that calls it, so it's not harmless
+ # to remove it.
+ def save_empty_tuple(self, obj):
+ self.write(EMPTY_TUPLE)
+
+ def save_list(self, obj):
+ write = self.write
+
+ if self.bin:
+ write(EMPTY_LIST)
+ else: # proto 0 -- can't use EMPTY_LIST
+ write(MARK + LIST)
+
+ self.memoize(obj)
+ self._batch_appends(iter(obj))
+
+ dispatch[ListType] = save_list
+
+ # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
+ # out of synch, though.
+ _BATCHSIZE = 1000
+
+ def _batch_appends(self, items):
+ # Helper to batch up APPENDS sequences
+ save = self.save
+ write = self.write
+
+ if not self.bin:
+ for x in items:
+ save(x)
+ write(APPEND)
+ return
+
+ r = xrange(self._BATCHSIZE)
+ while items is not None:
+ tmp = []
+ for i in r:
+ try:
+ x = items.next()
+ tmp.append(x)
+ except StopIteration:
+ items = None
+ break
+ n = len(tmp)
+ if n > 1:
+ write(MARK)
+ for x in tmp:
+ save(x)
+ write(APPENDS)
+ elif n:
+ save(tmp[0])
+ write(APPEND)
+ # else tmp is empty, and we're done
+
+ def save_dict(self, obj):
+ ## Stackless addition BEGIN
+ modict_saver = self._pickle_moduledict(obj)
+ if modict_saver is not None:
+ return self.save_reduce(*modict_saver)
+ ## Stackless addition END
+
+ write = self.write
+
+ if self.bin:
+ write(EMPTY_DICT)
+ else: # proto 0 -- can't use EMPTY_DICT
+ write(MARK + DICT)
+
+ self.memoize(obj)
+ self._batch_setitems(obj.iteritems())
+
+ dispatch[DictionaryType] = save_dict
+ if not PyStringMap is None:
+ dispatch[PyStringMap] = save_dict
+
+ def _batch_setitems(self, items):
+ # Helper to batch up SETITEMS sequences; proto >= 1 only
+ save = self.save
+ write = self.write
+
+ if not self.bin:
+ for k, v in items:
+ save(k)
+ save(v)
+ write(SETITEM)
+ return
+
+ r = xrange(self._BATCHSIZE)
+ while items is not None:
+ tmp = []
+ for i in r:
+ try:
+ tmp.append(items.next())
+ except StopIteration:
+ items = None
+ break
+ n = len(tmp)
+ if n > 1:
+ write(MARK)
+ for k, v in tmp:
+ save(k)
+ save(v)
+ write(SETITEMS)
+ elif n:
+ k, v = tmp[0]
+ save(k)
+ save(v)
+ write(SETITEM)
+ # else tmp is empty, and we're done
+
+ def save_inst(self, obj):
+ cls = obj.__class__
+
+ memo = self.memo
+ write = self.write
+ save = self.save
+
+ if hasattr(obj, '__getinitargs__'):
+ args = obj.__getinitargs__()
+ len(args) # XXX Assert it's a sequence
+ _keep_alive(args, memo)
+ else:
+ args = ()
+
+ write(MARK)
+
+ if self.bin:
+ save(cls)
+ for arg in args:
+ save(arg)
+ write(OBJ)
+ else:
+ for arg in args:
+ save(arg)
+ write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
+
+ self.memoize(obj)
+
+ try:
+ getstate = obj.__getstate__
+ except AttributeError:
+ stuff = obj.__dict__
+ else:
+ stuff = getstate()
+ _keep_alive(stuff, memo)
+ save(stuff)
+ write(BUILD)
+
+ dispatch[InstanceType] = save_inst
+
+ def save_global(self, obj, name=None, pack=struct.pack):
+ write = self.write
+ memo = self.memo
+
+ if name is None:
+ name = obj.__name__
+
+ module = getattr(obj, "__module__", None)
+ if module is None:
+ module = whichmodule(obj, name)
+
+ try:
+ __import__(module)
+ mod = sys.modules[module]
+ klass = getattr(mod, name)
+ except (ImportError, KeyError, AttributeError):
+ raise PicklingError(
+ "Can't pickle %r: it's not found as %s.%s" %
+ (obj, module, name))
+ else:
+ if klass is not obj:
+ raise PicklingError(
+ "Can't pickle %r: it's not the same object as %s.%s" %
+ (obj, module, name))
+
+ if self.proto >= 2:
+ code = _extension_registry.get((module, name))
+ if code:
+ assert code > 0
+ if code <= 0xff:
+ write(EXT1 + chr(code))
+ elif code <= 0xffff:
+ write("%c%c%c" % (EXT2, code&0xff, code>>8))
+ else:
+ write(EXT4 + pack("<i", code))
+ return
+
+ write(GLOBAL + module + '\n' + name + '\n')
+ self.memoize(obj)
+
+ def save_function(self, obj):
+ try:
+ return self.save_global(obj)
+ except PicklingError, e:
+ pass
+ # Check copy_reg.dispatch_table
+ reduce = dispatch_table.get(type(obj))
+ if reduce:
+ rv = reduce(obj)
+ else:
+ # Check for a __reduce_ex__ method, fall back to __reduce__
+ reduce = getattr(obj, "__reduce_ex__", None)
+ if reduce:
+ rv = reduce(self.proto)
+ else:
+ reduce = getattr(obj, "__reduce__", None)
+ if reduce:
+ rv = reduce()
+ else:
+ raise e
+ return self.save_reduce(obj=obj, *rv)
+
+ dispatch[ClassType] = save_global
+ dispatch[FunctionType] = save_function
+ dispatch[BuiltinFunctionType] = save_global
+ dispatch[TypeType] = save_global
+
+# Pickling helpers
+
+def _keep_alive(x, memo):
+ """Keeps a reference to the object x in the memo.
+
+ Because we remember objects by their id, we have
+ to assure that possibly temporary objects are kept
+ alive by referencing them.
+ We store a reference at the id of the memo, which should
+ normally not be used unless someone tries to deepcopy
+ the memo itself...
+ """
+ try:
+ memo[id(memo)].append(x)
+ except KeyError:
+ # aha, this is the first one :-)
+ memo[id(memo)]=[x]
+
+
+# A cache for whichmodule(), mapping a function object to the name of
+# the module in which the function was found.
+
+classmap = {} # called classmap for backwards compatibility
+
+def whichmodule(func, funcname):
+ """Figure out the module in which a function occurs.
+
+ Search sys.modules for the module.
+ Cache in classmap.
+ Return a module name.
+ If the function cannot be found, return "__main__".
+ """
+ # Python functions should always get an __module__ from their globals.
+ mod = getattr(func, "__module__", None)
+ if mod is not None:
+ return mod
+ if func in classmap:
+ return classmap[func]
+
+ for name, module in sys.modules.items():
+ if module is None:
+ continue # skip dummy package entries
+ if name != '__main__' and getattr(module, funcname, None) is func:
+ break
+ else:
+ name = '__main__'
+ classmap[func] = name
+ return name
+
+
+# Unpickling machinery
+
+class Unpickler:
+
+ def __init__(self, file):
+ """This takes a file-like object for reading a pickle data stream.
+
+ The protocol version of the pickle is detected automatically, so no
+ proto argument is needed.
+
+ The file-like object must have two methods, a read() method that
+ takes an integer argument, and a readline() method that requires no
+ arguments. Both methods should return a string. Thus file-like
+ object can be a file object opened for reading, a StringIO object,
+ or any other custom object that meets this interface.
+ """
+ self.readline = file.readline
+ self.read = file.read
+ self.memo = {}
+
+ def load(self):
+ """Read a pickled object representation from the open file.
+
+ Return the reconstituted object hierarchy specified in the file.
+ """
+ self.mark = object() # any new unique object
+ self.stack = []
+ self.append = self.stack.append
+ read = self.read
+ dispatch = self.dispatch
+ try:
+ while 1:
+ key = read(1)
+ dispatch[key](self)
+ except _Stop, stopinst:
+ return stopinst.value
+
+ # Return largest index k such that self.stack[k] is self.mark.
+ # If the stack doesn't contain a mark, eventually raises IndexError.
+ # This could be sped by maintaining another stack, of indices at which
+ # the mark appears. For that matter, the latter stack would suffice,
+ # and we wouldn't need to push mark objects on self.stack at all.
+ # Doing so is probably a good thing, though, since if the pickle is
+ # corrupt (or hostile) we may get a clue from finding self.mark embedded
+ # in unpickled objects.
+ def marker(self):
+ stack = self.stack
+ mark = self.mark
+ k = len(stack)-1
+ while stack[k] is not mark: k = k-1
+ return k
+
+ dispatch = {}
+
+ def load_eof(self):
+ raise EOFError
+ dispatch[''] = load_eof
+
+ def load_proto(self):
+ proto = ord(self.read(1))
+ if not 0 <= proto <= 2:
+ raise ValueError, "unsupported pickle protocol: %d" % proto
+ dispatch[PROTO] = load_proto
+
+ def load_persid(self):
+ pid = self.readline()[:-1]
+ self.append(self.persistent_load(pid))
+ dispatch[PERSID] = load_persid
+
+ def load_binpersid(self):
+ pid = self.stack.pop()
+ self.append(self.persistent_load(pid))
+ dispatch[BINPERSID] = load_binpersid
+
+ def load_none(self):
+ self.append(None)
+ dispatch[NONE] = load_none
+
+ def load_false(self):
+ self.append(False)
+ dispatch[NEWFALSE] = load_false
+
+ def load_true(self):
+ self.append(True)
+ dispatch[NEWTRUE] = load_true
+
+ def load_int(self):
+ data = self.readline()
+ if data == FALSE[1:]:
+ val = False
+ elif data == TRUE[1:]:
+ val = True
+ else:
+ try:
+ val = int(data)
+ except ValueError:
+ val = long(data)
+ self.append(val)
+ dispatch[INT] = load_int
+
+ def load_binint(self):
+ self.append(mloads('i' + self.read(4)))
+ dispatch[BININT] = load_binint
+
+ def load_binint1(self):
+ self.append(ord(self.read(1)))
+ dispatch[BININT1] = load_binint1
+
+ def load_binint2(self):
+ self.append(mloads('i' + self.read(2) + '\000\000'))
+ dispatch[BININT2] = load_binint2
+
+ def load_long(self):
+ self.append(long(self.readline()[:-1], 0))
+ dispatch[LONG] = load_long
+
+ def load_long1(self):
+ n = ord(self.read(1))
+ bytes = self.read(n)
+ self.append(decode_long(bytes))
+ dispatch[LONG1] = load_long1
+
+ def load_long4(self):
+ n = mloads('i' + self.read(4))
+ bytes = self.read(n)
+ self.append(decode_long(bytes))
+ dispatch[LONG4] = load_long4
+
+ def load_float(self):
+ self.append(float(self.readline()[:-1]))
+ dispatch[FLOAT] = load_float
+
+ def load_binfloat(self, unpack=struct.unpack):
+ self.append(unpack('>d', self.read(8))[0])
+ dispatch[BINFLOAT] = load_binfloat
+
+ def load_string(self):
+ rep = self.readline()[:-1]
+ for q in "\"'": # double or single quote
+ if rep.startswith(q):
+ if not rep.endswith(q):
+ raise ValueError, "insecure string pickle"
+ rep = rep[len(q):-len(q)]
+ break
+ else:
+ raise ValueError, "insecure string pickle"
+ self.append(rep.decode("string-escape"))
+ dispatch[STRING] = load_string
+
+ def load_binstring(self):
+ len = mloads('i' + self.read(4))
+ self.append(self.read(len))
+ dispatch[BINSTRING] = load_binstring
+
+ def load_unicode(self):
+ self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
+ dispatch[UNICODE] = load_unicode
+
+ def load_binunicode(self):
+ len = mloads('i' + self.read(4))
+ self.append(unicode(self.read(len),'utf-8'))
+ dispatch[BINUNICODE] = load_binunicode
+
+ def load_short_binstring(self):
+ len = ord(self.read(1))
+ self.append(self.read(len))
+ dispatch[SHORT_BINSTRING] = load_short_binstring
+
+ def load_tuple(self):
+ k = self.marker()
+ self.stack[k:] = [tuple(self.stack[k+1:])]
+ dispatch[TUPLE] = load_tuple
+
+ def load_empty_tuple(self):
+ self.stack.append(())
+ dispatch[EMPTY_TUPLE] = load_empty_tuple
+
+ def load_tuple1(self):
+ self.stack[-1] = (self.stack[-1],)
+ dispatch[TUPLE1] = load_tuple1
+
+ def load_tuple2(self):
+ self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
+ dispatch[TUPLE2] = load_tuple2
+
+ def load_tuple3(self):
+ self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
+ dispatch[TUPLE3] = load_tuple3
+
+ def load_empty_list(self):
+ self.stack.append([])
+ dispatch[EMPTY_LIST] = load_empty_list
+
+ def load_empty_dictionary(self):
+ self.stack.append({})
+ dispatch[EMPTY_DICT] = load_empty_dictionary
+
+ def load_list(self):
+ k = self.marker()
+ self.stack[k:] = [self.stack[k+1:]]
+ dispatch[LIST] = load_list
+
+ def load_dict(self):
+ k = self.marker()
+ d = {}
+ items = self.stack[k+1:]
+ for i in range(0, len(items), 2):
+ key = items[i]
+ value = items[i+1]
+ d[key] = value
+ self.stack[k:] = [d]
+ dispatch[DICT] = load_dict
+
+ # INST and OBJ differ only in how they get a class object. It's not
+ # only sensible to do the rest in a common routine, the two routines
+ # previously diverged and grew different bugs.
+ # klass is the class to instantiate, and k points to the topmost mark
+ # object, following which are the arguments for klass.__init__.
+ def _instantiate(self, klass, k):
+ args = tuple(self.stack[k+1:])
+ del self.stack[k:]
+ instantiated = 0
+ if (not args and
+ type(klass) is ClassType and
+ not hasattr(klass, "__getinitargs__")):
+ try:
+ value = _EmptyClass()
+ value.__class__ = klass
+ instantiated = 1
+ except RuntimeError:
+ # In restricted execution, assignment to inst.__class__ is
+ # prohibited
+ pass
+ if not instantiated:
+ try:
+ value = klass(*args)
+ except TypeError, err:
+ raise TypeError, "in constructor for %s: %s" % (
+ klass.__name__, str(err)), sys.exc_info()[2]
+ self.append(value)
+
+ def load_inst(self):
+ module = self.readline()[:-1]
+ name = self.readline()[:-1]
+ klass = self.find_class(module, name)
+ self._instantiate(klass, self.marker())
+ dispatch[INST] = load_inst
+
+ def load_obj(self):
+ # Stack is ... markobject classobject arg1 arg2 ...
+ k = self.marker()
+ klass = self.stack.pop(k+1)
+ self._instantiate(klass, k)
+ dispatch[OBJ] = load_obj
+
+ def load_newobj(self):
+ args = self.stack.pop()
+ cls = self.stack[-1]
+ obj = cls.__new__(cls, *args)
+ self.stack[-1] = obj
+ dispatch[NEWOBJ] = load_newobj
+
+ def load_global(self):
+ module = self.readline()[:-1]
+ name = self.readline()[:-1]
+ klass = self.find_class(module, name)
+ self.append(klass)
+ dispatch[GLOBAL] = load_global
+
+ def load_ext1(self):
+ code = ord(self.read(1))
+ self.get_extension(code)
+ dispatch[EXT1] = load_ext1
+
+ def load_ext2(self):
+ code = mloads('i' + self.read(2) + '\000\000')
+ self.get_extension(code)
+ dispatch[EXT2] = load_ext2
+
+ def load_ext4(self):
+ code = mloads('i' + self.read(4))
+ self.get_extension(code)
+ dispatch[EXT4] = load_ext4
+
+ def get_extension(self, code):
+ nil = []
+ obj = _extension_cache.get(code, nil)
+ if obj is not nil:
+ self.append(obj)
+ return
+ key = _inverted_registry.get(code)
+ if not key:
+ raise ValueError("unregistered extension code %d" % code)
+ obj = self.find_class(*key)
+ _extension_cache[code] = obj
+ self.append(obj)
+
+ def find_class(self, module, name):
+ # Subclasses may override this
+ __import__(module)
+ mod = sys.modules[module]
+ klass = getattr(mod, name)
+ return klass
+
+ def load_reduce(self):
+ stack = self.stack
+ args = stack.pop()
+ func = stack[-1]
+ value = func(*args)
+ stack[-1] = value
+ dispatch[REDUCE] = load_reduce
+
+ def load_pop(self):
+ del self.stack[-1]
+ dispatch[POP] = load_pop
+
+ def load_pop_mark(self):
+ k = self.marker()
+ del self.stack[k:]
+ dispatch[POP_MARK] = load_pop_mark
+
+ def load_dup(self):
+ self.append(self.stack[-1])
+ dispatch[DUP] = load_dup
+
+ def load_get(self):
+ self.append(self.memo[self.readline()[:-1]])
+ dispatch[GET] = load_get
+
+ def load_binget(self):
+ i = ord(self.read(1))
+ self.append(self.memo[repr(i)])
+ dispatch[BINGET] = load_binget
+
+ def load_long_binget(self):
+ i = mloads('i' + self.read(4))
+ self.append(self.memo[repr(i)])
+ dispatch[LONG_BINGET] = load_long_binget
+
+ def load_put(self):
+ self.memo[self.readline()[:-1]] = self.stack[-1]
+ dispatch[PUT] = load_put
+
+ def load_binput(self):
+ i = ord(self.read(1))
+ self.memo[repr(i)] = self.stack[-1]
+ dispatch[BINPUT] = load_binput
+
+ def load_long_binput(self):
+ i = mloads('i' + self.read(4))
+ self.memo[repr(i)] = self.stack[-1]
+ dispatch[LONG_BINPUT] = load_long_binput
+
+ def load_append(self):
+ stack = self.stack
+ value = stack.pop()
+ list = stack[-1]
+ list.append(value)
+ dispatch[APPEND] = load_append
+
+ def load_appends(self):
+ stack = self.stack
+ mark = self.marker()
+ list = stack[mark - 1]
+ list.extend(stack[mark + 1:])
+ del stack[mark:]
+ dispatch[APPENDS] = load_appends
+
+ def load_setitem(self):
+ stack = self.stack
+ value = stack.pop()
+ key = stack.pop()
+ dict = stack[-1]
+ dict[key] = value
+ dispatch[SETITEM] = load_setitem
+
+ def load_setitems(self):
+ stack = self.stack
+ mark = self.marker()
+ dict = stack[mark - 1]
+ for i in range(mark + 1, len(stack), 2):
+ dict[stack[i]] = stack[i + 1]
+
+ del stack[mark:]
+ dispatch[SETITEMS] = load_setitems
+
+ def load_build(self):
+ stack = self.stack
+ state = stack.pop()
+ inst = stack[-1]
+ setstate = getattr(inst, "__setstate__", None)
+ if setstate:
+ setstate(state)
+ return
+ slotstate = None
+ if isinstance(state, tuple) and len(state) == 2:
+ state, slotstate = state
+ if state:
+ try:
+ inst.__dict__.update(state)
+ except RuntimeError:
+ # XXX In restricted execution, the instance's __dict__
+ # is not accessible. Use the old way of unpickling
+ # the instance variables. This is a semantic
+ # difference when unpickling in restricted
+ # vs. unrestricted modes.
+ # Note, however, that cPickle has never tried to do the
+ # .update() business, and always uses
+ # PyObject_SetItem(inst.__dict__, key, value) in a
+ # loop over state.items().
+ for k, v in state.items():
+ setattr(inst, k, v)
+ if slotstate:
+ for k, v in slotstate.items():
+ setattr(inst, k, v)
+ dispatch[BUILD] = load_build
+
+ def load_mark(self):
+ self.append(self.mark)
+ dispatch[MARK] = load_mark
+
+ def load_stop(self):
+ value = self.stack.pop()
+ raise _Stop(value)
+ dispatch[STOP] = load_stop
+
+# Helper class for load_inst/load_obj
+
+class _EmptyClass:
+ pass
+
+# Encode/decode longs in linear time.
+
+import binascii as _binascii
+
+def encode_long(x):
+ r"""Encode a long to a two's complement little-endian binary string.
+ Note that 0L is a special case, returning an empty string, to save a
+ byte in the LONG1 pickling context.
+
+ >>> encode_long(0L)
+ ''
+ >>> encode_long(255L)
+ '\xff\x00'
+ >>> encode_long(32767L)
+ '\xff\x7f'
+ >>> encode_long(-256L)
+ '\x00\xff'
+ >>> encode_long(-32768L)
+ '\x00\x80'
+ >>> encode_long(-128L)
+ '\x80'
+ >>> encode_long(127L)
+ '\x7f'
+ >>>
+ """
+
+ if x == 0:
+ return ''
+ if x > 0:
+ ashex = hex(x)
+ assert ashex.startswith("0x")
+ njunkchars = 2 + ashex.endswith('L')
+ nibbles = len(ashex) - njunkchars
+ if nibbles & 1:
+ # need an even # of nibbles for unhexlify
+ ashex = "0x0" + ashex[2:]
+ elif int(ashex[2], 16) >= 8:
+ # "looks negative", so need a byte of sign bits
+ ashex = "0x00" + ashex[2:]
+ else:
+ # Build the 256's-complement: (1L << nbytes) + x. The trick is
+ # to find the number of bytes in linear time (although that should
+ # really be a constant-time task).
+ ashex = hex(-x)
+ assert ashex.startswith("0x")
+ njunkchars = 2 + ashex.endswith('L')
+ nibbles = len(ashex) - njunkchars
+ if nibbles & 1:
+ # Extend to a full byte.
+ nibbles += 1
+ nbits = nibbles * 4
+ x += 1L << nbits
+ assert x > 0
+ ashex = hex(x)
+ njunkchars = 2 + ashex.endswith('L')
+ newnibbles = len(ashex) - njunkchars
+ if newnibbles < nibbles:
+ ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
+ if int(ashex[2], 16) < 8:
+ # "looks positive", so need a byte of sign bits
+ ashex = "0xff" + ashex[2:]
+
+ if ashex.endswith('L'):
+ ashex = ashex[2:-1]
+ else:
+ ashex = ashex[2:]
+ assert len(ashex) & 1 == 0, (x, ashex)
+ binary = _binascii.unhexlify(ashex)
+ return binary[::-1]
+
+def decode_long(data):
+ r"""Decode a long from a two's complement little-endian binary string.
+
+ >>> decode_long('')
+ 0L
+ >>> decode_long("\xff\x00")
+ 255L
+ >>> decode_long("\xff\x7f")
+ 32767L
+ >>> decode_long("\x00\xff")
+ -256L
+ >>> decode_long("\x00\x80")
+ -32768L
+ >>> decode_long("\x80")
+ -128L
+ >>> decode_long("\x7f")
+ 127L
+ """
+
+ nbytes = len(data)
+ if nbytes == 0:
+ return 0L
+ ashex = _binascii.hexlify(data[::-1])
+ n = long(ashex, 16) # quadratic time before Python 2.3; linear now
+ if data[-1] >= '\x80':
+ n -= 1L << (nbytes * 8)
+ return n
+
+# Shorthands
+
+try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+
+def dump(obj, file, protocol=None):
+ Pickler(file, protocol).dump(obj)
+
+def dumps(obj, protocol=None):
+ file = StringIO()
+ Pickler(file, protocol).dump(obj)
+ return file.getvalue()
+
+def load(file):
+ return Unpickler(file).load()
+
+def loads(str):
+ file = StringIO(str)
+ return Unpickler(file).load()
+
+# Doctest
+
+def _test():
+ import doctest
+ return doctest.testmod()
+
+if __name__ == "__main__":
+ _test()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/pickletools.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/pickletools.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,2245 @@
+'''"Executable documentation" for the pickle module.
+
+Extensive comments about the pickle protocols and pickle-machine opcodes
+can be found here. Some functions meant for external use:
+
+genops(pickle)
+ Generate all the opcodes in a pickle, as (opcode, arg, position) triples.
+
+dis(pickle, out=None, memo=None, indentlevel=4)
+ Print a symbolic disassembly of a pickle.
+'''
+
+__all__ = ['dis',
+ 'genops',
+ ]
+
+# Other ideas:
+#
+# - A pickle verifier: read a pickle and check it exhaustively for
+# well-formedness. dis() does a lot of this already.
+#
+# - A protocol identifier: examine a pickle and return its protocol number
+# (== the highest .proto attr value among all the opcodes in the pickle).
+# dis() already prints this info at the end.
+#
+# - A pickle optimizer: for example, tuple-building code is sometimes more
+# elaborate than necessary, catering for the possibility that the tuple
+# is recursive. Or lots of times a PUT is generated that's never accessed
+# by a later GET.
+
+
+"""
+"A pickle" is a program for a virtual pickle machine (PM, but more accurately
+called an unpickling machine). It's a sequence of opcodes, interpreted by the
+PM, building an arbitrarily complex Python object.
+
+For the most part, the PM is very simple: there are no looping, testing, or
+conditional instructions, no arithmetic and no function calls. Opcodes are
+executed once each, from first to last, until a STOP opcode is reached.
+
+The PM has two data areas, "the stack" and "the memo".
+
+Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
+integer object on the stack, whose value is gotten from a decimal string
+literal immediately following the INT opcode in the pickle bytestream. Other
+opcodes take Python objects off the stack. The result of unpickling is
+whatever object is left on the stack when the final STOP opcode is executed.
+
+The memo is simply an array of objects, or it can be implemented as a dict
+mapping little integers to objects. The memo serves as the PM's "long term
+memory", and the little integers indexing the memo are akin to variable
+names. Some opcodes pop a stack object into the memo at a given index,
+and others push a memo object at a given index onto the stack again.
+
+At heart, that's all the PM has. Subtleties arise for these reasons:
+
++ Object identity. Objects can be arbitrarily complex, and subobjects
+ may be shared (for example, the list [a, a] refers to the same object a
+ twice). It can be vital that unpickling recreate an isomorphic object
+ graph, faithfully reproducing sharing.
+
++ Recursive objects. For example, after "L = []; L.append(L)", L is a
+ list, and L[0] is the same list. This is related to the object identity
+ point, and some sequences of pickle opcodes are subtle in order to
+ get the right result in all cases.
+
++ Things pickle doesn't know everything about. Examples of things pickle
+ does know everything about are Python's builtin scalar and container
+ types, like ints and tuples. They generally have opcodes dedicated to
+ them. For things like module references and instances of user-defined
+ classes, pickle's knowledge is limited. Historically, many enhancements
+ have been made to the pickle protocol in order to do a better (faster,
+ and/or more compact) job on those.
+
++ Backward compatibility and micro-optimization. As explained below,
+ pickle opcodes never go away, not even when better ways to do a thing
+ get invented. The repertoire of the PM just keeps growing over time.
+ For example, protocol 0 had two opcodes for building Python integers (INT
+ and LONG), protocol 1 added three more for more-efficient pickling of short
+ integers, and protocol 2 added two more for more-efficient pickling of
+ long integers (before protocol 2, the only ways to pickle a Python long
+ took time quadratic in the number of digits, for both pickling and
+ unpickling). "Opcode bloat" isn't so much a subtlety as a source of
+ wearying complication.
+
+
+Pickle protocols:
+
+For compatibility, the meaning of a pickle opcode never changes. Instead new
+pickle opcodes get added, and each version's unpickler can handle all the
+pickle opcodes in all protocol versions to date. So old pickles continue to
+be readable forever. The pickler can generally be told to restrict itself to
+the subset of opcodes available under previous protocol versions too, so that
+users can create pickles under the current version readable by older
+versions. However, a pickle does not contain its version number embedded
+within it. If an older unpickler tries to read a pickle using a later
+protocol, the result is most likely an exception due to seeing an unknown (in
+the older unpickler) opcode.
+
+The original pickle used what's now called "protocol 0", and what was called
+"text mode" before Python 2.3. The entire pickle bytestream is made up of
+printable 7-bit ASCII characters, plus the newline character, in protocol 0.
+That's why it was called text mode. Protocol 0 is small and elegant, but
+sometimes painfully inefficient.
+
+The second major set of additions is now called "protocol 1", and was called
+"binary mode" before Python 2.3. This added many opcodes with arguments
+consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
+bytes. Binary mode pickles can be substantially smaller than equivalent
+text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
+int as 4 bytes following the opcode, which is cheaper to unpickle than the
+(perhaps) 11-character decimal string attached to INT. Protocol 1 also added
+a number of opcodes that operate on many stack elements at once (like APPENDS
+and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
+
+The third major set of additions came in Python 2.3, and is called "protocol
+2". This added:
+
+- A better way to pickle instances of new-style classes (NEWOBJ).
+
+- A way for a pickle to identify its protocol (PROTO).
+
+- Time- and space- efficient pickling of long ints (LONG{1,4}).
+
+- Shortcuts for small tuples (TUPLE{1,2,3}}.
+
+- Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
+
+- The "extension registry", a vector of popular objects that can be pushed
+ efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
+ the registry contents are predefined (there's nothing akin to the memo's
+ PUT).
+
+Another independent change with Python 2.3 is the abandonment of any
+pretense that it might be safe to load pickles received from untrusted
+parties -- no sufficient security analysis has been done to guarantee
+this and there isn't a use case that warrants the expense of such an
+analysis.
+
+To this end, all tests for __safe_for_unpickling__ or for
+copy_reg.safe_constructors are removed from the unpickling code.
+References to these variables in the descriptions below are to be seen
+as describing unpickling in Python 2.2 and before.
+"""
+
+# Meta-rule: Descriptions are stored in instances of descriptor objects,
+# with plain constructors. No meta-language is defined from which
+# descriptors could be constructed. If you want, e.g., XML, write a little
+# program to generate XML from the objects.
+
+##############################################################################
+# Some pickle opcodes have an argument, following the opcode in the
+# bytestream. An argument is of a specific type, described by an instance
+# of ArgumentDescriptor. These are not to be confused with arguments taken
+# off the stack -- ArgumentDescriptor applies only to arguments embedded in
+# the opcode stream, immediately following an opcode.
+
+# Represents the number of bytes consumed by an argument delimited by the
+# next newline character.
+UP_TO_NEWLINE = -1
+
+# Represents the number of bytes consumed by a two-argument opcode where
+# the first argument gives the number of bytes in the second argument.
+TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
+TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
+
+class ArgumentDescriptor(object):
+ __slots__ = (
+ # name of descriptor record, also a module global name; a string
+ 'name',
+
+ # length of argument, in bytes; an int; UP_TO_NEWLINE and
+ # TAKEN_FROM_ARGUMENT{1,4} are negative values for variable-length
+ # cases
+ 'n',
+
+ # a function taking a file-like object, reading this kind of argument
+ # from the object at the current position, advancing the current
+ # position by n bytes, and returning the value of the argument
+ 'reader',
+
+ # human-readable docs for this arg descriptor; a string
+ 'doc',
+ )
+
+ def __init__(self, name, n, reader, doc):
+ assert isinstance(name, str)
+ self.name = name
+
+ assert isinstance(n, int) and (n >= 0 or
+ n in (UP_TO_NEWLINE,
+ TAKEN_FROM_ARGUMENT1,
+ TAKEN_FROM_ARGUMENT4))
+ self.n = n
+
+ self.reader = reader
+
+ assert isinstance(doc, str)
+ self.doc = doc
+
+from struct import unpack as _unpack
+
+def read_uint1(f):
+ r"""
+ >>> import StringIO
+ >>> read_uint1(StringIO.StringIO('\xff'))
+ 255
+ """
+
+ data = f.read(1)
+ if data:
+ return ord(data)
+ raise ValueError("not enough data in stream to read uint1")
+
+uint1 = ArgumentDescriptor(
+ name='uint1',
+ n=1,
+ reader=read_uint1,
+ doc="One-byte unsigned integer.")
+
+
+def read_uint2(f):
+ r"""
+ >>> import StringIO
+ >>> read_uint2(StringIO.StringIO('\xff\x00'))
+ 255
+ >>> read_uint2(StringIO.StringIO('\xff\xff'))
+ 65535
+ """
+
+ data = f.read(2)
+ if len(data) == 2:
+ return _unpack("<H", data)[0]
+ raise ValueError("not enough data in stream to read uint2")
+
+uint2 = ArgumentDescriptor(
+ name='uint2',
+ n=2,
+ reader=read_uint2,
+ doc="Two-byte unsigned integer, little-endian.")
+
+
+def read_int4(f):
+ r"""
+ >>> import StringIO
+ >>> read_int4(StringIO.StringIO('\xff\x00\x00\x00'))
+ 255
+ >>> read_int4(StringIO.StringIO('\x00\x00\x00\x80')) == -(2**31)
+ True
+ """
+
+ data = f.read(4)
+ if len(data) == 4:
+ return _unpack("<i", data)[0]
+ raise ValueError("not enough data in stream to read int4")
+
+int4 = ArgumentDescriptor(
+ name='int4',
+ n=4,
+ reader=read_int4,
+ doc="Four-byte signed integer, little-endian, 2's complement.")
+
+
+def read_stringnl(f, decode=True, stripquotes=True):
+ r"""
+ >>> import StringIO
+ >>> read_stringnl(StringIO.StringIO("'abcd'\nefg\n"))
+ 'abcd'
+
+ >>> read_stringnl(StringIO.StringIO("\n"))
+ Traceback (most recent call last):
+ ...
+ ValueError: no string quotes around ''
+
+ >>> read_stringnl(StringIO.StringIO("\n"), stripquotes=False)
+ ''
+
+ >>> read_stringnl(StringIO.StringIO("''\n"))
+ ''
+
+ >>> read_stringnl(StringIO.StringIO('"abcd"'))
+ Traceback (most recent call last):
+ ...
+ ValueError: no newline found when trying to read stringnl
+
+ Embedded escapes are undone in the result.
+ >>> read_stringnl(StringIO.StringIO(r"'a\n\\b\x00c\td'" + "\n'e'"))
+ 'a\n\\b\x00c\td'
+ """
+
+ data = f.readline()
+ if not data.endswith('\n'):
+ raise ValueError("no newline found when trying to read stringnl")
+ data = data[:-1] # lose the newline
+
+ if stripquotes:
+ for q in "'\"":
+ if data.startswith(q):
+ if not data.endswith(q):
+ raise ValueError("strinq quote %r not found at both "
+ "ends of %r" % (q, data))
+ data = data[1:-1]
+ break
+ else:
+ raise ValueError("no string quotes around %r" % data)
+
+ # I'm not sure when 'string_escape' was added to the std codecs; it's
+ # crazy not to use it if it's there.
+ if decode:
+ data = data.decode('string_escape')
+ return data
+
+stringnl = ArgumentDescriptor(
+ name='stringnl',
+ n=UP_TO_NEWLINE,
+ reader=read_stringnl,
+ doc="""A newline-terminated string.
+
+ This is a repr-style string, with embedded escapes, and
+ bracketing quotes.
+ """)
+
+def read_stringnl_noescape(f):
+ return read_stringnl(f, decode=False, stripquotes=False)
+
+stringnl_noescape = ArgumentDescriptor(
+ name='stringnl_noescape',
+ n=UP_TO_NEWLINE,
+ reader=read_stringnl_noescape,
+ doc="""A newline-terminated string.
+
+ This is a str-style string, without embedded escapes,
+ or bracketing quotes. It should consist solely of
+ printable ASCII characters.
+ """)
+
+def read_stringnl_noescape_pair(f):
+ r"""
+ >>> import StringIO
+ >>> read_stringnl_noescape_pair(StringIO.StringIO("Queue\nEmpty\njunk"))
+ 'Queue Empty'
+ """
+
+ return "%s %s" % (read_stringnl_noescape(f), read_stringnl_noescape(f))
+
+stringnl_noescape_pair = ArgumentDescriptor(
+ name='stringnl_noescape_pair',
+ n=UP_TO_NEWLINE,
+ reader=read_stringnl_noescape_pair,
+ doc="""A pair of newline-terminated strings.
+
+ These are str-style strings, without embedded
+ escapes, or bracketing quotes. They should
+ consist solely of printable ASCII characters.
+ The pair is returned as a single string, with
+ a single blank separating the two strings.
+ """)
+
+def read_string4(f):
+ r"""
+ >>> import StringIO
+ >>> read_string4(StringIO.StringIO("\x00\x00\x00\x00abc"))
+ ''
+ >>> read_string4(StringIO.StringIO("\x03\x00\x00\x00abcdef"))
+ 'abc'
+ >>> read_string4(StringIO.StringIO("\x00\x00\x00\x03abcdef"))
+ Traceback (most recent call last):
+ ...
+ ValueError: expected 50331648 bytes in a string4, but only 6 remain
+ """
+
+ n = read_int4(f)
+ if n < 0:
+ raise ValueError("string4 byte count < 0: %d" % n)
+ data = f.read(n)
+ if len(data) == n:
+ return data
+ raise ValueError("expected %d bytes in a string4, but only %d remain" %
+ (n, len(data)))
+
+string4 = ArgumentDescriptor(
+ name="string4",
+ n=TAKEN_FROM_ARGUMENT4,
+ reader=read_string4,
+ doc="""A counted string.
+
+ The first argument is a 4-byte little-endian signed int giving
+ the number of bytes in the string, and the second argument is
+ that many bytes.
+ """)
+
+
+def read_string1(f):
+ r"""
+ >>> import StringIO
+ >>> read_string1(StringIO.StringIO("\x00"))
+ ''
+ >>> read_string1(StringIO.StringIO("\x03abcdef"))
+ 'abc'
+ """
+
+ n = read_uint1(f)
+ assert n >= 0
+ data = f.read(n)
+ if len(data) == n:
+ return data
+ raise ValueError("expected %d bytes in a string1, but only %d remain" %
+ (n, len(data)))
+
+string1 = ArgumentDescriptor(
+ name="string1",
+ n=TAKEN_FROM_ARGUMENT1,
+ reader=read_string1,
+ doc="""A counted string.
+
+ The first argument is a 1-byte unsigned int giving the number
+ of bytes in the string, and the second argument is that many
+ bytes.
+ """)
+
+
+def read_unicodestringnl(f):
+ r"""
+ >>> import StringIO
+ >>> read_unicodestringnl(StringIO.StringIO("abc\uabcd\njunk"))
+ u'abc\uabcd'
+ """
+
+ data = f.readline()
+ if not data.endswith('\n'):
+ raise ValueError("no newline found when trying to read "
+ "unicodestringnl")
+ data = data[:-1] # lose the newline
+ return unicode(data, 'raw-unicode-escape')
+
+unicodestringnl = ArgumentDescriptor(
+ name='unicodestringnl',
+ n=UP_TO_NEWLINE,
+ reader=read_unicodestringnl,
+ doc="""A newline-terminated Unicode string.
+
+ This is raw-unicode-escape encoded, so consists of
+ printable ASCII characters, and may contain embedded
+ escape sequences.
+ """)
+
+def read_unicodestring4(f):
+ r"""
+ >>> import StringIO
+ >>> s = u'abcd\uabcd'
+ >>> enc = s.encode('utf-8')
+ >>> enc
+ 'abcd\xea\xaf\x8d'
+ >>> n = chr(len(enc)) + chr(0) * 3 # little-endian 4-byte length
+ >>> t = read_unicodestring4(StringIO.StringIO(n + enc + 'junk'))
+ >>> s == t
+ True
+
+ >>> read_unicodestring4(StringIO.StringIO(n + enc[:-1]))
+ Traceback (most recent call last):
+ ...
+ ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
+ """
+
+ n = read_int4(f)
+ if n < 0:
+ raise ValueError("unicodestring4 byte count < 0: %d" % n)
+ data = f.read(n)
+ if len(data) == n:
+ return unicode(data, 'utf-8')
+ raise ValueError("expected %d bytes in a unicodestring4, but only %d "
+ "remain" % (n, len(data)))
+
+unicodestring4 = ArgumentDescriptor(
+ name="unicodestring4",
+ n=TAKEN_FROM_ARGUMENT4,
+ reader=read_unicodestring4,
+ doc="""A counted Unicode string.
+
+ The first argument is a 4-byte little-endian signed int
+ giving the number of bytes in the string, and the second
+ argument-- the UTF-8 encoding of the Unicode string --
+ contains that many bytes.
+ """)
+
+
+def read_decimalnl_short(f):
+ r"""
+ >>> import StringIO
+ >>> read_decimalnl_short(StringIO.StringIO("1234\n56"))
+ 1234
+
+ >>> read_decimalnl_short(StringIO.StringIO("1234L\n56"))
+ Traceback (most recent call last):
+ ...
+ ValueError: trailing 'L' not allowed in '1234L'
+ """
+
+ s = read_stringnl(f, decode=False, stripquotes=False)
+ if s.endswith("L"):
+ raise ValueError("trailing 'L' not allowed in %r" % s)
+
+ # It's not necessarily true that the result fits in a Python short int:
+ # the pickle may have been written on a 64-bit box. There's also a hack
+ # for True and False here.
+ if s == "00":
+ return False
+ elif s == "01":
+ return True
+
+ try:
+ return int(s)
+ except OverflowError:
+ return long(s)
+
+def read_decimalnl_long(f):
+ r"""
+ >>> import StringIO
+
+ >>> read_decimalnl_long(StringIO.StringIO("1234\n56"))
+ Traceback (most recent call last):
+ ...
+ ValueError: trailing 'L' required in '1234'
+
+ Someday the trailing 'L' will probably go away from this output.
+
+ >>> read_decimalnl_long(StringIO.StringIO("1234L\n56"))
+ 1234L
+
+ >>> read_decimalnl_long(StringIO.StringIO("123456789012345678901234L\n6"))
+ 123456789012345678901234L
+ """
+
+ s = read_stringnl(f, decode=False, stripquotes=False)
+ if not s.endswith("L"):
+ raise ValueError("trailing 'L' required in %r" % s)
+ return long(s)
+
+
+decimalnl_short = ArgumentDescriptor(
+ name='decimalnl_short',
+ n=UP_TO_NEWLINE,
+ reader=read_decimalnl_short,
+ doc="""A newline-terminated decimal integer literal.
+
+ This never has a trailing 'L', and the integer fit
+ in a short Python int on the box where the pickle
+ was written -- but there's no guarantee it will fit
+ in a short Python int on the box where the pickle
+ is read.
+ """)
+
+decimalnl_long = ArgumentDescriptor(
+ name='decimalnl_long',
+ n=UP_TO_NEWLINE,
+ reader=read_decimalnl_long,
+ doc="""A newline-terminated decimal integer literal.
+
+ This has a trailing 'L', and can represent integers
+ of any size.
+ """)
+
+
+def read_floatnl(f):
+ r"""
+ >>> import StringIO
+ >>> read_floatnl(StringIO.StringIO("-1.25\n6"))
+ -1.25
+ """
+ s = read_stringnl(f, decode=False, stripquotes=False)
+ return float(s)
+
+floatnl = ArgumentDescriptor(
+ name='floatnl',
+ n=UP_TO_NEWLINE,
+ reader=read_floatnl,
+ doc="""A newline-terminated decimal floating literal.
+
+ In general this requires 17 significant digits for roundtrip
+ identity, and pickling then unpickling infinities, NaNs, and
+ minus zero doesn't work across boxes, or on some boxes even
+ on itself (e.g., Windows can't read the strings it produces
+ for infinities or NaNs).
+ """)
+
+def read_float8(f):
+ r"""
+ >>> import StringIO, struct
+ >>> raw = struct.pack(">d", -1.25)
+ >>> raw
+ '\xbf\xf4\x00\x00\x00\x00\x00\x00'
+ >>> read_float8(StringIO.StringIO(raw + "\n"))
+ -1.25
+ """
+
+ data = f.read(8)
+ if len(data) == 8:
+ return _unpack(">d", data)[0]
+ raise ValueError("not enough data in stream to read float8")
+
+
+float8 = ArgumentDescriptor(
+ name='float8',
+ n=8,
+ reader=read_float8,
+ doc="""An 8-byte binary representation of a float, big-endian.
+
+ The format is unique to Python, and shared with the struct
+ module (format string '>d') "in theory" (the struct and cPickle
+ implementations don't share the code -- they should). It's
+ strongly related to the IEEE-754 double format, and, in normal
+ cases, is in fact identical to the big-endian 754 double format.
+ On other boxes the dynamic range is limited to that of a 754
+ double, and "add a half and chop" rounding is used to reduce
+ the precision to 53 bits. However, even on a 754 box,
+ infinities, NaNs, and minus zero may not be handled correctly
+ (may not survive roundtrip pickling intact).
+ """)
+
+# Protocol 2 formats
+
+from pickle import decode_long
+
+def read_long1(f):
+ r"""
+ >>> import StringIO
+ >>> read_long1(StringIO.StringIO("\x00"))
+ 0L
+ >>> read_long1(StringIO.StringIO("\x02\xff\x00"))
+ 255L
+ >>> read_long1(StringIO.StringIO("\x02\xff\x7f"))
+ 32767L
+ >>> read_long1(StringIO.StringIO("\x02\x00\xff"))
+ -256L
+ >>> read_long1(StringIO.StringIO("\x02\x00\x80"))
+ -32768L
+ """
+
+ n = read_uint1(f)
+ data = f.read(n)
+ if len(data) != n:
+ raise ValueError("not enough data in stream to read long1")
+ return decode_long(data)
+
+long1 = ArgumentDescriptor(
+ name="long1",
+ n=TAKEN_FROM_ARGUMENT1,
+ reader=read_long1,
+ doc="""A binary long, little-endian, using 1-byte size.
+
+ This first reads one byte as an unsigned size, then reads that
+ many bytes and interprets them as a little-endian 2's-complement long.
+ If the size is 0, that's taken as a shortcut for the long 0L.
+ """)
+
+def read_long4(f):
+ r"""
+ >>> import StringIO
+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x00"))
+ 255L
+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\xff\x7f"))
+ 32767L
+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\xff"))
+ -256L
+ >>> read_long4(StringIO.StringIO("\x02\x00\x00\x00\x00\x80"))
+ -32768L
+ >>> read_long1(StringIO.StringIO("\x00\x00\x00\x00"))
+ 0L
+ """
+
+ n = read_int4(f)
+ if n < 0:
+ raise ValueError("long4 byte count < 0: %d" % n)
+ data = f.read(n)
+ if len(data) != n:
+ raise ValueError("not enough data in stream to read long4")
+ return decode_long(data)
+
+long4 = ArgumentDescriptor(
+ name="long4",
+ n=TAKEN_FROM_ARGUMENT4,
+ reader=read_long4,
+ doc="""A binary representation of a long, little-endian.
+
+ This first reads four bytes as a signed size (but requires the
+ size to be >= 0), then reads that many bytes and interprets them
+ as a little-endian 2's-complement long. If the size is 0, that's taken
+ as a shortcut for the long 0L, although LONG1 should really be used
+ then instead (and in any case where # of bytes < 256).
+ """)
+
+
+##############################################################################
+# Object descriptors. The stack used by the pickle machine holds objects,
+# and in the stack_before and stack_after attributes of OpcodeInfo
+# descriptors we need names to describe the various types of objects that can
+# appear on the stack.
+
+class StackObject(object):
+ __slots__ = (
+ # name of descriptor record, for info only
+ 'name',
+
+ # type of object, or tuple of type objects (meaning the object can
+ # be of any type in the tuple)
+ 'obtype',
+
+ # human-readable docs for this kind of stack object; a string
+ 'doc',
+ )
+
+ def __init__(self, name, obtype, doc):
+ assert isinstance(name, str)
+ self.name = name
+
+ assert isinstance(obtype, type) or isinstance(obtype, tuple)
+ if isinstance(obtype, tuple):
+ for contained in obtype:
+ assert isinstance(contained, type)
+ self.obtype = obtype
+
+ assert isinstance(doc, str)
+ self.doc = doc
+
+ def __repr__(self):
+ return self.name
+
+
+pyint = StackObject(
+ name='int',
+ obtype=int,
+ doc="A short (as opposed to long) Python integer object.")
+
+pylong = StackObject(
+ name='long',
+ obtype=long,
+ doc="A long (as opposed to short) Python integer object.")
+
+pyinteger_or_bool = StackObject(
+ name='int_or_bool',
+ obtype=(int, long, bool),
+ doc="A Python integer object (short or long), or "
+ "a Python bool.")
+
+pybool = StackObject(
+ name='bool',
+ obtype=(bool,),
+ doc="A Python bool object.")
+
+pyfloat = StackObject(
+ name='float',
+ obtype=float,
+ doc="A Python float object.")
+
+pystring = StackObject(
+ name='str',
+ obtype=str,
+ doc="A Python string object.")
+
+pyunicode = StackObject(
+ name='unicode',
+ obtype=unicode,
+ doc="A Python Unicode string object.")
+
+pynone = StackObject(
+ name="None",
+ obtype=type(None),
+ doc="The Python None object.")
+
+pytuple = StackObject(
+ name="tuple",
+ obtype=tuple,
+ doc="A Python tuple object.")
+
+pylist = StackObject(
+ name="list",
+ obtype=list,
+ doc="A Python list object.")
+
+pydict = StackObject(
+ name="dict",
+ obtype=dict,
+ doc="A Python dict object.")
+
+anyobject = StackObject(
+ name='any',
+ obtype=object,
+ doc="Any kind of object whatsoever.")
+
+markobject = StackObject(
+ name="mark",
+ obtype=StackObject,
+ doc="""'The mark' is a unique object.
+
+ Opcodes that operate on a variable number of objects
+ generally don't embed the count of objects in the opcode,
+ or pull it off the stack. Instead the MARK opcode is used
+ to push a special marker object on the stack, and then
+ some other opcodes grab all the objects from the top of
+ the stack down to (but not including) the topmost marker
+ object.
+ """)
+
+stackslice = StackObject(
+ name="stackslice",
+ obtype=StackObject,
+ doc="""An object representing a contiguous slice of the stack.
+
+ This is used in conjuction with markobject, to represent all
+ of the stack following the topmost markobject. For example,
+ the POP_MARK opcode changes the stack from
+
+ [..., markobject, stackslice]
+ to
+ [...]
+
+ No matter how many object are on the stack after the topmost
+ markobject, POP_MARK gets rid of all of them (including the
+ topmost markobject too).
+ """)
+
+##############################################################################
+# Descriptors for pickle opcodes.
+
+class OpcodeInfo(object):
+
+ __slots__ = (
+ # symbolic name of opcode; a string
+ 'name',
+
+ # the code used in a bytestream to represent the opcode; a
+ # one-character string
+ 'code',
+
+ # If the opcode has an argument embedded in the byte string, an
+ # instance of ArgumentDescriptor specifying its type. Note that
+ # arg.reader(s) can be used to read and decode the argument from
+ # the bytestream s, and arg.doc documents the format of the raw
+ # argument bytes. If the opcode doesn't have an argument embedded
+ # in the bytestream, arg should be None.
+ 'arg',
+
+ # what the stack looks like before this opcode runs; a list
+ 'stack_before',
+
+ # what the stack looks like after this opcode runs; a list
+ 'stack_after',
+
+ # the protocol number in which this opcode was introduced; an int
+ 'proto',
+
+ # human-readable docs for this opcode; a string
+ 'doc',
+ )
+
+ def __init__(self, name, code, arg,
+ stack_before, stack_after, proto, doc):
+ assert isinstance(name, str)
+ self.name = name
+
+ assert isinstance(code, str)
+ assert len(code) == 1
+ self.code = code
+
+ assert arg is None or isinstance(arg, ArgumentDescriptor)
+ self.arg = arg
+
+ assert isinstance(stack_before, list)
+ for x in stack_before:
+ assert isinstance(x, StackObject)
+ self.stack_before = stack_before
+
+ assert isinstance(stack_after, list)
+ for x in stack_after:
+ assert isinstance(x, StackObject)
+ self.stack_after = stack_after
+
+ assert isinstance(proto, int) and 0 <= proto <= 2
+ self.proto = proto
+
+ assert isinstance(doc, str)
+ self.doc = doc
+
+I = OpcodeInfo
+opcodes = [
+
+ # Ways to spell integers.
+
+ I(name='INT',
+ code='I',
+ arg=decimalnl_short,
+ stack_before=[],
+ stack_after=[pyinteger_or_bool],
+ proto=0,
+ doc="""Push an integer or bool.
+
+ The argument is a newline-terminated decimal literal string.
+
+ The intent may have been that this always fit in a short Python int,
+ but INT can be generated in pickles written on a 64-bit box that
+ require a Python long on a 32-bit box. The difference between this
+ and LONG then is that INT skips a trailing 'L', and produces a short
+ int whenever possible.
+
+ Another difference is due to that, when bool was introduced as a
+ distinct type in 2.3, builtin names True and False were also added to
+ 2.2.2, mapping to ints 1 and 0. For compatibility in both directions,
+ True gets pickled as INT + "I01\\n", and False as INT + "I00\\n".
+ Leading zeroes are never produced for a genuine integer. The 2.3
+ (and later) unpicklers special-case these and return bool instead;
+ earlier unpicklers ignore the leading "0" and return the int.
+ """),
+
+ I(name='BININT',
+ code='J',
+ arg=int4,
+ stack_before=[],
+ stack_after=[pyint],
+ proto=1,
+ doc="""Push a four-byte signed integer.
+
+ This handles the full range of Python (short) integers on a 32-bit
+ box, directly as binary bytes (1 for the opcode and 4 for the integer).
+ If the integer is non-negative and fits in 1 or 2 bytes, pickling via
+ BININT1 or BININT2 saves space.
+ """),
+
+ I(name='BININT1',
+ code='K',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[pyint],
+ proto=1,
+ doc="""Push a one-byte unsigned integer.
+
+ This is a space optimization for pickling very small non-negative ints,
+ in range(256).
+ """),
+
+ I(name='BININT2',
+ code='M',
+ arg=uint2,
+ stack_before=[],
+ stack_after=[pyint],
+ proto=1,
+ doc="""Push a two-byte unsigned integer.
+
+ This is a space optimization for pickling small positive ints, in
+ range(256, 2**16). Integers in range(256) can also be pickled via
+ BININT2, but BININT1 instead saves a byte.
+ """),
+
+ I(name='LONG',
+ code='L',
+ arg=decimalnl_long,
+ stack_before=[],
+ stack_after=[pylong],
+ proto=0,
+ doc="""Push a long integer.
+
+ The same as INT, except that the literal ends with 'L', and always
+ unpickles to a Python long. There doesn't seem a real purpose to the
+ trailing 'L'.
+
+ Note that LONG takes time quadratic in the number of digits when
+ unpickling (this is simply due to the nature of decimal->binary
+ conversion). Proto 2 added linear-time (in C; still quadratic-time
+ in Python) LONG1 and LONG4 opcodes.
+ """),
+
+ I(name="LONG1",
+ code='\x8a',
+ arg=long1,
+ stack_before=[],
+ stack_after=[pylong],
+ proto=2,
+ doc="""Long integer using one-byte length.
+
+ A more efficient encoding of a Python long; the long1 encoding
+ says it all."""),
+
+ I(name="LONG4",
+ code='\x8b',
+ arg=long4,
+ stack_before=[],
+ stack_after=[pylong],
+ proto=2,
+ doc="""Long integer using found-byte length.
+
+ A more efficient encoding of a Python long; the long4 encoding
+ says it all."""),
+
+ # Ways to spell strings (8-bit, not Unicode).
+
+ I(name='STRING',
+ code='S',
+ arg=stringnl,
+ stack_before=[],
+ stack_after=[pystring],
+ proto=0,
+ doc="""Push a Python string object.
+
+ The argument is a repr-style string, with bracketing quote characters,
+ and perhaps embedded escapes. The argument extends until the next
+ newline character.
+ """),
+
+ I(name='BINSTRING',
+ code='T',
+ arg=string4,
+ stack_before=[],
+ stack_after=[pystring],
+ proto=1,
+ doc="""Push a Python string object.
+
+ There are two arguments: the first is a 4-byte little-endian signed int
+ giving the number of bytes in the string, and the second is that many
+ bytes, which are taken literally as the string content.
+ """),
+
+ I(name='SHORT_BINSTRING',
+ code='U',
+ arg=string1,
+ stack_before=[],
+ stack_after=[pystring],
+ proto=1,
+ doc="""Push a Python string object.
+
+ There are two arguments: the first is a 1-byte unsigned int giving
+ the number of bytes in the string, and the second is that many bytes,
+ which are taken literally as the string content.
+ """),
+
+ # Ways to spell None.
+
+ I(name='NONE',
+ code='N',
+ arg=None,
+ stack_before=[],
+ stack_after=[pynone],
+ proto=0,
+ doc="Push None on the stack."),
+
+ # Ways to spell bools, starting with proto 2. See INT for how this was
+ # done before proto 2.
+
+ I(name='NEWTRUE',
+ code='\x88',
+ arg=None,
+ stack_before=[],
+ stack_after=[pybool],
+ proto=2,
+ doc="""True.
+
+ Push True onto the stack."""),
+
+ I(name='NEWFALSE',
+ code='\x89',
+ arg=None,
+ stack_before=[],
+ stack_after=[pybool],
+ proto=2,
+ doc="""True.
+
+ Push False onto the stack."""),
+
+ # Ways to spell Unicode strings.
+
+ I(name='UNICODE',
+ code='V',
+ arg=unicodestringnl,
+ stack_before=[],
+ stack_after=[pyunicode],
+ proto=0, # this may be pure-text, but it's a later addition
+ doc="""Push a Python Unicode string object.
+
+ The argument is a raw-unicode-escape encoding of a Unicode string,
+ and so may contain embedded escape sequences. The argument extends
+ until the next newline character.
+ """),
+
+ I(name='BINUNICODE',
+ code='X',
+ arg=unicodestring4,
+ stack_before=[],
+ stack_after=[pyunicode],
+ proto=1,
+ doc="""Push a Python Unicode string object.
+
+ There are two arguments: the first is a 4-byte little-endian signed int
+ giving the number of bytes in the string. The second is that many
+ bytes, and is the UTF-8 encoding of the Unicode string.
+ """),
+
+ # Ways to spell floats.
+
+ I(name='FLOAT',
+ code='F',
+ arg=floatnl,
+ stack_before=[],
+ stack_after=[pyfloat],
+ proto=0,
+ doc="""Newline-terminated decimal float literal.
+
+ The argument is repr(a_float), and in general requires 17 significant
+ digits for roundtrip conversion to be an identity (this is so for
+ IEEE-754 double precision values, which is what Python float maps to
+ on most boxes).
+
+ In general, FLOAT cannot be used to transport infinities, NaNs, or
+ minus zero across boxes (or even on a single box, if the platform C
+ library can't read the strings it produces for such things -- Windows
+ is like that), but may do less damage than BINFLOAT on boxes with
+ greater precision or dynamic range than IEEE-754 double.
+ """),
+
+ I(name='BINFLOAT',
+ code='G',
+ arg=float8,
+ stack_before=[],
+ stack_after=[pyfloat],
+ proto=1,
+ doc="""Float stored in binary form, with 8 bytes of data.
+
+ This generally requires less than half the space of FLOAT encoding.
+ In general, BINFLOAT cannot be used to transport infinities, NaNs, or
+ minus zero, raises an exception if the exponent exceeds the range of
+ an IEEE-754 double, and retains no more than 53 bits of precision (if
+ there are more than that, "add a half and chop" rounding is used to
+ cut it back to 53 significant bits).
+ """),
+
+ # Ways to build lists.
+
+ I(name='EMPTY_LIST',
+ code=']',
+ arg=None,
+ stack_before=[],
+ stack_after=[pylist],
+ proto=1,
+ doc="Push an empty list."),
+
+ I(name='APPEND',
+ code='a',
+ arg=None,
+ stack_before=[pylist, anyobject],
+ stack_after=[pylist],
+ proto=0,
+ doc="""Append an object to a list.
+
+ Stack before: ... pylist anyobject
+ Stack after: ... pylist+[anyobject]
+
+ although pylist is really extended in-place.
+ """),
+
+ I(name='APPENDS',
+ code='e',
+ arg=None,
+ stack_before=[pylist, markobject, stackslice],
+ stack_after=[pylist],
+ proto=1,
+ doc="""Extend a list by a slice of stack objects.
+
+ Stack before: ... pylist markobject stackslice
+ Stack after: ... pylist+stackslice
+
+ although pylist is really extended in-place.
+ """),
+
+ I(name='LIST',
+ code='l',
+ arg=None,
+ stack_before=[markobject, stackslice],
+ stack_after=[pylist],
+ proto=0,
+ doc="""Build a list out of the topmost stack slice, after markobject.
+
+ All the stack entries following the topmost markobject are placed into
+ a single Python list, which single list object replaces all of the
+ stack from the topmost markobject onward. For example,
+
+ Stack before: ... markobject 1 2 3 'abc'
+ Stack after: ... [1, 2, 3, 'abc']
+ """),
+
+ # Ways to build tuples.
+
+ I(name='EMPTY_TUPLE',
+ code=')',
+ arg=None,
+ stack_before=[],
+ stack_after=[pytuple],
+ proto=1,
+ doc="Push an empty tuple."),
+
+ I(name='TUPLE',
+ code='t',
+ arg=None,
+ stack_before=[markobject, stackslice],
+ stack_after=[pytuple],
+ proto=0,
+ doc="""Build a tuple out of the topmost stack slice, after markobject.
+
+ All the stack entries following the topmost markobject are placed into
+ a single Python tuple, which single tuple object replaces all of the
+ stack from the topmost markobject onward. For example,
+
+ Stack before: ... markobject 1 2 3 'abc'
+ Stack after: ... (1, 2, 3, 'abc')
+ """),
+
+ I(name='TUPLE1',
+ code='\x85',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[pytuple],
+ proto=2,
+ doc="""One-tuple.
+
+ This code pops one value off the stack and pushes a tuple of
+ length 1 whose one item is that value back onto it. IOW:
+
+ stack[-1] = tuple(stack[-1:])
+ """),
+
+ I(name='TUPLE2',
+ code='\x86',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[pytuple],
+ proto=2,
+ doc="""One-tuple.
+
+ This code pops two values off the stack and pushes a tuple
+ of length 2 whose items are those values back onto it. IOW:
+
+ stack[-2:] = [tuple(stack[-2:])]
+ """),
+
+ I(name='TUPLE3',
+ code='\x87',
+ arg=None,
+ stack_before=[anyobject, anyobject, anyobject],
+ stack_after=[pytuple],
+ proto=2,
+ doc="""One-tuple.
+
+ This code pops three values off the stack and pushes a tuple
+ of length 3 whose items are those values back onto it. IOW:
+
+ stack[-3:] = [tuple(stack[-3:])]
+ """),
+
+ # Ways to build dicts.
+
+ I(name='EMPTY_DICT',
+ code='}',
+ arg=None,
+ stack_before=[],
+ stack_after=[pydict],
+ proto=1,
+ doc="Push an empty dict."),
+
+ I(name='DICT',
+ code='d',
+ arg=None,
+ stack_before=[markobject, stackslice],
+ stack_after=[pydict],
+ proto=0,
+ doc="""Build a dict out of the topmost stack slice, after markobject.
+
+ All the stack entries following the topmost markobject are placed into
+ a single Python dict, which single dict object replaces all of the
+ stack from the topmost markobject onward. The stack slice alternates
+ key, value, key, value, .... For example,
+
+ Stack before: ... markobject 1 2 3 'abc'
+ Stack after: ... {1: 2, 3: 'abc'}
+ """),
+
+ I(name='SETITEM',
+ code='s',
+ arg=None,
+ stack_before=[pydict, anyobject, anyobject],
+ stack_after=[pydict],
+ proto=0,
+ doc="""Add a key+value pair to an existing dict.
+
+ Stack before: ... pydict key value
+ Stack after: ... pydict
+
+ where pydict has been modified via pydict[key] = value.
+ """),
+
+ I(name='SETITEMS',
+ code='u',
+ arg=None,
+ stack_before=[pydict, markobject, stackslice],
+ stack_after=[pydict],
+ proto=1,
+ doc="""Add an arbitrary number of key+value pairs to an existing dict.
+
+ The slice of the stack following the topmost markobject is taken as
+ an alternating sequence of keys and values, added to the dict
+ immediately under the topmost markobject. Everything at and after the
+ topmost markobject is popped, leaving the mutated dict at the top
+ of the stack.
+
+ Stack before: ... pydict markobject key_1 value_1 ... key_n value_n
+ Stack after: ... pydict
+
+ where pydict has been modified via pydict[key_i] = value_i for i in
+ 1, 2, ..., n, and in that order.
+ """),
+
+ # Stack manipulation.
+
+ I(name='POP',
+ code='0',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[],
+ proto=0,
+ doc="Discard the top stack item, shrinking the stack by one item."),
+
+ I(name='DUP',
+ code='2',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[anyobject, anyobject],
+ proto=0,
+ doc="Push the top stack item onto the stack again, duplicating it."),
+
+ I(name='MARK',
+ code='(',
+ arg=None,
+ stack_before=[],
+ stack_after=[markobject],
+ proto=0,
+ doc="""Push markobject onto the stack.
+
+ markobject is a unique object, used by other opcodes to identify a
+ region of the stack containing a variable number of objects for them
+ to work on. See markobject.doc for more detail.
+ """),
+
+ I(name='POP_MARK',
+ code='1',
+ arg=None,
+ stack_before=[markobject, stackslice],
+ stack_after=[],
+ proto=0,
+ doc="""Pop all the stack objects at and above the topmost markobject.
+
+ When an opcode using a variable number of stack objects is done,
+ POP_MARK is used to remove those objects, and to remove the markobject
+ that delimited their starting position on the stack.
+ """),
+
+ # Memo manipulation. There are really only two operations (get and put),
+ # each in all-text, "short binary", and "long binary" flavors.
+
+ I(name='GET',
+ code='g',
+ arg=decimalnl_short,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Read an object from the memo and push it on the stack.
+
+ The index of the memo object to push is given by the newline-teriminated
+ decimal string following. BINGET and LONG_BINGET are space-optimized
+ versions.
+ """),
+
+ I(name='BINGET',
+ code='h',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=1,
+ doc="""Read an object from the memo and push it on the stack.
+
+ The index of the memo object to push is given by the 1-byte unsigned
+ integer following.
+ """),
+
+ I(name='LONG_BINGET',
+ code='j',
+ arg=int4,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=1,
+ doc="""Read an object from the memo and push it on the stack.
+
+ The index of the memo object to push is given by the 4-byte signed
+ little-endian integer following.
+ """),
+
+ I(name='PUT',
+ code='p',
+ arg=decimalnl_short,
+ stack_before=[],
+ stack_after=[],
+ proto=0,
+ doc="""Store the stack top into the memo. The stack is not popped.
+
+ The index of the memo location to write into is given by the newline-
+ terminated decimal string following. BINPUT and LONG_BINPUT are
+ space-optimized versions.
+ """),
+
+ I(name='BINPUT',
+ code='q',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[],
+ proto=1,
+ doc="""Store the stack top into the memo. The stack is not popped.
+
+ The index of the memo location to write into is given by the 1-byte
+ unsigned integer following.
+ """),
+
+ I(name='LONG_BINPUT',
+ code='r',
+ arg=int4,
+ stack_before=[],
+ stack_after=[],
+ proto=1,
+ doc="""Store the stack top into the memo. The stack is not popped.
+
+ The index of the memo location to write into is given by the 4-byte
+ signed little-endian integer following.
+ """),
+
+ # Access the extension registry (predefined objects). Akin to the GET
+ # family.
+
+ I(name='EXT1',
+ code='\x82',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Extension code.
+
+ This code and the similar EXT2 and EXT4 allow using a registry
+ of popular objects that are pickled by name, typically classes.
+ It is envisioned that through a global negotiation and
+ registration process, third parties can set up a mapping between
+ ints and object names.
+
+ In order to guarantee pickle interchangeability, the extension
+ code registry ought to be global, although a range of codes may
+ be reserved for private use.
+
+ EXT1 has a 1-byte integer argument. This is used to index into the
+ extension registry, and the object at that index is pushed on the stack.
+ """),
+
+ I(name='EXT2',
+ code='\x83',
+ arg=uint2,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Extension code.
+
+ See EXT1. EXT2 has a two-byte integer argument.
+ """),
+
+ I(name='EXT4',
+ code='\x84',
+ arg=int4,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Extension code.
+
+ See EXT1. EXT4 has a four-byte integer argument.
+ """),
+
+ # Push a class object, or module function, on the stack, via its module
+ # and name.
+
+ I(name='GLOBAL',
+ code='c',
+ arg=stringnl_noescape_pair,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Push a global object (module.attr) on the stack.
+
+ Two newline-terminated strings follow the GLOBAL opcode. The first is
+ taken as a module name, and the second as a class name. The class
+ object module.class is pushed on the stack. More accurately, the
+ object returned by self.find_class(module, class) is pushed on the
+ stack, so unpickling subclasses can override this form of lookup.
+ """),
+
+ # Ways to build objects of classes pickle doesn't know about directly
+ # (user-defined classes). I despair of documenting this accurately
+ # and comprehensibly -- you really have to read the pickle code to
+ # find all the special cases.
+
+ I(name='REDUCE',
+ code='R',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Push an object built from a callable and an argument tuple.
+
+ The opcode is named to remind of the __reduce__() method.
+
+ Stack before: ... callable pytuple
+ Stack after: ... callable(*pytuple)
+
+ The callable and the argument tuple are the first two items returned
+ by a __reduce__ method. Applying the callable to the argtuple is
+ supposed to reproduce the original object, or at least get it started.
+ If the __reduce__ method returns a 3-tuple, the last component is an
+ argument to be passed to the object's __setstate__, and then the REDUCE
+ opcode is followed by code to create setstate's argument, and then a
+ BUILD opcode to apply __setstate__ to that argument.
+
+ If type(callable) is not ClassType, REDUCE complains unless the
+ callable has been registered with the copy_reg module's
+ safe_constructors dict, or the callable has a magic
+ '__safe_for_unpickling__' attribute with a true value. I'm not sure
+ why it does this, but I've sure seen this complaint often enough when
+ I didn't want to <wink>.
+ """),
+
+ I(name='BUILD',
+ code='b',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Finish building an object, via __setstate__ or dict update.
+
+ Stack before: ... anyobject argument
+ Stack after: ... anyobject
+
+ where anyobject may have been mutated, as follows:
+
+ If the object has a __setstate__ method,
+
+ anyobject.__setstate__(argument)
+
+ is called.
+
+ Else the argument must be a dict, the object must have a __dict__, and
+ the object is updated via
+
+ anyobject.__dict__.update(argument)
+
+ This may raise RuntimeError in restricted execution mode (which
+ disallows access to __dict__ directly); in that case, the object
+ is updated instead via
+
+ for k, v in argument.items():
+ anyobject[k] = v
+ """),
+
+ I(name='INST',
+ code='i',
+ arg=stringnl_noescape_pair,
+ stack_before=[markobject, stackslice],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Build a class instance.
+
+ This is the protocol 0 version of protocol 1's OBJ opcode.
+ INST is followed by two newline-terminated strings, giving a
+ module and class name, just as for the GLOBAL opcode (and see
+ GLOBAL for more details about that). self.find_class(module, name)
+ is used to get a class object.
+
+ In addition, all the objects on the stack following the topmost
+ markobject are gathered into a tuple and popped (along with the
+ topmost markobject), just as for the TUPLE opcode.
+
+ Now it gets complicated. If all of these are true:
+
+ + The argtuple is empty (markobject was at the top of the stack
+ at the start).
+
+ + It's an old-style class object (the type of the class object is
+ ClassType).
+
+ + The class object does not have a __getinitargs__ attribute.
+
+ then we want to create an old-style class instance without invoking
+ its __init__() method (pickle has waffled on this over the years; not
+ calling __init__() is current wisdom). In this case, an instance of
+ an old-style dummy class is created, and then we try to rebind its
+ __class__ attribute to the desired class object. If this succeeds,
+ the new instance object is pushed on the stack, and we're done. In
+ restricted execution mode it can fail (assignment to __class__ is
+ disallowed), and I'm not really sure what happens then -- it looks
+ like the code ends up calling the class object's __init__ anyway,
+ via falling into the next case.
+
+ Else (the argtuple is not empty, it's not an old-style class object,
+ or the class object does have a __getinitargs__ attribute), the code
+ first insists that the class object have a __safe_for_unpickling__
+ attribute. Unlike as for the __safe_for_unpickling__ check in REDUCE,
+ it doesn't matter whether this attribute has a true or false value, it
+ only matters whether it exists (XXX this is a bug; cPickle
+ requires the attribute to be true). If __safe_for_unpickling__
+ doesn't exist, UnpicklingError is raised.
+
+ Else (the class object does have a __safe_for_unpickling__ attr),
+ the class object obtained from INST's arguments is applied to the
+ argtuple obtained from the stack, and the resulting instance object
+ is pushed on the stack.
+
+ NOTE: checks for __safe_for_unpickling__ went away in Python 2.3.
+ """),
+
+ I(name='OBJ',
+ code='o',
+ arg=None,
+ stack_before=[markobject, anyobject, stackslice],
+ stack_after=[anyobject],
+ proto=1,
+ doc="""Build a class instance.
+
+ This is the protocol 1 version of protocol 0's INST opcode, and is
+ very much like it. The major difference is that the class object
+ is taken off the stack, allowing it to be retrieved from the memo
+ repeatedly if several instances of the same class are created. This
+ can be much more efficient (in both time and space) than repeatedly
+ embedding the module and class names in INST opcodes.
+
+ Unlike INST, OBJ takes no arguments from the opcode stream. Instead
+ the class object is taken off the stack, immediately above the
+ topmost markobject:
+
+ Stack before: ... markobject classobject stackslice
+ Stack after: ... new_instance_object
+
+ As for INST, the remainder of the stack above the markobject is
+ gathered into an argument tuple, and then the logic seems identical,
+ except that no __safe_for_unpickling__ check is done (XXX this is
+ a bug; cPickle does test __safe_for_unpickling__). See INST for
+ the gory details.
+
+ NOTE: In Python 2.3, INST and OBJ are identical except for how they
+ get the class object. That was always the intent; the implementations
+ had diverged for accidental reasons.
+ """),
+
+ I(name='NEWOBJ',
+ code='\x81',
+ arg=None,
+ stack_before=[anyobject, anyobject],
+ stack_after=[anyobject],
+ proto=2,
+ doc="""Build an object instance.
+
+ The stack before should be thought of as containing a class
+ object followed by an argument tuple (the tuple being the stack
+ top). Call these cls and args. They are popped off the stack,
+ and the value returned by cls.__new__(cls, *args) is pushed back
+ onto the stack.
+ """),
+
+ # Machine control.
+
+ I(name='PROTO',
+ code='\x80',
+ arg=uint1,
+ stack_before=[],
+ stack_after=[],
+ proto=2,
+ doc="""Protocol version indicator.
+
+ For protocol 2 and above, a pickle must start with this opcode.
+ The argument is the protocol version, an int in range(2, 256).
+ """),
+
+ I(name='STOP',
+ code='.',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[],
+ proto=0,
+ doc="""Stop the unpickling machine.
+
+ Every pickle ends with this opcode. The object at the top of the stack
+ is popped, and that's the result of unpickling. The stack should be
+ empty then.
+ """),
+
+ # Ways to deal with persistent IDs.
+
+ I(name='PERSID',
+ code='P',
+ arg=stringnl_noescape,
+ stack_before=[],
+ stack_after=[anyobject],
+ proto=0,
+ doc="""Push an object identified by a persistent ID.
+
+ The pickle module doesn't define what a persistent ID means. PERSID's
+ argument is a newline-terminated str-style (no embedded escapes, no
+ bracketing quote characters) string, which *is* "the persistent ID".
+ The unpickler passes this string to self.persistent_load(). Whatever
+ object that returns is pushed on the stack. There is no implementation
+ of persistent_load() in Python's unpickler: it must be supplied by an
+ unpickler subclass.
+ """),
+
+ I(name='BINPERSID',
+ code='Q',
+ arg=None,
+ stack_before=[anyobject],
+ stack_after=[anyobject],
+ proto=1,
+ doc="""Push an object identified by a persistent ID.
+
+ Like PERSID, except the persistent ID is popped off the stack (instead
+ of being a string embedded in the opcode bytestream). The persistent
+ ID is passed to self.persistent_load(), and whatever object that
+ returns is pushed on the stack. See PERSID for more detail.
+ """),
+]
+del I
+
+# Verify uniqueness of .name and .code members.
+name2i = {}
+code2i = {}
+
+for i, d in enumerate(opcodes):
+ if d.name in name2i:
+ raise ValueError("repeated name %r at indices %d and %d" %
+ (d.name, name2i[d.name], i))
+ if d.code in code2i:
+ raise ValueError("repeated code %r at indices %d and %d" %
+ (d.code, code2i[d.code], i))
+
+ name2i[d.name] = i
+ code2i[d.code] = i
+
+del name2i, code2i, i, d
+
+##############################################################################
+# Build a code2op dict, mapping opcode characters to OpcodeInfo records.
+# Also ensure we've got the same stuff as pickle.py, although the
+# introspection here is dicey.
+
+code2op = {}
+for d in opcodes:
+ code2op[d.code] = d
+del d
+
+def assure_pickle_consistency(verbose=False):
+ import pickle, re
+
+ copy = code2op.copy()
+ for name in pickle.__all__:
+ if not re.match("[A-Z][A-Z0-9_]+$", name):
+ if verbose:
+ print "skipping %r: it doesn't look like an opcode name" % name
+ continue
+ picklecode = getattr(pickle, name)
+ if not isinstance(picklecode, str) or len(picklecode) != 1:
+ if verbose:
+ print ("skipping %r: value %r doesn't look like a pickle "
+ "code" % (name, picklecode))
+ continue
+ if picklecode in copy:
+ if verbose:
+ print "checking name %r w/ code %r for consistency" % (
+ name, picklecode)
+ d = copy[picklecode]
+ if d.name != name:
+ raise ValueError("for pickle code %r, pickle.py uses name %r "
+ "but we're using name %r" % (picklecode,
+ name,
+ d.name))
+ # Forget this one. Any left over in copy at the end are a problem
+ # of a different kind.
+ del copy[picklecode]
+ else:
+ raise ValueError("pickle.py appears to have a pickle opcode with "
+ "name %r and code %r, but we don't" %
+ (name, picklecode))
+ if copy:
+ msg = ["we appear to have pickle opcodes that pickle.py doesn't have:"]
+ for code, d in copy.items():
+ msg.append(" name %r with code %r" % (d.name, code))
+ raise ValueError("\n".join(msg))
+
+assure_pickle_consistency()
+del assure_pickle_consistency
+
+##############################################################################
+# A pickle opcode generator.
+
+def genops(pickle):
+ """Generate all the opcodes in a pickle.
+
+ 'pickle' is a file-like object, or string, containing the pickle.
+
+ Each opcode in the pickle is generated, from the current pickle position,
+ stopping after a STOP opcode is delivered. A triple is generated for
+ each opcode:
+
+ opcode, arg, pos
+
+ opcode is an OpcodeInfo record, describing the current opcode.
+
+ If the opcode has an argument embedded in the pickle, arg is its decoded
+ value, as a Python object. If the opcode doesn't have an argument, arg
+ is None.
+
+ If the pickle has a tell() method, pos was the value of pickle.tell()
+ before reading the current opcode. If the pickle is a string object,
+ it's wrapped in a StringIO object, and the latter's tell() result is
+ used. Else (the pickle doesn't have a tell(), and it's not obvious how
+ to query its current position) pos is None.
+ """
+
+ import cStringIO as StringIO
+
+ if isinstance(pickle, str):
+ pickle = StringIO.StringIO(pickle)
+
+ if hasattr(pickle, "tell"):
+ getpos = pickle.tell
+ else:
+ getpos = lambda: None
+
+ while True:
+ pos = getpos()
+ code = pickle.read(1)
+ opcode = code2op.get(code)
+ if opcode is None:
+ if code == "":
+ raise ValueError("pickle exhausted before seeing STOP")
+ else:
+ raise ValueError("at position %s, opcode %r unknown" % (
+ pos is None and "<unknown>" or pos,
+ code))
+ if opcode.arg is None:
+ arg = None
+ else:
+ arg = opcode.arg.reader(pickle)
+ yield opcode, arg, pos
+ if code == '.':
+ assert opcode.name == 'STOP'
+ break
+
+##############################################################################
+# A symbolic pickle disassembler.
+
+def dis(pickle, out=None, memo=None, indentlevel=4):
+ """Produce a symbolic disassembly of a pickle.
+
+ 'pickle' is a file-like object, or string, containing a (at least one)
+ pickle. The pickle is disassembled from the current position, through
+ the first STOP opcode encountered.
+
+ Optional arg 'out' is a file-like object to which the disassembly is
+ printed. It defaults to sys.stdout.
+
+ Optional arg 'memo' is a Python dict, used as the pickle's memo. It
+ may be mutated by dis(), if the pickle contains PUT or BINPUT opcodes.
+ Passing the same memo object to another dis() call then allows disassembly
+ to proceed across multiple pickles that were all created by the same
+ pickler with the same memo. Ordinarily you don't need to worry about this.
+
+ Optional arg indentlevel is the number of blanks by which to indent
+ a new MARK level. It defaults to 4.
+
+ In addition to printing the disassembly, some sanity checks are made:
+
+ + All embedded opcode arguments "make sense".
+
+ + Explicit and implicit pop operations have enough items on the stack.
+
+ + When an opcode implicitly refers to a markobject, a markobject is
+ actually on the stack.
+
+ + A memo entry isn't referenced before it's defined.
+
+ + The markobject isn't stored in the memo.
+
+ + A memo entry isn't redefined.
+ """
+
+ # Most of the hair here is for sanity checks, but most of it is needed
+ # anyway to detect when a protocol 0 POP takes a MARK off the stack
+ # (which in turn is needed to indent MARK blocks correctly).
+
+ stack = [] # crude emulation of unpickler stack
+ if memo is None:
+ memo = {} # crude emulation of unpicker memo
+ maxproto = -1 # max protocol number seen
+ markstack = [] # bytecode positions of MARK opcodes
+ indentchunk = ' ' * indentlevel
+ errormsg = None
+ for opcode, arg, pos in genops(pickle):
+ if pos is not None:
+ print >> out, "%5d:" % pos,
+
+ line = "%-4s %s%s" % (repr(opcode.code)[1:-1],
+ indentchunk * len(markstack),
+ opcode.name)
+
+ maxproto = max(maxproto, opcode.proto)
+ before = opcode.stack_before # don't mutate
+ after = opcode.stack_after # don't mutate
+ numtopop = len(before)
+
+ # See whether a MARK should be popped.
+ markmsg = None
+ if markobject in before or (opcode.name == "POP" and
+ stack and
+ stack[-1] is markobject):
+ assert markobject not in after
+ if __debug__:
+ if markobject in before:
+ assert before[-1] is stackslice
+ if markstack:
+ markpos = markstack.pop()
+ if markpos is None:
+ markmsg = "(MARK at unknown opcode offset)"
+ else:
+ markmsg = "(MARK at %d)" % markpos
+ # Pop everything at and after the topmost markobject.
+ while stack[-1] is not markobject:
+ stack.pop()
+ stack.pop()
+ # Stop later code from popping too much.
+ try:
+ numtopop = before.index(markobject)
+ except ValueError:
+ assert opcode.name == "POP"
+ numtopop = 0
+ else:
+ errormsg = markmsg = "no MARK exists on stack"
+
+ # Check for correct memo usage.
+ if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT"):
+ assert arg is not None
+ if arg in memo:
+ errormsg = "memo key %r already defined" % arg
+ elif not stack:
+ errormsg = "stack is empty -- can't store into memo"
+ elif stack[-1] is markobject:
+ errormsg = "can't store markobject in the memo"
+ else:
+ memo[arg] = stack[-1]
+
+ elif opcode.name in ("GET", "BINGET", "LONG_BINGET"):
+ if arg in memo:
+ assert len(after) == 1
+ after = [memo[arg]] # for better stack emulation
+ else:
+ errormsg = "memo key %r has never been stored into" % arg
+
+ if arg is not None or markmsg:
+ # make a mild effort to align arguments
+ line += ' ' * (10 - len(opcode.name))
+ if arg is not None:
+ line += ' ' + repr(arg)
+ if markmsg:
+ line += ' ' + markmsg
+ print >> out, line
+
+ if errormsg:
+ # Note that we delayed complaining until the offending opcode
+ # was printed.
+ raise ValueError(errormsg)
+
+ # Emulate the stack effects.
+ if len(stack) < numtopop:
+ raise ValueError("tries to pop %d items from stack with "
+ "only %d items" % (numtopop, len(stack)))
+ if numtopop:
+ del stack[-numtopop:]
+ if markobject in after:
+ assert markobject not in before
+ markstack.append(pos)
+
+ stack.extend(after)
+
+ print >> out, "highest protocol among opcodes =", maxproto
+ if stack:
+ raise ValueError("stack not empty after STOP: %r" % stack)
+
+# For use in the doctest, simply as an example of a class to pickle.
+class _Example:
+ def __init__(self, value):
+ self.value = value
+
+_dis_test = r"""
+>>> import pickle
+>>> x = [1, 2, (3, 4), {'abc': u"def"}]
+>>> pkl = pickle.dumps(x, 0)
+>>> dis(pkl)
+ 0: ( MARK
+ 1: l LIST (MARK at 0)
+ 2: p PUT 0
+ 5: I INT 1
+ 8: a APPEND
+ 9: I INT 2
+ 12: a APPEND
+ 13: ( MARK
+ 14: I INT 3
+ 17: I INT 4
+ 20: t TUPLE (MARK at 13)
+ 21: p PUT 1
+ 24: a APPEND
+ 25: ( MARK
+ 26: d DICT (MARK at 25)
+ 27: p PUT 2
+ 30: S STRING 'abc'
+ 37: p PUT 3
+ 40: V UNICODE u'def'
+ 45: p PUT 4
+ 48: s SETITEM
+ 49: a APPEND
+ 50: . STOP
+highest protocol among opcodes = 0
+
+Try again with a "binary" pickle.
+
+>>> pkl = pickle.dumps(x, 1)
+>>> dis(pkl)
+ 0: ] EMPTY_LIST
+ 1: q BINPUT 0
+ 3: ( MARK
+ 4: K BININT1 1
+ 6: K BININT1 2
+ 8: ( MARK
+ 9: K BININT1 3
+ 11: K BININT1 4
+ 13: t TUPLE (MARK at 8)
+ 14: q BINPUT 1
+ 16: } EMPTY_DICT
+ 17: q BINPUT 2
+ 19: U SHORT_BINSTRING 'abc'
+ 24: q BINPUT 3
+ 26: X BINUNICODE u'def'
+ 34: q BINPUT 4
+ 36: s SETITEM
+ 37: e APPENDS (MARK at 3)
+ 38: . STOP
+highest protocol among opcodes = 1
+
+Exercise the INST/OBJ/BUILD family.
+
+>>> dis(pickle.dumps(zip, 0))
+ 0: c GLOBAL '__builtin__ zip'
+ 17: p PUT 0
+ 20: . STOP
+highest protocol among opcodes = 0
+
+>>> from pickletools import _Example
+>>> x = [_Example(42)] * 2
+>>> dis(pickle.dumps(x, 0))
+ 0: ( MARK
+ 1: l LIST (MARK at 0)
+ 2: p PUT 0
+ 5: ( MARK
+ 6: i INST 'pickletools _Example' (MARK at 5)
+ 28: p PUT 1
+ 31: ( MARK
+ 32: d DICT (MARK at 31)
+ 33: p PUT 2
+ 36: S STRING 'value'
+ 45: p PUT 3
+ 48: I INT 42
+ 52: s SETITEM
+ 53: b BUILD
+ 54: a APPEND
+ 55: g GET 1
+ 58: a APPEND
+ 59: . STOP
+highest protocol among opcodes = 0
+
+>>> dis(pickle.dumps(x, 1))
+ 0: ] EMPTY_LIST
+ 1: q BINPUT 0
+ 3: ( MARK
+ 4: ( MARK
+ 5: c GLOBAL 'pickletools _Example'
+ 27: q BINPUT 1
+ 29: o OBJ (MARK at 4)
+ 30: q BINPUT 2
+ 32: } EMPTY_DICT
+ 33: q BINPUT 3
+ 35: U SHORT_BINSTRING 'value'
+ 42: q BINPUT 4
+ 44: K BININT1 42
+ 46: s SETITEM
+ 47: b BUILD
+ 48: h BINGET 2
+ 50: e APPENDS (MARK at 3)
+ 51: . STOP
+highest protocol among opcodes = 1
+
+Try "the canonical" recursive-object test.
+
+>>> L = []
+>>> T = L,
+>>> L.append(T)
+>>> L[0] is T
+True
+>>> T[0] is L
+True
+>>> L[0][0] is L
+True
+>>> T[0][0] is T
+True
+>>> dis(pickle.dumps(L, 0))
+ 0: ( MARK
+ 1: l LIST (MARK at 0)
+ 2: p PUT 0
+ 5: ( MARK
+ 6: g GET 0
+ 9: t TUPLE (MARK at 5)
+ 10: p PUT 1
+ 13: a APPEND
+ 14: . STOP
+highest protocol among opcodes = 0
+
+>>> dis(pickle.dumps(L, 1))
+ 0: ] EMPTY_LIST
+ 1: q BINPUT 0
+ 3: ( MARK
+ 4: h BINGET 0
+ 6: t TUPLE (MARK at 3)
+ 7: q BINPUT 1
+ 9: a APPEND
+ 10: . STOP
+highest protocol among opcodes = 1
+
+Note that, in the protocol 0 pickle of the recursive tuple, the disassembler
+has to emulate the stack in order to realize that the POP opcode at 16 gets
+rid of the MARK at 0.
+
+>>> dis(pickle.dumps(T, 0))
+ 0: ( MARK
+ 1: ( MARK
+ 2: l LIST (MARK at 1)
+ 3: p PUT 0
+ 6: ( MARK
+ 7: g GET 0
+ 10: t TUPLE (MARK at 6)
+ 11: p PUT 1
+ 14: a APPEND
+ 15: 0 POP
+ 16: 0 POP (MARK at 0)
+ 17: g GET 1
+ 20: . STOP
+highest protocol among opcodes = 0
+
+>>> dis(pickle.dumps(T, 1))
+ 0: ( MARK
+ 1: ] EMPTY_LIST
+ 2: q BINPUT 0
+ 4: ( MARK
+ 5: h BINGET 0
+ 7: t TUPLE (MARK at 4)
+ 8: q BINPUT 1
+ 10: a APPEND
+ 11: 1 POP_MARK (MARK at 0)
+ 12: h BINGET 1
+ 14: . STOP
+highest protocol among opcodes = 1
+
+Try protocol 2.
+
+>>> dis(pickle.dumps(L, 2))
+ 0: \x80 PROTO 2
+ 2: ] EMPTY_LIST
+ 3: q BINPUT 0
+ 5: h BINGET 0
+ 7: \x85 TUPLE1
+ 8: q BINPUT 1
+ 10: a APPEND
+ 11: . STOP
+highest protocol among opcodes = 2
+
+>>> dis(pickle.dumps(T, 2))
+ 0: \x80 PROTO 2
+ 2: ] EMPTY_LIST
+ 3: q BINPUT 0
+ 5: h BINGET 0
+ 7: \x85 TUPLE1
+ 8: q BINPUT 1
+ 10: a APPEND
+ 11: 0 POP
+ 12: h BINGET 1
+ 14: . STOP
+highest protocol among opcodes = 2
+"""
+
+_memo_test = r"""
+>>> import pickle
+>>> from StringIO import StringIO
+>>> f = StringIO()
+>>> p = pickle.Pickler(f, 2)
+>>> x = [1, 2, 3]
+>>> p.dump(x)
+>>> p.dump(x)
+>>> f.seek(0)
+>>> memo = {}
+>>> dis(f, memo=memo)
+ 0: \x80 PROTO 2
+ 2: ] EMPTY_LIST
+ 3: q BINPUT 0
+ 5: ( MARK
+ 6: K BININT1 1
+ 8: K BININT1 2
+ 10: K BININT1 3
+ 12: e APPENDS (MARK at 5)
+ 13: . STOP
+highest protocol among opcodes = 2
+>>> dis(f, memo=memo)
+ 14: \x80 PROTO 2
+ 16: h BINGET 0
+ 18: . STOP
+highest protocol among opcodes = 2
+"""
+
+__test__ = {'disassembler_test': _dis_test,
+ 'disassembler_memo_test': _memo_test,
+ }
+
+def _test():
+ import doctest
+ return doctest.testmod()
+
+if __name__ == "__main__":
+ _test()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/popen2.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/popen2.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,252 @@
+"""Spawn a command with pipes to its stdin, stdout, and optionally stderr.
+
+The normal os.popen(cmd, mode) call spawns a shell command and provides a
+file interface to just the input or output of the process depending on
+whether mode is 'r' or 'w'. This module provides the functions popen2(cmd)
+and popen3(cmd) which return two or three pipes to the spawned command.
+"""
+
+import os
+import sys
+
+__all__ = ["popen2", "popen3", "popen4"]
+
+try:
+ MAXFD = os.sysconf('SC_OPEN_MAX')
+except (AttributeError, ValueError):
+ MAXFD = 256
+
+_active = []
+
+def _cleanup():
+ for inst in _active[:]:
+ if inst.poll(_deadstate=sys.maxint) >= 0:
+ try:
+ _active.remove(inst)
+ except ValueError:
+ # This can happen if two threads create a new Popen instance.
+ # It's harmless that it was already removed, so ignore.
+ pass
+
+class Popen3:
+ """Class representing a child process. Normally instances are created
+ by the factory functions popen2() and popen3()."""
+
+ sts = -1 # Child not completed yet
+
+ def __init__(self, cmd, capturestderr=False, bufsize=-1):
+ """The parameter 'cmd' is the shell command to execute in a
+ sub-process. On UNIX, 'cmd' may be a sequence, in which case arguments
+ will be passed directly to the program without shell intervention (as
+ with os.spawnv()). If 'cmd' is a string it will be passed to the shell
+ (as with os.system()). The 'capturestderr' flag, if true, specifies
+ that the object should capture standard error output of the child
+ process. The default is false. If the 'bufsize' parameter is
+ specified, it specifies the size of the I/O buffers to/from the child
+ process."""
+ _cleanup()
+ self.cmd = cmd
+ p2cread, p2cwrite = os.pipe()
+ c2pread, c2pwrite = os.pipe()
+ if capturestderr:
+ errout, errin = os.pipe()
+ gc.disable_finalizers()
+ try:
+ self.pid = os.fork()
+ if self.pid == 0:
+ # Child
+ os.dup2(p2cread, 0)
+ os.dup2(c2pwrite, 1)
+ if capturestderr:
+ os.dup2(errin, 2)
+ self._run_child(cmd)
+ finally:
+ gc.enable_finalizers()
+ os.close(p2cread)
+ self.tochild = os.fdopen(p2cwrite, 'w', bufsize)
+ os.close(c2pwrite)
+ self.fromchild = os.fdopen(c2pread, 'r', bufsize)
+ if capturestderr:
+ os.close(errin)
+ self.childerr = os.fdopen(errout, 'r', bufsize)
+ else:
+ self.childerr = None
+
+ def __del__(self):
+ # In case the child hasn't been waited on, check if it's done.
+ self.poll(_deadstate=sys.maxint)
+ if self.sts < 0:
+ if _active is not None:
+ # Child is still running, keep us alive until we can wait on it.
+ _active.append(self)
+
+ def _run_child(self, cmd):
+ if isinstance(cmd, basestring):
+ cmd = ['/bin/sh', '-c', cmd]
+ for i in xrange(3, MAXFD):
+ try:
+ os.close(i)
+ except OSError:
+ pass
+ try:
+ os.execvp(cmd[0], cmd)
+ finally:
+ os._exit(1)
+
+ def poll(self, _deadstate=None):
+ """Return the exit status of the child process if it has finished,
+ or -1 if it hasn't finished yet."""
+ if self.sts < 0:
+ try:
+ pid, sts = os.waitpid(self.pid, os.WNOHANG)
+ # pid will be 0 if self.pid hasn't terminated
+ if pid == self.pid:
+ self.sts = sts
+ except os.error:
+ if _deadstate is not None:
+ self.sts = _deadstate
+ return self.sts
+
+ def wait(self):
+ """Wait for and return the exit status of the child process."""
+ if self.sts < 0:
+ pid, sts = os.waitpid(self.pid, 0)
+ # This used to be a test, but it is believed to be
+ # always true, so I changed it to an assertion - mvl
+ assert pid == self.pid
+ self.sts = sts
+ return self.sts
+
+
+class Popen4(Popen3):
+ childerr = None
+
+ def __init__(self, cmd, bufsize=-1):
+ _cleanup()
+ self.cmd = cmd
+ p2cread, p2cwrite = os.pipe()
+ c2pread, c2pwrite = os.pipe()
+ gc.disable_finalizers()
+ try:
+ self.pid = os.fork()
+ if self.pid == 0:
+ # Child
+ os.dup2(p2cread, 0)
+ os.dup2(c2pwrite, 1)
+ os.dup2(c2pwrite, 2)
+ self._run_child(cmd)
+ finally:
+ gc.enable_finalizers()
+ os.close(p2cread)
+ self.tochild = os.fdopen(p2cwrite, 'w', bufsize)
+ os.close(c2pwrite)
+ self.fromchild = os.fdopen(c2pread, 'r', bufsize)
+
+
+if sys.platform[:3] == "win" or sys.platform == "os2emx":
+ # Some things don't make sense on non-Unix platforms.
+ del Popen3, Popen4
+
+ def popen2(cmd, bufsize=-1, mode='t'):
+ """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' may
+ be a sequence, in which case arguments will be passed directly to the
+ program without shell intervention (as with os.spawnv()). If 'cmd' is a
+ string it will be passed to the shell (as with os.system()). If
+ 'bufsize' is specified, it sets the buffer size for the I/O pipes. The
+ file objects (child_stdout, child_stdin) are returned."""
+ w, r = os.popen2(cmd, mode, bufsize)
+ return r, w
+
+ def popen3(cmd, bufsize=-1, mode='t'):
+ """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' may
+ be a sequence, in which case arguments will be passed directly to the
+ program without shell intervention (as with os.spawnv()). If 'cmd' is a
+ string it will be passed to the shell (as with os.system()). If
+ 'bufsize' is specified, it sets the buffer size for the I/O pipes. The
+ file objects (child_stdout, child_stdin, child_stderr) are returned."""
+ w, r, e = os.popen3(cmd, mode, bufsize)
+ return r, w, e
+
+ def popen4(cmd, bufsize=-1, mode='t'):
+ """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' may
+ be a sequence, in which case arguments will be passed directly to the
+ program without shell intervention (as with os.spawnv()). If 'cmd' is a
+ string it will be passed to the shell (as with os.system()). If
+ 'bufsize' is specified, it sets the buffer size for the I/O pipes. The
+ file objects (child_stdout_stderr, child_stdin) are returned."""
+ w, r = os.popen4(cmd, mode, bufsize)
+ return r, w
+else:
+ def popen2(cmd, bufsize=-1, mode='t'):
+ """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' may
+ be a sequence, in which case arguments will be passed directly to the
+ program without shell intervention (as with os.spawnv()). If 'cmd' is a
+ string it will be passed to the shell (as with os.system()). If
+ 'bufsize' is specified, it sets the buffer size for the I/O pipes. The
+ file objects (child_stdout, child_stdin) are returned."""
+ inst = Popen3(cmd, False, bufsize)
+ return inst.fromchild, inst.tochild
+
+ def popen3(cmd, bufsize=-1, mode='t'):
+ """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' may
+ be a sequence, in which case arguments will be passed directly to the
+ program without shell intervention (as with os.spawnv()). If 'cmd' is a
+ string it will be passed to the shell (as with os.system()). If
+ 'bufsize' is specified, it sets the buffer size for the I/O pipes. The
+ file objects (child_stdout, child_stdin, child_stderr) are returned."""
+ inst = Popen3(cmd, True, bufsize)
+ return inst.fromchild, inst.tochild, inst.childerr
+
+ def popen4(cmd, bufsize=-1, mode='t'):
+ """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' may
+ be a sequence, in which case arguments will be passed directly to the
+ program without shell intervention (as with os.spawnv()). If 'cmd' is a
+ string it will be passed to the shell (as with os.system()). If
+ 'bufsize' is specified, it sets the buffer size for the I/O pipes. The
+ file objects (child_stdout_stderr, child_stdin) are returned."""
+ inst = Popen4(cmd, bufsize)
+ return inst.fromchild, inst.tochild
+
+ __all__.extend(["Popen3", "Popen4"])
+
+def _test():
+ # When the test runs, there shouldn't be any open pipes
+ _cleanup()
+ assert not _active, "Active pipes when test starts " + repr([c.cmd for c in _active])
+ cmd = "cat"
+ teststr = "ab cd\n"
+ if os.name == "nt":
+ cmd = "more"
+ # "more" doesn't act the same way across Windows flavors,
+ # sometimes adding an extra newline at the start or the
+ # end. So we strip whitespace off both ends for comparison.
+ expected = teststr.strip()
+ print "testing popen2..."
+ r, w = popen2(cmd)
+ w.write(teststr)
+ w.close()
+ got = r.read()
+ if got.strip() != expected:
+ raise ValueError("wrote %r read %r" % (teststr, got))
+ print "testing popen3..."
+ try:
+ r, w, e = popen3([cmd])
+ except:
+ r, w, e = popen3(cmd)
+ w.write(teststr)
+ w.close()
+ got = r.read()
+ if got.strip() != expected:
+ raise ValueError("wrote %r read %r" % (teststr, got))
+ got = e.read()
+ if got:
+ raise ValueError("unexpected %r on stderr" % (got,))
+ for inst in _active[:]:
+ inst.wait()
+ _cleanup()
+ if _active:
+ raise ValueError("_active not empty")
+ print "All OK"
+
+if __name__ == '__main__':
+ _test()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/pprint.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/pprint.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,315 @@
+# Author: Fred L. Drake, Jr.
+# fdrake at acm.org
+#
+# This is a simple little module I wrote to make life easier. I didn't
+# see anything quite like it in the library, though I may have overlooked
+# something. I wrote this when I was trying to read some heavily nested
+# tuples with fairly non-descriptive content. This is modeled very much
+# after Lisp/Scheme - style pretty-printing of lists. If you find it
+# useful, thank small children who sleep at night.
+
+"""Support to pretty-print lists, tuples, & dictionaries recursively.
+
+Very simple, but useful, especially in debugging data structures.
+
+Classes
+-------
+
+PrettyPrinter()
+ Handle pretty-printing operations onto a stream using a configured
+ set of formatting parameters.
+
+Functions
+---------
+
+pformat()
+ Format a Python object into a pretty-printed representation.
+
+pprint()
+ Pretty-print a Python object to a stream [default is sys.stdout].
+
+saferepr()
+ Generate a 'standard' repr()-like value, but protect against recursive
+ data structures.
+
+"""
+
+import sys as _sys
+
+from cStringIO import StringIO as _StringIO
+
+__all__ = ["pprint","pformat","isreadable","isrecursive","saferepr",
+ "PrettyPrinter"]
+
+# cache these for faster access:
+_commajoin = ", ".join
+_id = id
+_len = len
+_type = type
+
+
+def pprint(object, stream=None, indent=1, width=80, depth=None):
+ """Pretty-print a Python object to a stream [default is sys.stdout]."""
+ printer = PrettyPrinter(
+ stream=stream, indent=indent, width=width, depth=depth)
+ printer.pprint(object)
+
+def pformat(object, indent=1, width=80, depth=None):
+ """Format a Python object into a pretty-printed representation."""
+ return PrettyPrinter(indent=indent, width=width, depth=depth).pformat(object)
+
+def saferepr(object):
+ """Version of repr() which can handle recursive data structures."""
+ return _safe_repr(object, {}, None, 0)[0]
+
+def isreadable(object):
+ """Determine if saferepr(object) is readable by eval()."""
+ return _safe_repr(object, {}, None, 0)[1]
+
+def isrecursive(object):
+ """Determine if object requires a recursive representation."""
+ return _safe_repr(object, {}, None, 0)[2]
+
+class PrettyPrinter:
+ def __init__(self, indent=1, width=80, depth=None, stream=None):
+ """Handle pretty printing operations onto a stream using a set of
+ configured parameters.
+
+ indent
+ Number of spaces to indent for each level of nesting.
+
+ width
+ Attempted maximum number of columns in the output.
+
+ depth
+ The maximum depth to print out nested structures.
+
+ stream
+ The desired output stream. If omitted (or false), the standard
+ output stream available at construction will be used.
+
+ """
+ indent = int(indent)
+ width = int(width)
+ assert indent >= 0, "indent must be >= 0"
+ assert depth is None or depth > 0, "depth must be > 0"
+ assert width, "width must be != 0"
+ self._depth = depth
+ self._indent_per_level = indent
+ self._width = width
+ if stream is not None:
+ self._stream = stream
+ else:
+ self._stream = _sys.stdout
+
+ def pprint(self, object):
+ self._format(object, self._stream, 0, 0, {}, 0)
+ self._stream.write("\n")
+
+ def pformat(self, object):
+ sio = _StringIO()
+ self._format(object, sio, 0, 0, {}, 0)
+ return sio.getvalue()
+
+ def isrecursive(self, object):
+ return self.format(object, {}, 0, 0)[2]
+
+ def isreadable(self, object):
+ s, readable, recursive = self.format(object, {}, 0, 0)
+ return readable and not recursive
+
+ def _format(self, object, stream, indent, allowance, context, level):
+ level = level + 1
+ objid = _id(object)
+ if objid in context:
+ stream.write(_recursion(object))
+ self._recursive = True
+ self._readable = False
+ return
+ rep = self._repr(object, context, level - 1)
+ typ = _type(object)
+ sepLines = _len(rep) > (self._width - 1 - indent - allowance)
+ write = stream.write
+
+ if sepLines:
+ r = getattr(typ, "__repr__", None)
+ if issubclass(typ, dict) and r == dict.__repr__:
+ write('{')
+ if self._indent_per_level > 1:
+ write((self._indent_per_level - 1) * ' ')
+ length = _len(object)
+ if length:
+ context[objid] = 1
+ indent = indent + self._indent_per_level
+ items = object.items()
+ items.sort()
+ key, ent = items[0]
+ rep = self._repr(key, context, level)
+ write(rep)
+ write(': ')
+ self._format(ent, stream, indent + _len(rep) + 2,
+ allowance + 1, context, level)
+ if length > 1:
+ for key, ent in items[1:]:
+ rep = self._repr(key, context, level)
+ write(',\n%s%s: ' % (' '*indent, rep))
+ self._format(ent, stream, indent + _len(rep) + 2,
+ allowance + 1, context, level)
+ indent = indent - self._indent_per_level
+ del context[objid]
+ write('}')
+ return
+
+ if (issubclass(typ, list) and r == list.__repr__) or \
+ (issubclass(typ, tuple) and r == tuple.__repr__):
+ if issubclass(typ, list):
+ write('[')
+ endchar = ']'
+ else:
+ write('(')
+ endchar = ')'
+ if self._indent_per_level > 1:
+ write((self._indent_per_level - 1) * ' ')
+ length = _len(object)
+ if length:
+ context[objid] = 1
+ indent = indent + self._indent_per_level
+ self._format(object[0], stream, indent, allowance + 1,
+ context, level)
+ if length > 1:
+ for ent in object[1:]:
+ write(',\n' + ' '*indent)
+ self._format(ent, stream, indent,
+ allowance + 1, context, level)
+ indent = indent - self._indent_per_level
+ del context[objid]
+ if issubclass(typ, tuple) and length == 1:
+ write(',')
+ write(endchar)
+ return
+
+ write(rep)
+
+ def _repr(self, object, context, level):
+ repr, readable, recursive = self.format(object, context.copy(),
+ self._depth, level)
+ if not readable:
+ self._readable = False
+ if recursive:
+ self._recursive = True
+ return repr
+
+ def format(self, object, context, maxlevels, level):
+ """Format object for a specific context, returning a string
+ and flags indicating whether the representation is 'readable'
+ and whether the object represents a recursive construct.
+ """
+ return _safe_repr(object, context, maxlevels, level)
+
+
+# Return triple (repr_string, isreadable, isrecursive).
+
+def _safe_repr(object, context, maxlevels, level):
+ typ = _type(object)
+ if typ is str:
+ if 'locale' not in _sys.modules:
+ return repr(object), True, False
+ if "'" in object and '"' not in object:
+ closure = '"'
+ quotes = {'"': '\\"'}
+ else:
+ closure = "'"
+ quotes = {"'": "\\'"}
+ qget = quotes.get
+ sio = _StringIO()
+ write = sio.write
+ for char in object:
+ if char.isalpha():
+ write(char)
+ else:
+ write(qget(char, repr(char)[1:-1]))
+ return ("%s%s%s" % (closure, sio.getvalue(), closure)), True, False
+
+ r = getattr(typ, "__repr__", None)
+ if issubclass(typ, dict) and r == dict.__repr__:
+ if not object:
+ return "{}", True, False
+ objid = _id(object)
+ if maxlevels and level > maxlevels:
+ return "{...}", False, objid in context
+ if objid in context:
+ return _recursion(object), False, True
+ context[objid] = 1
+ readable = True
+ recursive = False
+ components = []
+ append = components.append
+ level += 1
+ saferepr = _safe_repr
+ for k, v in sorted(object.items()):
+ krepr, kreadable, krecur = saferepr(k, context, maxlevels, level)
+ vrepr, vreadable, vrecur = saferepr(v, context, maxlevels, level)
+ append("%s: %s" % (krepr, vrepr))
+ readable = readable and kreadable and vreadable
+ if krecur or vrecur:
+ recursive = True
+ del context[objid]
+ return "{%s}" % _commajoin(components), readable, recursive
+
+ if (issubclass(typ, list) and r == list.__repr__) or \
+ (issubclass(typ, tuple) and r == tuple.__repr__):
+ if issubclass(typ, list):
+ if not object:
+ return "[]", True, False
+ format = "[%s]"
+ elif _len(object) == 1:
+ format = "(%s,)"
+ else:
+ if not object:
+ return "()", True, False
+ format = "(%s)"
+ objid = _id(object)
+ if maxlevels and level > maxlevels:
+ return format % "...", False, objid in context
+ if objid in context:
+ return _recursion(object), False, True
+ context[objid] = 1
+ readable = True
+ recursive = False
+ components = []
+ append = components.append
+ level += 1
+ for o in object:
+ orepr, oreadable, orecur = _safe_repr(o, context, maxlevels, level)
+ append(orepr)
+ if not oreadable:
+ readable = False
+ if orecur:
+ recursive = True
+ del context[objid]
+ return format % _commajoin(components), readable, recursive
+
+ rep = repr(object)
+ return rep, (rep and not rep.startswith('<')), False
+
+
+def _recursion(object):
+ return ("<Recursion on %s with id=%s>"
+ % (_type(object).__name__, _id(object)))
+
+
+def _perfcheck(object=None):
+ import time
+ if object is None:
+ object = [("string", (1, 2), [3, 4], {5: 6, 7: 8})] * 100000
+ p = PrettyPrinter()
+ t1 = time.time()
+ _safe_repr(object, {}, None, 0)
+ t2 = time.time()
+ p.pformat(object)
+ t3 = time.time()
+ print "_safe_repr:", t2 - t1
+ print "pformat:", t3 - t2
+
+if __name__ == "__main__":
+ _perfcheck()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/pydoc.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/pydoc.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,2259 @@
+#!/usr/bin/env python
+# -*- coding: Latin-1 -*-
+"""Generate Python documentation in HTML or text for interactive use.
+
+In the Python interpreter, do "from pydoc import help" to provide online
+help. Calling help(thing) on a Python object documents the object.
+
+Or, at the shell command line outside of Python:
+
+Run "pydoc <name>" to show documentation on something. <name> may be
+the name of a function, module, package, or a dotted reference to a
+class or function within a module or module in a package. If the
+argument contains a path segment delimiter (e.g. slash on Unix,
+backslash on Windows) it is treated as the path to a Python source file.
+
+Run "pydoc -k <keyword>" to search for a keyword in the synopsis lines
+of all available modules.
+
+Run "pydoc -p <port>" to start an HTTP server on a given port on the
+local machine to generate documentation web pages.
+
+For platforms without a command line, "pydoc -g" starts the HTTP server
+and also pops up a little window for controlling it.
+
+Run "pydoc -w <name>" to write out the HTML documentation for a module
+to a file named "<name>.html".
+
+Module docs for core modules are assumed to be in
+
+ http://www.python.org/doc/current/lib/
+
+This can be overridden by setting the PYTHONDOCS environment variable
+to a different URL or to a local directory containing the Library
+Reference Manual pages.
+"""
+
+__author__ = "Ka-Ping Yee <ping at lfw.org>"
+__date__ = "26 February 2001"
+
+__version__ = "$Revision: 54366 $"
+__credits__ = """Guido van Rossum, for an excellent programming language.
+Tommy Burnette, the original creator of manpy.
+Paul Prescod, for all his work on onlinehelp.
+Richard Chamberlain, for the first implementation of textdoc.
+"""
+
+# Known bugs that can't be fixed here:
+# - imp.load_module() cannot be prevented from clobbering existing
+# loaded modules, so calling synopsis() on a binary module file
+# changes the contents of any existing module with the same name.
+# - If the __file__ attribute on a module is a relative path and
+# the current directory is changed with os.chdir(), an incorrect
+# path will be displayed.
+
+import sys, imp, os, re, types, inspect, __builtin__, pkgutil
+from repr import Repr
+from string import expandtabs, find, join, lower, split, strip, rfind, rstrip
+try:
+ from collections import deque
+except ImportError:
+ # Python 2.3 compatibility
+ class deque(list):
+ def popleft(self):
+ return self.pop(0)
+
+# --------------------------------------------------------- common routines
+
+def pathdirs():
+ """Convert sys.path into a list of absolute, existing, unique paths."""
+ dirs = []
+ normdirs = []
+ for dir in sys.path:
+ dir = os.path.abspath(dir or '.')
+ normdir = os.path.normcase(dir)
+ if normdir not in normdirs and os.path.isdir(dir):
+ dirs.append(dir)
+ normdirs.append(normdir)
+ return dirs
+
+def getdoc(object):
+ """Get the doc string or comments for an object."""
+ result = inspect.getdoc(object) or inspect.getcomments(object)
+ return result and re.sub('^ *\n', '', rstrip(result)) or ''
+
+def splitdoc(doc):
+ """Split a doc string into a synopsis line (if any) and the rest."""
+ lines = split(strip(doc), '\n')
+ if len(lines) == 1:
+ return lines[0], ''
+ elif len(lines) >= 2 and not rstrip(lines[1]):
+ return lines[0], join(lines[2:], '\n')
+ return '', join(lines, '\n')
+
+def classname(object, modname):
+ """Get a class name and qualify it with a module name if necessary."""
+ name = object.__name__
+ if object.__module__ != modname:
+ name = object.__module__ + '.' + name
+ return name
+
+def isdata(object):
+ """Check if an object is of a type that probably means it's data."""
+ return not (inspect.ismodule(object) or inspect.isclass(object) or
+ inspect.isroutine(object) or inspect.isframe(object) or
+ inspect.istraceback(object) or inspect.iscode(object))
+
+def replace(text, *pairs):
+ """Do a series of global replacements on a string."""
+ while pairs:
+ text = join(split(text, pairs[0]), pairs[1])
+ pairs = pairs[2:]
+ return text
+
+def cram(text, maxlen):
+ """Omit part of a string if needed to make it fit in a maximum length."""
+ if len(text) > maxlen:
+ pre = max(0, (maxlen-3)//2)
+ post = max(0, maxlen-3-pre)
+ return text[:pre] + '...' + text[len(text)-post:]
+ return text
+
+_re_stripid = re.compile(r' at 0x[0-9a-f]{6,16}(>+)$', re.IGNORECASE)
+def stripid(text):
+ """Remove the hexadecimal id from a Python object representation."""
+ # The behaviour of %p is implementation-dependent in terms of case.
+ if _re_stripid.search(repr(Exception)):
+ return _re_stripid.sub(r'\1', text)
+ return text
+
+def _is_some_method(obj):
+ return inspect.ismethod(obj) or inspect.ismethoddescriptor(obj)
+
+def allmethods(cl):
+ methods = {}
+ for key, value in inspect.getmembers(cl, _is_some_method):
+ methods[key] = 1
+ for base in cl.__bases__:
+ methods.update(allmethods(base)) # all your base are belong to us
+ for key in methods.keys():
+ methods[key] = getattr(cl, key)
+ return methods
+
+def _split_list(s, predicate):
+ """Split sequence s via predicate, and return pair ([true], [false]).
+
+ The return value is a 2-tuple of lists,
+ ([x for x in s if predicate(x)],
+ [x for x in s if not predicate(x)])
+ """
+
+ yes = []
+ no = []
+ for x in s:
+ if predicate(x):
+ yes.append(x)
+ else:
+ no.append(x)
+ return yes, no
+
+def visiblename(name, all=None):
+ """Decide whether to show documentation on a variable."""
+ # Certain special names are redundant.
+ if name in ('__builtins__', '__doc__', '__file__', '__path__',
+ '__module__', '__name__', '__slots__'): return 0
+ # Private names are hidden, but special names are displayed.
+ if name.startswith('__') and name.endswith('__'): return 1
+ if all is not None:
+ # only document that which the programmer exported in __all__
+ return name in all
+ else:
+ return not name.startswith('_')
+
+def classify_class_attrs(object):
+ """Wrap inspect.classify_class_attrs, with fixup for data descriptors."""
+ def fixup((name, kind, cls, value)):
+ if inspect.isdatadescriptor(value):
+ kind = 'data descriptor'
+ return name, kind, cls, value
+ return map(fixup, inspect.classify_class_attrs(object))
+
+# ----------------------------------------------------- module manipulation
+
+def ispackage(path):
+ """Guess whether a path refers to a package directory."""
+ if os.path.isdir(path):
+ for ext in ('.py', '.pyc', '.pyo'):
+ if os.path.isfile(os.path.join(path, '__init__' + ext)):
+ return True
+ return False
+
+def source_synopsis(file):
+ line = file.readline()
+ while line[:1] == '#' or not strip(line):
+ line = file.readline()
+ if not line: break
+ line = strip(line)
+ if line[:4] == 'r"""': line = line[1:]
+ if line[:3] == '"""':
+ line = line[3:]
+ if line[-1:] == '\\': line = line[:-1]
+ while not strip(line):
+ line = file.readline()
+ if not line: break
+ result = strip(split(line, '"""')[0])
+ else: result = None
+ return result
+
+def synopsis(filename, cache={}):
+ """Get the one-line summary out of a module file."""
+ mtime = os.stat(filename).st_mtime
+ lastupdate, result = cache.get(filename, (0, None))
+ if lastupdate < mtime:
+ info = inspect.getmoduleinfo(filename)
+ try:
+ file = open(filename)
+ except IOError:
+ # module can't be opened, so skip it
+ return None
+ if info and 'b' in info[2]: # binary modules have to be imported
+ try: module = imp.load_module('__temp__', file, filename, info[1:])
+ except: return None
+ result = (module.__doc__ or '').splitlines()[0]
+ del sys.modules['__temp__']
+ else: # text modules can be directly examined
+ result = source_synopsis(file)
+ file.close()
+ cache[filename] = (mtime, result)
+ return result
+
+class ErrorDuringImport(Exception):
+ """Errors that occurred while trying to import something to document it."""
+ def __init__(self, filename, (exc, value, tb)):
+ self.filename = filename
+ self.exc = exc
+ self.value = value
+ self.tb = tb
+
+ def __str__(self):
+ exc = self.exc
+ if type(exc) is types.ClassType:
+ exc = exc.__name__
+ return 'problem in %s - %s: %s' % (self.filename, exc, self.value)
+
+def importfile(path):
+ """Import a Python source file or compiled file given its path."""
+ magic = imp.get_magic()
+ file = open(path, 'r')
+ if file.read(len(magic)) == magic:
+ kind = imp.PY_COMPILED
+ else:
+ kind = imp.PY_SOURCE
+ file.close()
+ filename = os.path.basename(path)
+ name, ext = os.path.splitext(filename)
+ file = open(path, 'r')
+ try:
+ module = imp.load_module(name, file, path, (ext, 'r', kind))
+ except:
+ raise ErrorDuringImport(path, sys.exc_info())
+ file.close()
+ return module
+
+def safeimport(path, forceload=0, cache={}):
+ """Import a module; handle errors; return None if the module isn't found.
+
+ If the module *is* found but an exception occurs, it's wrapped in an
+ ErrorDuringImport exception and reraised. Unlike __import__, if a
+ package path is specified, the module at the end of the path is returned,
+ not the package at the beginning. If the optional 'forceload' argument
+ is 1, we reload the module from disk (unless it's a dynamic extension)."""
+ try:
+ # If forceload is 1 and the module has been previously loaded from
+ # disk, we always have to reload the module. Checking the file's
+ # mtime isn't good enough (e.g. the module could contain a class
+ # that inherits from another module that has changed).
+ if forceload and path in sys.modules:
+ if path not in sys.builtin_module_names:
+ # Avoid simply calling reload() because it leaves names in
+ # the currently loaded module lying around if they're not
+ # defined in the new source file. Instead, remove the
+ # module from sys.modules and re-import. Also remove any
+ # submodules because they won't appear in the newly loaded
+ # module's namespace if they're already in sys.modules.
+ subs = [m for m in sys.modules if m.startswith(path + '.')]
+ for key in [path] + subs:
+ # Prevent garbage collection.
+ cache[key] = sys.modules[key]
+ del sys.modules[key]
+ module = __import__(path)
+ except:
+ # Did the error occur before or after the module was found?
+ (exc, value, tb) = info = sys.exc_info()
+ if path in sys.modules:
+ # An error occurred while executing the imported module.
+ raise ErrorDuringImport(sys.modules[path].__file__, info)
+ elif exc is SyntaxError:
+ # A SyntaxError occurred before we could execute the module.
+ raise ErrorDuringImport(value.filename, info)
+ elif exc is ImportError and \
+ split(lower(str(value)))[:2] == ['no', 'module']:
+ # The module was not found.
+ return None
+ else:
+ # Some other error occurred during the importing process.
+ raise ErrorDuringImport(path, sys.exc_info())
+ for part in split(path, '.')[1:]:
+ try: module = getattr(module, part)
+ except AttributeError: return None
+ return module
+
+# ---------------------------------------------------- formatter base class
+
+class Doc:
+ def document(self, object, name=None, *args):
+ """Generate documentation for an object."""
+ args = (object, name) + args
+ # 'try' clause is to attempt to handle the possibility that inspect
+ # identifies something in a way that pydoc itself has issues handling;
+ # think 'super' and how it is a descriptor (which raises the exception
+ # by lacking a __name__ attribute) and an instance.
+ if inspect.isgetsetdescriptor(object): return self.docdata(*args)
+ if inspect.ismemberdescriptor(object): return self.docdata(*args)
+ try:
+ if inspect.ismodule(object): return self.docmodule(*args)
+ if inspect.isclass(object): return self.docclass(*args)
+ if inspect.isroutine(object): return self.docroutine(*args)
+ except AttributeError:
+ pass
+ if isinstance(object, property): return self.docproperty(*args)
+ return self.docother(*args)
+
+ def fail(self, object, name=None, *args):
+ """Raise an exception for unimplemented types."""
+ message = "don't know how to document object%s of type %s" % (
+ name and ' ' + repr(name), type(object).__name__)
+ raise TypeError, message
+
+ docmodule = docclass = docroutine = docother = docproperty = docdata = fail
+
+ def getdocloc(self, object):
+ """Return the location of module docs or None"""
+
+ try:
+ file = inspect.getabsfile(object)
+ except TypeError:
+ file = '(built-in)'
+
+ docloc = os.environ.get("PYTHONDOCS",
+ "http://www.python.org/doc/current/lib")
+ basedir = os.path.join(sys.exec_prefix, "lib",
+ "python"+sys.version[0:3])
+ if (isinstance(object, type(os)) and
+ (object.__name__ in ('errno', 'exceptions', 'gc', 'imp',
+ 'marshal', 'posix', 'signal', 'sys',
+ 'thread', 'zipimport') or
+ (file.startswith(basedir) and
+ not file.startswith(os.path.join(basedir, 'site-packages'))))):
+ htmlfile = "module-%s.html" % object.__name__
+ if docloc.startswith("http://"):
+ docloc = "%s/%s" % (docloc.rstrip("/"), htmlfile)
+ else:
+ docloc = os.path.join(docloc, htmlfile)
+ else:
+ docloc = None
+ return docloc
+
+# -------------------------------------------- HTML documentation generator
+
+class HTMLRepr(Repr):
+ """Class for safely making an HTML representation of a Python object."""
+ def __init__(self):
+ Repr.__init__(self)
+ self.maxlist = self.maxtuple = 20
+ self.maxdict = 10
+ self.maxstring = self.maxother = 100
+
+ def escape(self, text):
+ return replace(text, '&', '&', '<', '<', '>', '>')
+
+ def repr(self, object):
+ return Repr.repr(self, object)
+
+ def repr1(self, x, level):
+ if hasattr(type(x), '__name__'):
+ methodname = 'repr_' + join(split(type(x).__name__), '_')
+ if hasattr(self, methodname):
+ return getattr(self, methodname)(x, level)
+ return self.escape(cram(stripid(repr(x)), self.maxother))
+
+ def repr_string(self, x, level):
+ test = cram(x, self.maxstring)
+ testrepr = repr(test)
+ if '\\' in test and '\\' not in replace(testrepr, r'\\', ''):
+ # Backslashes are only literal in the string and are never
+ # needed to make any special characters, so show a raw string.
+ return 'r' + testrepr[0] + self.escape(test) + testrepr[0]
+ return re.sub(r'((\\[\\abfnrtv\'"]|\\[0-9]..|\\x..|\\u....)+)',
+ r'<font color="#c040c0">\1</font>',
+ self.escape(testrepr))
+
+ repr_str = repr_string
+
+ def repr_instance(self, x, level):
+ try:
+ return self.escape(cram(stripid(repr(x)), self.maxstring))
+ except:
+ return self.escape('<%s instance>' % x.__class__.__name__)
+
+ repr_unicode = repr_string
+
+class HTMLDoc(Doc):
+ """Formatter class for HTML documentation."""
+
+ # ------------------------------------------- HTML formatting utilities
+
+ _repr_instance = HTMLRepr()
+ repr = _repr_instance.repr
+ escape = _repr_instance.escape
+
+ def page(self, title, contents):
+ """Format an HTML page."""
+ return '''
+<!doctype html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<html><head><title>Python: %s</title>
+</head><body bgcolor="#f0f0f8">
+%s
+</body></html>''' % (title, contents)
+
+ def heading(self, title, fgcol, bgcol, extras=''):
+ """Format a page heading."""
+ return '''
+<table width="100%%" cellspacing=0 cellpadding=2 border=0 summary="heading">
+<tr bgcolor="%s">
+<td valign=bottom> <br>
+<font color="%s" face="helvetica, arial"> <br>%s</font></td
+><td align=right valign=bottom
+><font color="%s" face="helvetica, arial">%s</font></td></tr></table>
+ ''' % (bgcol, fgcol, title, fgcol, extras or ' ')
+
+ def section(self, title, fgcol, bgcol, contents, width=6,
+ prelude='', marginalia=None, gap=' '):
+ """Format a section with a heading."""
+ if marginalia is None:
+ marginalia = '<tt>' + ' ' * width + '</tt>'
+ result = '''<p>
+<table width="100%%" cellspacing=0 cellpadding=2 border=0 summary="section">
+<tr bgcolor="%s">
+<td colspan=3 valign=bottom> <br>
+<font color="%s" face="helvetica, arial">%s</font></td></tr>
+ ''' % (bgcol, fgcol, title)
+ if prelude:
+ result = result + '''
+<tr bgcolor="%s"><td rowspan=2>%s</td>
+<td colspan=2>%s</td></tr>
+<tr><td>%s</td>''' % (bgcol, marginalia, prelude, gap)
+ else:
+ result = result + '''
+<tr><td bgcolor="%s">%s</td><td>%s</td>''' % (bgcol, marginalia, gap)
+
+ return result + '\n<td width="100%%">%s</td></tr></table>' % contents
+
+ def bigsection(self, title, *args):
+ """Format a section with a big heading."""
+ title = '<big><strong>%s</strong></big>' % title
+ return self.section(title, *args)
+
+ def preformat(self, text):
+ """Format literal preformatted text."""
+ text = self.escape(expandtabs(text))
+ return replace(text, '\n\n', '\n \n', '\n\n', '\n \n',
+ ' ', ' ', '\n', '<br>\n')
+
+ def multicolumn(self, list, format, cols=4):
+ """Format a list of items into a multi-column list."""
+ result = ''
+ rows = (len(list)+cols-1)/cols
+ for col in range(cols):
+ result = result + '<td width="%d%%" valign=top>' % (100/cols)
+ for i in range(rows*col, rows*col+rows):
+ if i < len(list):
+ result = result + format(list[i]) + '<br>\n'
+ result = result + '</td>'
+ return '<table width="100%%" summary="list"><tr>%s</tr></table>' % result
+
+ def grey(self, text): return '<font color="#909090">%s</font>' % text
+
+ def namelink(self, name, *dicts):
+ """Make a link for an identifier, given name-to-URL mappings."""
+ for dict in dicts:
+ if name in dict:
+ return '<a href="%s">%s</a>' % (dict[name], name)
+ return name
+
+ def classlink(self, object, modname):
+ """Make a link for a class."""
+ name, module = object.__name__, sys.modules.get(object.__module__)
+ if hasattr(module, name) and getattr(module, name) is object:
+ return '<a href="%s.html#%s">%s</a>' % (
+ module.__name__, name, classname(object, modname))
+ return classname(object, modname)
+
+ def modulelink(self, object):
+ """Make a link for a module."""
+ return '<a href="%s.html">%s</a>' % (object.__name__, object.__name__)
+
+ def modpkglink(self, (name, path, ispackage, shadowed)):
+ """Make a link for a module or package to display in an index."""
+ if shadowed:
+ return self.grey(name)
+ if path:
+ url = '%s.%s.html' % (path, name)
+ else:
+ url = '%s.html' % name
+ if ispackage:
+ text = '<strong>%s</strong> (package)' % name
+ else:
+ text = name
+ return '<a href="%s">%s</a>' % (url, text)
+
+ def markup(self, text, escape=None, funcs={}, classes={}, methods={}):
+ """Mark up some plain text, given a context of symbols to look for.
+ Each context dictionary maps object names to anchor names."""
+ escape = escape or self.escape
+ results = []
+ here = 0
+ pattern = re.compile(r'\b((http|ftp)://\S+[\w/]|'
+ r'RFC[- ]?(\d+)|'
+ r'PEP[- ]?(\d+)|'
+ r'(self\.)?(\w+))')
+ while True:
+ match = pattern.search(text, here)
+ if not match: break
+ start, end = match.span()
+ results.append(escape(text[here:start]))
+
+ all, scheme, rfc, pep, selfdot, name = match.groups()
+ if scheme:
+ url = escape(all).replace('"', '"')
+ results.append('<a href="%s">%s</a>' % (url, url))
+ elif rfc:
+ url = 'http://www.rfc-editor.org/rfc/rfc%d.txt' % int(rfc)
+ results.append('<a href="%s">%s</a>' % (url, escape(all)))
+ elif pep:
+ url = 'http://www.python.org/peps/pep-%04d.html' % int(pep)
+ results.append('<a href="%s">%s</a>' % (url, escape(all)))
+ elif text[end:end+1] == '(':
+ results.append(self.namelink(name, methods, funcs, classes))
+ elif selfdot:
+ results.append('self.<strong>%s</strong>' % name)
+ else:
+ results.append(self.namelink(name, classes))
+ here = end
+ results.append(escape(text[here:]))
+ return join(results, '')
+
+ # ---------------------------------------------- type-specific routines
+
+ def formattree(self, tree, modname, parent=None):
+ """Produce HTML for a class tree as given by inspect.getclasstree()."""
+ result = ''
+ for entry in tree:
+ if type(entry) is type(()):
+ c, bases = entry
+ result = result + '<dt><font face="helvetica, arial">'
+ result = result + self.classlink(c, modname)
+ if bases and bases != (parent,):
+ parents = []
+ for base in bases:
+ parents.append(self.classlink(base, modname))
+ result = result + '(' + join(parents, ', ') + ')'
+ result = result + '\n</font></dt>'
+ elif type(entry) is type([]):
+ result = result + '<dd>\n%s</dd>\n' % self.formattree(
+ entry, modname, c)
+ return '<dl>\n%s</dl>\n' % result
+
+ def docmodule(self, object, name=None, mod=None, *ignored):
+ """Produce HTML documentation for a module object."""
+ name = object.__name__ # ignore the passed-in name
+ try:
+ all = object.__all__
+ except AttributeError:
+ all = None
+ parts = split(name, '.')
+ links = []
+ for i in range(len(parts)-1):
+ links.append(
+ '<a href="%s.html"><font color="#ffffff">%s</font></a>' %
+ (join(parts[:i+1], '.'), parts[i]))
+ linkedname = join(links + parts[-1:], '.')
+ head = '<big><big><strong>%s</strong></big></big>' % linkedname
+ try:
+ path = inspect.getabsfile(object)
+ url = path
+ if sys.platform == 'win32':
+ import nturl2path
+ url = nturl2path.pathname2url(path)
+ filelink = '<a href="file:%s">%s</a>' % (url, path)
+ except TypeError:
+ filelink = '(built-in)'
+ info = []
+ if hasattr(object, '__version__'):
+ version = str(object.__version__)
+ if version[:11] == '$' + 'Revision: ' and version[-1:] == '$':
+ version = strip(version[11:-1])
+ info.append('version %s' % self.escape(version))
+ if hasattr(object, '__date__'):
+ info.append(self.escape(str(object.__date__)))
+ if info:
+ head = head + ' (%s)' % join(info, ', ')
+ docloc = self.getdocloc(object)
+ if docloc is not None:
+ docloc = '<br><a href="%(docloc)s">Module Docs</a>' % locals()
+ else:
+ docloc = ''
+ result = self.heading(
+ head, '#ffffff', '#7799ee',
+ '<a href=".">index</a><br>' + filelink + docloc)
+
+ modules = inspect.getmembers(object, inspect.ismodule)
+
+ classes, cdict = [], {}
+ for key, value in inspect.getmembers(object, inspect.isclass):
+ # if __all__ exists, believe it. Otherwise use old heuristic.
+ if (all is not None or
+ (inspect.getmodule(value) or object) is object):
+ if visiblename(key, all):
+ classes.append((key, value))
+ cdict[key] = cdict[value] = '#' + key
+ for key, value in classes:
+ for base in value.__bases__:
+ key, modname = base.__name__, base.__module__
+ module = sys.modules.get(modname)
+ if modname != name and module and hasattr(module, key):
+ if getattr(module, key) is base:
+ if not key in cdict:
+ cdict[key] = cdict[base] = modname + '.html#' + key
+ funcs, fdict = [], {}
+ for key, value in inspect.getmembers(object, inspect.isroutine):
+ # if __all__ exists, believe it. Otherwise use old heuristic.
+ if (all is not None or
+ inspect.isbuiltin(value) or inspect.getmodule(value) is object):
+ if visiblename(key, all):
+ funcs.append((key, value))
+ fdict[key] = '#-' + key
+ if inspect.isfunction(value): fdict[value] = fdict[key]
+ data = []
+ for key, value in inspect.getmembers(object, isdata):
+ if visiblename(key, all):
+ data.append((key, value))
+
+ doc = self.markup(getdoc(object), self.preformat, fdict, cdict)
+ doc = doc and '<tt>%s</tt>' % doc
+ result = result + '<p>%s</p>\n' % doc
+
+ if hasattr(object, '__path__'):
+ modpkgs = []
+ for importer, modname, ispkg in pkgutil.iter_modules(object.__path__):
+ modpkgs.append((modname, name, ispkg, 0))
+ modpkgs.sort()
+ contents = self.multicolumn(modpkgs, self.modpkglink)
+ result = result + self.bigsection(
+ 'Package Contents', '#ffffff', '#aa55cc', contents)
+ elif modules:
+ contents = self.multicolumn(
+ modules, lambda (key, value), s=self: s.modulelink(value))
+ result = result + self.bigsection(
+ 'Modules', '#fffff', '#aa55cc', contents)
+
+ if classes:
+ classlist = map(lambda (key, value): value, classes)
+ contents = [
+ self.formattree(inspect.getclasstree(classlist, 1), name)]
+ for key, value in classes:
+ contents.append(self.document(value, key, name, fdict, cdict))
+ result = result + self.bigsection(
+ 'Classes', '#ffffff', '#ee77aa', join(contents))
+ if funcs:
+ contents = []
+ for key, value in funcs:
+ contents.append(self.document(value, key, name, fdict, cdict))
+ result = result + self.bigsection(
+ 'Functions', '#ffffff', '#eeaa77', join(contents))
+ if data:
+ contents = []
+ for key, value in data:
+ contents.append(self.document(value, key))
+ result = result + self.bigsection(
+ 'Data', '#ffffff', '#55aa55', join(contents, '<br>\n'))
+ if hasattr(object, '__author__'):
+ contents = self.markup(str(object.__author__), self.preformat)
+ result = result + self.bigsection(
+ 'Author', '#ffffff', '#7799ee', contents)
+ if hasattr(object, '__credits__'):
+ contents = self.markup(str(object.__credits__), self.preformat)
+ result = result + self.bigsection(
+ 'Credits', '#ffffff', '#7799ee', contents)
+
+ return result
+
+ def docclass(self, object, name=None, mod=None, funcs={}, classes={},
+ *ignored):
+ """Produce HTML documentation for a class object."""
+ realname = object.__name__
+ name = name or realname
+ bases = object.__bases__
+
+ contents = []
+ push = contents.append
+
+ # Cute little class to pump out a horizontal rule between sections.
+ class HorizontalRule:
+ def __init__(self):
+ self.needone = 0
+ def maybe(self):
+ if self.needone:
+ push('<hr>\n')
+ self.needone = 1
+ hr = HorizontalRule()
+
+ # List the mro, if non-trivial.
+ mro = deque(inspect.getmro(object))
+ if len(mro) > 2:
+ hr.maybe()
+ push('<dl><dt>Method resolution order:</dt>\n')
+ for base in mro:
+ push('<dd>%s</dd>\n' % self.classlink(base,
+ object.__module__))
+ push('</dl>\n')
+
+ def spill(msg, attrs, predicate):
+ ok, attrs = _split_list(attrs, predicate)
+ if ok:
+ hr.maybe()
+ push(msg)
+ for name, kind, homecls, value in ok:
+ push(self.document(getattr(object, name), name, mod,
+ funcs, classes, mdict, object))
+ push('\n')
+ return attrs
+
+ def spilldescriptors(msg, attrs, predicate):
+ ok, attrs = _split_list(attrs, predicate)
+ if ok:
+ hr.maybe()
+ push(msg)
+ for name, kind, homecls, value in ok:
+ push(self._docdescriptor(name, value, mod))
+ return attrs
+
+ def spilldata(msg, attrs, predicate):
+ ok, attrs = _split_list(attrs, predicate)
+ if ok:
+ hr.maybe()
+ push(msg)
+ for name, kind, homecls, value in ok:
+ base = self.docother(getattr(object, name), name, mod)
+ if callable(value) or inspect.isdatadescriptor(value):
+ doc = getattr(value, "__doc__", None)
+ else:
+ doc = None
+ if doc is None:
+ push('<dl><dt>%s</dl>\n' % base)
+ else:
+ doc = self.markup(getdoc(value), self.preformat,
+ funcs, classes, mdict)
+ doc = '<dd><tt>%s</tt>' % doc
+ push('<dl><dt>%s%s</dl>\n' % (base, doc))
+ push('\n')
+ return attrs
+
+ attrs = filter(lambda (name, kind, cls, value): visiblename(name),
+ classify_class_attrs(object))
+ mdict = {}
+ for key, kind, homecls, value in attrs:
+ mdict[key] = anchor = '#' + name + '-' + key
+ value = getattr(object, key)
+ try:
+ # The value may not be hashable (e.g., a data attr with
+ # a dict or list value).
+ mdict[value] = anchor
+ except TypeError:
+ pass
+
+ while attrs:
+ if mro:
+ thisclass = mro.popleft()
+ else:
+ thisclass = attrs[0][2]
+ attrs, inherited = _split_list(attrs, lambda t: t[2] is thisclass)
+
+ if thisclass is __builtin__.object:
+ attrs = inherited
+ continue
+ elif thisclass is object:
+ tag = 'defined here'
+ else:
+ tag = 'inherited from %s' % self.classlink(thisclass,
+ object.__module__)
+ tag += ':<br>\n'
+
+ # Sort attrs by name.
+ try:
+ attrs.sort(key=lambda t: t[0])
+ except TypeError:
+ attrs.sort(lambda t1, t2: cmp(t1[0], t2[0])) # 2.3 compat
+
+ # Pump out the attrs, segregated by kind.
+ attrs = spill('Methods %s' % tag, attrs,
+ lambda t: t[1] == 'method')
+ attrs = spill('Class methods %s' % tag, attrs,
+ lambda t: t[1] == 'class method')
+ attrs = spill('Static methods %s' % tag, attrs,
+ lambda t: t[1] == 'static method')
+ attrs = spilldescriptors('Data descriptors %s' % tag, attrs,
+ lambda t: t[1] == 'data descriptor')
+ attrs = spilldata('Data and other attributes %s' % tag, attrs,
+ lambda t: t[1] == 'data')
+ assert attrs == []
+ attrs = inherited
+
+ contents = ''.join(contents)
+
+ if name == realname:
+ title = '<a name="%s">class <strong>%s</strong></a>' % (
+ name, realname)
+ else:
+ title = '<strong>%s</strong> = <a name="%s">class %s</a>' % (
+ name, name, realname)
+ if bases:
+ parents = []
+ for base in bases:
+ parents.append(self.classlink(base, object.__module__))
+ title = title + '(%s)' % join(parents, ', ')
+ doc = self.markup(getdoc(object), self.preformat, funcs, classes, mdict)
+ doc = doc and '<tt>%s<br> </tt>' % doc
+
+ return self.section(title, '#000000', '#ffc8d8', contents, 3, doc)
+
+ def formatvalue(self, object):
+ """Format an argument default value as text."""
+ return self.grey('=' + self.repr(object))
+
+ def docroutine(self, object, name=None, mod=None,
+ funcs={}, classes={}, methods={}, cl=None):
+ """Produce HTML documentation for a function or method object."""
+ realname = object.__name__
+ name = name or realname
+ anchor = (cl and cl.__name__ or '') + '-' + name
+ note = ''
+ skipdocs = 0
+ if inspect.ismethod(object):
+ imclass = object.im_class
+ if cl:
+ if imclass is not cl:
+ note = ' from ' + self.classlink(imclass, mod)
+ else:
+ if object.im_self is not None:
+ note = ' method of %s instance' % self.classlink(
+ object.im_self.__class__, mod)
+ else:
+ note = ' unbound %s method' % self.classlink(imclass,mod)
+ object = object.im_func
+
+ if name == realname:
+ title = '<a name="%s"><strong>%s</strong></a>' % (anchor, realname)
+ else:
+ if (cl and realname in cl.__dict__ and
+ cl.__dict__[realname] is object):
+ reallink = '<a href="#%s">%s</a>' % (
+ cl.__name__ + '-' + realname, realname)
+ skipdocs = 1
+ else:
+ reallink = realname
+ title = '<a name="%s"><strong>%s</strong></a> = %s' % (
+ anchor, name, reallink)
+ if inspect.isfunction(object) or (
+ inspect.isbuiltin(object) and hasattr(object, 'func_code')):
+ # PyPy extension: the code below works for built-in functions too
+ args, varargs, varkw, defaults = inspect.getargspec(object)
+ argspec = inspect.formatargspec(
+ args, varargs, varkw, defaults, formatvalue=self.formatvalue)
+ if realname == '<lambda>':
+ title = '<strong>%s</strong> <em>lambda</em> ' % name
+ argspec = argspec[1:-1] # remove parentheses
+ else:
+ argspec = '(...)'
+
+ decl = title + argspec + (note and self.grey(
+ '<font face="helvetica, arial">%s</font>' % note))
+
+ if skipdocs:
+ return '<dl><dt>%s</dt></dl>\n' % decl
+ else:
+ doc = self.markup(
+ getdoc(object), self.preformat, funcs, classes, methods)
+ doc = doc and '<dd><tt>%s</tt></dd>' % doc
+ return '<dl><dt>%s</dt>%s</dl>\n' % (decl, doc)
+
+ def _docdescriptor(self, name, value, mod):
+ results = []
+ push = results.append
+
+ if name:
+ push('<dl><dt><strong>%s</strong></dt>\n' % name)
+ if value.__doc__ is not None:
+ doc = self.markup(getdoc(value), self.preformat)
+ push('<dd><tt>%s</tt></dd>\n' % doc)
+ push('</dl>\n')
+
+ return ''.join(results)
+
+ def docproperty(self, object, name=None, mod=None, cl=None):
+ """Produce html documentation for a property."""
+ return self._docdescriptor(name, object, mod)
+
+ def docother(self, object, name=None, mod=None, *ignored):
+ """Produce HTML documentation for a data object."""
+ lhs = name and '<strong>%s</strong> = ' % name or ''
+ return lhs + self.repr(object)
+
+ def docdata(self, object, name=None, mod=None, cl=None):
+ """Produce html documentation for a data descriptor."""
+ return self._docdescriptor(name, object, mod)
+
+ def index(self, dir, shadowed=None):
+ """Generate an HTML index for a directory of modules."""
+ modpkgs = []
+ if shadowed is None: shadowed = {}
+ for importer, name, ispkg in pkgutil.iter_modules([dir]):
+ modpkgs.append((name, '', ispkg, name in shadowed))
+ shadowed[name] = 1
+
+ modpkgs.sort()
+ contents = self.multicolumn(modpkgs, self.modpkglink)
+ return self.bigsection(dir, '#ffffff', '#ee77aa', contents)
+
+# -------------------------------------------- text documentation generator
+
+class TextRepr(Repr):
+ """Class for safely making a text representation of a Python object."""
+ def __init__(self):
+ Repr.__init__(self)
+ self.maxlist = self.maxtuple = 20
+ self.maxdict = 10
+ self.maxstring = self.maxother = 100
+
+ def repr1(self, x, level):
+ if hasattr(type(x), '__name__'):
+ methodname = 'repr_' + join(split(type(x).__name__), '_')
+ if hasattr(self, methodname):
+ return getattr(self, methodname)(x, level)
+ return cram(stripid(repr(x)), self.maxother)
+
+ def repr_string(self, x, level):
+ test = cram(x, self.maxstring)
+ testrepr = repr(test)
+ if '\\' in test and '\\' not in replace(testrepr, r'\\', ''):
+ # Backslashes are only literal in the string and are never
+ # needed to make any special characters, so show a raw string.
+ return 'r' + testrepr[0] + test + testrepr[0]
+ return testrepr
+
+ repr_str = repr_string
+
+ def repr_instance(self, x, level):
+ try:
+ return cram(stripid(repr(x)), self.maxstring)
+ except:
+ return '<%s instance>' % x.__class__.__name__
+
+class TextDoc(Doc):
+ """Formatter class for text documentation."""
+
+ # ------------------------------------------- text formatting utilities
+
+ _repr_instance = TextRepr()
+ repr = _repr_instance.repr
+
+ def bold(self, text):
+ """Format a string in bold by overstriking."""
+ return join(map(lambda ch: ch + '\b' + ch, text), '')
+
+ def indent(self, text, prefix=' '):
+ """Indent text by prepending a given prefix to each line."""
+ if not text: return ''
+ lines = split(text, '\n')
+ lines = map(lambda line, prefix=prefix: prefix + line, lines)
+ if lines: lines[-1] = rstrip(lines[-1])
+ return join(lines, '\n')
+
+ def section(self, title, contents):
+ """Format a section with a given heading."""
+ return self.bold(title) + '\n' + rstrip(self.indent(contents)) + '\n\n'
+
+ # ---------------------------------------------- type-specific routines
+
+ def formattree(self, tree, modname, parent=None, prefix=''):
+ """Render in text a class tree as returned by inspect.getclasstree()."""
+ result = ''
+ for entry in tree:
+ if type(entry) is type(()):
+ c, bases = entry
+ result = result + prefix + classname(c, modname)
+ if bases and bases != (parent,):
+ parents = map(lambda c, m=modname: classname(c, m), bases)
+ result = result + '(%s)' % join(parents, ', ')
+ result = result + '\n'
+ elif type(entry) is type([]):
+ result = result + self.formattree(
+ entry, modname, c, prefix + ' ')
+ return result
+
+ def docmodule(self, object, name=None, mod=None):
+ """Produce text documentation for a given module object."""
+ name = object.__name__ # ignore the passed-in name
+ synop, desc = splitdoc(getdoc(object))
+ result = self.section('NAME', name + (synop and ' - ' + synop))
+
+ try:
+ all = object.__all__
+ except AttributeError:
+ all = None
+
+ try:
+ file = inspect.getabsfile(object)
+ except TypeError:
+ file = '(built-in)'
+ result = result + self.section('FILE', file)
+
+ docloc = self.getdocloc(object)
+ if docloc is not None:
+ result = result + self.section('MODULE DOCS', docloc)
+
+ if desc:
+ result = result + self.section('DESCRIPTION', desc)
+
+ classes = []
+ for key, value in inspect.getmembers(object, inspect.isclass):
+ # if __all__ exists, believe it. Otherwise use old heuristic.
+ if (all is not None
+ or (inspect.getmodule(value) or object) is object):
+ if visiblename(key, all):
+ classes.append((key, value))
+ funcs = []
+ for key, value in inspect.getmembers(object, inspect.isroutine):
+ # if __all__ exists, believe it. Otherwise use old heuristic.
+ if (all is not None or
+ inspect.isbuiltin(value) or inspect.getmodule(value) is object):
+ if visiblename(key, all):
+ funcs.append((key, value))
+ data = []
+ for key, value in inspect.getmembers(object, isdata):
+ if visiblename(key, all):
+ data.append((key, value))
+
+ if hasattr(object, '__path__'):
+ modpkgs = []
+ for importer, modname, ispkg in pkgutil.iter_modules(object.__path__):
+ if ispkg:
+ modpkgs.append(modname + ' (package)')
+ else:
+ modpkgs.append(modname)
+
+ modpkgs.sort()
+ result = result + self.section(
+ 'PACKAGE CONTENTS', join(modpkgs, '\n'))
+
+ if classes:
+ classlist = map(lambda (key, value): value, classes)
+ contents = [self.formattree(
+ inspect.getclasstree(classlist, 1), name)]
+ for key, value in classes:
+ contents.append(self.document(value, key, name))
+ result = result + self.section('CLASSES', join(contents, '\n'))
+
+ if funcs:
+ contents = []
+ for key, value in funcs:
+ contents.append(self.document(value, key, name))
+ result = result + self.section('FUNCTIONS', join(contents, '\n'))
+
+ if data:
+ contents = []
+ for key, value in data:
+ contents.append(self.docother(value, key, name, maxlen=70))
+ result = result + self.section('DATA', join(contents, '\n'))
+
+ if hasattr(object, '__version__'):
+ version = str(object.__version__)
+ if version[:11] == '$' + 'Revision: ' and version[-1:] == '$':
+ version = strip(version[11:-1])
+ result = result + self.section('VERSION', version)
+ if hasattr(object, '__date__'):
+ result = result + self.section('DATE', str(object.__date__))
+ if hasattr(object, '__author__'):
+ result = result + self.section('AUTHOR', str(object.__author__))
+ if hasattr(object, '__credits__'):
+ result = result + self.section('CREDITS', str(object.__credits__))
+ return result
+
+ def docclass(self, object, name=None, mod=None):
+ """Produce text documentation for a given class object."""
+ realname = object.__name__
+ name = name or realname
+ bases = object.__bases__
+
+ def makename(c, m=object.__module__):
+ return classname(c, m)
+
+ if name == realname:
+ title = 'class ' + self.bold(realname)
+ else:
+ title = self.bold(name) + ' = class ' + realname
+ if bases:
+ parents = map(makename, bases)
+ title = title + '(%s)' % join(parents, ', ')
+
+ doc = getdoc(object)
+ contents = doc and [doc + '\n'] or []
+ push = contents.append
+
+ # List the mro, if non-trivial.
+ mro = deque(inspect.getmro(object))
+ if len(mro) > 2:
+ push("Method resolution order:")
+ for base in mro:
+ push(' ' + makename(base))
+ push('')
+
+ # Cute little class to pump out a horizontal rule between sections.
+ class HorizontalRule:
+ def __init__(self):
+ self.needone = 0
+ def maybe(self):
+ if self.needone:
+ push('-' * 70)
+ self.needone = 1
+ hr = HorizontalRule()
+
+ def spill(msg, attrs, predicate):
+ ok, attrs = _split_list(attrs, predicate)
+ if ok:
+ hr.maybe()
+ push(msg)
+ for name, kind, homecls, value in ok:
+ push(self.document(getattr(object, name),
+ name, mod, object))
+ return attrs
+
+ def spilldescriptors(msg, attrs, predicate):
+ ok, attrs = _split_list(attrs, predicate)
+ if ok:
+ hr.maybe()
+ push(msg)
+ for name, kind, homecls, value in ok:
+ push(self._docdescriptor(name, value, mod))
+ return attrs
+
+ def spilldata(msg, attrs, predicate):
+ ok, attrs = _split_list(attrs, predicate)
+ if ok:
+ hr.maybe()
+ push(msg)
+ for name, kind, homecls, value in ok:
+ if callable(value) or inspect.isdatadescriptor(value):
+ doc = getdoc(value)
+ else:
+ doc = None
+ push(self.docother(getattr(object, name),
+ name, mod, maxlen=70, doc=doc) + '\n')
+ return attrs
+
+ attrs = filter(lambda (name, kind, cls, value): visiblename(name),
+ classify_class_attrs(object))
+ while attrs:
+ if mro:
+ thisclass = mro.popleft()
+ else:
+ thisclass = attrs[0][2]
+ attrs, inherited = _split_list(attrs, lambda t: t[2] is thisclass)
+
+ if thisclass is __builtin__.object:
+ attrs = inherited
+ continue
+ elif thisclass is object:
+ tag = "defined here"
+ else:
+ tag = "inherited from %s" % classname(thisclass,
+ object.__module__)
+ filter(lambda t: not t[0].startswith('_'), attrs)
+
+ # Sort attrs by name.
+ attrs.sort()
+
+ # Pump out the attrs, segregated by kind.
+ attrs = spill("Methods %s:\n" % tag, attrs,
+ lambda t: t[1] == 'method')
+ attrs = spill("Class methods %s:\n" % tag, attrs,
+ lambda t: t[1] == 'class method')
+ attrs = spill("Static methods %s:\n" % tag, attrs,
+ lambda t: t[1] == 'static method')
+ attrs = spilldescriptors("Data descriptors %s:\n" % tag, attrs,
+ lambda t: t[1] == 'data descriptor')
+ attrs = spilldata("Data and other attributes %s:\n" % tag, attrs,
+ lambda t: t[1] == 'data')
+ assert attrs == []
+ attrs = inherited
+
+ contents = '\n'.join(contents)
+ if not contents:
+ return title + '\n'
+ return title + '\n' + self.indent(rstrip(contents), ' | ') + '\n'
+
+ def formatvalue(self, object):
+ """Format an argument default value as text."""
+ return '=' + self.repr(object)
+
+ def docroutine(self, object, name=None, mod=None, cl=None):
+ """Produce text documentation for a function or method object."""
+ realname = object.__name__
+ name = name or realname
+ note = ''
+ skipdocs = 0
+ if inspect.ismethod(object):
+ imclass = object.im_class
+ if cl:
+ if imclass is not cl:
+ note = ' from ' + classname(imclass, mod)
+ else:
+ if object.im_self is not None:
+ note = ' method of %s instance' % classname(
+ object.im_self.__class__, mod)
+ else:
+ note = ' unbound %s method' % classname(imclass,mod)
+ object = object.im_func
+
+ if name == realname:
+ title = self.bold(realname)
+ else:
+ if (cl and realname in cl.__dict__ and
+ cl.__dict__[realname] is object):
+ skipdocs = 1
+ title = self.bold(name) + ' = ' + realname
+ if (inspect.isfunction(object) or
+ inspect.isbuiltin(object) and hasattr(object, 'func_code')):
+ # PyPy extension: the code below works for built-in functions too
+ args, varargs, varkw, defaults = inspect.getargspec(object)
+ argspec = inspect.formatargspec(
+ args, varargs, varkw, defaults, formatvalue=self.formatvalue)
+ if realname == '<lambda>':
+ title = self.bold(name) + ' lambda '
+ argspec = argspec[1:-1] # remove parentheses
+ else:
+ argspec = '(...)'
+ decl = title + argspec + note
+
+ if skipdocs:
+ return decl + '\n'
+ else:
+ doc = getdoc(object) or ''
+ return decl + '\n' + (doc and rstrip(self.indent(doc)) + '\n')
+
+ def _docdescriptor(self, name, value, mod):
+ results = []
+ push = results.append
+
+ if name:
+ push(self.bold(name))
+ push('\n')
+ doc = getdoc(value) or ''
+ if doc:
+ push(self.indent(doc))
+ push('\n')
+ return ''.join(results)
+
+ def docproperty(self, object, name=None, mod=None, cl=None):
+ """Produce text documentation for a property."""
+ return self._docdescriptor(name, object, mod)
+
+ def docdata(self, object, name=None, mod=None, cl=None):
+ """Produce text documentation for a data descriptor."""
+ return self._docdescriptor(name, object, mod)
+
+ def docother(self, object, name=None, mod=None, parent=None, maxlen=None, doc=None):
+ """Produce text documentation for a data object."""
+ repr = self.repr(object)
+ if maxlen:
+ line = (name and name + ' = ' or '') + repr
+ chop = maxlen - len(line)
+ if chop < 0: repr = repr[:chop] + '...'
+ line = (name and self.bold(name) + ' = ' or '') + repr
+ if doc is not None:
+ line += '\n' + self.indent(str(doc))
+ return line
+
+# --------------------------------------------------------- user interfaces
+
+def pager(text):
+ """The first time this is called, determine what kind of pager to use."""
+ global pager
+ pager = getpager()
+ pager(text)
+
+def getpager():
+ """Decide what method to use for paging through text."""
+ if type(sys.stdout) is not types.FileType:
+ return plainpager
+ if not sys.stdin.isatty() or not sys.stdout.isatty():
+ return plainpager
+ if 'PAGER' in os.environ:
+ if sys.platform == 'win32': # pipes completely broken in Windows
+ return lambda text: tempfilepager(plain(text), os.environ['PAGER'])
+ elif os.environ.get('TERM') in ('dumb', 'emacs'):
+ return lambda text: pipepager(plain(text), os.environ['PAGER'])
+ else:
+ return lambda text: pipepager(text, os.environ['PAGER'])
+ if os.environ.get('TERM') in ('dumb', 'emacs'):
+ return plainpager
+ if sys.platform == 'win32' or sys.platform.startswith('os2'):
+ return lambda text: tempfilepager(plain(text), 'more <')
+ if hasattr(os, 'system') and os.system('(less) 2>/dev/null') == 0:
+ return lambda text: pipepager(text, 'less')
+
+ import tempfile
+ (fd, filename) = tempfile.mkstemp()
+ os.close(fd)
+ try:
+ if hasattr(os, 'system') and os.system('more %s' % filename) == 0:
+ return lambda text: pipepager(text, 'more')
+ else:
+ return ttypager
+ finally:
+ os.unlink(filename)
+
+def plain(text):
+ """Remove boldface formatting from text."""
+ return re.sub('.\b', '', text)
+
+def pipepager(text, cmd):
+ """Page through text by feeding it to another program."""
+ pipe = os.popen(cmd, 'w')
+ try:
+ pipe.write(text)
+ pipe.close()
+ except IOError:
+ pass # Ignore broken pipes caused by quitting the pager program.
+
+def tempfilepager(text, cmd):
+ """Page through text by invoking a program on a temporary file."""
+ import tempfile
+ filename = tempfile.mktemp()
+ file = open(filename, 'w')
+ file.write(text)
+ file.close()
+ try:
+ os.system(cmd + ' ' + filename)
+ finally:
+ os.unlink(filename)
+
+def ttypager(text):
+ """Page through text on a text terminal."""
+ lines = split(plain(text), '\n')
+ try:
+ import tty
+ fd = sys.stdin.fileno()
+ old = tty.tcgetattr(fd)
+ tty.setcbreak(fd)
+ getchar = lambda: sys.stdin.read(1)
+ except (ImportError, AttributeError):
+ tty = None
+ getchar = lambda: sys.stdin.readline()[:-1][:1]
+
+ try:
+ r = inc = os.environ.get('LINES', 25) - 1
+ sys.stdout.write(join(lines[:inc], '\n') + '\n')
+ while lines[r:]:
+ sys.stdout.write('-- more --')
+ sys.stdout.flush()
+ c = getchar()
+
+ if c in ('q', 'Q'):
+ sys.stdout.write('\r \r')
+ break
+ elif c in ('\r', '\n'):
+ sys.stdout.write('\r \r' + lines[r] + '\n')
+ r = r + 1
+ continue
+ if c in ('b', 'B', '\x1b'):
+ r = r - inc - inc
+ if r < 0: r = 0
+ sys.stdout.write('\n' + join(lines[r:r+inc], '\n') + '\n')
+ r = r + inc
+
+ finally:
+ if tty:
+ tty.tcsetattr(fd, tty.TCSAFLUSH, old)
+
+def plainpager(text):
+ """Simply print unformatted text. This is the ultimate fallback."""
+ sys.stdout.write(plain(text))
+
+def describe(thing):
+ """Produce a short description of the given thing."""
+ if inspect.ismodule(thing):
+ if thing.__name__ in sys.builtin_module_names:
+ return 'built-in module ' + thing.__name__
+ if hasattr(thing, '__path__'):
+ return 'package ' + thing.__name__
+ else:
+ return 'module ' + thing.__name__
+ if inspect.isbuiltin(thing):
+ return 'built-in function ' + thing.__name__
+ if inspect.isgetsetdescriptor(thing):
+ return 'getset descriptor %s.%s.%s' % (
+ thing.__objclass__.__module__, thing.__objclass__.__name__,
+ thing.__name__)
+ if inspect.ismemberdescriptor(thing):
+ return 'member descriptor %s.%s.%s' % (
+ thing.__objclass__.__module__, thing.__objclass__.__name__,
+ thing.__name__)
+ if inspect.isclass(thing):
+ return 'class ' + thing.__name__
+ if inspect.isfunction(thing):
+ return 'function ' + thing.__name__
+ if inspect.ismethod(thing):
+ return 'method ' + thing.__name__
+ if type(thing) is types.InstanceType:
+ return 'instance of ' + thing.__class__.__name__
+ return type(thing).__name__
+
+def locate(path, forceload=0):
+ """Locate an object by name or dotted path, importing as necessary."""
+ parts = [part for part in split(path, '.') if part]
+ module, n = None, 0
+ while n < len(parts):
+ nextmodule = safeimport(join(parts[:n+1], '.'), forceload)
+ if nextmodule: module, n = nextmodule, n + 1
+ else: break
+ if module:
+ object = module
+ for part in parts[n:]:
+ try: object = getattr(object, part)
+ except AttributeError: return None
+ return object
+ else:
+ if hasattr(__builtin__, path):
+ return getattr(__builtin__, path)
+
+# --------------------------------------- interactive interpreter interface
+
+text = TextDoc()
+html = HTMLDoc()
+
+def resolve(thing, forceload=0):
+ """Given an object or a path to an object, get the object and its name."""
+ if isinstance(thing, str):
+ object = locate(thing, forceload)
+ if not object:
+ raise ImportError, 'no Python documentation found for %r' % thing
+ return object, thing
+ else:
+ return thing, getattr(thing, '__name__', None)
+
+def doc(thing, title='Python Library Documentation: %s', forceload=0):
+ """Display text documentation, given an object or a path to an object."""
+ try:
+ object, name = resolve(thing, forceload)
+ desc = describe(object)
+ module = inspect.getmodule(object)
+ if name and '.' in name:
+ desc += ' in ' + name[:name.rfind('.')]
+ elif module and module is not object:
+ desc += ' in module ' + module.__name__
+ if not (inspect.ismodule(object) or
+ inspect.isclass(object) or
+ inspect.isroutine(object) or
+ inspect.isgetsetdescriptor(object) or
+ inspect.ismemberdescriptor(object) or
+ isinstance(object, property)):
+ # If the passed object is a piece of data or an instance,
+ # document its available methods instead of its value.
+ object = type(object)
+ desc += ' object'
+ pager(title % desc + '\n\n' + text.document(object, name))
+ except (ImportError, ErrorDuringImport), value:
+ print value
+
+def writedoc(thing, forceload=0):
+ """Write HTML documentation to a file in the current directory."""
+ try:
+ object, name = resolve(thing, forceload)
+ page = html.page(describe(object), html.document(object, name))
+ file = open(name + '.html', 'w')
+ file.write(page)
+ file.close()
+ print 'wrote', name + '.html'
+ except (ImportError, ErrorDuringImport), value:
+ print value
+
+def writedocs(dir, pkgpath='', done=None):
+ """Write out HTML documentation for all modules in a directory tree."""
+ if done is None: done = {}
+ for importer, modname, ispkg in pkgutil.walk_packages([dir], pkgpath):
+ writedoc(modname)
+ return
+
+class Helper:
+ keywords = {
+ 'and': 'BOOLEAN',
+ 'as': 'with',
+ 'assert': ('ref/assert', ''),
+ 'break': ('ref/break', 'while for'),
+ 'class': ('ref/class', 'CLASSES SPECIALMETHODS'),
+ 'continue': ('ref/continue', 'while for'),
+ 'def': ('ref/function', ''),
+ 'del': ('ref/del', 'BASICMETHODS'),
+ 'elif': 'if',
+ 'else': ('ref/if', 'while for'),
+ 'except': 'try',
+ 'exec': ('ref/exec', ''),
+ 'finally': 'try',
+ 'for': ('ref/for', 'break continue while'),
+ 'from': 'import',
+ 'global': ('ref/global', 'NAMESPACES'),
+ 'if': ('ref/if', 'TRUTHVALUE'),
+ 'import': ('ref/import', 'MODULES'),
+ 'in': ('ref/comparisons', 'SEQUENCEMETHODS2'),
+ 'is': 'COMPARISON',
+ 'lambda': ('ref/lambdas', 'FUNCTIONS'),
+ 'not': 'BOOLEAN',
+ 'or': 'BOOLEAN',
+ 'pass': ('ref/pass', ''),
+ 'print': ('ref/print', ''),
+ 'raise': ('ref/raise', 'EXCEPTIONS'),
+ 'return': ('ref/return', 'FUNCTIONS'),
+ 'try': ('ref/try', 'EXCEPTIONS'),
+ 'while': ('ref/while', 'break continue if TRUTHVALUE'),
+ 'with': ('ref/with', 'CONTEXTMANAGERS EXCEPTIONS yield'),
+ 'yield': ('ref/yield', ''),
+ }
+
+ topics = {
+ 'TYPES': ('ref/types', 'STRINGS UNICODE NUMBERS SEQUENCES MAPPINGS FUNCTIONS CLASSES MODULES FILES inspect'),
+ 'STRINGS': ('ref/strings', 'str UNICODE SEQUENCES STRINGMETHODS FORMATTING TYPES'),
+ 'STRINGMETHODS': ('lib/string-methods', 'STRINGS FORMATTING'),
+ 'FORMATTING': ('lib/typesseq-strings', 'OPERATORS'),
+ 'UNICODE': ('ref/strings', 'encodings unicode SEQUENCES STRINGMETHODS FORMATTING TYPES'),
+ 'NUMBERS': ('ref/numbers', 'INTEGER FLOAT COMPLEX TYPES'),
+ 'INTEGER': ('ref/integers', 'int range'),
+ 'FLOAT': ('ref/floating', 'float math'),
+ 'COMPLEX': ('ref/imaginary', 'complex cmath'),
+ 'SEQUENCES': ('lib/typesseq', 'STRINGMETHODS FORMATTING xrange LISTS'),
+ 'MAPPINGS': 'DICTIONARIES',
+ 'FUNCTIONS': ('lib/typesfunctions', 'def TYPES'),
+ 'METHODS': ('lib/typesmethods', 'class def CLASSES TYPES'),
+ 'CODEOBJECTS': ('lib/bltin-code-objects', 'compile FUNCTIONS TYPES'),
+ 'TYPEOBJECTS': ('lib/bltin-type-objects', 'types TYPES'),
+ 'FRAMEOBJECTS': 'TYPES',
+ 'TRACEBACKS': 'TYPES',
+ 'NONE': ('lib/bltin-null-object', ''),
+ 'ELLIPSIS': ('lib/bltin-ellipsis-object', 'SLICINGS'),
+ 'FILES': ('lib/bltin-file-objects', ''),
+ 'SPECIALATTRIBUTES': ('lib/specialattrs', ''),
+ 'CLASSES': ('ref/types', 'class SPECIALMETHODS PRIVATENAMES'),
+ 'MODULES': ('lib/typesmodules', 'import'),
+ 'PACKAGES': 'import',
+ 'EXPRESSIONS': ('ref/summary', 'lambda or and not in is BOOLEAN COMPARISON BITWISE SHIFTING BINARY FORMATTING POWER UNARY ATTRIBUTES SUBSCRIPTS SLICINGS CALLS TUPLES LISTS DICTIONARIES BACKQUOTES'),
+ 'OPERATORS': 'EXPRESSIONS',
+ 'PRECEDENCE': 'EXPRESSIONS',
+ 'OBJECTS': ('ref/objects', 'TYPES'),
+ 'SPECIALMETHODS': ('ref/specialnames', 'BASICMETHODS ATTRIBUTEMETHODS CALLABLEMETHODS SEQUENCEMETHODS1 MAPPINGMETHODS SEQUENCEMETHODS2 NUMBERMETHODS CLASSES'),
+ 'BASICMETHODS': ('ref/customization', 'cmp hash repr str SPECIALMETHODS'),
+ 'ATTRIBUTEMETHODS': ('ref/attribute-access', 'ATTRIBUTES SPECIALMETHODS'),
+ 'CALLABLEMETHODS': ('ref/callable-types', 'CALLS SPECIALMETHODS'),
+ 'SEQUENCEMETHODS1': ('ref/sequence-types', 'SEQUENCES SEQUENCEMETHODS2 SPECIALMETHODS'),
+ 'SEQUENCEMETHODS2': ('ref/sequence-methods', 'SEQUENCES SEQUENCEMETHODS1 SPECIALMETHODS'),
+ 'MAPPINGMETHODS': ('ref/sequence-types', 'MAPPINGS SPECIALMETHODS'),
+ 'NUMBERMETHODS': ('ref/numeric-types', 'NUMBERS AUGMENTEDASSIGNMENT SPECIALMETHODS'),
+ 'EXECUTION': ('ref/execmodel', 'NAMESPACES DYNAMICFEATURES EXCEPTIONS'),
+ 'NAMESPACES': ('ref/naming', 'global ASSIGNMENT DELETION DYNAMICFEATURES'),
+ 'DYNAMICFEATURES': ('ref/dynamic-features', ''),
+ 'SCOPING': 'NAMESPACES',
+ 'FRAMES': 'NAMESPACES',
+ 'EXCEPTIONS': ('ref/exceptions', 'try except finally raise'),
+ 'COERCIONS': ('ref/coercion-rules','CONVERSIONS'),
+ 'CONVERSIONS': ('ref/conversions', 'COERCIONS'),
+ 'IDENTIFIERS': ('ref/identifiers', 'keywords SPECIALIDENTIFIERS'),
+ 'SPECIALIDENTIFIERS': ('ref/id-classes', ''),
+ 'PRIVATENAMES': ('ref/atom-identifiers', ''),
+ 'LITERALS': ('ref/atom-literals', 'STRINGS BACKQUOTES NUMBERS TUPLELITERALS LISTLITERALS DICTIONARYLITERALS'),
+ 'TUPLES': 'SEQUENCES',
+ 'TUPLELITERALS': ('ref/exprlists', 'TUPLES LITERALS'),
+ 'LISTS': ('lib/typesseq-mutable', 'LISTLITERALS'),
+ 'LISTLITERALS': ('ref/lists', 'LISTS LITERALS'),
+ 'DICTIONARIES': ('lib/typesmapping', 'DICTIONARYLITERALS'),
+ 'DICTIONARYLITERALS': ('ref/dict', 'DICTIONARIES LITERALS'),
+ 'BACKQUOTES': ('ref/string-conversions', 'repr str STRINGS LITERALS'),
+ 'ATTRIBUTES': ('ref/attribute-references', 'getattr hasattr setattr ATTRIBUTEMETHODS'),
+ 'SUBSCRIPTS': ('ref/subscriptions', 'SEQUENCEMETHODS1'),
+ 'SLICINGS': ('ref/slicings', 'SEQUENCEMETHODS2'),
+ 'CALLS': ('ref/calls', 'EXPRESSIONS'),
+ 'POWER': ('ref/power', 'EXPRESSIONS'),
+ 'UNARY': ('ref/unary', 'EXPRESSIONS'),
+ 'BINARY': ('ref/binary', 'EXPRESSIONS'),
+ 'SHIFTING': ('ref/shifting', 'EXPRESSIONS'),
+ 'BITWISE': ('ref/bitwise', 'EXPRESSIONS'),
+ 'COMPARISON': ('ref/comparisons', 'EXPRESSIONS BASICMETHODS'),
+ 'BOOLEAN': ('ref/Booleans', 'EXPRESSIONS TRUTHVALUE'),
+ 'ASSERTION': 'assert',
+ 'ASSIGNMENT': ('ref/assignment', 'AUGMENTEDASSIGNMENT'),
+ 'AUGMENTEDASSIGNMENT': ('ref/augassign', 'NUMBERMETHODS'),
+ 'DELETION': 'del',
+ 'PRINTING': 'print',
+ 'RETURNING': 'return',
+ 'IMPORTING': 'import',
+ 'CONDITIONAL': 'if',
+ 'LOOPING': ('ref/compound', 'for while break continue'),
+ 'TRUTHVALUE': ('lib/truth', 'if while and or not BASICMETHODS'),
+ 'DEBUGGING': ('lib/module-pdb', 'pdb'),
+ 'CONTEXTMANAGERS': ('ref/context-managers', 'with'),
+ }
+
+ def __init__(self, input, output):
+ self.input = input
+ self.output = output
+ self.docdir = None
+ execdir = os.path.dirname(sys.executable)
+ homedir = os.environ.get('PYTHONHOME')
+ for dir in [os.environ.get('PYTHONDOCS'),
+ homedir and os.path.join(homedir, 'doc'),
+ os.path.join(execdir, 'doc'),
+ '/usr/doc/python-docs-' + split(sys.version)[0],
+ '/usr/doc/python-' + split(sys.version)[0],
+ '/usr/doc/python-docs-' + sys.version[:3],
+ '/usr/doc/python-' + sys.version[:3],
+ os.path.join(sys.prefix, 'Resources/English.lproj/Documentation')]:
+ if dir and os.path.isdir(os.path.join(dir, 'lib')):
+ self.docdir = dir
+
+ def __repr__(self):
+ if inspect.stack()[1][3] == '?':
+ self()
+ return ''
+ return '<pydoc.Helper instance>'
+
+ def __call__(self, request=None):
+ if request is not None:
+ self.help(request)
+ else:
+ self.intro()
+ self.interact()
+ self.output.write('''
+You are now leaving help and returning to the Python interpreter.
+If you want to ask for help on a particular object directly from the
+interpreter, you can type "help(object)". Executing "help('string')"
+has the same effect as typing a particular string at the help> prompt.
+''')
+
+ def interact(self):
+ self.output.write('\n')
+ while True:
+ try:
+ request = self.getline('help> ')
+ if not request: break
+ except (KeyboardInterrupt, EOFError):
+ break
+ request = strip(replace(request, '"', '', "'", ''))
+ if lower(request) in ('q', 'quit'): break
+ self.help(request)
+
+ def getline(self, prompt):
+ """Read one line, using raw_input when available."""
+ if self.input is sys.stdin:
+ return raw_input(prompt)
+ else:
+ self.output.write(prompt)
+ self.output.flush()
+ return self.input.readline()
+
+ def help(self, request):
+ if type(request) is type(''):
+ if request == 'help': self.intro()
+ elif request == 'keywords': self.listkeywords()
+ elif request == 'topics': self.listtopics()
+ elif request == 'modules': self.listmodules()
+ elif request[:8] == 'modules ':
+ self.listmodules(split(request)[1])
+ elif request in self.keywords: self.showtopic(request)
+ elif request in self.topics: self.showtopic(request)
+ elif request: doc(request, 'Help on %s:')
+ elif isinstance(request, Helper): self()
+ else: doc(request, 'Help on %s:')
+ self.output.write('\n')
+
+ def intro(self):
+ self.output.write('''
+Welcome to Python %s! This is the online help utility.
+
+If this is your first time using Python, you should definitely check out
+the tutorial on the Internet at http://www.python.org/doc/tut/.
+
+Enter the name of any module, keyword, or topic to get help on writing
+Python programs and using Python modules. To quit this help utility and
+return to the interpreter, just type "quit".
+
+To get a list of available modules, keywords, or topics, type "modules",
+"keywords", or "topics". Each module also comes with a one-line summary
+of what it does; to list the modules whose summaries contain a given word
+such as "spam", type "modules spam".
+''' % sys.version[:3])
+
+ def list(self, items, columns=4, width=80):
+ items = items[:]
+ items.sort()
+ colw = width / columns
+ rows = (len(items) + columns - 1) / columns
+ for row in range(rows):
+ for col in range(columns):
+ i = col * rows + row
+ if i < len(items):
+ self.output.write(items[i])
+ if col < columns - 1:
+ self.output.write(' ' + ' ' * (colw-1 - len(items[i])))
+ self.output.write('\n')
+
+ def listkeywords(self):
+ self.output.write('''
+Here is a list of the Python keywords. Enter any keyword to get more help.
+
+''')
+ self.list(self.keywords.keys())
+
+ def listtopics(self):
+ self.output.write('''
+Here is a list of available topics. Enter any topic name to get more help.
+
+''')
+ self.list(self.topics.keys())
+
+ def showtopic(self, topic):
+ if not self.docdir:
+ self.output.write('''
+Sorry, topic and keyword documentation is not available because the Python
+HTML documentation files could not be found. If you have installed them,
+please set the environment variable PYTHONDOCS to indicate their location.
+
+On the Microsoft Windows operating system, the files can be built by
+running "hh -decompile . PythonNN.chm" in the C:\PythonNN\Doc> directory.
+''')
+ return
+ target = self.topics.get(topic, self.keywords.get(topic))
+ if not target:
+ self.output.write('no documentation found for %s\n' % repr(topic))
+ return
+ if type(target) is type(''):
+ return self.showtopic(target)
+
+ filename, xrefs = target
+ filename = self.docdir + '/' + filename + '.html'
+ try:
+ file = open(filename)
+ except:
+ self.output.write('could not read docs from %s\n' % filename)
+ return
+
+ divpat = re.compile('<div[^>]*navigat.*?</div.*?>', re.I | re.S)
+ addrpat = re.compile('<address.*?>.*?</address.*?>', re.I | re.S)
+ document = re.sub(addrpat, '', re.sub(divpat, '', file.read()))
+ file.close()
+
+ import htmllib, formatter, StringIO
+ buffer = StringIO.StringIO()
+ parser = htmllib.HTMLParser(
+ formatter.AbstractFormatter(formatter.DumbWriter(buffer)))
+ parser.start_table = parser.do_p
+ parser.end_table = lambda parser=parser: parser.do_p({})
+ parser.start_tr = parser.do_br
+ parser.start_td = parser.start_th = lambda a, b=buffer: b.write('\t')
+ parser.feed(document)
+ buffer = replace(buffer.getvalue(), '\xa0', ' ', '\n', '\n ')
+ pager(' ' + strip(buffer) + '\n')
+ if xrefs:
+ buffer = StringIO.StringIO()
+ formatter.DumbWriter(buffer).send_flowing_data(
+ 'Related help topics: ' + join(split(xrefs), ', ') + '\n')
+ self.output.write('\n%s\n' % buffer.getvalue())
+
+ def listmodules(self, key=''):
+ if key:
+ self.output.write('''
+Here is a list of matching modules. Enter any module name to get more help.
+
+''')
+ apropos(key)
+ else:
+ self.output.write('''
+Please wait a moment while I gather a list of all available modules...
+
+''')
+ modules = {}
+ def callback(path, modname, desc, modules=modules):
+ if modname and modname[-9:] == '.__init__':
+ modname = modname[:-9] + ' (package)'
+ if find(modname, '.') < 0:
+ modules[modname] = 1
+ ModuleScanner().run(callback)
+ self.list(modules.keys())
+ self.output.write('''
+Enter any module name to get more help. Or, type "modules spam" to search
+for modules whose descriptions contain the word "spam".
+''')
+
+help = Helper(sys.stdin, sys.stdout)
+
+class Scanner:
+ """A generic tree iterator."""
+ def __init__(self, roots, children, descendp):
+ self.roots = roots[:]
+ self.state = []
+ self.children = children
+ self.descendp = descendp
+
+ def next(self):
+ if not self.state:
+ if not self.roots:
+ return None
+ root = self.roots.pop(0)
+ self.state = [(root, self.children(root))]
+ node, children = self.state[-1]
+ if not children:
+ self.state.pop()
+ return self.next()
+ child = children.pop(0)
+ if self.descendp(child):
+ self.state.append((child, self.children(child)))
+ return child
+
+
+class ModuleScanner:
+ """An interruptible scanner that searches module synopses."""
+
+ def run(self, callback, key=None, completer=None):
+ if key: key = lower(key)
+ self.quit = False
+ seen = {}
+
+ for modname in sys.builtin_module_names:
+ if modname != '__main__':
+ seen[modname] = 1
+ if key is None:
+ callback(None, modname, '')
+ else:
+ desc = split(__import__(modname).__doc__ or '', '\n')[0]
+ if find(lower(modname + ' - ' + desc), key) >= 0:
+ callback(None, modname, desc)
+
+ for importer, modname, ispkg in pkgutil.walk_packages():
+ if self.quit:
+ break
+ if key is None:
+ callback(None, modname, '')
+ else:
+ loader = importer.find_module(modname)
+ if hasattr(loader,'get_source'):
+ import StringIO
+ desc = source_synopsis(
+ StringIO.StringIO(loader.get_source(modname))
+ ) or ''
+ if hasattr(loader,'get_filename'):
+ path = loader.get_filename(modname)
+ else:
+ path = None
+ else:
+ module = loader.load_module(modname)
+ desc = (module.__doc__ or '').splitlines()[0]
+ path = getattr(module,'__file__',None)
+ if find(lower(modname + ' - ' + desc), key) >= 0:
+ callback(path, modname, desc)
+
+ if completer:
+ completer()
+
+def apropos(key):
+ """Print all the one-line module summaries that contain a substring."""
+ def callback(path, modname, desc):
+ if modname[-9:] == '.__init__':
+ modname = modname[:-9] + ' (package)'
+ print modname, desc and '- ' + desc
+ try: import warnings
+ except ImportError: pass
+ else: warnings.filterwarnings('ignore') # ignore problems during import
+ ModuleScanner().run(callback, key)
+
+# --------------------------------------------------- web browser interface
+
+def serve(port, callback=None, completer=None):
+ import BaseHTTPServer, mimetools, select
+
+ # Patch up mimetools.Message so it doesn't break if rfc822 is reloaded.
+ class Message(mimetools.Message):
+ def __init__(self, fp, seekable=1):
+ Message = self.__class__
+ Message.__bases__[0].__bases__[0].__init__(self, fp, seekable)
+ self.encodingheader = self.getheader('content-transfer-encoding')
+ self.typeheader = self.getheader('content-type')
+ self.parsetype()
+ self.parseplist()
+
+ class DocHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+ def send_document(self, title, contents):
+ try:
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html')
+ self.end_headers()
+ self.wfile.write(html.page(title, contents))
+ except IOError: pass
+
+ def do_GET(self):
+ path = self.path
+ if path[-5:] == '.html': path = path[:-5]
+ if path[:1] == '/': path = path[1:]
+ if path and path != '.':
+ try:
+ obj = locate(path, forceload=1)
+ except ErrorDuringImport, value:
+ self.send_document(path, html.escape(str(value)))
+ return
+ if obj:
+ self.send_document(describe(obj), html.document(obj, path))
+ else:
+ self.send_document(path,
+'no Python documentation found for %s' % repr(path))
+ else:
+ heading = html.heading(
+'<big><big><strong>Python: Index of Modules</strong></big></big>',
+'#ffffff', '#7799ee')
+ def bltinlink(name):
+ return '<a href="%s.html">%s</a>' % (name, name)
+ names = filter(lambda x: x != '__main__',
+ sys.builtin_module_names)
+ contents = html.multicolumn(names, bltinlink)
+ indices = ['<p>' + html.bigsection(
+ 'Built-in Modules', '#ffffff', '#ee77aa', contents)]
+
+ seen = {}
+ for dir in sys.path:
+ indices.append(html.index(dir, seen))
+ contents = heading + join(indices) + '''<p align=right>
+<font color="#909090" face="helvetica, arial"><strong>
+pydoc</strong> by Ka-Ping Yee <ping at lfw.org></font>'''
+ self.send_document('Index of Modules', contents)
+
+ def log_message(self, *args): pass
+
+ class DocServer(BaseHTTPServer.HTTPServer):
+ def __init__(self, port, callback):
+ host = (sys.platform == 'mac') and '127.0.0.1' or 'localhost'
+ self.address = ('', port)
+ self.url = 'http://%s:%d/' % (host, port)
+ self.callback = callback
+ self.base.__init__(self, self.address, self.handler)
+
+ def serve_until_quit(self):
+ import select
+ self.quit = False
+ while not self.quit:
+ rd, wr, ex = select.select([self.socket.fileno()], [], [], 1)
+ if rd: self.handle_request()
+
+ def server_activate(self):
+ self.base.server_activate(self)
+ if self.callback: self.callback(self)
+
+ DocServer.base = BaseHTTPServer.HTTPServer
+ DocServer.handler = DocHandler
+ DocHandler.MessageClass = Message
+ try:
+ try:
+ DocServer(port, callback).serve_until_quit()
+ except (KeyboardInterrupt, select.error):
+ pass
+ finally:
+ if completer: completer()
+
+# ----------------------------------------------------- graphical interface
+
+def gui():
+ """Graphical interface (starts web server and pops up a control window)."""
+ class GUI:
+ def __init__(self, window, port=7464):
+ self.window = window
+ self.server = None
+ self.scanner = None
+
+ import Tkinter
+ self.server_frm = Tkinter.Frame(window)
+ self.title_lbl = Tkinter.Label(self.server_frm,
+ text='Starting server...\n ')
+ self.open_btn = Tkinter.Button(self.server_frm,
+ text='open browser', command=self.open, state='disabled')
+ self.quit_btn = Tkinter.Button(self.server_frm,
+ text='quit serving', command=self.quit, state='disabled')
+
+ self.search_frm = Tkinter.Frame(window)
+ self.search_lbl = Tkinter.Label(self.search_frm, text='Search for')
+ self.search_ent = Tkinter.Entry(self.search_frm)
+ self.search_ent.bind('<Return>', self.search)
+ self.stop_btn = Tkinter.Button(self.search_frm,
+ text='stop', pady=0, command=self.stop, state='disabled')
+ if sys.platform == 'win32':
+ # Trying to hide and show this button crashes under Windows.
+ self.stop_btn.pack(side='right')
+
+ self.window.title('pydoc')
+ self.window.protocol('WM_DELETE_WINDOW', self.quit)
+ self.title_lbl.pack(side='top', fill='x')
+ self.open_btn.pack(side='left', fill='x', expand=1)
+ self.quit_btn.pack(side='right', fill='x', expand=1)
+ self.server_frm.pack(side='top', fill='x')
+
+ self.search_lbl.pack(side='left')
+ self.search_ent.pack(side='right', fill='x', expand=1)
+ self.search_frm.pack(side='top', fill='x')
+ self.search_ent.focus_set()
+
+ font = ('helvetica', sys.platform == 'win32' and 8 or 10)
+ self.result_lst = Tkinter.Listbox(window, font=font, height=6)
+ self.result_lst.bind('<Button-1>', self.select)
+ self.result_lst.bind('<Double-Button-1>', self.goto)
+ self.result_scr = Tkinter.Scrollbar(window,
+ orient='vertical', command=self.result_lst.yview)
+ self.result_lst.config(yscrollcommand=self.result_scr.set)
+
+ self.result_frm = Tkinter.Frame(window)
+ self.goto_btn = Tkinter.Button(self.result_frm,
+ text='go to selected', command=self.goto)
+ self.hide_btn = Tkinter.Button(self.result_frm,
+ text='hide results', command=self.hide)
+ self.goto_btn.pack(side='left', fill='x', expand=1)
+ self.hide_btn.pack(side='right', fill='x', expand=1)
+
+ self.window.update()
+ self.minwidth = self.window.winfo_width()
+ self.minheight = self.window.winfo_height()
+ self.bigminheight = (self.server_frm.winfo_reqheight() +
+ self.search_frm.winfo_reqheight() +
+ self.result_lst.winfo_reqheight() +
+ self.result_frm.winfo_reqheight())
+ self.bigwidth, self.bigheight = self.minwidth, self.bigminheight
+ self.expanded = 0
+ self.window.wm_geometry('%dx%d' % (self.minwidth, self.minheight))
+ self.window.wm_minsize(self.minwidth, self.minheight)
+ self.window.tk.willdispatch()
+
+ import threading
+ threading.Thread(
+ target=serve, args=(port, self.ready, self.quit)).start()
+
+ def ready(self, server):
+ self.server = server
+ self.title_lbl.config(
+ text='Python documentation server at\n' + server.url)
+ self.open_btn.config(state='normal')
+ self.quit_btn.config(state='normal')
+
+ def open(self, event=None, url=None):
+ url = url or self.server.url
+ try:
+ import webbrowser
+ webbrowser.open(url)
+ except ImportError: # pre-webbrowser.py compatibility
+ if sys.platform == 'win32':
+ os.system('start "%s"' % url)
+ elif sys.platform == 'mac':
+ try: import ic
+ except ImportError: pass
+ else: ic.launchurl(url)
+ else:
+ rc = os.system('netscape -remote "openURL(%s)" &' % url)
+ if rc: os.system('netscape "%s" &' % url)
+
+ def quit(self, event=None):
+ if self.server:
+ self.server.quit = 1
+ self.window.quit()
+
+ def search(self, event=None):
+ key = self.search_ent.get()
+ self.stop_btn.pack(side='right')
+ self.stop_btn.config(state='normal')
+ self.search_lbl.config(text='Searching for "%s"...' % key)
+ self.search_ent.forget()
+ self.search_lbl.pack(side='left')
+ self.result_lst.delete(0, 'end')
+ self.goto_btn.config(state='disabled')
+ self.expand()
+
+ import threading
+ if self.scanner:
+ self.scanner.quit = 1
+ self.scanner = ModuleScanner()
+ threading.Thread(target=self.scanner.run,
+ args=(self.update, key, self.done)).start()
+
+ def update(self, path, modname, desc):
+ if modname[-9:] == '.__init__':
+ modname = modname[:-9] + ' (package)'
+ self.result_lst.insert('end',
+ modname + ' - ' + (desc or '(no description)'))
+
+ def stop(self, event=None):
+ if self.scanner:
+ self.scanner.quit = 1
+ self.scanner = None
+
+ def done(self):
+ self.scanner = None
+ self.search_lbl.config(text='Search for')
+ self.search_lbl.pack(side='left')
+ self.search_ent.pack(side='right', fill='x', expand=1)
+ if sys.platform != 'win32': self.stop_btn.forget()
+ self.stop_btn.config(state='disabled')
+
+ def select(self, event=None):
+ self.goto_btn.config(state='normal')
+
+ def goto(self, event=None):
+ selection = self.result_lst.curselection()
+ if selection:
+ modname = split(self.result_lst.get(selection[0]))[0]
+ self.open(url=self.server.url + modname + '.html')
+
+ def collapse(self):
+ if not self.expanded: return
+ self.result_frm.forget()
+ self.result_scr.forget()
+ self.result_lst.forget()
+ self.bigwidth = self.window.winfo_width()
+ self.bigheight = self.window.winfo_height()
+ self.window.wm_geometry('%dx%d' % (self.minwidth, self.minheight))
+ self.window.wm_minsize(self.minwidth, self.minheight)
+ self.expanded = 0
+
+ def expand(self):
+ if self.expanded: return
+ self.result_frm.pack(side='bottom', fill='x')
+ self.result_scr.pack(side='right', fill='y')
+ self.result_lst.pack(side='top', fill='both', expand=1)
+ self.window.wm_geometry('%dx%d' % (self.bigwidth, self.bigheight))
+ self.window.wm_minsize(self.minwidth, self.bigminheight)
+ self.expanded = 1
+
+ def hide(self, event=None):
+ self.stop()
+ self.collapse()
+
+ import Tkinter
+ try:
+ root = Tkinter.Tk()
+ # Tk will crash if pythonw.exe has an XP .manifest
+ # file and the root has is not destroyed explicitly.
+ # If the problem is ever fixed in Tk, the explicit
+ # destroy can go.
+ try:
+ gui = GUI(root)
+ root.mainloop()
+ finally:
+ root.destroy()
+ except KeyboardInterrupt:
+ pass
+
+# -------------------------------------------------- command-line interface
+
+def ispath(x):
+ return isinstance(x, str) and find(x, os.sep) >= 0
+
+def cli():
+ """Command-line interface (looks at sys.argv to decide what to do)."""
+ import getopt
+ class BadUsage: pass
+
+ # Scripts don't get the current directory in their path by default.
+ scriptdir = os.path.dirname(sys.argv[0])
+ if scriptdir in sys.path:
+ sys.path.remove(scriptdir)
+ sys.path.insert(0, '.')
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], 'gk:p:w')
+ writing = 0
+
+ for opt, val in opts:
+ if opt == '-g':
+ gui()
+ return
+ if opt == '-k':
+ apropos(val)
+ return
+ if opt == '-p':
+ try:
+ port = int(val)
+ except ValueError:
+ raise BadUsage
+ def ready(server):
+ print 'pydoc server ready at %s' % server.url
+ def stopped():
+ print 'pydoc server stopped'
+ serve(port, ready, stopped)
+ return
+ if opt == '-w':
+ writing = 1
+
+ if not args: raise BadUsage
+ for arg in args:
+ if ispath(arg) and not os.path.exists(arg):
+ print 'file %r does not exist' % arg
+ break
+ try:
+ if ispath(arg) and os.path.isfile(arg):
+ arg = importfile(arg)
+ if writing:
+ if ispath(arg) and os.path.isdir(arg):
+ writedocs(arg)
+ else:
+ writedoc(arg)
+ else:
+ help.help(arg)
+ except ErrorDuringImport, value:
+ print value
+
+ except (getopt.error, BadUsage):
+ cmd = os.path.basename(sys.argv[0])
+ print """pydoc - the Python documentation tool
+
+%s <name> ...
+ Show text documentation on something. <name> may be the name of a
+ Python keyword, topic, function, module, or package, or a dotted
+ reference to a class or function within a module or module in a
+ package. If <name> contains a '%s', it is used as the path to a
+ Python source file to document. If name is 'keywords', 'topics',
+ or 'modules', a listing of these things is displayed.
+
+%s -k <keyword>
+ Search for a keyword in the synopsis lines of all available modules.
+
+%s -p <port>
+ Start an HTTP server on the given port on the local machine.
+
+%s -g
+ Pop up a graphical interface for finding and serving documentation.
+
+%s -w <name> ...
+ Write out the HTML documentation for a module to a file in the current
+ directory. If <name> contains a '%s', it is treated as a filename; if
+ it names a directory, documentation is written for all the contents.
+""" % (cmd, os.sep, cmd, cmd, cmd, cmd, os.sep)
+
+if __name__ == '__main__': cli()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/site.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/site.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,437 @@
+"""Append module search paths for third-party packages to sys.path.
+
+****************************************************************
+* This module is automatically imported during initialization. *
+****************************************************************
+
+In earlier versions of Python (up to 1.5a3), scripts or modules that
+needed to use site-specific modules would place ``import site''
+somewhere near the top of their code. Because of the automatic
+import, this is no longer necessary (but code that does it still
+works).
+
+This will append site-specific paths to the module search path. On
+Unix (including Mac OSX), it starts with sys.prefix and
+sys.exec_prefix (if different) and appends
+lib/python<version>/site-packages as well as lib/site-python.
+On other platforms (such as Windows), it tries each of the
+prefixes directly, as well as with lib/site-packages appended. The
+resulting directories, if they exist, are appended to sys.path, and
+also inspected for path configuration files.
+
+A path configuration file is a file whose name has the form
+<package>.pth; its contents are additional directories (one per line)
+to be added to sys.path. Non-existing directories (or
+non-directories) are never added to sys.path; no directory is added to
+sys.path more than once. Blank lines and lines beginning with
+'#' are skipped. Lines starting with 'import' are executed.
+
+For example, suppose sys.prefix and sys.exec_prefix are set to
+/usr/local and there is a directory /usr/local/lib/python2.5/site-packages
+with three subdirectories, foo, bar and spam, and two path
+configuration files, foo.pth and bar.pth. Assume foo.pth contains the
+following:
+
+ # foo package configuration
+ foo
+ bar
+ bletch
+
+and bar.pth contains:
+
+ # bar package configuration
+ bar
+
+Then the following directories are added to sys.path, in this order:
+
+ /usr/local/lib/python2.5/site-packages/bar
+ /usr/local/lib/python2.5/site-packages/foo
+
+Note that bletch is omitted because it doesn't exist; bar precedes foo
+because bar.pth comes alphabetically before foo.pth; and spam is
+omitted because it is not mentioned in either path configuration file.
+
+After these path manipulations, an attempt is made to import a module
+named sitecustomize, which can perform arbitrary additional
+site-specific customizations. If this import fails with an
+ImportError exception, it is silently ignored.
+
+"""
+
+import sys
+import os
+import __builtin__
+
+
+def makepath(*paths):
+ dir = os.path.abspath(os.path.join(*paths))
+ return dir, os.path.normcase(dir)
+
+def abs__file__():
+ """Set all module' __file__ attribute to an absolute path"""
+ for m in sys.modules.values():
+ if hasattr(m, '__loader__'):
+ continue # don't mess with a PEP 302-supplied __file__
+ try:
+ prev = m.__file__
+ new = os.path.abspath(m.__file__)
+ if prev != new:
+ m.__file__ = new
+ except AttributeError:
+ continue
+
+def removeduppaths():
+ """ Remove duplicate entries from sys.path along with making them
+ absolute"""
+ # This ensures that the initial path provided by the interpreter contains
+ # only absolute pathnames, even if we're running from the build directory.
+ L = []
+ known_paths = set()
+ for dir in sys.path:
+ # Filter out duplicate paths (on case-insensitive file systems also
+ # if they only differ in case); turn relative paths into absolute
+ # paths.
+ dir, dircase = makepath(dir)
+ if not dircase in known_paths:
+ L.append(dir)
+ known_paths.add(dircase)
+ sys.path[:] = L
+ return known_paths
+
+# XXX This should not be part of site.py, since it is needed even when
+# using the -S option for Python. See http://www.python.org/sf/586680
+def addbuilddir():
+ """Append ./build/lib.<platform> in case we're running in the build dir
+ (especially for Guido :-)"""
+ from distutils.util import get_platform
+ s = "build/lib.%s-%.3s" % (get_platform(), sys.version)
+ s = os.path.join(os.path.dirname(sys.path[-1]), s)
+ sys.path.append(s)
+
+def _init_pathinfo():
+ """Return a set containing all existing directory entries from sys.path"""
+ d = set()
+ for dir in sys.path:
+ try:
+ if os.path.isdir(dir):
+ dir, dircase = makepath(dir)
+ d.add(dircase)
+ except TypeError:
+ continue
+ return d
+
+def addpackage(sitedir, name, known_paths):
+ """Add a new path to known_paths by combining sitedir and 'name' or execute
+ sitedir if it starts with 'import'"""
+ if known_paths is None:
+ _init_pathinfo()
+ reset = 1
+ else:
+ reset = 0
+ fullname = os.path.join(sitedir, name)
+ try:
+ f = open(fullname, "rU")
+ except IOError:
+ return
+ try:
+ for line in f:
+ if line.startswith("#"):
+ continue
+ if line.startswith("import"):
+ exec line
+ continue
+ line = line.rstrip()
+ dir, dircase = makepath(sitedir, line)
+ if not dircase in known_paths and os.path.exists(dir):
+ sys.path.append(dir)
+ known_paths.add(dircase)
+ finally:
+ f.close()
+ if reset:
+ known_paths = None
+ return known_paths
+
+def addsitedir(sitedir, known_paths=None):
+ """Add 'sitedir' argument to sys.path if missing and handle .pth files in
+ 'sitedir'"""
+ if known_paths is None:
+ known_paths = _init_pathinfo()
+ reset = 1
+ else:
+ reset = 0
+ sitedir, sitedircase = makepath(sitedir)
+ if not sitedircase in known_paths:
+ sys.path.append(sitedir) # Add path component
+ try:
+ names = os.listdir(sitedir)
+ except os.error:
+ return
+ names.sort()
+ for name in names:
+ if name.endswith(os.extsep + "pth"):
+ addpackage(sitedir, name, known_paths)
+ if reset:
+ known_paths = None
+ return known_paths
+
+def addsitepackages(known_paths):
+ """Add site-packages (and possibly site-python) to sys.path"""
+ prefixes = [sys.prefix]
+ if sys.exec_prefix != sys.prefix:
+ prefixes.append(sys.exec_prefix)
+ for prefix in prefixes:
+ if prefix:
+ if sys.platform in ('os2emx', 'riscos'):
+ sitedirs = [os.path.join(prefix, "Lib", "site-packages")]
+ elif os.sep == '/':
+ sitedirs = [os.path.join(prefix,
+ "lib",
+ "python" + sys.version[:3],
+ "site-packages"),
+ os.path.join(prefix, "lib", "site-python")]
+ else:
+ sitedirs = [prefix, os.path.join(prefix, "lib", "site-packages")]
+ if sys.platform == 'darwin':
+ # for framework builds *only* we add the standard Apple
+ # locations. Currently only per-user, but /Library and
+ # /Network/Library could be added too
+ if 'Python.framework' in prefix:
+ home = os.environ.get('HOME')
+ if home:
+ sitedirs.append(
+ os.path.join(home,
+ 'Library',
+ 'Python',
+ sys.version[:3],
+ 'site-packages'))
+ for sitedir in sitedirs:
+ if os.path.isdir(sitedir):
+ addsitedir(sitedir, known_paths)
+ return None
+
+
+def setBEGINLIBPATH():
+ """The OS/2 EMX port has optional extension modules that do double duty
+ as DLLs (and must use the .DLL file extension) for other extensions.
+ The library search path needs to be amended so these will be found
+ during module import. Use BEGINLIBPATH so that these are at the start
+ of the library search path.
+
+ """
+ dllpath = os.path.join(sys.prefix, "Lib", "lib-dynload")
+ libpath = os.environ['BEGINLIBPATH'].split(';')
+ if libpath[-1]:
+ libpath.append(dllpath)
+ else:
+ libpath[-1] = dllpath
+ os.environ['BEGINLIBPATH'] = ';'.join(libpath)
+
+
+def setquit():
+ """Define new built-ins 'quit' and 'exit'.
+ These are simply strings that display a hint on how to exit.
+
+ """
+ if os.sep == ':':
+ eof = 'Cmd-Q'
+ elif os.sep == '\\':
+ eof = 'Ctrl-Z plus Return'
+ else:
+ eof = 'Ctrl-D (i.e. EOF)'
+
+ class Quitter(object):
+ def __init__(self, name):
+ self.name = name
+ def __repr__(self):
+ return 'Use %s() or %s to exit' % (self.name, eof)
+ def __call__(self, code=None):
+ # Shells like IDLE catch the SystemExit, but listen when their
+ # stdin wrapper is closed.
+ try:
+ sys.stdin.close()
+ except:
+ pass
+ raise SystemExit(code)
+ __builtin__.quit = Quitter('quit')
+ __builtin__.exit = Quitter('exit')
+
+
+class _Printer(object):
+ """interactive prompt objects for printing the license text, a list of
+ contributors and the copyright notice."""
+
+ MAXLINES = 23
+
+ def __init__(self, name, data, files=(), dirs=()):
+ self.__name = name
+ self.__data = data
+ self.__files = files
+ self.__dirs = dirs
+ self.__lines = None
+
+ def __setup(self):
+ if self.__lines:
+ return
+ data = None
+ for dir in self.__dirs:
+ for filename in self.__files:
+ filename = os.path.join(dir, filename)
+ try:
+ fp = file(filename, "rU")
+ data = fp.read()
+ fp.close()
+ break
+ except IOError:
+ pass
+ if data:
+ break
+ if not data:
+ data = self.__data
+ self.__lines = data.split('\n')
+ self.__linecnt = len(self.__lines)
+
+ def __repr__(self):
+ self.__setup()
+ if len(self.__lines) <= self.MAXLINES:
+ return "\n".join(self.__lines)
+ else:
+ return "Type %s() to see the full %s text" % ((self.__name,)*2)
+
+ def __call__(self):
+ self.__setup()
+ prompt = 'Hit Return for more, or q (and Return) to quit: '
+ lineno = 0
+ while 1:
+ try:
+ for i in range(lineno, lineno + self.MAXLINES):
+ print self.__lines[i]
+ except IndexError:
+ break
+ else:
+ lineno += self.MAXLINES
+ key = None
+ while key is None:
+ key = raw_input(prompt)
+ if key not in ('', 'q'):
+ key = None
+ if key == 'q':
+ break
+
+##def setcopyright():
+## """Set 'copyright' and 'credits' in __builtin__"""
+## __builtin__.copyright = _Printer("copyright", sys.copyright)
+## if sys.platform[:4] == 'java':
+## __builtin__.credits = _Printer(
+## "credits",
+## "Jython is maintained by the Jython developers (www.jython.org).")
+## else:
+## __builtin__.credits = _Printer("credits", """\
+## Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands
+## for supporting Python development. See www.python.org for more information.""")
+## here = os.path.dirname(os.__file__)
+## __builtin__.license = _Printer(
+## "license", "See http://www.python.org/%.3s/license.html" % sys.version,
+## ["LICENSE.txt", "LICENSE"],
+## [os.path.join(here, os.pardir), here, os.curdir])
+
+def setcopyright():
+ # XXX this is the PyPy-specific version. Should be unified with the above.
+ __builtin__.credits = _Printer(
+ "credits",
+ "PyPy is maintained by the PyPy developers: http://codespeak.net/pypy")
+ __builtin__.license = _Printer(
+ "license",
+ "See http://codespeak.net/svn/pypy/dist/LICENSE")
+
+
+
+class _Helper(object):
+ """Define the built-in 'help'.
+ This is a wrapper around pydoc.help (with a twist).
+
+ """
+
+ def __repr__(self):
+ return "Type help() for interactive help, " \
+ "or help(object) for help about object."
+ def __call__(self, *args, **kwds):
+ import pydoc
+ return pydoc.help(*args, **kwds)
+
+def sethelper():
+ __builtin__.help = _Helper()
+
+def aliasmbcs():
+ """On Windows, some default encodings are not provided by Python,
+ while they are always available as "mbcs" in each locale. Make
+ them usable by aliasing to "mbcs" in such a case."""
+ if sys.platform == 'win32':
+ import locale, codecs
+ enc = locale.getdefaultlocale()[1]
+ if enc is not None and enc.startswith('cp'): # "cp***" ?
+ try:
+ codecs.lookup(enc)
+ except LookupError:
+ import encodings
+ encodings._cache[enc] = encodings._unknown
+ encodings.aliases.aliases[enc] = 'mbcs'
+
+def setencoding():
+ """Set the string encoding used by the Unicode implementation. The
+ default is 'ascii', but if you're willing to experiment, you can
+ change this."""
+ encoding = "ascii" # Default value set by _PyUnicode_Init()
+ if 0:
+ # Enable to support locale aware default string encodings.
+ import locale
+ loc = locale.getdefaultlocale()
+ if loc[1]:
+ encoding = loc[1]
+ if 0:
+ # Enable to switch off string to Unicode coercion and implicit
+ # Unicode to string conversion.
+ encoding = "undefined"
+ if encoding != "ascii":
+ # On Non-Unicode builds this will raise an AttributeError...
+ sys.setdefaultencoding(encoding) # Needs Python Unicode build !
+
+
+def execsitecustomize():
+ """Run custom site specific code, if available."""
+ try:
+ import sitecustomize
+ except ImportError:
+ pass
+
+
+def main():
+ abs__file__()
+ paths_in_sys = removeduppaths()
+ if (os.name == "posix" and sys.path and
+ os.path.basename(sys.path[-1]) == "Modules"):
+ addbuilddir()
+ paths_in_sys = addsitepackages(paths_in_sys)
+ if sys.platform == 'os2emx':
+ setBEGINLIBPATH()
+ setquit()
+ setcopyright()
+ sethelper()
+ aliasmbcs()
+ setencoding()
+ execsitecustomize()
+ # Remove sys.setdefaultencoding() so that users cannot change the
+ # encoding after initialization. The test for presence is needed when
+ # this module is run as a script, because this code is executed twice.
+ if hasattr(sys, "setdefaultencoding"):
+ del sys.setdefaultencoding
+
+main()
+
+def _test():
+ print "sys.path = ["
+ for dir in sys.path:
+ print " %r," % (dir,)
+ print "]"
+
+if __name__ == '__main__':
+ _test()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/socket.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/socket.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,428 @@
+# Wrapper module for _socket, providing some additional facilities
+# implemented in Python.
+
+"""\
+This module provides socket operations and some related functions.
+On Unix, it supports IP (Internet Protocol) and Unix domain sockets.
+On other systems, it only supports IP. Functions specific for a
+socket are available as methods of the socket object.
+
+Functions:
+
+socket() -- create a new socket object
+socketpair() -- create a pair of new socket objects [*]
+fromfd() -- create a socket object from an open file descriptor [*]
+gethostname() -- return the current hostname
+gethostbyname() -- map a hostname to its IP number
+gethostbyaddr() -- map an IP number or hostname to DNS info
+getservbyname() -- map a service name and a protocol name to a port number
+getprotobyname() -- mape a protocol name (e.g. 'tcp') to a number
+ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order
+htons(), htonl() -- convert 16, 32 bit int from host to network byte order
+inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format
+inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89)
+ssl() -- secure socket layer support (only available if configured)
+socket.getdefaulttimeout() -- get the default timeout value
+socket.setdefaulttimeout() -- set the default timeout value
+
+ [*] not available on all platforms!
+
+Special objects:
+
+SocketType -- type object for socket objects
+error -- exception raised for I/O errors
+has_ipv6 -- boolean value indicating if IPv6 is supported
+
+Integer constants:
+
+AF_INET, AF_UNIX -- socket domains (first argument to socket() call)
+SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument)
+
+Many other constants may be defined; these may be used in calls to
+the setsockopt() and getsockopt() methods.
+"""
+
+import _socket
+from _socket import *
+
+_have_ssl = False
+try:
+ import _ssl
+ from _ssl import *
+ _have_ssl = True
+except ImportError:
+ pass
+
+import os, sys
+
+try:
+ from errno import EBADF
+except ImportError:
+ EBADF = 9
+
+__all__ = ["getfqdn"]
+__all__.extend(os._get_exports_list(_socket))
+if _have_ssl:
+ __all__.extend(os._get_exports_list(_ssl))
+
+_realsocket = socket
+if _have_ssl:
+ _realssl = ssl
+ def ssl(sock, keyfile=None, certfile=None):
+ if hasattr(sock, "_sock"):
+ sock = sock._sock
+ return _realssl(sock, keyfile, certfile)
+
+# WSA error codes
+if sys.platform.lower().startswith("win"):
+ errorTab = {}
+ errorTab[10004] = "The operation was interrupted."
+ errorTab[10009] = "A bad file handle was passed."
+ errorTab[10013] = "Permission denied."
+ errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT
+ errorTab[10022] = "An invalid operation was attempted."
+ errorTab[10035] = "The socket operation would block"
+ errorTab[10036] = "A blocking operation is already in progress."
+ errorTab[10048] = "The network address is in use."
+ errorTab[10054] = "The connection has been reset."
+ errorTab[10058] = "The network has been shut down."
+ errorTab[10060] = "The operation timed out."
+ errorTab[10061] = "Connection refused."
+ errorTab[10063] = "The name is too long."
+ errorTab[10064] = "The host is down."
+ errorTab[10065] = "The host is unreachable."
+ __all__.append("errorTab")
+
+
+
+def getfqdn(name=''):
+ """Get fully qualified domain name from name.
+
+ An empty argument is interpreted as meaning the local host.
+
+ First the hostname returned by gethostbyaddr() is checked, then
+ possibly existing aliases. In case no FQDN is available, hostname
+ from gethostname() is returned.
+ """
+ name = name.strip()
+ if not name or name == '0.0.0.0':
+ name = gethostname()
+ try:
+ hostname, aliases, ipaddrs = gethostbyaddr(name)
+ except error:
+ pass
+ else:
+ aliases.insert(0, hostname)
+ for name in aliases:
+ if '.' in name:
+ break
+ else:
+ name = hostname
+ return name
+
+
+_socketmethods = (
+ 'bind', 'connect', 'connect_ex', 'fileno', 'listen',
+ 'getpeername', 'getsockname', 'getsockopt', 'setsockopt',
+ 'sendall', 'setblocking',
+ 'settimeout', 'gettimeout', 'shutdown')
+
+if sys.platform == "riscos":
+ _socketmethods = _socketmethods + ('sleeptaskw',)
+
+# All the method names that must be delegated to either the real socket
+# object or the _closedsocket object.
+_delegate_methods = ("recv", "recvfrom", "recv_into", "recvfrom_into",
+ "send", "sendto")
+
+class _closedsocket(object):
+ __slots__ = []
+ def _dummy(*args):
+ raise error(EBADF, 'Bad file descriptor')
+ def _drop(self):
+ pass
+ def _reuse(self):
+ pass
+ # All _delegate_methods must also be initialized here.
+ send = recv = recv_into = sendto = recvfrom = recvfrom_into = _dummy
+ __getattr__ = _dummy
+
+class _socketobject(object):
+
+ __doc__ = _realsocket.__doc__
+
+ __slots__ = ["_sock", "__weakref__"] + list(_delegate_methods)
+
+ def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, _sock=None):
+ if _sock is None:
+ _sock = _realsocket(family, type, proto)
+ self._sock = _sock
+ for method in _delegate_methods:
+ setattr(self, method, getattr(_sock, method))
+
+ def __del__(self):
+ self.close()
+
+
+
+
+ def close(self):
+ self._sock._drop()
+ self._sock = _closedsocket()
+ dummy = self._sock._dummy
+ for method in _delegate_methods:
+ setattr(self, method, dummy)
+ close.__doc__ = _realsocket.close.__doc__
+
+ def accept(self):
+ sock, addr = self._sock.accept()
+ return _socketobject(_sock=sock), addr
+ accept.__doc__ = _realsocket.accept.__doc__
+
+ def dup(self):
+ """dup() -> socket object
+
+ Return a new socket object connected to the same system resource."""
+ self._sock._reuse()
+ return _socketobject(_sock=self._sock)
+
+ def makefile(self, mode='r', bufsize=-1):
+ """makefile([mode[, bufsize]]) -> file object
+
+ Return a regular file object corresponding to the socket. The mode
+ and bufsize arguments are as for the built-in open() function."""
+ self._sock._reuse()
+ return _fileobject(self._sock, mode, bufsize)
+
+ family = property(lambda self: self._sock.family, doc="the socket family")
+ type = property(lambda self: self._sock.type, doc="the socket type")
+ proto = property(lambda self: self._sock.proto, doc="the socket protocol")
+
+ _s = ("def %s(self, *args): return self._sock.%s(*args)\n\n"
+ "%s.__doc__ = _realsocket.%s.__doc__\n")
+ for _m in _socketmethods:
+ exec _s % (_m, _m, _m, _m)
+ del _m, _s
+
+socket = SocketType = _socketobject
+
+class _fileobject(object):
+ """Faux file object attached to a socket object."""
+
+ default_bufsize = 8192
+ name = "<socket>"
+
+ __slots__ = ["mode", "bufsize", "softspace",
+ # "closed" is a property, see below
+ "_sock", "_rbufsize", "_wbufsize", "_rbuf", "_wbuf",
+ "_close"]
+
+ def __init__(self, sock, mode='rb', bufsize=-1, close=False):
+ self._sock = sock
+ self.mode = mode # Not actually used in this version
+ if bufsize < 0:
+ bufsize = self.default_bufsize
+ self.bufsize = bufsize
+ self.softspace = False
+ if bufsize == 0:
+ self._rbufsize = 1
+ elif bufsize == 1:
+ self._rbufsize = self.default_bufsize
+ else:
+ self._rbufsize = bufsize
+ self._wbufsize = bufsize
+ self._rbuf = "" # A string
+ self._wbuf = [] # A list of strings
+ self._close = close
+
+ def _getclosed(self):
+ return self._sock is None
+ closed = property(_getclosed, doc="True if the file is closed")
+
+ def close(self):
+ if self._sock:
+ try:
+ self.flush()
+ finally:
+ if self._sock:
+ s = self._sock
+ self._sock = None
+ s._drop()
+
+ def __del__(self):
+ try:
+ self.close()
+ except:
+ # close() may fail if __init__ didn't complete
+ pass
+
+ def flush(self):
+ if self._wbuf:
+ buffer = "".join(self._wbuf)
+ self._wbuf = []
+ self._sock.sendall(buffer)
+
+ def fileno(self):
+ return self._sock.fileno()
+
+ def write(self, data):
+ data = str(data) # XXX Should really reject non-string non-buffers
+ if not data:
+ return
+ self._wbuf.append(data)
+ if (self._wbufsize == 0 or
+ self._wbufsize == 1 and '\n' in data or
+ self._get_wbuf_len() >= self._wbufsize):
+ self.flush()
+
+ def writelines(self, list):
+ # XXX We could do better here for very long lists
+ # XXX Should really reject non-string non-buffers
+ self._wbuf.extend(filter(None, map(str, list)))
+ if (self._wbufsize <= 1 or
+ self._get_wbuf_len() >= self._wbufsize):
+ self.flush()
+
+ def _get_wbuf_len(self):
+ buf_len = 0
+ for x in self._wbuf:
+ buf_len += len(x)
+ return buf_len
+
+ def read(self, size=-1):
+ data = self._rbuf
+ if size < 0:
+ # Read until EOF
+ buffers = []
+ if data:
+ buffers.append(data)
+ self._rbuf = ""
+ if self._rbufsize <= 1:
+ recv_size = self.default_bufsize
+ else:
+ recv_size = self._rbufsize
+ while True:
+ data = self._sock.recv(recv_size)
+ if not data:
+ break
+ buffers.append(data)
+ return "".join(buffers)
+ else:
+ # Read until size bytes or EOF seen, whichever comes first
+ buf_len = len(data)
+ if buf_len >= size:
+ self._rbuf = data[size:]
+ return data[:size]
+ buffers = []
+ if data:
+ buffers.append(data)
+ self._rbuf = ""
+ while True:
+ left = size - buf_len
+ recv_size = max(self._rbufsize, left)
+ data = self._sock.recv(recv_size)
+ if not data:
+ break
+ buffers.append(data)
+ n = len(data)
+ if n >= left:
+ self._rbuf = data[left:]
+ buffers[-1] = data[:left]
+ break
+ buf_len += n
+ return "".join(buffers)
+
+ def readline(self, size=-1):
+ data = self._rbuf
+ if size < 0:
+ # Read until \n or EOF, whichever comes first
+ if self._rbufsize <= 1:
+ # Speed up unbuffered case
+ assert data == ""
+ buffers = []
+ recv = self._sock.recv
+ while data != "\n":
+ data = recv(1)
+ if not data:
+ break
+ buffers.append(data)
+ return "".join(buffers)
+ nl = data.find('\n')
+ if nl >= 0:
+ nl += 1
+ self._rbuf = data[nl:]
+ return data[:nl]
+ buffers = []
+ if data:
+ buffers.append(data)
+ self._rbuf = ""
+ while True:
+ data = self._sock.recv(self._rbufsize)
+ if not data:
+ break
+ buffers.append(data)
+ nl = data.find('\n')
+ if nl >= 0:
+ nl += 1
+ self._rbuf = data[nl:]
+ buffers[-1] = data[:nl]
+ break
+ return "".join(buffers)
+ else:
+ # Read until size bytes or \n or EOF seen, whichever comes first
+ nl = data.find('\n', 0, size)
+ if nl >= 0:
+ nl += 1
+ self._rbuf = data[nl:]
+ return data[:nl]
+ buf_len = len(data)
+ if buf_len >= size:
+ self._rbuf = data[size:]
+ return data[:size]
+ buffers = []
+ if data:
+ buffers.append(data)
+ self._rbuf = ""
+ while True:
+ data = self._sock.recv(self._rbufsize)
+ if not data:
+ break
+ buffers.append(data)
+ left = size - buf_len
+ nl = data.find('\n', 0, left)
+ if nl >= 0:
+ nl += 1
+ self._rbuf = data[nl:]
+ buffers[-1] = data[:nl]
+ break
+ n = len(data)
+ if n >= left:
+ self._rbuf = data[left:]
+ buffers[-1] = data[:left]
+ break
+ buf_len += n
+ return "".join(buffers)
+
+ def readlines(self, sizehint=0):
+ total = 0
+ list = []
+ while True:
+ line = self.readline()
+ if not line:
+ break
+ list.append(line)
+ total += len(line)
+ if sizehint and total >= sizehint:
+ break
+ return list
+
+ # Iterator protocols
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ line = self.readline()
+ if not line:
+ raise StopIteration
+ return line
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/sre_compile.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/sre_compile.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,532 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# convert template to internal format
+#
+# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
+#
+# See the sre.py file for information on usage and redistribution.
+#
+
+"""Internal support module for sre"""
+
+import _sre, sys
+
+from sre_constants import *
+
+# XXX see PyPy hack in sre_constants to support both the 2.3 and 2.4 _sre.c implementation.
+#assert _sre.MAGIC == MAGIC, "SRE module mismatch"
+
+if _sre.CODESIZE == 2:
+ MAXCODE = 65535
+else:
+ MAXCODE = 0xFFFFFFFFL
+
+def _identityfunction(x):
+ return x
+
+def set(seq):
+ s = {}
+ for elem in seq:
+ s[elem] = 1
+ return s
+
+_LITERAL_CODES = set([LITERAL, NOT_LITERAL])
+_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT])
+_SUCCESS_CODES = set([SUCCESS, FAILURE])
+_ASSERT_CODES = set([ASSERT, ASSERT_NOT])
+
+def _compile(code, pattern, flags):
+ # internal: compile a (sub)pattern
+ emit = code.append
+ _len = len
+ LITERAL_CODES = _LITERAL_CODES
+ REPEATING_CODES = _REPEATING_CODES
+ SUCCESS_CODES = _SUCCESS_CODES
+ ASSERT_CODES = _ASSERT_CODES
+ for op, av in pattern:
+ if op in LITERAL_CODES:
+ if flags & SRE_FLAG_IGNORECASE:
+ emit(OPCODES[OP_IGNORE[op]])
+ emit(_sre.getlower(av, flags))
+ else:
+ emit(OPCODES[op])
+ emit(av)
+ elif op is IN:
+ if flags & SRE_FLAG_IGNORECASE:
+ emit(OPCODES[OP_IGNORE[op]])
+ def fixup(literal, flags=flags):
+ return _sre.getlower(literal, flags)
+ else:
+ emit(OPCODES[op])
+ fixup = _identityfunction
+ skip = _len(code); emit(0)
+ _compile_charset(av, flags, code, fixup)
+ code[skip] = _len(code) - skip
+ elif op is ANY:
+ if flags & SRE_FLAG_DOTALL:
+ emit(OPCODES[ANY_ALL])
+ else:
+ emit(OPCODES[ANY])
+ elif op in REPEATING_CODES:
+ if flags & SRE_FLAG_TEMPLATE:
+ raise error, "internal: unsupported template operator"
+ emit(OPCODES[REPEAT])
+ skip = _len(code); emit(0)
+ emit(av[0])
+ emit(av[1])
+ _compile(code, av[2], flags)
+ emit(OPCODES[SUCCESS])
+ code[skip] = _len(code) - skip
+ elif _simple(av) and op is not REPEAT:
+ if op is MAX_REPEAT:
+ emit(OPCODES[REPEAT_ONE])
+ else:
+ emit(OPCODES[MIN_REPEAT_ONE])
+ skip = _len(code); emit(0)
+ emit(av[0])
+ emit(av[1])
+ _compile(code, av[2], flags)
+ emit(OPCODES[SUCCESS])
+ code[skip] = _len(code) - skip
+ else:
+ emit(OPCODES[REPEAT])
+ skip = _len(code); emit(0)
+ emit(av[0])
+ emit(av[1])
+ _compile(code, av[2], flags)
+ code[skip] = _len(code) - skip
+ if op is MAX_REPEAT:
+ emit(OPCODES[MAX_UNTIL])
+ else:
+ emit(OPCODES[MIN_UNTIL])
+ elif op is SUBPATTERN:
+ if av[0]:
+ emit(OPCODES[MARK])
+ emit((av[0]-1)*2)
+ # _compile_info(code, av[1], flags)
+ _compile(code, av[1], flags)
+ if av[0]:
+ emit(OPCODES[MARK])
+ emit((av[0]-1)*2+1)
+ elif op in SUCCESS_CODES:
+ emit(OPCODES[op])
+ elif op in ASSERT_CODES:
+ emit(OPCODES[op])
+ skip = _len(code); emit(0)
+ if av[0] >= 0:
+ emit(0) # look ahead
+ else:
+ lo, hi = av[1].getwidth()
+ if lo != hi:
+ raise error, "look-behind requires fixed-width pattern"
+ emit(lo) # look behind
+ _compile(code, av[1], flags)
+ emit(OPCODES[SUCCESS])
+ code[skip] = _len(code) - skip
+ elif op is CALL:
+ emit(OPCODES[op])
+ skip = _len(code); emit(0)
+ _compile(code, av, flags)
+ emit(OPCODES[SUCCESS])
+ code[skip] = _len(code) - skip
+ elif op is AT:
+ emit(OPCODES[op])
+ if flags & SRE_FLAG_MULTILINE:
+ av = AT_MULTILINE.get(av, av)
+ if flags & SRE_FLAG_LOCALE:
+ av = AT_LOCALE.get(av, av)
+ elif flags & SRE_FLAG_UNICODE:
+ av = AT_UNICODE.get(av, av)
+ emit(ATCODES[av])
+ elif op is BRANCH:
+ emit(OPCODES[op])
+ tail = []
+ tailappend = tail.append
+ for av in av[1]:
+ skip = _len(code); emit(0)
+ # _compile_info(code, av, flags)
+ _compile(code, av, flags)
+ emit(OPCODES[JUMP])
+ tailappend(_len(code)); emit(0)
+ code[skip] = _len(code) - skip
+ emit(0) # end of branch
+ for tail in tail:
+ code[tail] = _len(code) - tail
+ elif op is CATEGORY:
+ emit(OPCODES[op])
+ if flags & SRE_FLAG_LOCALE:
+ av = CH_LOCALE[av]
+ elif flags & SRE_FLAG_UNICODE:
+ av = CH_UNICODE[av]
+ emit(CHCODES[av])
+ elif op is GROUPREF:
+ if flags & SRE_FLAG_IGNORECASE:
+ emit(OPCODES[OP_IGNORE[op]])
+ else:
+ emit(OPCODES[op])
+ emit(av-1)
+ elif op is GROUPREF_EXISTS:
+ emit(OPCODES[op])
+ emit(av[0]-1)
+ skipyes = _len(code); emit(0)
+ _compile(code, av[1], flags)
+ if av[2]:
+ emit(OPCODES[JUMP])
+ skipno = _len(code); emit(0)
+ code[skipyes] = _len(code) - skipyes + 1
+ _compile(code, av[2], flags)
+ code[skipno] = _len(code) - skipno
+ else:
+ code[skipyes] = _len(code) - skipyes + 1
+ else:
+ raise ValueError, ("unsupported operand type", op)
+
+def _compile_charset(charset, flags, code, fixup=None):
+ # compile charset subprogram
+ emit = code.append
+ if fixup is None:
+ fixup = _identityfunction
+ for op, av in _optimize_charset(charset, fixup):
+ emit(OPCODES[op])
+ if op is NEGATE:
+ pass
+ elif op is LITERAL:
+ emit(fixup(av))
+ elif op is RANGE:
+ emit(fixup(av[0]))
+ emit(fixup(av[1]))
+ elif op is CHARSET:
+ code.extend(av)
+ elif op is BIGCHARSET:
+ code.extend(av)
+ elif op is CATEGORY:
+ if flags & SRE_FLAG_LOCALE:
+ emit(CHCODES[CH_LOCALE[av]])
+ elif flags & SRE_FLAG_UNICODE:
+ emit(CHCODES[CH_UNICODE[av]])
+ else:
+ emit(CHCODES[av])
+ else:
+ raise error, "internal: unsupported set operator"
+ emit(OPCODES[FAILURE])
+
+def _optimize_charset(charset, fixup):
+ # internal: optimize character set
+ out = []
+ outappend = out.append
+ charmap = [0]*256
+ try:
+ for op, av in charset:
+ if op is NEGATE:
+ outappend((op, av))
+ elif op is LITERAL:
+ charmap[fixup(av)] = 1
+ elif op is RANGE:
+ for i in range(fixup(av[0]), fixup(av[1])+1):
+ charmap[i] = 1
+ elif op is CATEGORY:
+ # XXX: could append to charmap tail
+ return charset # cannot compress
+ except IndexError:
+ # character set contains unicode characters
+ return _optimize_unicode(charset, fixup)
+ # compress character map
+ i = p = n = 0
+ runs = []
+ runsappend = runs.append
+ for c in charmap:
+ if c:
+ if n == 0:
+ p = i
+ n = n + 1
+ elif n:
+ runsappend((p, n))
+ n = 0
+ i = i + 1
+ if n:
+ runsappend((p, n))
+ if len(runs) <= 2:
+ # use literal/range
+ for p, n in runs:
+ if n == 1:
+ outappend((LITERAL, p))
+ else:
+ outappend((RANGE, (p, p+n-1)))
+ if len(out) < len(charset):
+ return out
+ else:
+ # use bitmap
+ data = _mk_bitmap(charmap)
+ outappend((CHARSET, data))
+ return out
+ return charset
+
+def _mk_bitmap(bits):
+ data = []
+ dataappend = data.append
+ if _sre.CODESIZE == 2:
+ start = (1, 0)
+ else:
+ start = (1L, 0L)
+ m, v = start
+ for c in bits:
+ if c:
+ v = v + m
+ m = m + m
+ if m > MAXCODE:
+ dataappend(v)
+ m, v = start
+ return data
+
+# To represent a big charset, first a bitmap of all characters in the
+# set is constructed. Then, this bitmap is sliced into chunks of 256
+# characters, duplicate chunks are eliminitated, and each chunk is
+# given a number. In the compiled expression, the charset is
+# represented by a 16-bit word sequence, consisting of one word for
+# the number of different chunks, a sequence of 256 bytes (128 words)
+# of chunk numbers indexed by their original chunk position, and a
+# sequence of chunks (16 words each).
+
+# Compression is normally good: in a typical charset, large ranges of
+# Unicode will be either completely excluded (e.g. if only cyrillic
+# letters are to be matched), or completely included (e.g. if large
+# subranges of Kanji match). These ranges will be represented by
+# chunks of all one-bits or all zero-bits.
+
+# Matching can be also done efficiently: the more significant byte of
+# the Unicode character is an index into the chunk number, and the
+# less significant byte is a bit index in the chunk (just like the
+# CHARSET matching).
+
+# In UCS-4 mode, the BIGCHARSET opcode still supports only subsets
+# of the basic multilingual plane; an efficient representation
+# for all of UTF-16 has not yet been developed. This means,
+# in particular, that negated charsets cannot be represented as
+# bigcharsets.
+
+def _optimize_unicode(charset, fixup):
+ try:
+ import array
+ except ImportError:
+ return charset
+ charmap = [0]*65536
+ negate = 0
+ try:
+ for op, av in charset:
+ if op is NEGATE:
+ negate = 1
+ elif op is LITERAL:
+ charmap[fixup(av)] = 1
+ elif op is RANGE:
+ for i in xrange(fixup(av[0]), fixup(av[1])+1):
+ charmap[i] = 1
+ elif op is CATEGORY:
+ # XXX: could expand category
+ return charset # cannot compress
+ except IndexError:
+ # non-BMP characters
+ return charset
+ if negate:
+ if sys.maxunicode != 65535:
+ # XXX: negation does not work with big charsets
+ return charset
+ for i in xrange(65536):
+ charmap[i] = not charmap[i]
+ comps = {}
+ mapping = [0]*256
+ block = 0
+ data = []
+ for i in xrange(256):
+ chunk = tuple(charmap[i*256:(i+1)*256])
+ new = comps.setdefault(chunk, block)
+ mapping[i] = new
+ if new == block:
+ block = block + 1
+ data = data + _mk_bitmap(chunk)
+ header = [block]
+ if _sre.CODESIZE == 2:
+ code = 'H'
+ else:
+ code = 'I'
+ # Convert block indices to byte array of 256 bytes
+ mapping = array.array('b', mapping).tostring()
+ # Convert byte array to word array
+ mapping = array.array(code, mapping)
+ assert mapping.itemsize == _sre.CODESIZE
+ header = header + mapping.tolist()
+ data[0:0] = header
+ return [(BIGCHARSET, data)]
+
+def _simple(av):
+ # check if av is a "simple" operator
+ lo, hi = av[2].getwidth()
+ if lo == 0 and hi == MAXREPEAT:
+ raise error, "nothing to repeat"
+ return lo == hi == 1 and av[2][0][0] != SUBPATTERN
+
+def _compile_info(code, pattern, flags):
+ # internal: compile an info block. in the current version,
+ # this contains min/max pattern width, and an optional literal
+ # prefix or a character map
+ lo, hi = pattern.getwidth()
+ if lo == 0:
+ return # not worth it
+ # look for a literal prefix
+ prefix = []
+ prefixappend = prefix.append
+ prefix_skip = 0
+ charset = [] # not used
+ charsetappend = charset.append
+ if not (flags & SRE_FLAG_IGNORECASE):
+ # look for literal prefix
+ for op, av in pattern.data:
+ if op is LITERAL:
+ if len(prefix) == prefix_skip:
+ prefix_skip = prefix_skip + 1
+ prefixappend(av)
+ elif op is SUBPATTERN and len(av[1]) == 1:
+ op, av = av[1][0]
+ if op is LITERAL:
+ prefixappend(av)
+ else:
+ break
+ else:
+ break
+ # if no prefix, look for charset prefix
+ if not prefix and pattern.data:
+ op, av = pattern.data[0]
+ if op is SUBPATTERN and av[1]:
+ op, av = av[1][0]
+ if op is LITERAL:
+ charsetappend((op, av))
+ elif op is BRANCH:
+ c = []
+ cappend = c.append
+ for p in av[1]:
+ if not p:
+ break
+ op, av = p[0]
+ if op is LITERAL:
+ cappend((op, av))
+ else:
+ break
+ else:
+ charset = c
+ elif op is BRANCH:
+ c = []
+ cappend = c.append
+ for p in av[1]:
+ if not p:
+ break
+ op, av = p[0]
+ if op is LITERAL:
+ cappend((op, av))
+ else:
+ break
+ else:
+ charset = c
+ elif op is IN:
+ charset = av
+## if prefix:
+## print "*** PREFIX", prefix, prefix_skip
+## if charset:
+## print "*** CHARSET", charset
+ # add an info block
+ emit = code.append
+ emit(OPCODES[INFO])
+ skip = len(code); emit(0)
+ # literal flag
+ mask = 0
+ if prefix:
+ mask = SRE_INFO_PREFIX
+ if len(prefix) == prefix_skip == len(pattern.data):
+ mask = mask + SRE_INFO_LITERAL
+ elif charset:
+ mask = mask + SRE_INFO_CHARSET
+ emit(mask)
+ # pattern length
+ if lo < MAXCODE:
+ emit(lo)
+ else:
+ emit(MAXCODE)
+ prefix = prefix[:MAXCODE]
+ if hi < MAXCODE:
+ emit(hi)
+ else:
+ emit(0)
+ # add literal prefix
+ if prefix:
+ emit(len(prefix)) # length
+ emit(prefix_skip) # skip
+ code.extend(prefix)
+ # generate overlap table
+ table = [-1] + ([0]*len(prefix))
+ for i in xrange(len(prefix)):
+ table[i+1] = table[i]+1
+ while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
+ table[i+1] = table[table[i+1]-1]+1
+ code.extend(table[1:]) # don't store first entry
+ elif charset:
+ _compile_charset(charset, flags, code)
+ code[skip] = len(code) - skip
+
+try:
+ unicode
+except NameError:
+ STRING_TYPES = (type(""),)
+else:
+ STRING_TYPES = (type(""), type(unicode("")))
+
+def isstring(obj):
+ for tp in STRING_TYPES:
+ if isinstance(obj, tp):
+ return 1
+ return 0
+
+def _code(p, flags):
+
+ flags = p.pattern.flags | flags
+ code = []
+
+ # compile info block
+ _compile_info(code, p, flags)
+
+ # compile the pattern
+ _compile(code, p.data, flags)
+
+ code.append(OPCODES[SUCCESS])
+
+ return code
+
+def compile(p, flags=0):
+ # internal: convert pattern list to internal format
+
+ if isstring(p):
+ import sre_parse
+ pattern = p
+ p = sre_parse.parse(p, flags)
+ else:
+ pattern = None
+
+ code = _code(p, flags)
+
+ # print code
+
+ # XXX: <fl> get rid of this limitation!
+ if p.pattern.groups > 100:
+ raise AssertionError(
+ "sorry, but this version only supports 100 named groups"
+ )
+
+ # map in either direction
+ groupindex = p.pattern.groupdict
+ indexgroup = [None] * p.pattern.groups
+ for k, i in groupindex.items():
+ indexgroup[i] = k
+
+ return _sre.compile(
+ pattern, flags, code,
+ p.pattern.groups-1,
+ groupindex, indexgroup
+ )
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/sre_constants.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/sre_constants.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,268 @@
+#
+# Secret Labs' Regular Expression Engine
+#
+# various symbols used by the regular expression engine.
+# run this script to update the _sre include files!
+#
+# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
+#
+# See the sre.py file for information on usage and redistribution.
+#
+
+"""Internal support module for sre"""
+
+# update when constants are added or removed
+
+MAGIC = 20031017
+
+# max code word in this release
+
+MAXREPEAT = 65535
+
+# SRE standard exception (access as sre.error)
+# should this really be here?
+
+class error(Exception):
+ pass
+
+# operators
+
+FAILURE = "failure"
+SUCCESS = "success"
+
+ANY = "any"
+ANY_ALL = "any_all"
+ASSERT = "assert"
+ASSERT_NOT = "assert_not"
+AT = "at"
+BIGCHARSET = "bigcharset"
+BRANCH = "branch"
+CALL = "call"
+CATEGORY = "category"
+CHARSET = "charset"
+GROUPREF = "groupref"
+GROUPREF_IGNORE = "groupref_ignore"
+GROUPREF_EXISTS = "groupref_exists"
+IN = "in"
+IN_IGNORE = "in_ignore"
+INFO = "info"
+JUMP = "jump"
+LITERAL = "literal"
+LITERAL_IGNORE = "literal_ignore"
+MARK = "mark"
+MAX_REPEAT = "max_repeat"
+MAX_UNTIL = "max_until"
+MIN_REPEAT = "min_repeat"
+MIN_UNTIL = "min_until"
+NEGATE = "negate"
+NOT_LITERAL = "not_literal"
+NOT_LITERAL_IGNORE = "not_literal_ignore"
+RANGE = "range"
+REPEAT = "repeat"
+REPEAT_ONE = "repeat_one"
+SUBPATTERN = "subpattern"
+MIN_REPEAT_ONE = "min_repeat_one"
+
+# positions
+AT_BEGINNING = "at_beginning"
+AT_BEGINNING_LINE = "at_beginning_line"
+AT_BEGINNING_STRING = "at_beginning_string"
+AT_BOUNDARY = "at_boundary"
+AT_NON_BOUNDARY = "at_non_boundary"
+AT_END = "at_end"
+AT_END_LINE = "at_end_line"
+AT_END_STRING = "at_end_string"
+AT_LOC_BOUNDARY = "at_loc_boundary"
+AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
+AT_UNI_BOUNDARY = "at_uni_boundary"
+AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
+
+# categories
+CATEGORY_DIGIT = "category_digit"
+CATEGORY_NOT_DIGIT = "category_not_digit"
+CATEGORY_SPACE = "category_space"
+CATEGORY_NOT_SPACE = "category_not_space"
+CATEGORY_WORD = "category_word"
+CATEGORY_NOT_WORD = "category_not_word"
+CATEGORY_LINEBREAK = "category_linebreak"
+CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
+CATEGORY_LOC_WORD = "category_loc_word"
+CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
+CATEGORY_UNI_DIGIT = "category_uni_digit"
+CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
+CATEGORY_UNI_SPACE = "category_uni_space"
+CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
+CATEGORY_UNI_WORD = "category_uni_word"
+CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
+CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
+CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
+
+OPCODES = [
+
+ # failure=0 success=1 (just because it looks better that way :-)
+ FAILURE, SUCCESS,
+
+ ANY, ANY_ALL,
+ ASSERT, ASSERT_NOT,
+ AT,
+ BRANCH,
+ CALL,
+ CATEGORY,
+ CHARSET, BIGCHARSET,
+ GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
+ IN, IN_IGNORE,
+ INFO,
+ JUMP,
+ LITERAL, LITERAL_IGNORE,
+ MARK,
+ MAX_UNTIL,
+ MIN_UNTIL,
+ NOT_LITERAL, NOT_LITERAL_IGNORE,
+ NEGATE,
+ RANGE,
+ REPEAT,
+ REPEAT_ONE,
+ SUBPATTERN,
+ MIN_REPEAT_ONE
+
+]
+
+# PyPy hack to make the sre_*.py files from 2.4.1 work on the _sre
+# engine of 2.3.
+import _sre
+if _sre.MAGIC < 20031017:
+ OPCODES.remove(GROUPREF_EXISTS)
+del _sre
+
+ATCODES = [
+ AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
+ AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
+ AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
+ AT_UNI_NON_BOUNDARY
+]
+
+CHCODES = [
+ CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
+ CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
+ CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
+ CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
+ CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
+ CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
+ CATEGORY_UNI_NOT_LINEBREAK
+]
+
+def makedict(list):
+ d = {}
+ i = 0
+ for item in list:
+ d[item] = i
+ i = i + 1
+ return d
+
+OPCODES = makedict(OPCODES)
+ATCODES = makedict(ATCODES)
+CHCODES = makedict(CHCODES)
+
+# replacement operations for "ignore case" mode
+OP_IGNORE = {
+ GROUPREF: GROUPREF_IGNORE,
+ IN: IN_IGNORE,
+ LITERAL: LITERAL_IGNORE,
+ NOT_LITERAL: NOT_LITERAL_IGNORE
+}
+
+AT_MULTILINE = {
+ AT_BEGINNING: AT_BEGINNING_LINE,
+ AT_END: AT_END_LINE
+}
+
+AT_LOCALE = {
+ AT_BOUNDARY: AT_LOC_BOUNDARY,
+ AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
+}
+
+AT_UNICODE = {
+ AT_BOUNDARY: AT_UNI_BOUNDARY,
+ AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
+}
+
+CH_LOCALE = {
+ CATEGORY_DIGIT: CATEGORY_DIGIT,
+ CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
+ CATEGORY_SPACE: CATEGORY_SPACE,
+ CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
+ CATEGORY_WORD: CATEGORY_LOC_WORD,
+ CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
+ CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
+ CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
+}
+
+CH_UNICODE = {
+ CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
+ CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
+ CATEGORY_SPACE: CATEGORY_UNI_SPACE,
+ CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
+ CATEGORY_WORD: CATEGORY_UNI_WORD,
+ CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
+ CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
+ CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
+}
+
+# flags
+SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
+SRE_FLAG_IGNORECASE = 2 # case insensitive
+SRE_FLAG_LOCALE = 4 # honour system locale
+SRE_FLAG_MULTILINE = 8 # treat target as multiline string
+SRE_FLAG_DOTALL = 16 # treat target as a single string
+SRE_FLAG_UNICODE = 32 # use unicode locale
+SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
+SRE_FLAG_DEBUG = 128 # debugging
+
+# flags for INFO primitive
+SRE_INFO_PREFIX = 1 # has prefix
+SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
+SRE_INFO_CHARSET = 4 # pattern starts with character from given set
+
+if __name__ == "__main__":
+ def dump(f, d, prefix):
+ items = d.items()
+ items.sort(key=lambda a: a[1])
+ for k, v in items:
+ f.write("#define %s_%s %s\n" % (prefix, k.upper(), v))
+ f = open("sre_constants.h", "w")
+ f.write("""\
+/*
+ * Secret Labs' Regular Expression Engine
+ *
+ * regular expression matching engine
+ *
+ * NOTE: This file is generated by sre_constants.py. If you need
+ * to change anything in here, edit sre_constants.py and run it.
+ *
+ * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
+ *
+ * See the _sre.c file for information on usage and redistribution.
+ */
+
+""")
+
+ f.write("#define SRE_MAGIC %d\n" % MAGIC)
+
+ dump(f, OPCODES, "SRE_OP")
+ dump(f, ATCODES, "SRE")
+ dump(f, CHCODES, "SRE")
+
+ f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
+ f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
+ f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
+ f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE)
+ f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
+ f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
+ f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
+
+ f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
+ f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
+ f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET)
+
+ f.close()
+ print "done"
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/subprocess.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/subprocess.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,1253 @@
+# subprocess - Subprocesses with accessible I/O streams
+#
+# For more information about this module, see PEP 324.
+#
+# This module should remain compatible with Python 2.2, see PEP 291.
+#
+# Copyright (c) 2003-2005 by Peter Astrand <astrand at lysator.liu.se>
+#
+# Licensed to PSF under a Contributor Agreement.
+# See http://www.python.org/2.4/license for licensing details.
+
+r"""subprocess - Subprocesses with accessible I/O streams
+
+This module allows you to spawn processes, connect to their
+input/output/error pipes, and obtain their return codes. This module
+intends to replace several other, older modules and functions, like:
+
+os.system
+os.spawn*
+os.popen*
+popen2.*
+commands.*
+
+Information about how the subprocess module can be used to replace these
+modules and functions can be found below.
+
+
+
+Using the subprocess module
+===========================
+This module defines one class called Popen:
+
+class Popen(args, bufsize=0, executable=None,
+ stdin=None, stdout=None, stderr=None,
+ preexec_fn=None, close_fds=False, shell=False,
+ cwd=None, env=None, universal_newlines=False,
+ startupinfo=None, creationflags=0):
+
+
+Arguments are:
+
+args should be a string, or a sequence of program arguments. The
+program to execute is normally the first item in the args sequence or
+string, but can be explicitly set by using the executable argument.
+
+On UNIX, with shell=False (default): In this case, the Popen class
+uses os.execvp() to execute the child program. args should normally
+be a sequence. A string will be treated as a sequence with the string
+as the only item (the program to execute).
+
+On UNIX, with shell=True: If args is a string, it specifies the
+command string to execute through the shell. If args is a sequence,
+the first item specifies the command string, and any additional items
+will be treated as additional shell arguments.
+
+On Windows: the Popen class uses CreateProcess() to execute the child
+program, which operates on strings. If args is a sequence, it will be
+converted to a string using the list2cmdline method. Please note that
+not all MS Windows applications interpret the command line the same
+way: The list2cmdline is designed for applications using the same
+rules as the MS C runtime.
+
+bufsize, if given, has the same meaning as the corresponding argument
+to the built-in open() function: 0 means unbuffered, 1 means line
+buffered, any other positive value means use a buffer of
+(approximately) that size. A negative bufsize means to use the system
+default, which usually means fully buffered. The default value for
+bufsize is 0 (unbuffered).
+
+stdin, stdout and stderr specify the executed programs' standard
+input, standard output and standard error file handles, respectively.
+Valid values are PIPE, an existing file descriptor (a positive
+integer), an existing file object, and None. PIPE indicates that a
+new pipe to the child should be created. With None, no redirection
+will occur; the child's file handles will be inherited from the
+parent. Additionally, stderr can be STDOUT, which indicates that the
+stderr data from the applications should be captured into the same
+file handle as for stdout.
+
+If preexec_fn is set to a callable object, this object will be called
+in the child process just before the child is executed.
+
+If close_fds is true, all file descriptors except 0, 1 and 2 will be
+closed before the child process is executed.
+
+if shell is true, the specified command will be executed through the
+shell.
+
+If cwd is not None, the current directory will be changed to cwd
+before the child is executed.
+
+If env is not None, it defines the environment variables for the new
+process.
+
+If universal_newlines is true, the file objects stdout and stderr are
+opened as a text files, but lines may be terminated by any of '\n',
+the Unix end-of-line convention, '\r', the Macintosh convention or
+'\r\n', the Windows convention. All of these external representations
+are seen as '\n' by the Python program. Note: This feature is only
+available if Python is built with universal newline support (the
+default). Also, the newlines attribute of the file objects stdout,
+stdin and stderr are not updated by the communicate() method.
+
+The startupinfo and creationflags, if given, will be passed to the
+underlying CreateProcess() function. They can specify things such as
+appearance of the main window and priority for the new process.
+(Windows only)
+
+
+This module also defines two shortcut functions:
+
+call(*popenargs, **kwargs):
+ Run command with arguments. Wait for command to complete, then
+ return the returncode attribute.
+
+ The arguments are the same as for the Popen constructor. Example:
+
+ retcode = call(["ls", "-l"])
+
+check_call(*popenargs, **kwargs):
+ Run command with arguments. Wait for command to complete. If the
+ exit code was zero then return, otherwise raise
+ CalledProcessError. The CalledProcessError object will have the
+ return code in the returncode attribute.
+
+ The arguments are the same as for the Popen constructor. Example:
+
+ check_call(["ls", "-l"])
+
+Exceptions
+----------
+Exceptions raised in the child process, before the new program has
+started to execute, will be re-raised in the parent. Additionally,
+the exception object will have one extra attribute called
+'child_traceback', which is a string containing traceback information
+from the childs point of view.
+
+The most common exception raised is OSError. This occurs, for
+example, when trying to execute a non-existent file. Applications
+should prepare for OSErrors.
+
+A ValueError will be raised if Popen is called with invalid arguments.
+
+check_call() will raise CalledProcessError, if the called process
+returns a non-zero return code.
+
+
+Security
+--------
+Unlike some other popen functions, this implementation will never call
+/bin/sh implicitly. This means that all characters, including shell
+metacharacters, can safely be passed to child processes.
+
+
+Popen objects
+=============
+Instances of the Popen class have the following methods:
+
+poll()
+ Check if child process has terminated. Returns returncode
+ attribute.
+
+wait()
+ Wait for child process to terminate. Returns returncode attribute.
+
+communicate(input=None)
+ Interact with process: Send data to stdin. Read data from stdout
+ and stderr, until end-of-file is reached. Wait for process to
+ terminate. The optional input argument should be a string to be
+ sent to the child process, or None, if no data should be sent to
+ the child.
+
+ communicate() returns a tuple (stdout, stderr).
+
+ Note: The data read is buffered in memory, so do not use this
+ method if the data size is large or unlimited.
+
+The following attributes are also available:
+
+stdin
+ If the stdin argument is PIPE, this attribute is a file object
+ that provides input to the child process. Otherwise, it is None.
+
+stdout
+ If the stdout argument is PIPE, this attribute is a file object
+ that provides output from the child process. Otherwise, it is
+ None.
+
+stderr
+ If the stderr argument is PIPE, this attribute is file object that
+ provides error output from the child process. Otherwise, it is
+ None.
+
+pid
+ The process ID of the child process.
+
+returncode
+ The child return code. A None value indicates that the process
+ hasn't terminated yet. A negative value -N indicates that the
+ child was terminated by signal N (UNIX only).
+
+
+Replacing older functions with the subprocess module
+====================================================
+In this section, "a ==> b" means that b can be used as a replacement
+for a.
+
+Note: All functions in this section fail (more or less) silently if
+the executed program cannot be found; this module raises an OSError
+exception.
+
+In the following examples, we assume that the subprocess module is
+imported with "from subprocess import *".
+
+
+Replacing /bin/sh shell backquote
+---------------------------------
+output=`mycmd myarg`
+==>
+output = Popen(["mycmd", "myarg"], stdout=PIPE).communicate()[0]
+
+
+Replacing shell pipe line
+-------------------------
+output=`dmesg | grep hda`
+==>
+p1 = Popen(["dmesg"], stdout=PIPE)
+p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE)
+output = p2.communicate()[0]
+
+
+Replacing os.system()
+---------------------
+sts = os.system("mycmd" + " myarg")
+==>
+p = Popen("mycmd" + " myarg", shell=True)
+pid, sts = os.waitpid(p.pid, 0)
+
+Note:
+
+* Calling the program through the shell is usually not required.
+
+* It's easier to look at the returncode attribute than the
+ exitstatus.
+
+A more real-world example would look like this:
+
+try:
+ retcode = call("mycmd" + " myarg", shell=True)
+ if retcode < 0:
+ print >>sys.stderr, "Child was terminated by signal", -retcode
+ else:
+ print >>sys.stderr, "Child returned", retcode
+except OSError, e:
+ print >>sys.stderr, "Execution failed:", e
+
+
+Replacing os.spawn*
+-------------------
+P_NOWAIT example:
+
+pid = os.spawnlp(os.P_NOWAIT, "/bin/mycmd", "mycmd", "myarg")
+==>
+pid = Popen(["/bin/mycmd", "myarg"]).pid
+
+
+P_WAIT example:
+
+retcode = os.spawnlp(os.P_WAIT, "/bin/mycmd", "mycmd", "myarg")
+==>
+retcode = call(["/bin/mycmd", "myarg"])
+
+
+Vector example:
+
+os.spawnvp(os.P_NOWAIT, path, args)
+==>
+Popen([path] + args[1:])
+
+
+Environment example:
+
+os.spawnlpe(os.P_NOWAIT, "/bin/mycmd", "mycmd", "myarg", env)
+==>
+Popen(["/bin/mycmd", "myarg"], env={"PATH": "/usr/bin"})
+
+
+Replacing os.popen*
+-------------------
+pipe = os.popen(cmd, mode='r', bufsize)
+==>
+pipe = Popen(cmd, shell=True, bufsize=bufsize, stdout=PIPE).stdout
+
+pipe = os.popen(cmd, mode='w', bufsize)
+==>
+pipe = Popen(cmd, shell=True, bufsize=bufsize, stdin=PIPE).stdin
+
+
+(child_stdin, child_stdout) = os.popen2(cmd, mode, bufsize)
+==>
+p = Popen(cmd, shell=True, bufsize=bufsize,
+ stdin=PIPE, stdout=PIPE, close_fds=True)
+(child_stdin, child_stdout) = (p.stdin, p.stdout)
+
+
+(child_stdin,
+ child_stdout,
+ child_stderr) = os.popen3(cmd, mode, bufsize)
+==>
+p = Popen(cmd, shell=True, bufsize=bufsize,
+ stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True)
+(child_stdin,
+ child_stdout,
+ child_stderr) = (p.stdin, p.stdout, p.stderr)
+
+
+(child_stdin, child_stdout_and_stderr) = os.popen4(cmd, mode, bufsize)
+==>
+p = Popen(cmd, shell=True, bufsize=bufsize,
+ stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
+(child_stdin, child_stdout_and_stderr) = (p.stdin, p.stdout)
+
+
+Replacing popen2.*
+------------------
+Note: If the cmd argument to popen2 functions is a string, the command
+is executed through /bin/sh. If it is a list, the command is directly
+executed.
+
+(child_stdout, child_stdin) = popen2.popen2("somestring", bufsize, mode)
+==>
+p = Popen(["somestring"], shell=True, bufsize=bufsize
+ stdin=PIPE, stdout=PIPE, close_fds=True)
+(child_stdout, child_stdin) = (p.stdout, p.stdin)
+
+
+(child_stdout, child_stdin) = popen2.popen2(["mycmd", "myarg"], bufsize, mode)
+==>
+p = Popen(["mycmd", "myarg"], bufsize=bufsize,
+ stdin=PIPE, stdout=PIPE, close_fds=True)
+(child_stdout, child_stdin) = (p.stdout, p.stdin)
+
+The popen2.Popen3 and popen3.Popen4 basically works as subprocess.Popen,
+except that:
+
+* subprocess.Popen raises an exception if the execution fails
+* the capturestderr argument is replaced with the stderr argument.
+* stdin=PIPE and stdout=PIPE must be specified.
+* popen2 closes all filedescriptors by default, but you have to specify
+ close_fds=True with subprocess.Popen.
+
+
+"""
+
+import sys
+mswindows = (sys.platform == "win32")
+
+import os
+import types
+import traceback
+
+# Exception classes used by this module.
+class CalledProcessError(Exception):
+ """This exception is raised when a process run by check_call() returns
+ a non-zero exit status. The exit status will be stored in the
+ returncode attribute."""
+ def __init__(self, returncode, cmd):
+ self.returncode = returncode
+ self.cmd = cmd
+ def __str__(self):
+ return "Command '%s' returned non-zero exit status %d" % (self.cmd, self.returncode)
+
+
+if mswindows:
+ import threading
+ import msvcrt
+ if 0: # <-- change this to use pywin32 instead of the _subprocess driver
+ import pywintypes
+ from win32api import GetStdHandle, STD_INPUT_HANDLE, \
+ STD_OUTPUT_HANDLE, STD_ERROR_HANDLE
+ from win32api import GetCurrentProcess, DuplicateHandle, \
+ GetModuleFileName, GetVersion
+ from win32con import DUPLICATE_SAME_ACCESS, SW_HIDE
+ from win32pipe import CreatePipe
+ from win32process import CreateProcess, STARTUPINFO, \
+ GetExitCodeProcess, STARTF_USESTDHANDLES, \
+ STARTF_USESHOWWINDOW, CREATE_NEW_CONSOLE
+ from win32event import WaitForSingleObject, INFINITE, WAIT_OBJECT_0
+ else:
+ from _subprocess import *
+ class STARTUPINFO:
+ dwFlags = 0
+ hStdInput = None
+ hStdOutput = None
+ hStdError = None
+ wShowWindow = 0
+ class pywintypes:
+ error = IOError
+else:
+ import select
+ import errno
+ import fcntl
+ import pickle
+
+__all__ = ["Popen", "PIPE", "STDOUT", "call", "check_call", "CalledProcessError"]
+
+try:
+ MAXFD = os.sysconf("SC_OPEN_MAX")
+except:
+ MAXFD = 256
+
+# True/False does not exist on 2.2.0
+try:
+ False
+except NameError:
+ False = 0
+ True = 1
+
+_active = []
+
+def _cleanup():
+ for inst in _active[:]:
+ if inst.poll(_deadstate=sys.maxint) >= 0:
+ try:
+ _active.remove(inst)
+ except ValueError:
+ # This can happen if two threads create a new Popen instance.
+ # It's harmless that it was already removed, so ignore.
+ pass
+
+PIPE = -1
+STDOUT = -2
+
+
+def call(*popenargs, **kwargs):
+ """Run command with arguments. Wait for command to complete, then
+ return the returncode attribute.
+
+ The arguments are the same as for the Popen constructor. Example:
+
+ retcode = call(["ls", "-l"])
+ """
+ return Popen(*popenargs, **kwargs).wait()
+
+
+def check_call(*popenargs, **kwargs):
+ """Run command with arguments. Wait for command to complete. If
+ the exit code was zero then return, otherwise raise
+ CalledProcessError. The CalledProcessError object will have the
+ return code in the returncode attribute.
+
+ The arguments are the same as for the Popen constructor. Example:
+
+ check_call(["ls", "-l"])
+ """
+ retcode = call(*popenargs, **kwargs)
+ cmd = kwargs.get("args")
+ if cmd is None:
+ cmd = popenargs[0]
+ if retcode:
+ raise CalledProcessError(retcode, cmd)
+ return retcode
+
+
+def list2cmdline(seq):
+ """
+ Translate a sequence of arguments into a command line
+ string, using the same rules as the MS C runtime:
+
+ 1) Arguments are delimited by white space, which is either a
+ space or a tab.
+
+ 2) A string surrounded by double quotation marks is
+ interpreted as a single argument, regardless of white space
+ contained within. A quoted string can be embedded in an
+ argument.
+
+ 3) A double quotation mark preceded by a backslash is
+ interpreted as a literal double quotation mark.
+
+ 4) Backslashes are interpreted literally, unless they
+ immediately precede a double quotation mark.
+
+ 5) If backslashes immediately precede a double quotation mark,
+ every pair of backslashes is interpreted as a literal
+ backslash. If the number of backslashes is odd, the last
+ backslash escapes the next double quotation mark as
+ described in rule 3.
+ """
+
+ # See
+ # http://msdn.microsoft.com/library/en-us/vccelng/htm/progs_12.asp
+ result = []
+ needquote = False
+ for arg in seq:
+ bs_buf = []
+
+ # Add a space to separate this argument from the others
+ if result:
+ result.append(' ')
+
+ needquote = (" " in arg) or ("\t" in arg) or arg == ""
+ if needquote:
+ result.append('"')
+
+ for c in arg:
+ if c == '\\':
+ # Don't know if we need to double yet.
+ bs_buf.append(c)
+ elif c == '"':
+ # Double backspaces.
+ result.append('\\' * len(bs_buf)*2)
+ bs_buf = []
+ result.append('\\"')
+ else:
+ # Normal char
+ if bs_buf:
+ result.extend(bs_buf)
+ bs_buf = []
+ result.append(c)
+
+ # Add remaining backspaces, if any.
+ if bs_buf:
+ result.extend(bs_buf)
+
+ if needquote:
+ result.extend(bs_buf)
+ result.append('"')
+
+ return ''.join(result)
+
+
+class Popen(object):
+ def __init__(self, args, bufsize=0, executable=None,
+ stdin=None, stdout=None, stderr=None,
+ preexec_fn=None, close_fds=False, shell=False,
+ cwd=None, env=None, universal_newlines=False,
+ startupinfo=None, creationflags=0):
+ """Create new Popen instance."""
+ _cleanup()
+
+ self._child_created = False
+ if not isinstance(bufsize, (int, long)):
+ raise TypeError("bufsize must be an integer")
+
+ if mswindows:
+ if preexec_fn is not None:
+ raise ValueError("preexec_fn is not supported on Windows "
+ "platforms")
+ if close_fds:
+ raise ValueError("close_fds is not supported on Windows "
+ "platforms")
+ else:
+ # POSIX
+ if startupinfo is not None:
+ raise ValueError("startupinfo is only supported on Windows "
+ "platforms")
+ if creationflags != 0:
+ raise ValueError("creationflags is only supported on Windows "
+ "platforms")
+
+ self.stdin = None
+ self.stdout = None
+ self.stderr = None
+ self.pid = None
+ self.returncode = None
+ self.universal_newlines = universal_newlines
+
+ # Input and output objects. The general principle is like
+ # this:
+ #
+ # Parent Child
+ # ------ -----
+ # p2cwrite ---stdin---> p2cread
+ # c2pread <--stdout--- c2pwrite
+ # errread <--stderr--- errwrite
+ #
+ # On POSIX, the child objects are file descriptors. On
+ # Windows, these are Windows file handles. The parent objects
+ # are file descriptors on both platforms. The parent objects
+ # are None when not using PIPEs. The child objects are None
+ # when not redirecting.
+
+ (p2cread, p2cwrite,
+ c2pread, c2pwrite,
+ errread, errwrite) = self._get_handles(stdin, stdout, stderr)
+
+ self._execute_child(args, executable, preexec_fn, close_fds,
+ cwd, env, universal_newlines,
+ startupinfo, creationflags, shell,
+ p2cread, p2cwrite,
+ c2pread, c2pwrite,
+ errread, errwrite)
+
+ # On Windows, you cannot just redirect one or two handles: You
+ # either have to redirect all three or none. If the subprocess
+ # user has only redirected one or two handles, we are
+ # automatically creating PIPEs for the rest. We should close
+ # these after the process is started. See bug #1124861.
+ if mswindows:
+ if stdin is None and p2cwrite is not None:
+ os.close(p2cwrite)
+ p2cwrite = None
+ if stdout is None and c2pread is not None:
+ os.close(c2pread)
+ c2pread = None
+ if stderr is None and errread is not None:
+ os.close(errread)
+ errread = None
+
+ if p2cwrite:
+ self.stdin = os.fdopen(p2cwrite, 'wb', bufsize)
+ if c2pread:
+ if universal_newlines:
+ self.stdout = os.fdopen(c2pread, 'rU', bufsize)
+ else:
+ self.stdout = os.fdopen(c2pread, 'rb', bufsize)
+ if errread:
+ if universal_newlines:
+ self.stderr = os.fdopen(errread, 'rU', bufsize)
+ else:
+ self.stderr = os.fdopen(errread, 'rb', bufsize)
+
+
+ def _translate_newlines(self, data):
+ data = data.replace("\r\n", "\n")
+ data = data.replace("\r", "\n")
+ return data
+
+
+ def __del__(self):
+ if not self._child_created:
+ # We didn't get to successfully create a child process.
+ return
+ # In case the child hasn't been waited on, check if it's done.
+ self.poll(_deadstate=sys.maxint)
+ if self.returncode is None and _active is not None:
+ # Child is still running, keep us alive until we can wait on it.
+ _active.append(self)
+
+
+ def communicate(self, input=None):
+ """Interact with process: Send data to stdin. Read data from
+ stdout and stderr, until end-of-file is reached. Wait for
+ process to terminate. The optional input argument should be a
+ string to be sent to the child process, or None, if no data
+ should be sent to the child.
+
+ communicate() returns a tuple (stdout, stderr)."""
+
+ # Optimization: If we are only using one pipe, or no pipe at
+ # all, using select() or threads is unnecessary.
+ if [self.stdin, self.stdout, self.stderr].count(None) >= 2:
+ stdout = None
+ stderr = None
+ if self.stdin:
+ if input:
+ self.stdin.write(input)
+ self.stdin.close()
+ elif self.stdout:
+ stdout = self.stdout.read()
+ elif self.stderr:
+ stderr = self.stderr.read()
+ self.wait()
+ return (stdout, stderr)
+
+ return self._communicate(input)
+
+
+ if mswindows:
+ #
+ # Windows methods
+ #
+ def _get_handles(self, stdin, stdout, stderr):
+ """Construct and return tupel with IO objects:
+ p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite
+ """
+ if stdin is None and stdout is None and stderr is None:
+ return (None, None, None, None, None, None)
+
+ p2cread, p2cwrite = None, None
+ c2pread, c2pwrite = None, None
+ errread, errwrite = None, None
+
+ if stdin is None:
+ p2cread = GetStdHandle(STD_INPUT_HANDLE)
+ if p2cread is not None:
+ pass
+ elif stdin is None or stdin == PIPE:
+ p2cread, p2cwrite = CreatePipe(None, 0)
+ # Detach and turn into fd
+ p2cwrite = p2cwrite.Detach()
+ p2cwrite = msvcrt.open_osfhandle(p2cwrite, 0)
+ elif isinstance(stdin, int):
+ p2cread = msvcrt.get_osfhandle(stdin)
+ else:
+ # Assuming file-like object
+ p2cread = msvcrt.get_osfhandle(stdin.fileno())
+ p2cread = self._make_inheritable(p2cread)
+
+ if stdout is None:
+ c2pwrite = GetStdHandle(STD_OUTPUT_HANDLE)
+ if c2pwrite is not None:
+ pass
+ elif stdout is None or stdout == PIPE:
+ c2pread, c2pwrite = CreatePipe(None, 0)
+ # Detach and turn into fd
+ c2pread = c2pread.Detach()
+ c2pread = msvcrt.open_osfhandle(c2pread, 0)
+ elif isinstance(stdout, int):
+ c2pwrite = msvcrt.get_osfhandle(stdout)
+ else:
+ # Assuming file-like object
+ c2pwrite = msvcrt.get_osfhandle(stdout.fileno())
+ c2pwrite = self._make_inheritable(c2pwrite)
+
+ if stderr is None:
+ errwrite = GetStdHandle(STD_ERROR_HANDLE)
+ if errwrite is not None:
+ pass
+ elif stderr is None or stderr == PIPE:
+ errread, errwrite = CreatePipe(None, 0)
+ # Detach and turn into fd
+ errread = errread.Detach()
+ errread = msvcrt.open_osfhandle(errread, 0)
+ elif stderr == STDOUT:
+ errwrite = c2pwrite
+ elif isinstance(stderr, int):
+ errwrite = msvcrt.get_osfhandle(stderr)
+ else:
+ # Assuming file-like object
+ errwrite = msvcrt.get_osfhandle(stderr.fileno())
+ errwrite = self._make_inheritable(errwrite)
+
+ return (p2cread, p2cwrite,
+ c2pread, c2pwrite,
+ errread, errwrite)
+
+
+ def _make_inheritable(self, handle):
+ """Return a duplicate of handle, which is inheritable"""
+ return DuplicateHandle(GetCurrentProcess(), handle,
+ GetCurrentProcess(), 0, 1,
+ DUPLICATE_SAME_ACCESS)
+
+
+ def _find_w9xpopen(self):
+ """Find and return absolut path to w9xpopen.exe"""
+ w9xpopen = os.path.join(os.path.dirname(GetModuleFileName(0)),
+ "w9xpopen.exe")
+ if not os.path.exists(w9xpopen):
+ # Eeek - file-not-found - possibly an embedding
+ # situation - see if we can locate it in sys.exec_prefix
+ w9xpopen = os.path.join(os.path.dirname(sys.exec_prefix),
+ "w9xpopen.exe")
+ if not os.path.exists(w9xpopen):
+ raise RuntimeError("Cannot locate w9xpopen.exe, which is "
+ "needed for Popen to work with your "
+ "shell or platform.")
+ return w9xpopen
+
+
+ def _execute_child(self, args, executable, preexec_fn, close_fds,
+ cwd, env, universal_newlines,
+ startupinfo, creationflags, shell,
+ p2cread, p2cwrite,
+ c2pread, c2pwrite,
+ errread, errwrite):
+ """Execute program (MS Windows version)"""
+
+ if not isinstance(args, types.StringTypes):
+ args = list2cmdline(args)
+
+ # Process startup details
+ if startupinfo is None:
+ startupinfo = STARTUPINFO()
+ if None not in (p2cread, c2pwrite, errwrite):
+ startupinfo.dwFlags |= STARTF_USESTDHANDLES
+ startupinfo.hStdInput = p2cread
+ startupinfo.hStdOutput = c2pwrite
+ startupinfo.hStdError = errwrite
+
+ if shell:
+ startupinfo.dwFlags |= STARTF_USESHOWWINDOW
+ startupinfo.wShowWindow = SW_HIDE
+ comspec = os.environ.get("COMSPEC", "cmd.exe")
+ args = comspec + " /c " + args
+ if (GetVersion() >= 0x80000000L or
+ os.path.basename(comspec).lower() == "command.com"):
+ # Win9x, or using command.com on NT. We need to
+ # use the w9xpopen intermediate program. For more
+ # information, see KB Q150956
+ # (http://web.archive.org/web/20011105084002/http://support.microsoft.com/support/kb/articles/Q150/9/56.asp)
+ w9xpopen = self._find_w9xpopen()
+ args = '"%s" %s' % (w9xpopen, args)
+ # Not passing CREATE_NEW_CONSOLE has been known to
+ # cause random failures on win9x. Specifically a
+ # dialog: "Your program accessed mem currently in
+ # use at xxx" and a hopeful warning about the
+ # stability of your system. Cost is Ctrl+C wont
+ # kill children.
+ creationflags |= CREATE_NEW_CONSOLE
+
+ # Start the process
+ try:
+ hp, ht, pid, tid = CreateProcess(executable, args,
+ # no special security
+ None, None,
+ # must inherit handles to pass std
+ # handles
+ 1,
+ creationflags,
+ env,
+ cwd,
+ startupinfo)
+ except pywintypes.error, e:
+ # Translate pywintypes.error to WindowsError, which is
+ # a subclass of OSError. FIXME: We should really
+ # translate errno using _sys_errlist (or simliar), but
+ # how can this be done from Python?
+ raise WindowsError(*e.args)
+
+ # Retain the process handle, but close the thread handle
+ self._child_created = True
+ self._handle = hp
+ self.pid = pid
+ ht.Close()
+
+ # Child is launched. Close the parent's copy of those pipe
+ # handles that only the child should have open. You need
+ # to make sure that no handles to the write end of the
+ # output pipe are maintained in this process or else the
+ # pipe will not close when the child process exits and the
+ # ReadFile will hang.
+ if p2cread is not None:
+ p2cread.Close()
+ if c2pwrite is not None:
+ c2pwrite.Close()
+ if errwrite is not None:
+ errwrite.Close()
+
+
+ def poll(self, _deadstate=None):
+ """Check if child process has terminated. Returns returncode
+ attribute."""
+ if self.returncode is None:
+ if WaitForSingleObject(self._handle, 0) == WAIT_OBJECT_0:
+ self.returncode = GetExitCodeProcess(self._handle)
+ return self.returncode
+
+
+ def wait(self):
+ """Wait for child process to terminate. Returns returncode
+ attribute."""
+ if self.returncode is None:
+ obj = WaitForSingleObject(self._handle, INFINITE)
+ self.returncode = GetExitCodeProcess(self._handle)
+ return self.returncode
+
+
+ def _readerthread(self, fh, buffer):
+ buffer.append(fh.read())
+
+
+ def _communicate(self, input):
+ stdout = None # Return
+ stderr = None # Return
+
+ if self.stdout:
+ stdout = []
+ stdout_thread = threading.Thread(target=self._readerthread,
+ args=(self.stdout, stdout))
+ stdout_thread.setDaemon(True)
+ stdout_thread.start()
+ if self.stderr:
+ stderr = []
+ stderr_thread = threading.Thread(target=self._readerthread,
+ args=(self.stderr, stderr))
+ stderr_thread.setDaemon(True)
+ stderr_thread.start()
+
+ if self.stdin:
+ if input is not None:
+ self.stdin.write(input)
+ self.stdin.close()
+
+ if self.stdout:
+ stdout_thread.join()
+ if self.stderr:
+ stderr_thread.join()
+
+ # All data exchanged. Translate lists into strings.
+ if stdout is not None:
+ stdout = stdout[0]
+ if stderr is not None:
+ stderr = stderr[0]
+
+ # Translate newlines, if requested. We cannot let the file
+ # object do the translation: It is based on stdio, which is
+ # impossible to combine with select (unless forcing no
+ # buffering).
+ if self.universal_newlines and hasattr(file, 'newlines'):
+ if stdout:
+ stdout = self._translate_newlines(stdout)
+ if stderr:
+ stderr = self._translate_newlines(stderr)
+
+ self.wait()
+ return (stdout, stderr)
+
+ else:
+ #
+ # POSIX methods
+ #
+ def _get_handles(self, stdin, stdout, stderr):
+ """Construct and return tupel with IO objects:
+ p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite
+ """
+ p2cread, p2cwrite = None, None
+ c2pread, c2pwrite = None, None
+ errread, errwrite = None, None
+
+ if stdin is None:
+ pass
+ elif stdin == PIPE:
+ p2cread, p2cwrite = os.pipe()
+ elif isinstance(stdin, int):
+ p2cread = stdin
+ else:
+ # Assuming file-like object
+ p2cread = stdin.fileno()
+
+ if stdout is None:
+ pass
+ elif stdout == PIPE:
+ c2pread, c2pwrite = os.pipe()
+ elif isinstance(stdout, int):
+ c2pwrite = stdout
+ else:
+ # Assuming file-like object
+ c2pwrite = stdout.fileno()
+
+ if stderr is None:
+ pass
+ elif stderr == PIPE:
+ errread, errwrite = os.pipe()
+ elif stderr == STDOUT:
+ errwrite = c2pwrite
+ elif isinstance(stderr, int):
+ errwrite = stderr
+ else:
+ # Assuming file-like object
+ errwrite = stderr.fileno()
+
+ return (p2cread, p2cwrite,
+ c2pread, c2pwrite,
+ errread, errwrite)
+
+
+ def _set_cloexec_flag(self, fd):
+ try:
+ cloexec_flag = fcntl.FD_CLOEXEC
+ except AttributeError:
+ cloexec_flag = 1
+
+ old = fcntl.fcntl(fd, fcntl.F_GETFD)
+ fcntl.fcntl(fd, fcntl.F_SETFD, old | cloexec_flag)
+
+
+ def _close_fds(self, but):
+ for i in xrange(3, MAXFD):
+ if i == but:
+ continue
+ try:
+ os.close(i)
+ except:
+ pass
+
+
+ def _execute_child(self, args, executable, preexec_fn, close_fds,
+ cwd, env, universal_newlines,
+ startupinfo, creationflags, shell,
+ p2cread, p2cwrite,
+ c2pread, c2pwrite,
+ errread, errwrite):
+ """Execute program (POSIX version)"""
+
+ if isinstance(args, types.StringTypes):
+ args = [args]
+ else:
+ args = list(args)
+
+ if shell:
+ args = ["/bin/sh", "-c"] + args
+
+ if executable is None:
+ executable = args[0]
+
+ # For transferring possible exec failure from child to parent
+ # The first char specifies the exception type: 0 means
+ # OSError, 1 means some other error.
+ errpipe_read, errpipe_write = os.pipe()
+ self._set_cloexec_flag(errpipe_write)
+
+ in_child = False
+ gc.disable_finalizers()
+ try:
+ self.pid = os.fork()
+ in_child = (self.pid == 0)
+ finally:
+ if not in_child:
+ gc.enable_finalizers()
+ self._child_created = True
+ if self.pid == 0:
+ # Child
+ try:
+ # Close parent's pipe ends
+ if p2cwrite:
+ os.close(p2cwrite)
+ if c2pread:
+ os.close(c2pread)
+ if errread:
+ os.close(errread)
+ os.close(errpipe_read)
+
+ # Dup fds for child
+ if p2cread:
+ os.dup2(p2cread, 0)
+ if c2pwrite:
+ os.dup2(c2pwrite, 1)
+ if errwrite:
+ os.dup2(errwrite, 2)
+
+ # Close pipe fds. Make sure we don't close the same
+ # fd more than once, or standard fds.
+ if p2cread and p2cread not in (0,):
+ os.close(p2cread)
+ if c2pwrite and c2pwrite not in (p2cread, 1):
+ os.close(c2pwrite)
+ if errwrite and errwrite not in (p2cread, c2pwrite, 2):
+ os.close(errwrite)
+
+ # Close all other fds, if asked for
+ if close_fds:
+ self._close_fds(but=errpipe_write)
+
+ if cwd is not None:
+ os.chdir(cwd)
+
+ if preexec_fn:
+ apply(preexec_fn)
+
+ if env is None:
+ os.execvp(executable, args)
+ else:
+ os.execvpe(executable, args, env)
+
+ except:
+ exc_type, exc_value, tb = sys.exc_info()
+ # Save the traceback and attach it to the exception object
+ exc_lines = traceback.format_exception(exc_type,
+ exc_value,
+ tb)
+ exc_value.child_traceback = ''.join(exc_lines)
+ os.write(errpipe_write, pickle.dumps(exc_value))
+
+ # This exitcode won't be reported to applications, so it
+ # really doesn't matter what we return.
+ os._exit(255)
+
+ # Parent
+ os.close(errpipe_write)
+ if p2cread and p2cwrite:
+ os.close(p2cread)
+ if c2pwrite and c2pread:
+ os.close(c2pwrite)
+ if errwrite and errread:
+ os.close(errwrite)
+
+ # Wait for exec to fail or succeed; possibly raising exception
+ data = os.read(errpipe_read, 1048576) # Exceptions limited to 1 MB
+ os.close(errpipe_read)
+ if data != "":
+ os.waitpid(self.pid, 0)
+ child_exception = pickle.loads(data)
+ raise child_exception
+
+
+ def _handle_exitstatus(self, sts):
+ if os.WIFSIGNALED(sts):
+ self.returncode = -os.WTERMSIG(sts)
+ elif os.WIFEXITED(sts):
+ self.returncode = os.WEXITSTATUS(sts)
+ else:
+ # Should never happen
+ raise RuntimeError("Unknown child exit status!")
+
+
+ def poll(self, _deadstate=None):
+ """Check if child process has terminated. Returns returncode
+ attribute."""
+ if self.returncode is None:
+ try:
+ pid, sts = os.waitpid(self.pid, os.WNOHANG)
+ if pid == self.pid:
+ self._handle_exitstatus(sts)
+ except os.error:
+ if _deadstate is not None:
+ self.returncode = _deadstate
+ return self.returncode
+
+
+ def wait(self):
+ """Wait for child process to terminate. Returns returncode
+ attribute."""
+ if self.returncode is None:
+ pid, sts = os.waitpid(self.pid, 0)
+ self._handle_exitstatus(sts)
+ return self.returncode
+
+
+ def _communicate(self, input):
+ read_set = []
+ write_set = []
+ stdout = None # Return
+ stderr = None # Return
+
+ if self.stdin:
+ # Flush stdio buffer. This might block, if the user has
+ # been writing to .stdin in an uncontrolled fashion.
+ self.stdin.flush()
+ if input:
+ write_set.append(self.stdin)
+ else:
+ self.stdin.close()
+ if self.stdout:
+ read_set.append(self.stdout)
+ stdout = []
+ if self.stderr:
+ read_set.append(self.stderr)
+ stderr = []
+
+ input_offset = 0
+ while read_set or write_set:
+ rlist, wlist, xlist = select.select(read_set, write_set, [])
+
+ if self.stdin in wlist:
+ # When select has indicated that the file is writable,
+ # we can write up to PIPE_BUF bytes without risk
+ # blocking. POSIX defines PIPE_BUF >= 512
+ bytes_written = os.write(self.stdin.fileno(), buffer(input, input_offset, 512))
+ input_offset += bytes_written
+ if input_offset >= len(input):
+ self.stdin.close()
+ write_set.remove(self.stdin)
+
+ if self.stdout in rlist:
+ data = os.read(self.stdout.fileno(), 1024)
+ if data == "":
+ self.stdout.close()
+ read_set.remove(self.stdout)
+ stdout.append(data)
+
+ if self.stderr in rlist:
+ data = os.read(self.stderr.fileno(), 1024)
+ if data == "":
+ self.stderr.close()
+ read_set.remove(self.stderr)
+ stderr.append(data)
+
+ # All data exchanged. Translate lists into strings.
+ if stdout is not None:
+ stdout = ''.join(stdout)
+ if stderr is not None:
+ stderr = ''.join(stderr)
+
+ # Translate newlines, if requested. We cannot let the file
+ # object do the translation: It is based on stdio, which is
+ # impossible to combine with select (unless forcing no
+ # buffering).
+ if self.universal_newlines and hasattr(file, 'newlines'):
+ if stdout:
+ stdout = self._translate_newlines(stdout)
+ if stderr:
+ stderr = self._translate_newlines(stderr)
+
+ self.wait()
+ return (stdout, stderr)
+
+
+def _demo_posix():
+ #
+ # Example 1: Simple redirection: Get process list
+ #
+ plist = Popen(["ps"], stdout=PIPE).communicate()[0]
+ print "Process list:"
+ print plist
+
+ #
+ # Example 2: Change uid before executing child
+ #
+ if os.getuid() == 0:
+ p = Popen(["id"], preexec_fn=lambda: os.setuid(100))
+ p.wait()
+
+ #
+ # Example 3: Connecting several subprocesses
+ #
+ print "Looking for 'hda'..."
+ p1 = Popen(["dmesg"], stdout=PIPE)
+ p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE)
+ print repr(p2.communicate()[0])
+
+ #
+ # Example 4: Catch execution error
+ #
+ print
+ print "Trying a weird file..."
+ try:
+ print Popen(["/this/path/does/not/exist"]).communicate()
+ except OSError, e:
+ if e.errno == errno.ENOENT:
+ print "The file didn't exist. I thought so..."
+ print "Child traceback:"
+ print e.child_traceback
+ else:
+ print "Error", e.errno
+ else:
+ print >>sys.stderr, "Gosh. No error."
+
+
+def _demo_windows():
+ #
+ # Example 1: Connecting several subprocesses
+ #
+ print "Looking for 'PROMPT' in set output..."
+ p1 = Popen("set", stdout=PIPE, shell=True)
+ p2 = Popen('find "PROMPT"', stdin=p1.stdout, stdout=PIPE)
+ print repr(p2.communicate()[0])
+
+ #
+ # Example 2: Simple execution of program
+ #
+ print "Executing calc..."
+ p = Popen("calc")
+ p.wait()
+
+
+if __name__ == "__main__":
+ if mswindows:
+ _demo_windows()
+ else:
+ _demo_posix()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/tarfile.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/tarfile.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,2176 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+#-------------------------------------------------------------------
+# tarfile.py
+#-------------------------------------------------------------------
+# Copyright (C) 2002 Lars Gustäbel <lars at gustaebel.de>
+# All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+"""Read from and write to tar format archives.
+"""
+
+__version__ = "$Revision: 53162 $"
+# $Source$
+
+version = "0.8.0"
+__author__ = "Lars Gustäbel (lars at gustaebel.de)"
+__date__ = "$Date: 2006-12-27 21:36:58 +1100 (Wed, 27 Dec 2006) $"
+__cvsid__ = "$Id: tarfile.py 53162 2006-12-27 10:36:58Z lars.gustaebel $"
+__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
+
+#---------
+# Imports
+#---------
+import sys
+import os
+import shutil
+import stat
+import errno
+import time
+import struct
+import copy
+
+if sys.platform == 'mac':
+ # This module needs work for MacOS9, especially in the area of pathname
+ # handling. In many places it is assumed a simple substitution of / by the
+ # local os.path.sep is good enough to convert pathnames, but this does not
+ # work with the mac rooted:path:name versus :nonrooted:path:name syntax
+ raise ImportError, "tarfile does not work for platform==mac"
+
+try:
+ import grp, pwd
+except ImportError:
+ grp = pwd = None
+
+# from tarfile import *
+__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
+
+#---------------------------------------------------------
+# tar constants
+#---------------------------------------------------------
+NUL = "\0" # the null character
+BLOCKSIZE = 512 # length of processing blocks
+RECORDSIZE = BLOCKSIZE * 20 # length of records
+MAGIC = "ustar" # magic tar string
+VERSION = "00" # version number
+
+LENGTH_NAME = 100 # maximum length of a filename
+LENGTH_LINK = 100 # maximum length of a linkname
+LENGTH_PREFIX = 155 # maximum length of the prefix field
+MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits)
+
+REGTYPE = "0" # regular file
+AREGTYPE = "\0" # regular file
+LNKTYPE = "1" # link (inside tarfile)
+SYMTYPE = "2" # symbolic link
+CHRTYPE = "3" # character special device
+BLKTYPE = "4" # block special device
+DIRTYPE = "5" # directory
+FIFOTYPE = "6" # fifo special device
+CONTTYPE = "7" # contiguous file
+
+GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames
+GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink
+GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file
+
+#---------------------------------------------------------
+# tarfile constants
+#---------------------------------------------------------
+SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile
+ SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with.
+ CONTTYPE, CHRTYPE, BLKTYPE,
+ GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
+ GNUTYPE_SPARSE)
+
+REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow
+ CONTTYPE, GNUTYPE_SPARSE) # represent regular files
+
+#---------------------------------------------------------
+# Bits used in the mode field, values in octal.
+#---------------------------------------------------------
+S_IFLNK = 0120000 # symbolic link
+S_IFREG = 0100000 # regular file
+S_IFBLK = 0060000 # block device
+S_IFDIR = 0040000 # directory
+S_IFCHR = 0020000 # character device
+S_IFIFO = 0010000 # fifo
+
+TSUID = 04000 # set UID on execution
+TSGID = 02000 # set GID on execution
+TSVTX = 01000 # reserved
+
+TUREAD = 0400 # read by owner
+TUWRITE = 0200 # write by owner
+TUEXEC = 0100 # execute/search by owner
+TGREAD = 0040 # read by group
+TGWRITE = 0020 # write by group
+TGEXEC = 0010 # execute/search by group
+TOREAD = 0004 # read by other
+TOWRITE = 0002 # write by other
+TOEXEC = 0001 # execute/search by other
+
+#---------------------------------------------------------
+# Some useful functions
+#---------------------------------------------------------
+
+def stn(s, length):
+ """Convert a python string to a null-terminated string buffer.
+ """
+ return s[:length] + (length - len(s)) * NUL
+
+def nti(s):
+ """Convert a number field to a python number.
+ """
+ # There are two possible encodings for a number field, see
+ # itn() below.
+ if s[0] != chr(0200):
+ n = int(s.rstrip(NUL + " ") or "0", 8)
+ else:
+ n = 0L
+ for i in xrange(len(s) - 1):
+ n <<= 8
+ n += ord(s[i + 1])
+ return n
+
+def itn(n, digits=8, posix=False):
+ """Convert a python number to a number field.
+ """
+ # POSIX 1003.1-1988 requires numbers to be encoded as a string of
+ # octal digits followed by a null-byte, this allows values up to
+ # (8**(digits-1))-1. GNU tar allows storing numbers greater than
+ # that if necessary. A leading 0200 byte indicates this particular
+ # encoding, the following digits-1 bytes are a big-endian
+ # representation. This allows values up to (256**(digits-1))-1.
+ if 0 <= n < 8 ** (digits - 1):
+ s = "%0*o" % (digits - 1, n) + NUL
+ else:
+ if posix:
+ raise ValueError("overflow in number field")
+
+ if n < 0:
+ # XXX We mimic GNU tar's behaviour with negative numbers,
+ # this could raise OverflowError.
+ n = struct.unpack("L", struct.pack("l", n))[0]
+
+ s = ""
+ for i in xrange(digits - 1):
+ s = chr(n & 0377) + s
+ n >>= 8
+ s = chr(0200) + s
+ return s
+
+def calc_chksums(buf):
+ """Calculate the checksum for a member's header by summing up all
+ characters except for the chksum field which is treated as if
+ it was filled with spaces. According to the GNU tar sources,
+ some tars (Sun and NeXT) calculate chksum with signed char,
+ which will be different if there are chars in the buffer with
+ the high bit set. So we calculate two checksums, unsigned and
+ signed.
+ """
+ unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
+ signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
+ return unsigned_chksum, signed_chksum
+
+def copyfileobj(src, dst, length=None):
+ """Copy length bytes from fileobj src to fileobj dst.
+ If length is None, copy the entire content.
+ """
+ if length == 0:
+ return
+ if length is None:
+ shutil.copyfileobj(src, dst)
+ return
+
+ BUFSIZE = 16 * 1024
+ blocks, remainder = divmod(length, BUFSIZE)
+ for b in xrange(blocks):
+ buf = src.read(BUFSIZE)
+ if len(buf) < BUFSIZE:
+ raise IOError("end of file reached")
+ dst.write(buf)
+
+ if remainder != 0:
+ buf = src.read(remainder)
+ if len(buf) < remainder:
+ raise IOError("end of file reached")
+ dst.write(buf)
+ return
+
+filemode_table = (
+ ((S_IFLNK, "l"),
+ (S_IFREG, "-"),
+ (S_IFBLK, "b"),
+ (S_IFDIR, "d"),
+ (S_IFCHR, "c"),
+ (S_IFIFO, "p")),
+
+ ((TUREAD, "r"),),
+ ((TUWRITE, "w"),),
+ ((TUEXEC|TSUID, "s"),
+ (TSUID, "S"),
+ (TUEXEC, "x")),
+
+ ((TGREAD, "r"),),
+ ((TGWRITE, "w"),),
+ ((TGEXEC|TSGID, "s"),
+ (TSGID, "S"),
+ (TGEXEC, "x")),
+
+ ((TOREAD, "r"),),
+ ((TOWRITE, "w"),),
+ ((TOEXEC|TSVTX, "t"),
+ (TSVTX, "T"),
+ (TOEXEC, "x"))
+)
+
+def filemode(mode):
+ """Convert a file's mode to a string of the form
+ -rwxrwxrwx.
+ Used by TarFile.list()
+ """
+ perm = []
+ for table in filemode_table:
+ for bit, char in table:
+ if mode & bit == bit:
+ perm.append(char)
+ break
+ else:
+ perm.append("-")
+ return "".join(perm)
+
+if os.sep != "/":
+ normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
+else:
+ normpath = os.path.normpath
+
+class TarError(Exception):
+ """Base exception."""
+ pass
+class ExtractError(TarError):
+ """General exception for extract errors."""
+ pass
+class ReadError(TarError):
+ """Exception for unreadble tar archives."""
+ pass
+class CompressionError(TarError):
+ """Exception for unavailable compression methods."""
+ pass
+class StreamError(TarError):
+ """Exception for unsupported operations on stream-like TarFiles."""
+ pass
+
+#---------------------------
+# internal stream interface
+#---------------------------
+class _LowLevelFile:
+ """Low-level file object. Supports reading and writing.
+ It is used instead of a regular file object for streaming
+ access.
+ """
+
+ def __init__(self, name, mode):
+ mode = {
+ "r": os.O_RDONLY,
+ "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
+ }[mode]
+ if hasattr(os, "O_BINARY"):
+ mode |= os.O_BINARY
+ self.fd = os.open(name, mode)
+
+ def close(self):
+ os.close(self.fd)
+
+ def read(self, size):
+ return os.read(self.fd, size)
+
+ def write(self, s):
+ os.write(self.fd, s)
+
+class _Stream:
+ """Class that serves as an adapter between TarFile and
+ a stream-like object. The stream-like object only
+ needs to have a read() or write() method and is accessed
+ blockwise. Use of gzip or bzip2 compression is possible.
+ A stream-like object could be for example: sys.stdin,
+ sys.stdout, a socket, a tape device etc.
+
+ _Stream is intended to be used only internally.
+ """
+
+ def __init__(self, name, mode, comptype, fileobj, bufsize):
+ """Construct a _Stream object.
+ """
+ self._extfileobj = True
+ if fileobj is None:
+ fileobj = _LowLevelFile(name, mode)
+ self._extfileobj = False
+
+ if comptype == '*':
+ # Enable transparent compression detection for the
+ # stream interface
+ fileobj = _StreamProxy(fileobj)
+ comptype = fileobj.getcomptype()
+
+ self.name = name or ""
+ self.mode = mode
+ self.comptype = comptype
+ self.fileobj = fileobj
+ self.bufsize = bufsize
+ self.buf = ""
+ self.pos = 0L
+ self.closed = False
+
+ if comptype == "gz":
+ try:
+ import zlib
+ except ImportError:
+ raise CompressionError("zlib module is not available")
+ self.zlib = zlib
+ self.crc = zlib.crc32("")
+ if mode == "r":
+ self._init_read_gz()
+ else:
+ self._init_write_gz()
+
+ if comptype == "bz2":
+ try:
+ import bz2
+ except ImportError:
+ raise CompressionError("bz2 module is not available")
+ if mode == "r":
+ self.dbuf = ""
+ self.cmp = bz2.BZ2Decompressor()
+ else:
+ self.cmp = bz2.BZ2Compressor()
+
+ def __del__(self):
+ if hasattr(self, "closed") and not self.closed:
+ self.close()
+
+ def _init_write_gz(self):
+ """Initialize for writing with gzip compression.
+ """
+ self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
+ -self.zlib.MAX_WBITS,
+ self.zlib.DEF_MEM_LEVEL,
+ 0)
+ timestamp = struct.pack("<L", long(time.time()))
+ self.__write("\037\213\010\010%s\002\377" % timestamp)
+ if self.name.endswith(".gz"):
+ self.name = self.name[:-3]
+ self.__write(self.name + NUL)
+
+ def write(self, s):
+ """Write string s to the stream.
+ """
+ if self.comptype == "gz":
+ self.crc = self.zlib.crc32(s, self.crc)
+ self.pos += len(s)
+ if self.comptype != "tar":
+ s = self.cmp.compress(s)
+ self.__write(s)
+
+ def __write(self, s):
+ """Write string s to the stream if a whole new block
+ is ready to be written.
+ """
+ self.buf += s
+ while len(self.buf) > self.bufsize:
+ self.fileobj.write(self.buf[:self.bufsize])
+ self.buf = self.buf[self.bufsize:]
+
+ def close(self):
+ """Close the _Stream object. No operation should be
+ done on it afterwards.
+ """
+ if self.closed:
+ return
+
+ if self.mode == "w" and self.comptype != "tar":
+ self.buf += self.cmp.flush()
+
+ if self.mode == "w" and self.buf:
+ self.fileobj.write(self.buf)
+ self.buf = ""
+ if self.comptype == "gz":
+ # The native zlib crc is an unsigned 32-bit integer, but
+ # the Python wrapper implicitly casts that to a signed C
+ # long. So, on a 32-bit box self.crc may "look negative",
+ # while the same crc on a 64-bit box may "look positive".
+ # To avoid irksome warnings from the `struct` module, force
+ # it to look positive on all boxes.
+ self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
+ self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
+
+ if not self._extfileobj:
+ self.fileobj.close()
+
+ self.closed = True
+
+ def _init_read_gz(self):
+ """Initialize for reading a gzip compressed fileobj.
+ """
+ self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
+ self.dbuf = ""
+
+ # taken from gzip.GzipFile with some alterations
+ if self.__read(2) != "\037\213":
+ raise ReadError("not a gzip file")
+ if self.__read(1) != "\010":
+ raise CompressionError("unsupported compression method")
+
+ flag = ord(self.__read(1))
+ self.__read(6)
+
+ if flag & 4:
+ xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
+ self.read(xlen)
+ if flag & 8:
+ while True:
+ s = self.__read(1)
+ if not s or s == NUL:
+ break
+ if flag & 16:
+ while True:
+ s = self.__read(1)
+ if not s or s == NUL:
+ break
+ if flag & 2:
+ self.__read(2)
+
+ def tell(self):
+ """Return the stream's file pointer position.
+ """
+ return self.pos
+
+ def seek(self, pos=0):
+ """Set the stream's file pointer to pos. Negative seeking
+ is forbidden.
+ """
+ if pos - self.pos >= 0:
+ blocks, remainder = divmod(pos - self.pos, self.bufsize)
+ for i in xrange(blocks):
+ self.read(self.bufsize)
+ self.read(remainder)
+ else:
+ raise StreamError("seeking backwards is not allowed")
+ return self.pos
+
+ def read(self, size=None):
+ """Return the next size number of bytes from the stream.
+ If size is not defined, return all bytes of the stream
+ up to EOF.
+ """
+ if size is None:
+ t = []
+ while True:
+ buf = self._read(self.bufsize)
+ if not buf:
+ break
+ t.append(buf)
+ buf = "".join(t)
+ else:
+ buf = self._read(size)
+ self.pos += len(buf)
+ return buf
+
+ def _read(self, size):
+ """Return size bytes from the stream.
+ """
+ if self.comptype == "tar":
+ return self.__read(size)
+
+ c = len(self.dbuf)
+ t = [self.dbuf]
+ while c < size:
+ buf = self.__read(self.bufsize)
+ if not buf:
+ break
+ buf = self.cmp.decompress(buf)
+ t.append(buf)
+ c += len(buf)
+ t = "".join(t)
+ self.dbuf = t[size:]
+ return t[:size]
+
+ def __read(self, size):
+ """Return size bytes from stream. If internal buffer is empty,
+ read another block from the stream.
+ """
+ c = len(self.buf)
+ t = [self.buf]
+ while c < size:
+ buf = self.fileobj.read(self.bufsize)
+ if not buf:
+ break
+ t.append(buf)
+ c += len(buf)
+ t = "".join(t)
+ self.buf = t[size:]
+ return t[:size]
+# class _Stream
+
+class _StreamProxy(object):
+ """Small proxy class that enables transparent compression
+ detection for the Stream interface (mode 'r|*').
+ """
+
+ def __init__(self, fileobj):
+ self.fileobj = fileobj
+ self.buf = self.fileobj.read(BLOCKSIZE)
+
+ def read(self, size):
+ self.read = self.fileobj.read
+ return self.buf
+
+ def getcomptype(self):
+ if self.buf.startswith("\037\213\010"):
+ return "gz"
+ if self.buf.startswith("BZh91"):
+ return "bz2"
+ return "tar"
+
+ def close(self):
+ self.fileobj.close()
+# class StreamProxy
+
+class _BZ2Proxy(object):
+ """Small proxy class that enables external file object
+ support for "r:bz2" and "w:bz2" modes. This is actually
+ a workaround for a limitation in bz2 module's BZ2File
+ class which (unlike gzip.GzipFile) has no support for
+ a file object argument.
+ """
+
+ blocksize = 16 * 1024
+
+ def __init__(self, fileobj, mode):
+ self.fileobj = fileobj
+ self.mode = mode
+ self.init()
+
+ def init(self):
+ import bz2
+ self.pos = 0
+ if self.mode == "r":
+ self.bz2obj = bz2.BZ2Decompressor()
+ self.fileobj.seek(0)
+ self.buf = ""
+ else:
+ self.bz2obj = bz2.BZ2Compressor()
+
+ def read(self, size):
+ b = [self.buf]
+ x = len(self.buf)
+ while x < size:
+ try:
+ raw = self.fileobj.read(self.blocksize)
+ data = self.bz2obj.decompress(raw)
+ b.append(data)
+ except EOFError:
+ break
+ x += len(data)
+ self.buf = "".join(b)
+
+ buf = self.buf[:size]
+ self.buf = self.buf[size:]
+ self.pos += len(buf)
+ return buf
+
+ def seek(self, pos):
+ if pos < self.pos:
+ self.init()
+ self.read(pos - self.pos)
+
+ def tell(self):
+ return self.pos
+
+ def write(self, data):
+ self.pos += len(data)
+ raw = self.bz2obj.compress(data)
+ self.fileobj.write(raw)
+
+ def close(self):
+ if self.mode == "w":
+ raw = self.bz2obj.flush()
+ self.fileobj.write(raw)
+ self.fileobj.close()
+# class _BZ2Proxy
+
+#------------------------
+# Extraction file object
+#------------------------
+class _FileInFile(object):
+ """A thin wrapper around an existing file object that
+ provides a part of its data as an individual file
+ object.
+ """
+
+ def __init__(self, fileobj, offset, size, sparse=None):
+ self.fileobj = fileobj
+ self.offset = offset
+ self.size = size
+ self.sparse = sparse
+ self.position = 0
+
+ def tell(self):
+ """Return the current file position.
+ """
+ return self.position
+
+ def seek(self, position):
+ """Seek to a position in the file.
+ """
+ self.position = position
+
+ def read(self, size=None):
+ """Read data from the file.
+ """
+ if size is None:
+ size = self.size - self.position
+ else:
+ size = min(size, self.size - self.position)
+
+ if self.sparse is None:
+ return self.readnormal(size)
+ else:
+ return self.readsparse(size)
+
+ def readnormal(self, size):
+ """Read operation for regular files.
+ """
+ self.fileobj.seek(self.offset + self.position)
+ self.position += size
+ return self.fileobj.read(size)
+
+ def readsparse(self, size):
+ """Read operation for sparse files.
+ """
+ data = []
+ while size > 0:
+ buf = self.readsparsesection(size)
+ if not buf:
+ break
+ size -= len(buf)
+ data.append(buf)
+ return "".join(data)
+
+ def readsparsesection(self, size):
+ """Read a single section of a sparse file.
+ """
+ section = self.sparse.find(self.position)
+
+ if section is None:
+ return ""
+
+ size = min(size, section.offset + section.size - self.position)
+
+ if isinstance(section, _data):
+ realpos = section.realpos + self.position - section.offset
+ self.fileobj.seek(self.offset + realpos)
+ self.position += size
+ return self.fileobj.read(size)
+ else:
+ self.position += size
+ return NUL * size
+#class _FileInFile
+
+
+class ExFileObject(object):
+ """File-like object for reading an archive member.
+ Is returned by TarFile.extractfile().
+ """
+ blocksize = 1024
+
+ def __init__(self, tarfile, tarinfo):
+ self.fileobj = _FileInFile(tarfile.fileobj,
+ tarinfo.offset_data,
+ tarinfo.size,
+ getattr(tarinfo, "sparse", None))
+ self.name = tarinfo.name
+ self.mode = "r"
+ self.closed = False
+ self.size = tarinfo.size
+
+ self.position = 0
+ self.buffer = ""
+
+ def read(self, size=None):
+ """Read at most size bytes from the file. If size is not
+ present or None, read all data until EOF is reached.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ buf = ""
+ if self.buffer:
+ if size is None:
+ buf = self.buffer
+ self.buffer = ""
+ else:
+ buf = self.buffer[:size]
+ self.buffer = self.buffer[size:]
+
+ if size is None:
+ buf += self.fileobj.read()
+ else:
+ buf += self.fileobj.read(size - len(buf))
+
+ self.position += len(buf)
+ return buf
+
+ def readline(self, size=-1):
+ """Read one entire line from the file. If size is present
+ and non-negative, return a string with at most that
+ size, which may be an incomplete line.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ if "\n" in self.buffer:
+ pos = self.buffer.find("\n") + 1
+ else:
+ buffers = [self.buffer]
+ while True:
+ buf = self.fileobj.read(self.blocksize)
+ buffers.append(buf)
+ if not buf or "\n" in buf:
+ self.buffer = "".join(buffers)
+ pos = self.buffer.find("\n") + 1
+ if pos == 0:
+ # no newline found.
+ pos = len(self.buffer)
+ break
+
+ if size != -1:
+ pos = min(size, pos)
+
+ buf = self.buffer[:pos]
+ self.buffer = self.buffer[pos:]
+ self.position += len(buf)
+ return buf
+
+ def readlines(self):
+ """Return a list with all remaining lines.
+ """
+ result = []
+ while True:
+ line = self.readline()
+ if not line: break
+ result.append(line)
+ return result
+
+ def tell(self):
+ """Return the current file position.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ return self.position
+
+ def seek(self, pos, whence=os.SEEK_SET):
+ """Seek to a position in the file.
+ """
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+
+ if whence == os.SEEK_SET:
+ self.position = min(max(pos, 0), self.size)
+ elif whence == os.SEEK_CUR:
+ if pos < 0:
+ self.position = max(self.position + pos, 0)
+ else:
+ self.position = min(self.position + pos, self.size)
+ elif whence == os.SEEK_END:
+ self.position = max(min(self.size + pos, self.size), 0)
+ else:
+ raise ValueError("Invalid argument")
+
+ self.buffer = ""
+ self.fileobj.seek(self.position)
+
+ def close(self):
+ """Close the file object.
+ """
+ self.closed = True
+
+ def __iter__(self):
+ """Get an iterator over the file's lines.
+ """
+ while True:
+ line = self.readline()
+ if not line:
+ break
+ yield line
+#class ExFileObject
+
+#------------------
+# Exported Classes
+#------------------
+class TarInfo(object):
+ """Informational class which holds the details about an
+ archive member given by a tar header block.
+ TarInfo objects are returned by TarFile.getmember(),
+ TarFile.getmembers() and TarFile.gettarinfo() and are
+ usually created internally.
+ """
+
+ def __init__(self, name=""):
+ """Construct a TarInfo object. name is the optional name
+ of the member.
+ """
+ self.name = name # member name (dirnames must end with '/')
+ self.mode = 0666 # file permissions
+ self.uid = 0 # user id
+ self.gid = 0 # group id
+ self.size = 0 # file size
+ self.mtime = 0 # modification time
+ self.chksum = 0 # header checksum
+ self.type = REGTYPE # member type
+ self.linkname = "" # link name
+ self.uname = "user" # user name
+ self.gname = "group" # group name
+ self.devmajor = 0 # device major number
+ self.devminor = 0 # device minor number
+
+ self.offset = 0 # the tar header starts here
+ self.offset_data = 0 # the file's data starts here
+
+ def __repr__(self):
+ return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
+
+ @classmethod
+ def frombuf(cls, buf):
+ """Construct a TarInfo object from a 512 byte string buffer.
+ """
+ if len(buf) != BLOCKSIZE:
+ raise ValueError("truncated header")
+ if buf.count(NUL) == BLOCKSIZE:
+ raise ValueError("empty header")
+
+ tarinfo = cls()
+ tarinfo.buf = buf
+ tarinfo.name = buf[0:100].rstrip(NUL)
+ tarinfo.mode = nti(buf[100:108])
+ tarinfo.uid = nti(buf[108:116])
+ tarinfo.gid = nti(buf[116:124])
+ tarinfo.size = nti(buf[124:136])
+ tarinfo.mtime = nti(buf[136:148])
+ tarinfo.chksum = nti(buf[148:156])
+ tarinfo.type = buf[156:157]
+ tarinfo.linkname = buf[157:257].rstrip(NUL)
+ tarinfo.uname = buf[265:297].rstrip(NUL)
+ tarinfo.gname = buf[297:329].rstrip(NUL)
+ tarinfo.devmajor = nti(buf[329:337])
+ tarinfo.devminor = nti(buf[337:345])
+ prefix = buf[345:500].rstrip(NUL)
+
+ if prefix and not tarinfo.issparse():
+ tarinfo.name = prefix + "/" + tarinfo.name
+
+ if tarinfo.chksum not in calc_chksums(buf):
+ raise ValueError("invalid header")
+ return tarinfo
+
+ def tobuf(self, posix=False):
+ """Return a tar header as a string of 512 byte blocks.
+ """
+ buf = ""
+ type = self.type
+ prefix = ""
+
+ if self.name.endswith("/"):
+ type = DIRTYPE
+
+ if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
+ # Prevent "././@LongLink" from being normalized.
+ name = self.name
+ else:
+ name = normpath(self.name)
+
+ if type == DIRTYPE:
+ # directories should end with '/'
+ name += "/"
+
+ linkname = self.linkname
+ if linkname:
+ # if linkname is empty we end up with a '.'
+ linkname = normpath(linkname)
+
+ if posix:
+ if self.size > MAXSIZE_MEMBER:
+ raise ValueError("file is too large (>= 8 GB)")
+
+ if len(self.linkname) > LENGTH_LINK:
+ raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
+
+ if len(name) > LENGTH_NAME:
+ prefix = name[:LENGTH_PREFIX + 1]
+ while prefix and prefix[-1] != "/":
+ prefix = prefix[:-1]
+
+ name = name[len(prefix):]
+ prefix = prefix[:-1]
+
+ if not prefix or len(name) > LENGTH_NAME:
+ raise ValueError("name is too long")
+
+ else:
+ if len(self.linkname) > LENGTH_LINK:
+ buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
+
+ if len(name) > LENGTH_NAME:
+ buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
+
+ parts = [
+ stn(name, 100),
+ itn(self.mode & 07777, 8, posix),
+ itn(self.uid, 8, posix),
+ itn(self.gid, 8, posix),
+ itn(self.size, 12, posix),
+ itn(self.mtime, 12, posix),
+ " ", # checksum field
+ type,
+ stn(self.linkname, 100),
+ stn(MAGIC, 6),
+ stn(VERSION, 2),
+ stn(self.uname, 32),
+ stn(self.gname, 32),
+ itn(self.devmajor, 8, posix),
+ itn(self.devminor, 8, posix),
+ stn(prefix, 155)
+ ]
+
+ buf += struct.pack("%ds" % BLOCKSIZE, "".join(parts))
+ chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
+ buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
+ self.buf = buf
+ return buf
+
+ def _create_gnulong(self, name, type):
+ """Create a GNU longname/longlink header from name.
+ It consists of an extended tar header, with the length
+ of the longname as size, followed by data blocks,
+ which contain the longname as a null terminated string.
+ """
+ name += NUL
+
+ tarinfo = self.__class__()
+ tarinfo.name = "././@LongLink"
+ tarinfo.type = type
+ tarinfo.mode = 0
+ tarinfo.size = len(name)
+
+ # create extended header
+ buf = tarinfo.tobuf()
+ # create name blocks
+ buf += name
+ blocks, remainder = divmod(len(name), BLOCKSIZE)
+ if remainder > 0:
+ buf += (BLOCKSIZE - remainder) * NUL
+ return buf
+
+ def isreg(self):
+ return self.type in REGULAR_TYPES
+ def isfile(self):
+ return self.isreg()
+ def isdir(self):
+ return self.type == DIRTYPE
+ def issym(self):
+ return self.type == SYMTYPE
+ def islnk(self):
+ return self.type == LNKTYPE
+ def ischr(self):
+ return self.type == CHRTYPE
+ def isblk(self):
+ return self.type == BLKTYPE
+ def isfifo(self):
+ return self.type == FIFOTYPE
+ def issparse(self):
+ return self.type == GNUTYPE_SPARSE
+ def isdev(self):
+ return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
+# class TarInfo
+
+class TarFile(object):
+ """The TarFile Class provides an interface to tar archives.
+ """
+
+ debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
+
+ dereference = False # If true, add content of linked file to the
+ # tar file, else the link.
+
+ ignore_zeros = False # If true, skips empty or invalid blocks and
+ # continues processing.
+
+ errorlevel = 0 # If 0, fatal errors only appear in debug
+ # messages (if debug >= 0). If > 0, errors
+ # are passed to the caller as exceptions.
+
+ posix = False # If True, generates POSIX.1-1990-compliant
+ # archives (no GNU extensions!)
+
+ fileobject = ExFileObject
+
+ def __init__(self, name=None, mode="r", fileobj=None):
+ """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
+ read from an existing archive, 'a' to append data to an existing
+ file or 'w' to create a new file overwriting an existing one. `mode'
+ defaults to 'r'.
+ If `fileobj' is given, it is used for reading or writing data. If it
+ can be determined, `mode' is overridden by `fileobj's mode.
+ `fileobj' is not closed, when TarFile is closed.
+ """
+ self.name = os.path.abspath(name)
+
+ if len(mode) > 1 or mode not in "raw":
+ raise ValueError("mode must be 'r', 'a' or 'w'")
+ self._mode = mode
+ self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
+
+ if not fileobj:
+ fileobj = file(self.name, self.mode)
+ self._extfileobj = False
+ else:
+ if self.name is None and hasattr(fileobj, "name"):
+ self.name = os.path.abspath(fileobj.name)
+ if hasattr(fileobj, "mode"):
+ self.mode = fileobj.mode
+ self._extfileobj = True
+ self.fileobj = fileobj
+
+ # Init datastructures
+ self.closed = False
+ self.members = [] # list of members as TarInfo objects
+ self._loaded = False # flag if all members have been read
+ self.offset = 0L # current position in the archive file
+ self.inodes = {} # dictionary caching the inodes of
+ # archive members already added
+
+ if self._mode == "r":
+ self.firstmember = None
+ self.firstmember = self.next()
+
+ if self._mode == "a":
+ # Move to the end of the archive,
+ # before the first empty block.
+ self.firstmember = None
+ while True:
+ try:
+ tarinfo = self.next()
+ except ReadError:
+ self.fileobj.seek(0)
+ break
+ if tarinfo is None:
+ self.fileobj.seek(- BLOCKSIZE, 1)
+ break
+
+ if self._mode in "aw":
+ self._loaded = True
+
+ #--------------------------------------------------------------------------
+ # Below are the classmethods which act as alternate constructors to the
+ # TarFile class. The open() method is the only one that is needed for
+ # public use; it is the "super"-constructor and is able to select an
+ # adequate "sub"-constructor for a particular compression using the mapping
+ # from OPEN_METH.
+ #
+ # This concept allows one to subclass TarFile without losing the comfort of
+ # the super-constructor. A sub-constructor is registered and made available
+ # by adding it to the mapping in OPEN_METH.
+
+ @classmethod
+ def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
+ """Open a tar archive for reading, writing or appending. Return
+ an appropriate TarFile class.
+
+ mode:
+ 'r' or 'r:*' open for reading with transparent compression
+ 'r:' open for reading exclusively uncompressed
+ 'r:gz' open for reading with gzip compression
+ 'r:bz2' open for reading with bzip2 compression
+ 'a' or 'a:' open for appending
+ 'w' or 'w:' open for writing without compression
+ 'w:gz' open for writing with gzip compression
+ 'w:bz2' open for writing with bzip2 compression
+
+ 'r|*' open a stream of tar blocks with transparent compression
+ 'r|' open an uncompressed stream of tar blocks for reading
+ 'r|gz' open a gzip compressed stream of tar blocks
+ 'r|bz2' open a bzip2 compressed stream of tar blocks
+ 'w|' open an uncompressed stream for writing
+ 'w|gz' open a gzip compressed stream for writing
+ 'w|bz2' open a bzip2 compressed stream for writing
+ """
+
+ if not name and not fileobj:
+ raise ValueError("nothing to open")
+
+ if mode in ("r", "r:*"):
+ # Find out which *open() is appropriate for opening the file.
+ for comptype in cls.OPEN_METH:
+ func = getattr(cls, cls.OPEN_METH[comptype])
+ if fileobj is not None:
+ saved_pos = fileobj.tell()
+ try:
+ return func(name, "r", fileobj)
+ except (ReadError, CompressionError):
+ if fileobj is not None:
+ fileobj.seek(saved_pos)
+ continue
+ raise ReadError("file could not be opened successfully")
+
+ elif ":" in mode:
+ filemode, comptype = mode.split(":", 1)
+ filemode = filemode or "r"
+ comptype = comptype or "tar"
+
+ # Select the *open() function according to
+ # given compression.
+ if comptype in cls.OPEN_METH:
+ func = getattr(cls, cls.OPEN_METH[comptype])
+ else:
+ raise CompressionError("unknown compression type %r" % comptype)
+ return func(name, filemode, fileobj)
+
+ elif "|" in mode:
+ filemode, comptype = mode.split("|", 1)
+ filemode = filemode or "r"
+ comptype = comptype or "tar"
+
+ if filemode not in "rw":
+ raise ValueError("mode must be 'r' or 'w'")
+
+ t = cls(name, filemode,
+ _Stream(name, filemode, comptype, fileobj, bufsize))
+ t._extfileobj = False
+ return t
+
+ elif mode in "aw":
+ return cls.taropen(name, mode, fileobj)
+
+ raise ValueError("undiscernible mode")
+
+ @classmethod
+ def taropen(cls, name, mode="r", fileobj=None):
+ """Open uncompressed tar archive name for reading or writing.
+ """
+ if len(mode) > 1 or mode not in "raw":
+ raise ValueError("mode must be 'r', 'a' or 'w'")
+ return cls(name, mode, fileobj)
+
+ @classmethod
+ def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
+ """Open gzip compressed tar archive name for reading or writing.
+ Appending is not allowed.
+ """
+ if len(mode) > 1 or mode not in "rw":
+ raise ValueError("mode must be 'r' or 'w'")
+
+ try:
+ import gzip
+ gzip.GzipFile
+ except (ImportError, AttributeError):
+ raise CompressionError("gzip module is not available")
+
+ if fileobj is None:
+ fileobj = file(name, mode + "b")
+
+ try:
+ t = cls.taropen(name, mode,
+ gzip.GzipFile(name, mode, compresslevel, fileobj))
+ except IOError:
+ raise ReadError("not a gzip file")
+ t._extfileobj = False
+ return t
+
+ @classmethod
+ def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
+ """Open bzip2 compressed tar archive name for reading or writing.
+ Appending is not allowed.
+ """
+ if len(mode) > 1 or mode not in "rw":
+ raise ValueError("mode must be 'r' or 'w'.")
+
+ try:
+ import bz2
+ except ImportError:
+ raise CompressionError("bz2 module is not available")
+
+ if fileobj is not None:
+ fileobj = _BZ2Proxy(fileobj, mode)
+ else:
+ fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
+
+ try:
+ t = cls.taropen(name, mode, fileobj)
+ except IOError:
+ raise ReadError("not a bzip2 file")
+ t._extfileobj = False
+ return t
+
+ # All *open() methods are registered here.
+ OPEN_METH = {
+ "tar": "taropen", # uncompressed tar
+ "gz": "gzopen", # gzip compressed tar
+ "bz2": "bz2open" # bzip2 compressed tar
+ }
+
+ #--------------------------------------------------------------------------
+ # The public methods which TarFile provides:
+
+ def close(self):
+ """Close the TarFile. In write-mode, two finishing zero blocks are
+ appended to the archive.
+ """
+ if self.closed:
+ return
+
+ if self._mode in "aw":
+ self.fileobj.write(NUL * (BLOCKSIZE * 2))
+ self.offset += (BLOCKSIZE * 2)
+ # fill up the end with zero-blocks
+ # (like option -b20 for tar does)
+ blocks, remainder = divmod(self.offset, RECORDSIZE)
+ if remainder > 0:
+ self.fileobj.write(NUL * (RECORDSIZE - remainder))
+
+ if not self._extfileobj:
+ self.fileobj.close()
+ self.closed = True
+
+ def getmember(self, name):
+ """Return a TarInfo object for member `name'. If `name' can not be
+ found in the archive, KeyError is raised. If a member occurs more
+ than once in the archive, its last occurence is assumed to be the
+ most up-to-date version.
+ """
+ tarinfo = self._getmember(name)
+ if tarinfo is None:
+ raise KeyError("filename %r not found" % name)
+ return tarinfo
+
+ def getmembers(self):
+ """Return the members of the archive as a list of TarInfo objects. The
+ list has the same order as the members in the archive.
+ """
+ self._check()
+ if not self._loaded: # if we want to obtain a list of
+ self._load() # all members, we first have to
+ # scan the whole archive.
+ return self.members
+
+ def getnames(self):
+ """Return the members of the archive as a list of their names. It has
+ the same order as the list returned by getmembers().
+ """
+ return [tarinfo.name for tarinfo in self.getmembers()]
+
+ def gettarinfo(self, name=None, arcname=None, fileobj=None):
+ """Create a TarInfo object for either the file `name' or the file
+ object `fileobj' (using os.fstat on its file descriptor). You can
+ modify some of the TarInfo's attributes before you add it using
+ addfile(). If given, `arcname' specifies an alternative name for the
+ file in the archive.
+ """
+ self._check("aw")
+
+ # When fileobj is given, replace name by
+ # fileobj's real name.
+ if fileobj is not None:
+ name = fileobj.name
+
+ # Building the name of the member in the archive.
+ # Backward slashes are converted to forward slashes,
+ # Absolute paths are turned to relative paths.
+ if arcname is None:
+ arcname = name
+ arcname = normpath(arcname)
+ drv, arcname = os.path.splitdrive(arcname)
+ while arcname[0:1] == "/":
+ arcname = arcname[1:]
+
+ # Now, fill the TarInfo object with
+ # information specific for the file.
+ tarinfo = TarInfo()
+
+ # Use os.stat or os.lstat, depending on platform
+ # and if symlinks shall be resolved.
+ if fileobj is None:
+ if hasattr(os, "lstat") and not self.dereference:
+ statres = os.lstat(name)
+ else:
+ statres = os.stat(name)
+ else:
+ statres = os.fstat(fileobj.fileno())
+ linkname = ""
+
+ stmd = statres.st_mode
+ if stat.S_ISREG(stmd):
+ inode = (statres.st_ino, statres.st_dev)
+ if not self.dereference and \
+ statres.st_nlink > 1 and inode in self.inodes:
+ # Is it a hardlink to an already
+ # archived file?
+ type = LNKTYPE
+ linkname = self.inodes[inode]
+ else:
+ # The inode is added only if its valid.
+ # For win32 it is always 0.
+ type = REGTYPE
+ if inode[0]:
+ self.inodes[inode] = arcname
+ elif stat.S_ISDIR(stmd):
+ type = DIRTYPE
+ if arcname[-1:] != "/":
+ arcname += "/"
+ elif stat.S_ISFIFO(stmd):
+ type = FIFOTYPE
+ elif stat.S_ISLNK(stmd):
+ type = SYMTYPE
+ linkname = os.readlink(name)
+ elif stat.S_ISCHR(stmd):
+ type = CHRTYPE
+ elif stat.S_ISBLK(stmd):
+ type = BLKTYPE
+ else:
+ return None
+
+ # Fill the TarInfo object with all
+ # information we can get.
+ tarinfo.name = arcname
+ tarinfo.mode = stmd
+ tarinfo.uid = statres.st_uid
+ tarinfo.gid = statres.st_gid
+ if stat.S_ISREG(stmd):
+ tarinfo.size = statres.st_size
+ else:
+ tarinfo.size = 0L
+ tarinfo.mtime = statres.st_mtime
+ tarinfo.type = type
+ tarinfo.linkname = linkname
+ if pwd:
+ try:
+ tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
+ except KeyError:
+ pass
+ if grp:
+ try:
+ tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
+ except KeyError:
+ pass
+
+ if type in (CHRTYPE, BLKTYPE):
+ if hasattr(os, "major") and hasattr(os, "minor"):
+ tarinfo.devmajor = os.major(statres.st_rdev)
+ tarinfo.devminor = os.minor(statres.st_rdev)
+ return tarinfo
+
+ def list(self, verbose=True):
+ """Print a table of contents to sys.stdout. If `verbose' is False, only
+ the names of the members are printed. If it is True, an `ls -l'-like
+ output is produced.
+ """
+ self._check()
+
+ for tarinfo in self:
+ if verbose:
+ print filemode(tarinfo.mode),
+ print "%s/%s" % (tarinfo.uname or tarinfo.uid,
+ tarinfo.gname or tarinfo.gid),
+ if tarinfo.ischr() or tarinfo.isblk():
+ print "%10s" % ("%d,%d" \
+ % (tarinfo.devmajor, tarinfo.devminor)),
+ else:
+ print "%10d" % tarinfo.size,
+ print "%d-%02d-%02d %02d:%02d:%02d" \
+ % time.localtime(tarinfo.mtime)[:6],
+
+ print tarinfo.name,
+
+ if verbose:
+ if tarinfo.issym():
+ print "->", tarinfo.linkname,
+ if tarinfo.islnk():
+ print "link to", tarinfo.linkname,
+ print
+
+ def add(self, name, arcname=None, recursive=True):
+ """Add the file `name' to the archive. `name' may be any type of file
+ (directory, fifo, symbolic link, etc.). If given, `arcname'
+ specifies an alternative name for the file in the archive.
+ Directories are added recursively by default. This can be avoided by
+ setting `recursive' to False.
+ """
+ self._check("aw")
+
+ if arcname is None:
+ arcname = name
+
+ # Skip if somebody tries to archive the archive...
+ if self.name is not None and os.path.abspath(name) == self.name:
+ self._dbg(2, "tarfile: Skipped %r" % name)
+ return
+
+ # Special case: The user wants to add the current
+ # working directory.
+ if name == ".":
+ if recursive:
+ if arcname == ".":
+ arcname = ""
+ for f in os.listdir("."):
+ self.add(f, os.path.join(arcname, f))
+ return
+
+ self._dbg(1, name)
+
+ # Create a TarInfo object from the file.
+ tarinfo = self.gettarinfo(name, arcname)
+
+ if tarinfo is None:
+ self._dbg(1, "tarfile: Unsupported type %r" % name)
+ return
+
+ # Append the tar header and data to the archive.
+ if tarinfo.isreg():
+ f = file(name, "rb")
+ self.addfile(tarinfo, f)
+ f.close()
+
+ elif tarinfo.isdir():
+ self.addfile(tarinfo)
+ if recursive:
+ for f in os.listdir(name):
+ self.add(os.path.join(name, f), os.path.join(arcname, f))
+
+ else:
+ self.addfile(tarinfo)
+
+ def addfile(self, tarinfo, fileobj=None):
+ """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
+ given, tarinfo.size bytes are read from it and added to the archive.
+ You can create TarInfo objects using gettarinfo().
+ On Windows platforms, `fileobj' should always be opened with mode
+ 'rb' to avoid irritation about the file size.
+ """
+ self._check("aw")
+
+ tarinfo = copy.copy(tarinfo)
+
+ buf = tarinfo.tobuf(self.posix)
+ self.fileobj.write(buf)
+ self.offset += len(buf)
+
+ # If there's data to follow, append it.
+ if fileobj is not None:
+ copyfileobj(fileobj, self.fileobj, tarinfo.size)
+ blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
+ if remainder > 0:
+ self.fileobj.write(NUL * (BLOCKSIZE - remainder))
+ blocks += 1
+ self.offset += blocks * BLOCKSIZE
+
+ self.members.append(tarinfo)
+
+ def extractall(self, path=".", members=None):
+ """Extract all members from the archive to the current working
+ directory and set owner, modification time and permissions on
+ directories afterwards. `path' specifies a different directory
+ to extract to. `members' is optional and must be a subset of the
+ list returned by getmembers().
+ """
+ directories = []
+
+ if members is None:
+ members = self
+
+ for tarinfo in members:
+ if tarinfo.isdir():
+ # Extract directory with a safe mode, so that
+ # all files below can be extracted as well.
+ try:
+ os.makedirs(os.path.join(path, tarinfo.name), 0777)
+ except EnvironmentError:
+ pass
+ directories.append(tarinfo)
+ else:
+ self.extract(tarinfo, path)
+
+ # Reverse sort directories.
+ directories.sort(lambda a, b: cmp(a.name, b.name))
+ directories.reverse()
+
+ # Set correct owner, mtime and filemode on directories.
+ for tarinfo in directories:
+ path = os.path.join(path, tarinfo.name)
+ try:
+ self.chown(tarinfo, path)
+ self.utime(tarinfo, path)
+ self.chmod(tarinfo, path)
+ except ExtractError, e:
+ if self.errorlevel > 1:
+ raise
+ else:
+ self._dbg(1, "tarfile: %s" % e)
+
+ def extract(self, member, path=""):
+ """Extract a member from the archive to the current working directory,
+ using its full name. Its file information is extracted as accurately
+ as possible. `member' may be a filename or a TarInfo object. You can
+ specify a different directory using `path'.
+ """
+ self._check("r")
+
+ if isinstance(member, TarInfo):
+ tarinfo = member
+ else:
+ tarinfo = self.getmember(member)
+
+ # Prepare the link target for makelink().
+ if tarinfo.islnk():
+ tarinfo._link_target = os.path.join(path, tarinfo.linkname)
+
+ try:
+ self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
+ except EnvironmentError, e:
+ if self.errorlevel > 0:
+ raise
+ else:
+ if e.filename is None:
+ self._dbg(1, "tarfile: %s" % e.strerror)
+ else:
+ self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
+ except ExtractError, e:
+ if self.errorlevel > 1:
+ raise
+ else:
+ self._dbg(1, "tarfile: %s" % e)
+
+ def extractfile(self, member):
+ """Extract a member from the archive as a file object. `member' may be
+ a filename or a TarInfo object. If `member' is a regular file, a
+ file-like object is returned. If `member' is a link, a file-like
+ object is constructed from the link's target. If `member' is none of
+ the above, None is returned.
+ The file-like object is read-only and provides the following
+ methods: read(), readline(), readlines(), seek() and tell()
+ """
+ self._check("r")
+
+ if isinstance(member, TarInfo):
+ tarinfo = member
+ else:
+ tarinfo = self.getmember(member)
+
+ if tarinfo.isreg():
+ return self.fileobject(self, tarinfo)
+
+ elif tarinfo.type not in SUPPORTED_TYPES:
+ # If a member's type is unknown, it is treated as a
+ # regular file.
+ return self.fileobject(self, tarinfo)
+
+ elif tarinfo.islnk() or tarinfo.issym():
+ if isinstance(self.fileobj, _Stream):
+ # A small but ugly workaround for the case that someone tries
+ # to extract a (sym)link as a file-object from a non-seekable
+ # stream of tar blocks.
+ raise StreamError("cannot extract (sym)link as file object")
+ else:
+ # A (sym)link's file object is its target's file object.
+ return self.extractfile(self._getmember(tarinfo.linkname,
+ tarinfo))
+ else:
+ # If there's no data associated with the member (directory, chrdev,
+ # blkdev, etc.), return None instead of a file object.
+ return None
+
+ def _extract_member(self, tarinfo, targetpath):
+ """Extract the TarInfo object tarinfo to a physical
+ file called targetpath.
+ """
+ # Fetch the TarInfo object for the given name
+ # and build the destination pathname, replacing
+ # forward slashes to platform specific separators.
+ if targetpath[-1:] == "/":
+ targetpath = targetpath[:-1]
+ targetpath = os.path.normpath(targetpath)
+
+ # Create all upper directories.
+ upperdirs = os.path.dirname(targetpath)
+ if upperdirs and not os.path.exists(upperdirs):
+ ti = TarInfo()
+ ti.name = upperdirs
+ ti.type = DIRTYPE
+ ti.mode = 0777
+ ti.mtime = tarinfo.mtime
+ ti.uid = tarinfo.uid
+ ti.gid = tarinfo.gid
+ ti.uname = tarinfo.uname
+ ti.gname = tarinfo.gname
+ try:
+ self._extract_member(ti, ti.name)
+ except:
+ pass
+
+ if tarinfo.islnk() or tarinfo.issym():
+ self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
+ else:
+ self._dbg(1, tarinfo.name)
+
+ if tarinfo.isreg():
+ self.makefile(tarinfo, targetpath)
+ elif tarinfo.isdir():
+ self.makedir(tarinfo, targetpath)
+ elif tarinfo.isfifo():
+ self.makefifo(tarinfo, targetpath)
+ elif tarinfo.ischr() or tarinfo.isblk():
+ self.makedev(tarinfo, targetpath)
+ elif tarinfo.islnk() or tarinfo.issym():
+ self.makelink(tarinfo, targetpath)
+ elif tarinfo.type not in SUPPORTED_TYPES:
+ self.makeunknown(tarinfo, targetpath)
+ else:
+ self.makefile(tarinfo, targetpath)
+
+ self.chown(tarinfo, targetpath)
+ if not tarinfo.issym():
+ self.chmod(tarinfo, targetpath)
+ self.utime(tarinfo, targetpath)
+
+ #--------------------------------------------------------------------------
+ # Below are the different file methods. They are called via
+ # _extract_member() when extract() is called. They can be replaced in a
+ # subclass to implement other functionality.
+
+ def makedir(self, tarinfo, targetpath):
+ """Make a directory called targetpath.
+ """
+ try:
+ os.mkdir(targetpath)
+ except EnvironmentError, e:
+ if e.errno != errno.EEXIST:
+ raise
+
+ def makefile(self, tarinfo, targetpath):
+ """Make a file called targetpath.
+ """
+ source = self.extractfile(tarinfo)
+ target = file(targetpath, "wb")
+ copyfileobj(source, target)
+ source.close()
+ target.close()
+
+ def makeunknown(self, tarinfo, targetpath):
+ """Make a file from a TarInfo object with an unknown type
+ at targetpath.
+ """
+ self.makefile(tarinfo, targetpath)
+ self._dbg(1, "tarfile: Unknown file type %r, " \
+ "extracted as regular file." % tarinfo.type)
+
+ def makefifo(self, tarinfo, targetpath):
+ """Make a fifo called targetpath.
+ """
+ if hasattr(os, "mkfifo"):
+ os.mkfifo(targetpath)
+ else:
+ raise ExtractError("fifo not supported by system")
+
+ def makedev(self, tarinfo, targetpath):
+ """Make a character or block device called targetpath.
+ """
+ if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
+ raise ExtractError("special devices not supported by system")
+
+ mode = tarinfo.mode
+ if tarinfo.isblk():
+ mode |= stat.S_IFBLK
+ else:
+ mode |= stat.S_IFCHR
+
+ os.mknod(targetpath, mode,
+ os.makedev(tarinfo.devmajor, tarinfo.devminor))
+
+ def makelink(self, tarinfo, targetpath):
+ """Make a (symbolic) link called targetpath. If it cannot be created
+ (platform limitation), we try to make a copy of the referenced file
+ instead of a link.
+ """
+ linkpath = tarinfo.linkname
+ try:
+ if tarinfo.issym():
+ os.symlink(linkpath, targetpath)
+ else:
+ # See extract().
+ os.link(tarinfo._link_target, targetpath)
+ except AttributeError:
+ if tarinfo.issym():
+ linkpath = os.path.join(os.path.dirname(tarinfo.name),
+ linkpath)
+ linkpath = normpath(linkpath)
+
+ try:
+ self._extract_member(self.getmember(linkpath), targetpath)
+ except (EnvironmentError, KeyError), e:
+ linkpath = os.path.normpath(linkpath)
+ try:
+ shutil.copy2(linkpath, targetpath)
+ except EnvironmentError, e:
+ raise IOError("link could not be created")
+
+ def chown(self, tarinfo, targetpath):
+ """Set owner of targetpath according to tarinfo.
+ """
+ if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
+ # We have to be root to do so.
+ try:
+ g = grp.getgrnam(tarinfo.gname)[2]
+ except KeyError:
+ try:
+ g = grp.getgrgid(tarinfo.gid)[2]
+ except KeyError:
+ g = os.getgid()
+ try:
+ u = pwd.getpwnam(tarinfo.uname)[2]
+ except KeyError:
+ try:
+ u = pwd.getpwuid(tarinfo.uid)[2]
+ except KeyError:
+ u = os.getuid()
+ try:
+ if tarinfo.issym() and hasattr(os, "lchown"):
+ os.lchown(targetpath, u, g)
+ else:
+ if sys.platform != "os2emx":
+ os.chown(targetpath, u, g)
+ except EnvironmentError, e:
+ raise ExtractError("could not change owner")
+
+ def chmod(self, tarinfo, targetpath):
+ """Set file permissions of targetpath according to tarinfo.
+ """
+ if hasattr(os, 'chmod'):
+ try:
+ os.chmod(targetpath, tarinfo.mode)
+ except EnvironmentError, e:
+ raise ExtractError("could not change mode")
+
+ def utime(self, tarinfo, targetpath):
+ """Set modification time of targetpath according to tarinfo.
+ """
+ if not hasattr(os, 'utime'):
+ return
+ if sys.platform == "win32" and tarinfo.isdir():
+ # According to msdn.microsoft.com, it is an error (EACCES)
+ # to use utime() on directories.
+ return
+ try:
+ os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
+ except EnvironmentError, e:
+ raise ExtractError("could not change modification time")
+
+ #--------------------------------------------------------------------------
+ def next(self):
+ """Return the next member of the archive as a TarInfo object, when
+ TarFile is opened for reading. Return None if there is no more
+ available.
+ """
+ self._check("ra")
+ if self.firstmember is not None:
+ m = self.firstmember
+ self.firstmember = None
+ return m
+
+ # Read the next block.
+ self.fileobj.seek(self.offset)
+ while True:
+ buf = self.fileobj.read(BLOCKSIZE)
+ if not buf:
+ return None
+
+ try:
+ tarinfo = TarInfo.frombuf(buf)
+
+ # Set the TarInfo object's offset to the current position of the
+ # TarFile and set self.offset to the position where the data blocks
+ # should begin.
+ tarinfo.offset = self.offset
+ self.offset += BLOCKSIZE
+
+ tarinfo = self.proc_member(tarinfo)
+
+ except ValueError, e:
+ if self.ignore_zeros:
+ self._dbg(2, "0x%X: empty or invalid block: %s" %
+ (self.offset, e))
+ self.offset += BLOCKSIZE
+ continue
+ else:
+ if self.offset == 0:
+ raise ReadError("empty, unreadable or compressed "
+ "file: %s" % e)
+ return None
+ break
+
+ # Some old tar programs represent a directory as a regular
+ # file with a trailing slash.
+ if tarinfo.isreg() and tarinfo.name.endswith("/"):
+ tarinfo.type = DIRTYPE
+
+ # Directory names should have a '/' at the end.
+ if tarinfo.isdir():
+ tarinfo.name += "/"
+
+ self.members.append(tarinfo)
+ return tarinfo
+
+ #--------------------------------------------------------------------------
+ # The following are methods that are called depending on the type of a
+ # member. The entry point is proc_member() which is called with a TarInfo
+ # object created from the header block from the current offset. The
+ # proc_member() method can be overridden in a subclass to add custom
+ # proc_*() methods. A proc_*() method MUST implement the following
+ # operations:
+ # 1. Set tarinfo.offset_data to the position where the data blocks begin,
+ # if there is data that follows.
+ # 2. Set self.offset to the position where the next member's header will
+ # begin.
+ # 3. Return tarinfo or another valid TarInfo object.
+ def proc_member(self, tarinfo):
+ """Choose the right processing method for tarinfo depending
+ on its type and call it.
+ """
+ if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
+ return self.proc_gnulong(tarinfo)
+ elif tarinfo.type == GNUTYPE_SPARSE:
+ return self.proc_sparse(tarinfo)
+ else:
+ return self.proc_builtin(tarinfo)
+
+ def proc_builtin(self, tarinfo):
+ """Process a builtin type member or an unknown member
+ which will be treated as a regular file.
+ """
+ tarinfo.offset_data = self.offset
+ if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
+ # Skip the following data blocks.
+ self.offset += self._block(tarinfo.size)
+ return tarinfo
+
+ def proc_gnulong(self, tarinfo):
+ """Process the blocks that hold a GNU longname
+ or longlink member.
+ """
+ buf = ""
+ count = tarinfo.size
+ while count > 0:
+ block = self.fileobj.read(BLOCKSIZE)
+ buf += block
+ self.offset += BLOCKSIZE
+ count -= BLOCKSIZE
+
+ # Fetch the next header and process it.
+ b = self.fileobj.read(BLOCKSIZE)
+ t = TarInfo.frombuf(b)
+ t.offset = self.offset
+ self.offset += BLOCKSIZE
+ next = self.proc_member(t)
+
+ # Patch the TarInfo object from the next header with
+ # the longname information.
+ next.offset = tarinfo.offset
+ if tarinfo.type == GNUTYPE_LONGNAME:
+ next.name = buf.rstrip(NUL)
+ elif tarinfo.type == GNUTYPE_LONGLINK:
+ next.linkname = buf.rstrip(NUL)
+
+ return next
+
+ def proc_sparse(self, tarinfo):
+ """Process a GNU sparse header plus extra headers.
+ """
+ buf = tarinfo.buf
+ sp = _ringbuffer()
+ pos = 386
+ lastpos = 0L
+ realpos = 0L
+ # There are 4 possible sparse structs in the
+ # first header.
+ for i in xrange(4):
+ try:
+ offset = nti(buf[pos:pos + 12])
+ numbytes = nti(buf[pos + 12:pos + 24])
+ except ValueError:
+ break
+ if offset > lastpos:
+ sp.append(_hole(lastpos, offset - lastpos))
+ sp.append(_data(offset, numbytes, realpos))
+ realpos += numbytes
+ lastpos = offset + numbytes
+ pos += 24
+
+ isextended = ord(buf[482])
+ origsize = nti(buf[483:495])
+
+ # If the isextended flag is given,
+ # there are extra headers to process.
+ while isextended == 1:
+ buf = self.fileobj.read(BLOCKSIZE)
+ self.offset += BLOCKSIZE
+ pos = 0
+ for i in xrange(21):
+ try:
+ offset = nti(buf[pos:pos + 12])
+ numbytes = nti(buf[pos + 12:pos + 24])
+ except ValueError:
+ break
+ if offset > lastpos:
+ sp.append(_hole(lastpos, offset - lastpos))
+ sp.append(_data(offset, numbytes, realpos))
+ realpos += numbytes
+ lastpos = offset + numbytes
+ pos += 24
+ isextended = ord(buf[504])
+
+ if lastpos < origsize:
+ sp.append(_hole(lastpos, origsize - lastpos))
+
+ tarinfo.sparse = sp
+
+ tarinfo.offset_data = self.offset
+ self.offset += self._block(tarinfo.size)
+ tarinfo.size = origsize
+
+ return tarinfo
+
+ #--------------------------------------------------------------------------
+ # Little helper methods:
+
+ def _block(self, count):
+ """Round up a byte count by BLOCKSIZE and return it,
+ e.g. _block(834) => 1024.
+ """
+ blocks, remainder = divmod(count, BLOCKSIZE)
+ if remainder:
+ blocks += 1
+ return blocks * BLOCKSIZE
+
+ def _getmember(self, name, tarinfo=None):
+ """Find an archive member by name from bottom to top.
+ If tarinfo is given, it is used as the starting point.
+ """
+ # Ensure that all members have been loaded.
+ members = self.getmembers()
+
+ if tarinfo is None:
+ end = len(members)
+ else:
+ end = members.index(tarinfo)
+
+ for i in xrange(end - 1, -1, -1):
+ if name == members[i].name:
+ return members[i]
+
+ def _load(self):
+ """Read through the entire archive file and look for readable
+ members.
+ """
+ while True:
+ tarinfo = self.next()
+ if tarinfo is None:
+ break
+ self._loaded = True
+
+ def _check(self, mode=None):
+ """Check if TarFile is still open, and if the operation's mode
+ corresponds to TarFile's mode.
+ """
+ if self.closed:
+ raise IOError("%s is closed" % self.__class__.__name__)
+ if mode is not None and self._mode not in mode:
+ raise IOError("bad operation for mode %r" % self._mode)
+
+ def __iter__(self):
+ """Provide an iterator object.
+ """
+ if self._loaded:
+ return iter(self.members)
+ else:
+ return TarIter(self)
+
+ def _dbg(self, level, msg):
+ """Write debugging output to sys.stderr.
+ """
+ if level <= self.debug:
+ print >> sys.stderr, msg
+# class TarFile
+
+class TarIter:
+ """Iterator Class.
+
+ for tarinfo in TarFile(...):
+ suite...
+ """
+
+ def __init__(self, tarfile):
+ """Construct a TarIter object.
+ """
+ self.tarfile = tarfile
+ self.index = 0
+ def __iter__(self):
+ """Return iterator object.
+ """
+ return self
+ def next(self):
+ """Return the next item using TarFile's next() method.
+ When all members have been read, set TarFile as _loaded.
+ """
+ # Fix for SF #1100429: Under rare circumstances it can
+ # happen that getmembers() is called during iteration,
+ # which will cause TarIter to stop prematurely.
+ if not self.tarfile._loaded:
+ tarinfo = self.tarfile.next()
+ if not tarinfo:
+ self.tarfile._loaded = True
+ raise StopIteration
+ else:
+ try:
+ tarinfo = self.tarfile.members[self.index]
+ except IndexError:
+ raise StopIteration
+ self.index += 1
+ return tarinfo
+
+# Helper classes for sparse file support
+class _section:
+ """Base class for _data and _hole.
+ """
+ def __init__(self, offset, size):
+ self.offset = offset
+ self.size = size
+ def __contains__(self, offset):
+ return self.offset <= offset < self.offset + self.size
+
+class _data(_section):
+ """Represent a data section in a sparse file.
+ """
+ def __init__(self, offset, size, realpos):
+ _section.__init__(self, offset, size)
+ self.realpos = realpos
+
+class _hole(_section):
+ """Represent a hole section in a sparse file.
+ """
+ pass
+
+class _ringbuffer(list):
+ """Ringbuffer class which increases performance
+ over a regular list.
+ """
+ def __init__(self):
+ self.idx = 0
+ def find(self, offset):
+ idx = self.idx
+ while True:
+ item = self[idx]
+ if offset in item:
+ break
+ idx += 1
+ if idx == len(self):
+ idx = 0
+ if idx == self.idx:
+ # End of File
+ return None
+ self.idx = idx
+ return item
+
+#---------------------------------------------
+# zipfile compatible TarFile class
+#---------------------------------------------
+TAR_PLAIN = 0 # zipfile.ZIP_STORED
+TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
+class TarFileCompat:
+ """TarFile class compatible with standard module zipfile's
+ ZipFile class.
+ """
+ def __init__(self, file, mode="r", compression=TAR_PLAIN):
+ if compression == TAR_PLAIN:
+ self.tarfile = TarFile.taropen(file, mode)
+ elif compression == TAR_GZIPPED:
+ self.tarfile = TarFile.gzopen(file, mode)
+ else:
+ raise ValueError("unknown compression constant")
+ if mode[0:1] == "r":
+ members = self.tarfile.getmembers()
+ for m in members:
+ m.filename = m.name
+ m.file_size = m.size
+ m.date_time = time.gmtime(m.mtime)[:6]
+ def namelist(self):
+ return map(lambda m: m.name, self.infolist())
+ def infolist(self):
+ return filter(lambda m: m.type in REGULAR_TYPES,
+ self.tarfile.getmembers())
+ def printdir(self):
+ self.tarfile.list()
+ def testzip(self):
+ return
+ def getinfo(self, name):
+ return self.tarfile.getmember(name)
+ def read(self, name):
+ return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
+ def write(self, filename, arcname=None, compress_type=None):
+ self.tarfile.add(filename, arcname)
+ def writestr(self, zinfo, bytes):
+ try:
+ from cStringIO import StringIO
+ except ImportError:
+ from StringIO import StringIO
+ import calendar
+ zinfo.name = zinfo.filename
+ zinfo.size = zinfo.file_size
+ zinfo.mtime = calendar.timegm(zinfo.date_time)
+ self.tarfile.addfile(zinfo, StringIO(bytes))
+ def close(self):
+ self.tarfile.close()
+#class TarFileCompat
+
+#--------------------
+# exported functions
+#--------------------
+def is_tarfile(name):
+ """Return True if name points to a tar archive that we
+ are able to handle, else return False.
+ """
+ try:
+ t = open(name)
+ t.close()
+ return True
+ except TarError:
+ return False
+
+open = TarFile.open
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/traceback.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/traceback.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,312 @@
+"""Extract, format and print information about Python stack traces."""
+
+import linecache
+import sys
+import types
+
+__all__ = ['extract_stack', 'extract_tb', 'format_exception',
+ 'format_exception_only', 'format_list', 'format_stack',
+ 'format_tb', 'print_exc', 'format_exc', 'print_exception',
+ 'print_last', 'print_stack', 'print_tb', 'tb_lineno']
+
+def _print(file, str='', terminator='\n'):
+ file.write(str+terminator)
+
+
+def print_list(extracted_list, file=None):
+ """Print the list of tuples as returned by extract_tb() or
+ extract_stack() as a formatted stack trace to the given file."""
+ if file is None:
+ file = sys.stderr
+ for filename, lineno, name, line in extracted_list:
+ _print(file,
+ ' File "%s", line %d, in %s' % (filename,lineno,name))
+ if line:
+ _print(file, ' %s' % line.strip())
+
+def format_list(extracted_list):
+ """Format a list of traceback entry tuples for printing.
+
+ Given a list of tuples as returned by extract_tb() or
+ extract_stack(), return a list of strings ready for printing.
+ Each string in the resulting list corresponds to the item with the
+ same index in the argument list. Each string ends in a newline;
+ the strings may contain internal newlines as well, for those items
+ whose source text line is not None.
+ """
+ list = []
+ for filename, lineno, name, line in extracted_list:
+ item = ' File "%s", line %d, in %s\n' % (filename,lineno,name)
+ if line:
+ item = item + ' %s\n' % line.strip()
+ list.append(item)
+ return list
+
+
+def print_tb(tb, limit=None, file=None):
+ """Print up to 'limit' stack trace entries from the traceback 'tb'.
+
+ If 'limit' is omitted or None, all entries are printed. If 'file'
+ is omitted or None, the output goes to sys.stderr; otherwise
+ 'file' should be an open file or file-like object with a write()
+ method.
+ """
+ if file is None:
+ file = sys.stderr
+ if limit is None:
+ if hasattr(sys, 'tracebacklimit'):
+ limit = sys.tracebacklimit
+ n = 0
+ while tb is not None and (limit is None or n < limit):
+ f = tb.tb_frame
+ lineno = tb.tb_lineno
+ co = f.f_code
+ filename = co.co_filename
+ name = co.co_name
+ _print(file,
+ ' File "%s", line %d, in %s' % (filename,lineno,name))
+ linecache.checkcache(filename)
+ line = linecache.getline(filename, lineno, f.f_globals)
+ if line: _print(file, ' ' + line.strip())
+ tb = tb.tb_next
+ n = n+1
+
+def format_tb(tb, limit = None):
+ """A shorthand for 'format_list(extract_stack(f, limit))."""
+ return format_list(extract_tb(tb, limit))
+
+def extract_tb(tb, limit = None):
+ """Return list of up to limit pre-processed entries from traceback.
+
+ This is useful for alternate formatting of stack traces. If
+ 'limit' is omitted or None, all entries are extracted. A
+ pre-processed stack trace entry is a quadruple (filename, line
+ number, function name, text) representing the information that is
+ usually printed for a stack trace. The text is a string with
+ leading and trailing whitespace stripped; if the source is not
+ available it is None.
+ """
+ if limit is None:
+ if hasattr(sys, 'tracebacklimit'):
+ limit = sys.tracebacklimit
+ list = []
+ n = 0
+ while tb is not None and (limit is None or n < limit):
+ f = tb.tb_frame
+ lineno = tb.tb_lineno
+ co = f.f_code
+ filename = co.co_filename
+ name = co.co_name
+ linecache.checkcache(filename)
+ line = linecache.getline(filename, lineno, f.f_globals)
+ if line: line = line.strip()
+ else: line = None
+ list.append((filename, lineno, name, line))
+ tb = tb.tb_next
+ n = n+1
+ return list
+
+
+def print_exception(etype, value, tb, limit=None, file=None):
+ """Print exception up to 'limit' stack trace entries from 'tb' to 'file'.
+
+ This differs from print_tb() in the following ways: (1) if
+ traceback is not None, it prints a header "Traceback (most recent
+ call last):"; (2) it prints the exception type and value after the
+ stack trace; (3) if type is SyntaxError and value has the
+ appropriate format, it prints the line where the syntax error
+ occurred with a caret on the next line indicating the approximate
+ position of the error.
+ """
+ if file is None:
+ file = sys.stderr
+ if tb:
+ _print(file, 'Traceback (most recent call last):')
+ print_tb(tb, limit, file)
+ lines = format_exception_only(etype, value)
+ for line in lines[:-1]:
+ _print(file, line, ' ')
+ _print(file, lines[-1], '')
+
+def format_exception(etype, value, tb, limit = None):
+ """Format a stack trace and the exception information.
+
+ The arguments have the same meaning as the corresponding arguments
+ to print_exception(). The return value is a list of strings, each
+ ending in a newline and some containing internal newlines. When
+ these lines are concatenated and printed, exactly the same text is
+ printed as does print_exception().
+ """
+ if tb:
+ list = ['Traceback (most recent call last):\n']
+ list = list + format_tb(tb, limit)
+ else:
+ list = []
+ list = list + format_exception_only(etype, value)
+ return list
+
+def format_exception_only(etype, value):
+ """Format the exception part of a traceback.
+
+ The arguments are the exception type and value such as given by
+ sys.last_type and sys.last_value. The return value is a list of
+ strings, each ending in a newline.
+
+ Normally, the list contains a single string; however, for
+ SyntaxError exceptions, it contains several lines that (when
+ printed) display detailed information about where the syntax
+ error occurred.
+
+ The message indicating which exception occurred is always the last
+ string in the list.
+
+ """
+
+ # An instance should not have a meaningful value parameter, but
+ # sometimes does, particularly for string exceptions, such as
+ # >>> raise string1, string2 # deprecated
+ #
+ # Clear these out first because issubtype(string1, SyntaxError)
+ # would throw another exception and mask the original problem.
+ if (isinstance(etype, BaseException) or
+ isinstance(etype, types.InstanceType) or
+ etype is None or type(etype) is str):
+ return [_format_final_exc_line(etype, value)]
+
+ stype = etype.__name__
+
+ if not issubclass(etype, SyntaxError):
+ return [_format_final_exc_line(stype, value)]
+
+ # It was a syntax error; show exactly where the problem was found.
+ lines = []
+ try:
+ msg, (filename, lineno, offset, badline) = value
+ except Exception:
+ pass
+ else:
+ filename = filename or "<string>"
+ lines.append(' File "%s", line %d\n' % (filename, lineno))
+ if badline is not None:
+ lines.append(' %s\n' % badline.strip())
+ if offset is not None:
+ caretspace = badline[:offset].lstrip()
+ # non-space whitespace (likes tabs) must be kept for alignment
+ caretspace = ((c.isspace() and c or ' ') for c in caretspace)
+ # only three spaces to account for offset1 == pos 0
+ lines.append(' %s^\n' % ''.join(caretspace))
+ value = msg
+
+ lines.append(_format_final_exc_line(stype, value))
+ return lines
+
+def _format_final_exc_line(etype, value):
+ """Return a list of a single line -- normal case for format_exception_only"""
+ valuestr = _some_str(value)
+ if value is None or not valuestr:
+ line = "%s\n" % etype
+ else:
+ line = "%s: %s\n" % (etype, valuestr)
+ return line
+
+def _some_str(value):
+ try:
+ return str(value)
+ except:
+ return '<unprintable %s object>' % type(value).__name__
+
+
+def print_exc(limit=None, file=None):
+ """Shorthand for 'print_exception(sys.exc_type, sys.exc_value, sys.exc_traceback, limit, file)'.
+ (In fact, it uses sys.exc_info() to retrieve the same information
+ in a thread-safe way.)"""
+ if file is None:
+ file = sys.stderr
+ try:
+ etype, value, tb = sys.exc_info()
+ print_exception(etype, value, tb, limit, file)
+ finally:
+ etype = value = tb = None
+
+
+def format_exc(limit=None):
+ """Like print_exc() but return a string."""
+ try:
+ etype, value, tb = sys.exc_info()
+ return ''.join(format_exception(etype, value, tb, limit))
+ finally:
+ etype = value = tb = None
+
+
+def print_last(limit=None, file=None):
+ """This is a shorthand for 'print_exception(sys.last_type,
+ sys.last_value, sys.last_traceback, limit, file)'."""
+ if file is None:
+ file = sys.stderr
+ print_exception(sys.last_type, sys.last_value, sys.last_traceback,
+ limit, file)
+
+
+def print_stack(f=None, limit=None, file=None):
+ """Print a stack trace from its invocation point.
+
+ The optional 'f' argument can be used to specify an alternate
+ stack frame at which to start. The optional 'limit' and 'file'
+ arguments have the same meaning as for print_exception().
+ """
+ if f is None:
+ try:
+ raise ZeroDivisionError
+ except ZeroDivisionError:
+ f = sys.exc_info()[2].tb_frame.f_back
+ print_list(extract_stack(f, limit), file)
+
+def format_stack(f=None, limit=None):
+ """Shorthand for 'format_list(extract_stack(f, limit))'."""
+ if f is None:
+ try:
+ raise ZeroDivisionError
+ except ZeroDivisionError:
+ f = sys.exc_info()[2].tb_frame.f_back
+ return format_list(extract_stack(f, limit))
+
+def extract_stack(f=None, limit = None):
+ """Extract the raw traceback from the current stack frame.
+
+ The return value has the same format as for extract_tb(). The
+ optional 'f' and 'limit' arguments have the same meaning as for
+ print_stack(). Each item in the list is a quadruple (filename,
+ line number, function name, text), and the entries are in order
+ from oldest to newest stack frame.
+ """
+ if f is None:
+ try:
+ raise ZeroDivisionError
+ except ZeroDivisionError:
+ f = sys.exc_info()[2].tb_frame.f_back
+ if limit is None:
+ if hasattr(sys, 'tracebacklimit'):
+ limit = sys.tracebacklimit
+ list = []
+ n = 0
+ while f is not None and (limit is None or n < limit):
+ lineno = f.f_lineno
+ co = f.f_code
+ filename = co.co_filename
+ name = co.co_name
+ linecache.checkcache(filename)
+ line = linecache.getline(filename, lineno, f.f_globals)
+ if line: line = line.strip()
+ else: line = None
+ list.append((filename, lineno, name, line))
+ f = f.f_back
+ n = n+1
+ list.reverse()
+ return list
+
+def tb_lineno(tb):
+ """Calculate correct line number of traceback given in tb.
+
+ Obsolete in 2.3.
+ """
+ return tb.tb_lineno
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/types.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/types.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,101 @@
+"""Define names for all type symbols known in the standard interpreter.
+
+Types that are part of optional modules (e.g. array) are not listed.
+"""
+import sys
+
+# Iterators in Python aren't a matter of type but of protocol. A large
+# and changing number of builtin types implement *some* flavor of
+# iterator. Don't check the type! Use hasattr to check for both
+# "__iter__" and "next" attributes instead.
+
+NoneType = type(None)
+TypeType = type
+ObjectType = object
+
+IntType = int
+LongType = long
+FloatType = float
+BooleanType = bool
+try:
+ ComplexType = complex
+except NameError:
+ pass
+
+StringType = str
+
+# StringTypes is already outdated. Instead of writing "type(x) in
+# types.StringTypes", you should use "isinstance(x, basestring)". But
+# we keep around for compatibility with Python 2.2.
+try:
+ UnicodeType = unicode
+ StringTypes = (StringType, UnicodeType)
+except NameError:
+ StringTypes = (StringType,)
+
+BufferType = buffer
+
+TupleType = tuple
+ListType = list
+DictType = DictionaryType = dict
+
+def _f(): pass
+FunctionType = type(_f)
+LambdaType = type(lambda: None) # Same as FunctionType
+try:
+ CodeType = type(_f.func_code)
+except RuntimeError:
+ # Execution in restricted environment
+ pass
+
+def _g():
+ yield 1
+GeneratorType = type(_g())
+
+class _C:
+ def _m(self): pass
+ClassType = _classobj # PyPy-specific, from __builtin__
+UnboundMethodType = type(_C._m) # Same as MethodType
+_x = _C()
+InstanceType = _instance # PyPy-specific, from __builtin__
+MethodType = type(_x._m)
+
+BuiltinFunctionType = type(len)
+BuiltinMethodType = type([].append) # Same as BuiltinFunctionType
+
+ModuleType = type(sys)
+FileType = file
+XRangeType = xrange
+
+try:
+ raise TypeError
+except TypeError:
+ try:
+ tb = sys.exc_info()[2]
+ TracebackType = type(tb)
+ FrameType = type(tb.tb_frame)
+ except AttributeError:
+ # In the restricted environment, exc_info returns (None, None,
+ # None) Then, tb.tb_frame gives an attribute error
+ pass
+ tb = None; del tb
+
+SliceType = slice
+EllipsisType = type(Ellipsis)
+
+DictProxyType = type(TypeType.__dict__)
+NotImplementedType = type(NotImplemented)
+
+# Extension types defined in a C helper module. XXX There may be no
+# equivalent in implementations other than CPython, so it seems better to
+# leave them undefined then to set them to e.g. None.
+try:
+ import _types
+except ImportError:
+ pass
+else:
+ GetSetDescriptorType = type(_types.Helper.getter)
+ MemberDescriptorType = type(_types.Helper.member)
+ del _types
+
+del sys, _f, _g, _C, _x # Not for export
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/uu.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/uu.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,186 @@
+#! /usr/bin/env python
+
+# Copyright 1994 by Lance Ellinghouse
+# Cathedral City, California Republic, United States of America.
+# All Rights Reserved
+# Permission to use, copy, modify, and distribute this software and its
+# documentation for any purpose and without fee is hereby granted,
+# provided that the above copyright notice appear in all copies and that
+# both that copyright notice and this permission notice appear in
+# supporting documentation, and that the name of Lance Ellinghouse
+# not be used in advertising or publicity pertaining to distribution
+# of the software without specific, written prior permission.
+# LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO
+# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+# FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE
+# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+# Modified by Jack Jansen, CWI, July 1995:
+# - Use binascii module to do the actual line-by-line conversion
+# between ascii and binary. This results in a 1000-fold speedup. The C
+# version is still 5 times faster, though.
+# - Arguments more compliant with python standard
+
+"""Implementation of the UUencode and UUdecode functions.
+
+encode(in_file, out_file [,name, mode])
+decode(in_file [, out_file, mode])
+"""
+
+import binascii
+import os
+import sys
+
+__all__ = ["Error", "encode", "decode"]
+
+class Error(Exception):
+ pass
+
+def encode(in_file, out_file, name=None, mode=None):
+ """Uuencode file"""
+ #
+ # If in_file is a pathname open it and change defaults
+ #
+ if in_file == '-':
+ in_file = sys.stdin
+ elif isinstance(in_file, basestring):
+ if name is None:
+ name = os.path.basename(in_file)
+ if mode is None:
+ try:
+ mode = os.stat(in_file).st_mode
+ except AttributeError:
+ pass
+ in_file = open(in_file, 'rb')
+ #
+ # Open out_file if it is a pathname
+ #
+ if out_file == '-':
+ out_file = sys.stdout
+ elif isinstance(out_file, basestring):
+ out_file = open(out_file, 'w')
+ #
+ # Set defaults for name and mode
+ #
+ if name is None:
+ name = '-'
+ if mode is None:
+ mode = 0666
+ #
+ # Write the data
+ #
+ out_file.write('begin %o %s\n' % ((mode&0777),name))
+ data = in_file.read(45)
+ while len(data) > 0:
+ out_file.write(binascii.b2a_uu(data))
+ data = in_file.read(45)
+ out_file.write(' \nend\n')
+
+
+def decode(in_file, out_file=None, mode=None, quiet=0):
+ """Decode uuencoded file"""
+ #
+ # Open the input file, if needed.
+ #
+ if in_file == '-':
+ in_file = sys.stdin
+ elif isinstance(in_file, basestring):
+ in_file = open(in_file)
+ #
+ # Read until a begin is encountered or we've exhausted the file
+ #
+ while True:
+ hdr = in_file.readline()
+ if not hdr:
+ raise Error('No valid begin line found in input file')
+ if not hdr.startswith('begin'):
+ continue
+ hdrfields = hdr.split(' ', 2)
+ if len(hdrfields) == 3 and hdrfields[0] == 'begin':
+ try:
+ int(hdrfields[1], 8)
+ break
+ except ValueError:
+ pass
+ if out_file is None:
+ out_file = hdrfields[2].rstrip()
+ if os.path.exists(out_file):
+ raise Error('Cannot overwrite existing file: %s' % out_file)
+ if mode is None:
+ mode = int(hdrfields[1], 8)
+ #
+ # Open the output file
+ #
+ opened = False
+ if out_file == '-':
+ out_file = sys.stdout
+ elif isinstance(out_file, basestring):
+ fp = open(out_file, 'wb')
+ try:
+ os.chmod(out_file, mode)
+ except AttributeError:
+ pass
+ out_file = fp
+ opened = True
+ #
+ # Main decoding loop
+ #
+ s = in_file.readline()
+ while s and s.strip() != 'end':
+ try:
+ data = binascii.a2b_uu(s)
+ except binascii.Error, v:
+ # Workaround for broken uuencoders by /Fredrik Lundh
+ nbytes = (((ord(s[0])-32) & 63) * 4 + 5) // 3
+ data = binascii.a2b_uu(s[:nbytes])
+ if not quiet:
+ sys.stderr.write("Warning: %s\n" % v)
+ out_file.write(data)
+ s = in_file.readline()
+ if not s:
+ raise Error('Truncated input file')
+ if opened:
+ out_file.close()
+
+def test():
+ """uuencode/uudecode main program"""
+
+ import optparse
+ parser = optparse.OptionParser(usage='usage: %prog [-d] [-t] [input [output]]')
+ parser.add_option('-d', '--decode', dest='decode', help='Decode (instead of encode)?', default=False, action='store_true')
+ parser.add_option('-t', '--text', dest='text', help='data is text, encoded format unix-compatible text?', default=False, action='store_true')
+
+ (options, args) = parser.parse_args()
+ if len(args) > 2:
+ parser.error('incorrect number of arguments')
+ sys.exit(1)
+
+ input = sys.stdin
+ output = sys.stdout
+ if len(args) > 0:
+ input = args[0]
+ if len(args) > 1:
+ output = args[1]
+
+ if options.decode:
+ if options.text:
+ if isinstance(output, basestring):
+ output = open(output, 'w')
+ else:
+ print sys.argv[0], ': cannot do -t to stdout'
+ sys.exit(1)
+ decode(input, output)
+ else:
+ if options.text:
+ if isinstance(input, basestring):
+ input = open(input, 'r')
+ else:
+ print sys.argv[0], ': cannot do -t from stdin'
+ sys.exit(1)
+ encode(input, output)
+
+if __name__ == '__main__':
+ test()
Added: pypy/branch/2.5-features/lib-python/modified-2.5.1/warnings.py
==============================================================================
--- (empty file)
+++ pypy/branch/2.5-features/lib-python/modified-2.5.1/warnings.py Mon Aug 11 22:10:30 2008
@@ -0,0 +1,264 @@
+"""Python part of the warnings subsystem."""
+
+# Note: function level imports should *not* be used
+# in this module as it may cause import lock deadlock.
+# See bug 683658.
+import sys, types
+import linecache
+
+__all__ = ["warn", "showwarning", "formatwarning", "filterwarnings",
+ "resetwarnings"]
+
+# filters contains a sequence of filter 5-tuples
+# The components of the 5-tuple are:
+# - an action: error, ignore, always, default, module, or once
+# - a compiled regex that must match the warning message
+# - a class representing the warning category
+# - a compiled regex that must match the module that is being warned
+# - a line number for the line being warning, or 0 to mean any line
+# If either if the compiled regexs are None, match anything.
+filters = []
+defaultaction = "default"
+onceregistry = {}
+
+def warn(message, category=None, stacklevel=1):
+ """Issue a warning, or maybe ignore it or raise an exception."""
+ # Check if message is already a Warning object
+ if isinstance(message, Warning):
+ category = message.__class__
+ # Check category argument
+ if category is None:
+ category = UserWarning
+ assert issubclass(category, Warning)
+ # Get context information
+ try:
+ caller = sys._getframe(stacklevel)
+ except ValueError:
+ globals = sys.__dict__
+ lineno = 1
+ else:
+ globals = caller.f_globals
+ lineno = caller.f_lineno
+ if '__name__' in globals:
+ module = globals['__name__']
+ else:
+ module = "<string>"
+ filename = globals.get('__file__')
+ if filename:
+ fnl = filename.lower()
+ if fnl.endswith((".pyc", ".pyo")):
+ filename = filename[:-1]
+ else:
+ if module == "__main__":
+ try:
+ filename = sys.argv[0]
+ except AttributeError:
+ # embedded interpreters don't have sys.argv, see bug #839151
+ filename = '__main__'
+ if not filename:
+ filename = module
+ registry = globals.setdefault("__warningregistry__", {})
+ warn_explicit(message, category, filename, lineno, module, registry,
+ globals)
+
+def warn_explicit(message, category, filename, lineno,
+ module=None, registry=None, module_globals=None):
+ if module is None:
+ module = filename or "<unknown>"
+ if module[-3:].lower() == ".py":
+ module = module[:-3] # XXX What about leading pathname?
+ if registry is None:
+ registry = {}
+ if isinstance(message, Warning):
+ text = str(message)
+ category = message.__class__
+ else:
+ text = message
+ message = category(message)
+ key = (text, category, lineno)
+ # Quick test for common case
+ if registry.get(key):
+ return
+ # Search the filters
+ for item in filters:
+ action, msg, cat, mod, ln = item
+ if ((msg is None or msg.match(text)) and
+ issubclass(category, cat) and
+ (mod is None or mod.match(module)) and
+ (ln == 0 or lineno == ln)):
+ break
+ else:
+ action = defaultaction
+ # Early exit actions
+ if action == "ignore":
+ registry[key] = 1
+ return
+
+ # Prime the linecache for formatting, in case the
+ # "file" is actually in a zipfile or something.
+ linecache.getlines(filename, module_globals)
+
+ if action == "error":
+ raise message
+ # Other actions
+ if action == "once":
+ registry[key] = 1
+ oncekey = (text, category)
+ if onceregistry.get(oncekey):
+ return
+ onceregistry[oncekey] = 1
+ elif action == "always":
+ pass
+ elif action == "module":
+ registry[key] = 1
+ altkey = (text, category, 0)
+ if registry.get(altkey):
+ return
+ registry[altkey] = 1
+ elif action == "default":
+ registry[key] = 1
+ else:
+ # Unrecognized actions are errors
+ raise RuntimeError(
+ "Unrecognized action (%r) in warnings.filters:\n %s" %
+ (action, item))
+ # Print message and context
+ showwarning(message, category, filename, lineno)
+
+def showwarning(message, category, filename, lineno, file=None):
+ """Hook to write a warning to a file; replace if you like."""
+ if file is None:
+ file = sys.stderr
+ try:
+ file.write(formatwarning(message, category, filename, lineno))
+ except IOError:
+ pass # the file (probably stderr) is invalid - this warning gets lost.
+
+def formatwarning(message, category, filename, lineno):
+ """Function to format a warning the standard way."""
+ s = "%s:%s: %s: %s\n" % (filename, lineno, category.__name__, message)
+ line = linecache.getline(filename, lineno).strip()
+ if line:
+ s = s + " " + line + "\n"
+ return s
+
+def filterwarnings(action, message="", category=Warning, module="", lineno=0,
+ append=0):
+ """Insert an entry into the list of warnings filters (at the front).
+
+ Use assertions to check that all arguments have the right type."""
+ import re
+ assert action in ("error", "ignore", "always", "default", "module",
+ "once"), "invalid action: %r" % (action,)
+ assert isinstance(message, basestring), "message must be a string"
+# assert isinstance(category, (type, types.ClassType)), \
+# "category must be a class"
+ assert issubclass(category, Warning), "category must be a Warning subclass"
+ assert isinstance(module, basestring), "module must be a string"
+ assert isinstance(lineno, int) and lineno >= 0, \
+ "lineno must be an int >= 0"
+ item = (action, re.compile(message, re.I), category,
+ re.compile(module), lineno)
+ if append:
+ filters.append(item)
+ else:
+ filters.insert(0, item)
+
+def simplefilter(action, category=Warning, lineno=0, append=0):
+ """Insert a simple entry into the list of warnings filters (at the front).
+
+ A simple filter matches all modules and messages.
+ """
+ assert action in ("error", "ignore", "always", "default", "module",
+ "once"), "invalid action: %r" % (action,)
+ assert isinstance(lineno, int) and lineno >= 0, \
+ "lineno must be an int >= 0"
+ item = (action, None, category, None, lineno)
+ if append:
+ filters.append(item)
+ else:
+ filters.insert(0, item)
+
+def resetwarnings():
+ """Clear the list of warning filters, so that no filters are active."""
+ filters[:] = []
+
+class _OptionError(Exception):
+ """Exception used by option processing helpers."""
+ pass
+
+# Helper to process -W options passed via sys.warnoptions
+def _processoptions(args):
+ for arg in args:
+ try:
+ _setoption(arg)
+ except _OptionError, msg:
+ print >>sys.stderr, "Invalid -W option ignored:", msg
+
+# Helper for _processoptions()
+def _setoption(arg):
+ import re
+ parts = arg.split(':')
+ if len(parts) > 5:
+ raise _OptionError("too many fields (max 5): %r" % (arg,))
+ while len(parts) < 5:
+ parts.append('')
+ action, message, category, module, lineno = [s.strip()
+ for s in parts]
+ action = _getaction(action)
+ message = re.escape(message)
+ category = _getcategory(category)
+ module = re.escape(module)
+ if module:
+ module = module + '$'
+ if lineno:
+ try:
+ lineno = int(lineno)
+ if lineno < 0:
+ raise ValueError
+ except (ValueError, OverflowError):
+ raise _OptionError("invalid lineno %r" % (lineno,))
+ else:
+ lineno = 0
+ filterwarnings(action, message, category, module, lineno)
+
+# Helper for _setoption()
+def _getaction(action):
+ if not action:
+ return "default"
+ if action == "all": return "always" # Alias
+ for a in ('default', 'always', 'ignore', 'module', 'once', 'error'):
+ if a.startswith(action):
+ return a
+ raise _OptionError("invalid action: %r" % (action,))
+
+# Helper for _setoption()
+def _getcategory(category):
+ import re
+ if not category:
+ return Warning
+ if re.match("^[a-zA-Z0-9_]+$", category):
+ try:
+ cat = eval(category)
+ except NameError:
+ raise _OptionError("unknown warning category: %r" % (category,))
+ else:
+ i = category.rfind(".")
+ module = category[:i]
+ klass = category[i+1:]
+ try:
+ m = __import__(module, None, None, [klass])
+ except ImportError:
+ raise _OptionError("invalid module name: %r" % (module,))
+ try:
+ cat = getattr(m, klass)
+ except AttributeError:
+ raise _OptionError("unknown warning category: %r" % (category,))
+ if not issubclass(cat, Warning):
+ raise _OptionError("invalid warning category: %r" % (category,))
+ return cat
+
+# Module initialization
+_processoptions(sys.warnoptions)
+simplefilter("ignore", category=PendingDeprecationWarning, append=1)
+simplefilter("ignore", category=ImportWarning, append=1)
More information about the Pypy-commit
mailing list