[Python-checkins] r88469 - in python/branches/release32-maint: Lib/collections.py Lib/configparser.py Misc/NEWS

raymond.hettinger python-checkins at python.org
Mon Feb 21 20:38:54 CET 2011


Author: raymond.hettinger
Date: Mon Feb 21 20:38:53 2011
New Revision: 88469

Log:
Issue #11089: Fix performance issue limiting the use of ConfigParser()
with large config files. 



Modified:
   python/branches/release32-maint/Lib/collections.py
   python/branches/release32-maint/Lib/configparser.py
   python/branches/release32-maint/Misc/NEWS

Modified: python/branches/release32-maint/Lib/collections.py
==============================================================================
--- python/branches/release32-maint/Lib/collections.py	(original)
+++ python/branches/release32-maint/Lib/collections.py	Mon Feb 21 20:38:53 2011
@@ -631,6 +631,97 @@
         return result
 
 
+########################################################################
+###  ChainMap (helper for configparser)
+########################################################################
+
+class _ChainMap(MutableMapping):
+    ''' A ChainMap groups multiple dicts (or other mappings) together
+    to create a single, updateable view.
+
+    The underlying mappings are stored in a list.  That list is public and can
+    accessed or updated using the *maps* attribute.  There is no other state.
+
+    Lookups search the underlying mappings successively until a key is found.
+    In contrast, writes, updates, and deletions only operate on the first
+    mapping.
+
+    '''
+
+    def __init__(self, *maps):
+        '''Initialize a ChainMap by setting *maps* to the given mappings.
+        If no mappings are provided, a single empty dictionary is used.
+
+        '''
+        self.maps = list(maps) or [{}]          # always at least one map
+
+    def __missing__(self, key):
+        raise KeyError(key)
+
+    def __getitem__(self, key):
+        for mapping in self.maps:
+            try:
+                return mapping[key]             # can't use 'key in mapping' with defaultdict
+            except KeyError:
+                pass
+        return self.__missing__(key)            # support subclasses that define __missing__
+
+    def get(self, key, default=None):
+        return self[key] if key in self else default
+
+    def __len__(self):
+        return len(set().union(*self.maps))     # reuses stored hash values if possible
+
+    def __iter__(self):
+        return iter(set().union(*self.maps))
+
+    def __contains__(self, key):
+        return any(key in m for m in self.maps)
+
+    @_recursive_repr()
+    def __repr__(self):
+        return '{0.__class__.__name__}({1})'.format(
+            self, ', '.join(map(repr, self.maps)))
+
+    @classmethod
+    def fromkeys(cls, iterable, *args):
+        'Create a ChainMap with a single dict created from the iterable.'
+        return cls(dict.fromkeys(iterable, *args))
+
+    def copy(self):
+        'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
+        return self.__class__(self.maps[0].copy(), *self.maps[1:])
+
+    __copy__ = copy
+
+    def __setitem__(self, key, value):
+        self.maps[0][key] = value
+
+    def __delitem__(self, key):
+        try:
+            del self.maps[0][key]
+        except KeyError:
+            raise KeyError('Key not found in the first mapping: {!r}'.format(key))
+
+    def popitem(self):
+        'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
+        try:
+            return self.maps[0].popitem()
+        except KeyError:
+            raise KeyError('No keys found in the first mapping.')
+
+    def pop(self, key, *args):
+        'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
+        try:
+            return self.maps[0].pop(key, *args)
+        except KeyError:
+            raise KeyError('Key not found in the first mapping: {!r}'.format(key))
+
+    def clear(self):
+        'Clear maps[0], leaving maps[1:] intact.'
+        self.maps[0].clear()
+
+
 ################################################################################
 ### UserDict
 ################################################################################

Modified: python/branches/release32-maint/Lib/configparser.py
==============================================================================
--- python/branches/release32-maint/Lib/configparser.py	(original)
+++ python/branches/release32-maint/Lib/configparser.py	Mon Feb 21 20:38:53 2011
@@ -119,7 +119,7 @@
         between keys and values are surrounded by spaces.
 """
 
-from collections import MutableMapping, OrderedDict as _default_dict
+from collections import MutableMapping, OrderedDict as _default_dict, _ChainMap
 import functools
 import io
 import itertools
@@ -1099,23 +1099,24 @@
         return exc
 
     def _unify_values(self, section, vars):
-        """Create a copy of the DEFAULTSECT with values from a specific
-        `section' and the `vars' dictionary. If provided, values in `vars'
-        take precendence.
+        """Create a sequence of lookups with 'vars' taking priority over
+        the 'section' which takes priority over the DEFAULTSECT.
+
         """
-        d = self._defaults.copy()
+        sectiondict = {}
         try:
-            d.update(self._sections[section])
+            sectiondict = self._sections[section]
         except KeyError:
             if section != self.default_section:
                 raise NoSectionError(section)
         # Update with the entry specific variables
+        vardict = {}
         if vars:
             for key, value in vars.items():
                 if value is not None:
                     value = str(value)
-                d[self.optionxform(key)] = value
-        return d
+                vardict[self.optionxform(key)] = value
+        return _ChainMap(vardict, sectiondict, self._defaults)
 
     def _convert_to_boolean(self, value):
         """Return a boolean value translating from other types if necessary.

Modified: python/branches/release32-maint/Misc/NEWS
==============================================================================
--- python/branches/release32-maint/Misc/NEWS	(original)
+++ python/branches/release32-maint/Misc/NEWS	Mon Feb 21 20:38:53 2011
@@ -15,6 +15,9 @@
 Library
 -------
 
+- Issue #11089: Fix performance issue limiting the use of ConfigParser()
+  with large config files. 
+
 - Issue #10276: Fix the results of zlib.crc32() and zlib.adler32() on buffers
   larger than 4GB.  Patch by Nadeem Vawda.
 


More information about the Python-checkins mailing list