[Python-checkins] r83220 - in python/branches/py3k: Doc/library/configparser.rst Lib/configparser.py Lib/test/cfgparser.3 Lib/test/test_cfgparser.py Misc/NEWS

georg.brandl python-checkins at python.org
Thu Jul 29 14:17:40 CEST 2010


Author: georg.brandl
Date: Thu Jul 29 14:17:40 2010
New Revision: 83220

Log:
#9411: allow selecting an encoding for configparser files.  Also adds a new test config file to test special cases.

Added:
   python/branches/py3k/Lib/test/cfgparser.3
Modified:
   python/branches/py3k/Doc/library/configparser.rst
   python/branches/py3k/Lib/configparser.py
   python/branches/py3k/Lib/test/test_cfgparser.py
   python/branches/py3k/Misc/NEWS

Modified: python/branches/py3k/Doc/library/configparser.rst
==============================================================================
--- python/branches/py3k/Doc/library/configparser.rst	(original)
+++ python/branches/py3k/Doc/library/configparser.rst	Thu Jul 29 14:17:40 2010
@@ -286,25 +286,29 @@
    :const:`True`; otherwise return :const:`False`.
 
 
-.. method:: RawConfigParser.read(filenames)
+.. method:: RawConfigParser.read(filenames, encoding=None)
 
    Attempt to read and parse a list of filenames, returning a list of filenames
-   which were successfully parsed.  If *filenames* is a string,
-   it is treated as a single filename. If a file named in *filenames* cannot be
-   opened, that file will be ignored.  This is designed so that you can specify a
-   list of potential configuration file locations (for example, the current
-   directory, the user's home directory, and some system-wide directory), and all
-   existing configuration files in the list will be read.  If none of the named
-   files exist, the :class:`ConfigParser` instance will contain an empty dataset.
-   An application which requires initial values to be loaded from a file should
-   load the required file or files using :meth:`readfp` before calling :meth:`read`
-   for any optional files::
+   which were successfully parsed.  If *filenames* is a string, it is treated as
+   a single filename.  If a file named in *filenames* cannot be opened, that
+   file will be ignored.  This is designed so that you can specify a list of
+   potential configuration file locations (for example, the current directory,
+   the user's home directory, and some system-wide directory), and all existing
+   configuration files in the list will be read.  If none of the named files
+   exist, the :class:`ConfigParser` instance will contain an empty dataset.  An
+   application which requires initial values to be loaded from a file should
+   load the required file or files using :meth:`readfp` before calling
+   :meth:`read` for any optional files::
 
       import configparser, os
 
       config = configparser.ConfigParser()
       config.readfp(open('defaults.cfg'))
-      config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')])
+      config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')], encoding='cp1250')
+
+   .. versionadded:: 3.2
+      The *encoding* parameter.  Previously, all files were read using the
+      default encoding for :func:`open`.
 
 
 .. method:: RawConfigParser.readfp(fp, filename=None)

Modified: python/branches/py3k/Lib/configparser.py
==============================================================================
--- python/branches/py3k/Lib/configparser.py	(original)
+++ python/branches/py3k/Lib/configparser.py	Thu Jul 29 14:17:40 2010
@@ -61,7 +61,7 @@
     options(section)
         Return list of configuration options for the named section.
 
-    read(filenames)
+    read(filenames, encoding=None)
         Read and parse the list of named configuration files, given by
         name.  A single filename is also allowed.  Non-existing files
         are ignored.  Return list of successfully read files.
@@ -369,7 +369,7 @@
             del opts['__name__']
         return list(opts.keys())
 
-    def read(self, filenames):
+    def read(self, filenames, encoding=None):
         """Read and parse a filename or a list of filenames.
 
         Files that cannot be opened are silently ignored; this is
@@ -386,7 +386,7 @@
         read_ok = []
         for filename in filenames:
             try:
-                fp = open(filename)
+                fp = open(filename, encoding=encoding)
             except IOError:
                 continue
             self._read(fp, filename)

Added: python/branches/py3k/Lib/test/cfgparser.3
==============================================================================
--- (empty file)
+++ python/branches/py3k/Lib/test/cfgparser.3	Thu Jul 29 14:17:40 2010
@@ -0,0 +1,69 @@
+  # INI with as many tricky parts as possible
+  # Most of them could not be used before 3.2
+     
+  # This will be parsed with the following options
+    # delimiters = {'='}
+    # comment_prefixes = {'#'}
+    # allow_no_value = True
+
+[DEFAULT]
+go = %(interpolate)s
+
+[strange]
+  values = that are indented # and end with hash comments
+  other = that do continue
+    in     # and still have
+    other  # comments mixed
+    lines  # with the values
+
+
+
+
+
+[corruption]
+  value = that is 
+
+
+    actually still here
+
+
+      and holds all these weird newlines
+
+
+        # but not for the lines that are comments
+        nor the indentation
+
+  another value = # empty string
+  yet another # None!
+
+  [yeah, sections can be indented as well]
+  and that does not mean = anything
+  are they subsections = False
+  if you want subsections = use XML
+  lets use some Unicode = 片仮名
+
+  [another one!]
+ even if values are indented like this = seriously
+yes, this still applies to = section "another one!"
+this too = are there people with configurations broken as this? 
+ beware, this is going to be a continuation
+ of the value for
+ key "this too"
+ even if it has a = character
+ this is still the continuation
+ your editor probably highlights it wrong
+ but that's life
+# let's set this value so there is no error
+# when getting all items for this section:
+interpolate = anything will do
+
+[no values here]
+# but there's this `go` in DEFAULT
+
+    [tricky interpolation]
+      interpolate = do this
+      lets = %(go)s
+      
+    [more interpolation]
+      interpolate = go shopping
+      lets = %(go)s

Modified: python/branches/py3k/Lib/test/test_cfgparser.py
==============================================================================
--- python/branches/py3k/Lib/test/test_cfgparser.py	(original)
+++ python/branches/py3k/Lib/test/test_cfgparser.py	Thu Jul 29 14:17:40 2010
@@ -533,7 +533,7 @@
         smbconf = support.findfile("cfgparser.2")
         # check when we pass a mix of readable and non-readable files:
         cf = self.newconfig()
-        parsed_files = cf.read([smbconf, "nonexistent-file"])
+        parsed_files = cf.read([smbconf, "nonexistent-file"], encoding='utf-8')
         self.assertEqual(parsed_files, [smbconf])
         sections = ['global', 'homes', 'printers',
                     'print$', 'pdf-generator', 'tmp', 'Agustin']
@@ -600,6 +600,46 @@
 class SafeConfigParserTestCaseNoValue(SafeConfigParserTestCase):
     allow_no_value = True
 
+class SafeConfigParserTestCaseTrickyFile(CfgParserTestCaseClass):
+    config_class = configparser.SafeConfigParser
+    delimiters = {'='}
+    comment_prefixes = {'#'}
+    allow_no_value = True
+
+    def test_cfgparser_dot_3(self):
+        tricky = support.findfile("cfgparser.3")
+        cf = self.newconfig()
+        self.assertEqual(len(cf.read(tricky, encoding='utf-8')), 1)
+        self.assertEqual(cf.sections(), ['strange',
+                                         'corruption',
+                                         'yeah, sections can be '
+                                         'indented as well',
+                                         'another one!',
+                                         'no values here',
+                                         'tricky interpolation',
+                                         'more interpolation'])
+        #self.assertEqual(cf.getint('DEFAULT', 'go', vars={'interpolate': '-1'}),
+        #                 -1)
+        self.assertEqual(len(cf.get('strange', 'other').split('\n')), 4)
+        self.assertEqual(len(cf.get('corruption', 'value').split('\n')), 10)
+        longname = 'yeah, sections can be indented as well'
+        self.assertFalse(cf.getboolean(longname, 'are they subsections'))
+        self.assertEquals(cf.get(longname, 'lets use some Unicode'),
+                                           '片仮名')
+        self.assertEqual(len(cf.items('another one!')), 5) # 4 in section and
+                                                           # `go` from DEFAULT
+        with self.assertRaises(configparser.InterpolationMissingOptionError):
+            cf.items('no values here')
+        self.assertEqual(cf.get('tricky interpolation', 'lets'), 'do this')
+        self.assertEqual(cf.get('tricky interpolation', 'lets'),
+                         cf.get('tricky interpolation', 'go'))
+        self.assertEqual(cf.get('more interpolation', 'lets'), 'go shopping')
+
+    def test_unicode_failure(self):
+        tricky = support.findfile("cfgparser.3")
+        cf = self.newconfig()
+        with self.assertRaises(UnicodeDecodeError):
+            cf.read(tricky, encoding='ascii')
 
 class SortedTestCase(RawConfigParserTestCase):
     dict_type = SortedDict
@@ -635,10 +675,13 @@
         foo: bar # not a comment!
         # but this is a comment
         ; another comment
+        quirk: this;is not a comment
+        ; a space must precede a comment character
         """)
         cf = self.fromstring(config_string)
         self.assertEqual(cf.get('Commented Bar', 'foo'), 'bar # not a comment!')
         self.assertEqual(cf.get('Commented Bar', 'baz'), 'qwe')
+        self.assertEqual(cf.get('Commented Bar', 'quirk'), 'this;is not a comment')
 
 
 def test_main():
@@ -652,6 +695,7 @@
         SafeConfigParserTestCase,
         SafeConfigParserTestCaseNonStandardDelimiters,
         SafeConfigParserTestCaseNoValue,
+        SafeConfigParserTestCaseTrickyFile,
         SortedTestCase,
         CompatibleTestCase,
         )

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Thu Jul 29 14:17:40 2010
@@ -475,6 +475,9 @@
 Library
 -------
 
+- Issue #9411: Allow specifying an encoding for config files in the
+  configparser module.
+
 - Issue #1682942: Improvements to configparser: support alternate
   delimiters, alternate comment prefixes and empty lines in values.
 


More information about the Python-checkins mailing list