[Python-checkins] r65483 - in doctools/branches/0.4.x: doc/markup/code.rst sphinx/directives/code.py sphinx/highlighting.py tests/root/contents.txt tests/root/includes.txt tests/root/literal.inc tests/root/subdir/include.inc tests/root/wrongenc.inc tests/test_build.py tests/util.py

Mon Aug 4 11:01:40 CEST 2008

Author: georg.brandl
Date: Mon Aug  4 11:01:40 2008
New Revision: 65483

Log:
Add an "encoding" option to literalinclude.

Add tests for include directives.


Added:
   doctools/branches/0.4.x/tests/root/includes.txt   (contents, props changed)
   doctools/branches/0.4.x/tests/root/literal.inc   (contents, props changed)
   doctools/branches/0.4.x/tests/root/subdir/include.inc   (contents, props changed)
   doctools/branches/0.4.x/tests/root/wrongenc.inc   (contents, props changed)
Modified:
   doctools/branches/0.4.x/doc/markup/code.rst
   doctools/branches/0.4.x/sphinx/directives/code.py
   doctools/branches/0.4.x/sphinx/highlighting.py
   doctools/branches/0.4.x/tests/root/contents.txt
   doctools/branches/0.4.x/tests/test_build.py
   doctools/branches/0.4.x/tests/util.py

Modified: doctools/branches/0.4.x/doc/markup/code.rst
==============================================================================

--- doctools/branches/0.4.x/doc/markup/code.rst	(original)
+++ doctools/branches/0.4.x/doc/markup/code.rst	Mon Aug  4 11:01:40 2008
@@ -100,6 +100,15 @@
          :language: ruby
          :linenos:
 
+   Include files are assumed to be encoded in UTF-8.  If the file has a different
+   encoding, you can specify it with the ``encoding`` option:
+
+      .. literalinclude:: example.py
+         :encoding: latin-1
+
+   .. versionadded:: 0.4.3
+      The ``encoding`` option.
+
 
 .. rubric:: Footnotes
 

Modified: doctools/branches/0.4.x/sphinx/directives/code.py
==============================================================================
--- doctools/branches/0.4.x/sphinx/directives/code.py	(original)
+++ doctools/branches/0.4.x/sphinx/directives/code.py	Mon Aug  4 11:01:40 2008
@@ -8,6 +8,7 @@
 """
 
 import sys
+import codecs
 from os import path
 
 from docutils import nodes
@@ -67,13 +68,19 @@
         lineno - state_machine.input_offset - 1)))
     fn = path.normpath(path.join(source_dir, rel_fn))
 
+    encoding = options.get('encoding', 'utf-8')
     try:
-        f = open(fn)
+        f = codecs.open(fn, 'r', encoding)
         text = f.read()
         f.close()
     except (IOError, OSError):
         retnode = state.document.reporter.warning(
             'Include file %r not found or reading it failed' % arguments[0], line=lineno)
+    except UnicodeError:
+        retnode = state.document.reporter.warning(
+            'Encoding %r used for reading included file %r seems to '
+            'be wrong, try giving an :encoding: option' %
+            (encoding, arguments[0]))
     else:
         retnode = nodes.literal_block(text, text, source=fn)
         retnode.line = 1
@@ -85,7 +92,8 @@
     return [retnode]
 
 literalinclude_directive.options = {'linenos': directives.flag,
-                                    'language': directives.unchanged}
+                                    'language': directives.unchanged,
+                                    'encoding': directives.encoding}
 literalinclude_directive.content = 0
 literalinclude_directive.arguments = (1, 0, 0)
 directives.register_directive('literalinclude', literalinclude_directive)

Modified: doctools/branches/0.4.x/sphinx/highlighting.py
==============================================================================
--- doctools/branches/0.4.x/sphinx/highlighting.py	(original)
+++ doctools/branches/0.4.x/sphinx/highlighting.py	Mon Aug  4 11:01:40 2008
@@ -128,6 +128,13 @@
                 if sys.version_info >= (2, 5):
                     src = 'from __future__ import with_statement\n' + src
 
+                if isinstance(src, unicode):
+                    # Non-ASCII chars will only occur in string literals
+                    # and comments.  If we wanted to give them to the parser
+                    # correctly, we'd have to find out the correct source
+                    # encoding.  Since it may not even be given in a snippet,
+                    # just replace all non-ASCII characters.
+                    src = src.encode('ascii', 'replace')
                 try:
                     parser.suite(src)
                 except parsing_exceptions:

Modified: doctools/branches/0.4.x/tests/root/contents.txt
==============================================================================
--- doctools/branches/0.4.x/tests/root/contents.txt	(original)
+++ doctools/branches/0.4.x/tests/root/contents.txt	Mon Aug  4 11:01:40 2008
@@ -11,6 +11,7 @@
    :maxdepth: 2
 
    images
+   includes
 
 Indices and tables
 ==================
@@ -18,4 +19,3 @@
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
-

Added: doctools/branches/0.4.x/tests/root/includes.txt
==============================================================================
--- (empty file)
+++ doctools/branches/0.4.x/tests/root/includes.txt	Mon Aug  4 11:01:40 2008
@@ -0,0 +1,16 @@
+Test file and literal inclusion
+===============================
+
+.. include:: subdir/include.inc
+
+.. literalinclude:: literal.inc
+   :language: python
+
+.. should give a warning
+.. literalinclude:: wrongenc.inc
+
+.. should succeed
+.. literalinclude:: wrongenc.inc
+   :encoding: latin-1
+.. include:: wrongenc.inc
+   :encoding: latin-1

Added: doctools/branches/0.4.x/tests/root/literal.inc
==============================================================================
--- (empty file)
+++ doctools/branches/0.4.x/tests/root/literal.inc	Mon Aug  4 11:01:40 2008
@@ -0,0 +1,4 @@
+# Literally included file using Python highlighting
+# -*- coding: utf-8 -*-
+
+foo = u"Including Unicode characters: Ã¼Ã¶Ã¤"

Added: doctools/branches/0.4.x/tests/root/subdir/include.inc
==============================================================================
--- (empty file)
+++ doctools/branches/0.4.x/tests/root/subdir/include.inc	Mon Aug  4 11:01:40 2008
@@ -0,0 +1,5 @@
+.. This file is included by contents.txt.
+
+.. Paths in included files are relative to the file that
+   includes them
+.. image:: ../root/img.png

Added: doctools/branches/0.4.x/tests/root/wrongenc.inc
==============================================================================
--- (empty file)
+++ doctools/branches/0.4.x/tests/root/wrongenc.inc	Mon Aug  4 11:01:40 2008
@@ -0,0 +1,3 @@
+This file is encoded in latin-1 but at first read as utf-8.
+
+Max Strauß aß in München eine Leberkässemmel.

Modified: doctools/branches/0.4.x/tests/test_build.py
==============================================================================
--- doctools/branches/0.4.x/tests/test_build.py	(original)
+++ doctools/branches/0.4.x/tests/test_build.py	Mon Aug  4 11:01:40 2008
@@ -25,6 +25,7 @@
 ENV_WARNINGS = """\
 WARNING: %(root)s/images.txt:9: Image file not readable: foo.png
 WARNING: %(root)s/images.txt:20: Nonlocal image URI found: http://www.python.org/logo.png
+WARNING: %(root)s/includes.txt:: (WARNING/2) Encoding 'utf-8' used for reading included file u'wrongenc.inc' seems to be wrong, try giving an :encoding: option
 """
 
 HTML_WARNINGS = ENV_WARNINGS + """\
@@ -39,10 +40,16 @@
     'images.html': {
         ".//img[@src='_images/img.png']": '',
         ".//img[@src='_images/img1.png']": '',
-    }
+    },
+    'includes.html': {
+        ".//pre/span[@class='s']": u'Ã¼Ã¶Ã¤',
+        ".//pre": u'Max StrauÃŸ',
+    },
 }
 
 class NslessParser(ET.XMLParser):
+    """XMLParser that throws away namespaces in tag names."""
+
     def _fixname(self, key):
         try:
             return self._names[key]
@@ -63,15 +70,22 @@
 
     if not ET:
         return
-    parser = NslessParser()
-    parser.entity.update(htmlentitydefs.entitydefs)
     for fname, paths in HTML_XPATH.iteritems():
+        parser = NslessParser()
+        parser.entity.update(htmlentitydefs.entitydefs)
         etree = ET.parse(app.outdir / fname, parser)
         for path, text in paths.iteritems():
             nodes = list(etree.findall(path))
             assert nodes != []
+            if not text:
+                # only check for node presence
+                continue
             for node in nodes:
-                if text: assert text in node.text
+                if node.text and text in node.text:
+                    break
+            else:
+                assert False, ('%r not found in any node matching '
+                               'path %s in %s' % (text, path, fname))
 
 
 @with_testapp(buildername='latex', warning=latex_warnfile)

Modified: doctools/branches/0.4.x/tests/util.py
==============================================================================
--- doctools/branches/0.4.x/tests/util.py	(original)
+++ doctools/branches/0.4.x/tests/util.py	Mon Aug  4 11:01:40 2008
@@ -130,10 +130,10 @@
 
     def cleanup(self):
         trees = [self.outdir, self.doctreedir]
-        if self.made_builddir:
-            trees.append(self.builddir)
-        for tree in trees:
-            shutil.rmtree(tree, True)
+        #f self.made_builddir:
+        #    trees.append(self.builddir)
+        #for tree in trees:
+        #    shutil.rmtree(tree, True)
 
 
 def with_testapp(*args, **kwargs):