[Python-checkins] r46844 - in python/trunk: Doc/lib/libtextwrap.tex Lib/test/test_textwrap.py Lib/textwrap.py Misc/NEWS

greg.ward python-checkins at python.org
Sun Jun 11 02:40:50 CEST 2006


Author: greg.ward
Date: Sun Jun 11 02:40:49 2006
New Revision: 46844

Modified:
   python/trunk/Doc/lib/libtextwrap.tex
   python/trunk/Lib/test/test_textwrap.py
   python/trunk/Lib/textwrap.py
   python/trunk/Misc/NEWS
Log:
Bug #1361643: fix textwrap.dedent() so it handles tabs appropriately,
i.e. do *not* expand tabs, but treat them as whitespace that is not
equivalent to spaces.  Add a couple of test cases.  Clarify docs.


Modified: python/trunk/Doc/lib/libtextwrap.tex
==============================================================================
--- python/trunk/Doc/lib/libtextwrap.tex	(original)
+++ python/trunk/Doc/lib/libtextwrap.tex	Sun Jun 11 02:40:49 2006
@@ -47,12 +47,17 @@
 left of the text.
 
 \begin{funcdesc}{dedent}{text} 
-Remove any whitespace that can be uniformly removed from the left
-of every line in \var{text}.
+Remove any common leading whitespace from every line in \var{text}.
 
-This is typically used to make triple-quoted strings line up with
-the left edge of screen/whatever, while still presenting it in the
-source code in indented form. 
+This can be used to make triple-quoted strings line up with the left
+edge of the display, while still presenting them in the source code
+in indented form.
+
+Note that tabs and spaces are both treated as whitespace, but they are
+not equal: the lines \code{" {} hello"} and \code{"\textbackslash{}thello"}
+are considered to have no common leading whitespace.  (This behaviour is
+new in Python 2.5; older versions of this module incorrectly expanded
+tabs before searching for common leading whitespace.)
 
 For example:
 \begin{verbatim}

Modified: python/trunk/Lib/test/test_textwrap.py
==============================================================================
--- python/trunk/Lib/test/test_textwrap.py	(original)
+++ python/trunk/Lib/test/test_textwrap.py	Sun Jun 11 02:40:49 2006
@@ -460,38 +460,42 @@
 # of IndentTestCase!
 class DedentTestCase(unittest.TestCase):
 
+    def assertUnchanged(self, text):
+        """assert that dedent() has no effect on 'text'"""
+        self.assertEquals(text, dedent(text))
+
     def test_dedent_nomargin(self):
         # No lines indented.
         text = "Hello there.\nHow are you?\nOh good, I'm glad."
-        self.assertEquals(dedent(text), text)
+        self.assertUnchanged(text)
 
         # Similar, with a blank line.
         text = "Hello there.\n\nBoo!"
-        self.assertEquals(dedent(text), text)
+        self.assertUnchanged(text)
 
         # Some lines indented, but overall margin is still zero.
         text = "Hello there.\n  This is indented."
-        self.assertEquals(dedent(text), text)
+        self.assertUnchanged(text)
 
         # Again, add a blank line.
         text = "Hello there.\n\n  Boo!\n"
-        self.assertEquals(dedent(text), text)
+        self.assertUnchanged(text)
 
     def test_dedent_even(self):
         # All lines indented by two spaces.
         text = "  Hello there.\n  How are ya?\n  Oh good."
         expect = "Hello there.\nHow are ya?\nOh good."
-        self.assertEquals(dedent(text), expect)
+        self.assertEquals(expect, dedent(text))
 
         # Same, with blank lines.
         text = "  Hello there.\n\n  How are ya?\n  Oh good.\n"
         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
-        self.assertEquals(dedent(text), expect)
+        self.assertEquals(expect, dedent(text))
 
         # Now indent one of the blank lines.
         text = "  Hello there.\n  \n  How are ya?\n  Oh good.\n"
         expect = "Hello there.\n\nHow are ya?\nOh good.\n"
-        self.assertEquals(dedent(text), expect)
+        self.assertEquals(expect, dedent(text))
 
     def test_dedent_uneven(self):
         # Lines indented unevenly.
@@ -505,18 +509,53 @@
     while 1:
         return foo
 '''
-        self.assertEquals(dedent(text), expect)
+        self.assertEquals(expect, dedent(text))
 
         # Uneven indentation with a blank line.
         text = "  Foo\n    Bar\n\n   Baz\n"
         expect = "Foo\n  Bar\n\n Baz\n"
-        self.assertEquals(dedent(text), expect)
+        self.assertEquals(expect, dedent(text))
 
         # Uneven indentation with a whitespace-only line.
         text = "  Foo\n    Bar\n \n   Baz\n"
         expect = "Foo\n  Bar\n\n Baz\n"
-        self.assertEquals(dedent(text), expect)
+        self.assertEquals(expect, dedent(text))
 
+    # dedent() should not mangle internal tabs
+    def test_dedent_preserve_internal_tabs(self):
+        text = "  hello\tthere\n  how are\tyou?"
+        expect = "hello\tthere\nhow are\tyou?"
+        self.assertEquals(expect, dedent(text))
+
+        # make sure that it preserves tabs when it's not making any
+        # changes at all
+        self.assertEquals(expect, dedent(expect))
+
+    # dedent() should not mangle tabs in the margin (i.e.
+    # tabs and spaces both count as margin, but are *not*
+    # considered equivalent)
+    def test_dedent_preserve_margin_tabs(self):
+        text = "  hello there\n\thow are you?"
+        self.assertUnchanged(text)
+
+        # same effect even if we have 8 spaces
+        text = "        hello there\n\thow are you?"
+        self.assertUnchanged(text)
+
+        # dedent() only removes whitespace that can be uniformly removed!
+        text = "\thello there\n\thow are you?"
+        expect = "hello there\nhow are you?"
+        self.assertEquals(expect, dedent(text))
+
+        text = "  \thello there\n  \thow are you?"
+        self.assertEquals(expect, dedent(text))
+
+        text = "  \t  hello there\n  \t  how are you?"
+        self.assertEquals(expect, dedent(text))
+        
+        text = "  \thello there\n  \t  how are you?"
+        expect = "hello there\n  how are you?"
+        self.assertEquals(expect, dedent(text))
 
 
 def test_main():

Modified: python/trunk/Lib/textwrap.py
==============================================================================
--- python/trunk/Lib/textwrap.py	(original)
+++ python/trunk/Lib/textwrap.py	Sun Jun 11 02:40:49 2006
@@ -317,41 +317,58 @@
 
 # -- Loosely related functionality -------------------------------------
 
-def dedent(text):
-    """dedent(text : string) -> string
+_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
+_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
 
-    Remove any whitespace than can be uniformly removed from the left
-    of every line in `text`.
+def dedent(text):
+    """Remove any common leading whitespace from every line in `text`.
 
-    This can be used e.g. to make triple-quoted strings line up with
-    the left edge of screen/whatever, while still presenting it in the
-    source code in indented form.
-
-    For example:
-
-        def test():
-            # end first line with \ to avoid the empty line!
-            s = '''\
-            hello
-              world
-            '''
-            print repr(s)          # prints '    hello\n      world\n    '
-            print repr(dedent(s))  # prints 'hello\n  world\n'
+    This can be used to make triple-quoted strings line up with the left
+    edge of the display, while still presenting them in the source code
+    in indented form.
+
+    Note that tabs and spaces are both treated as whitespace, but they
+    are not equal: the lines "  hello" and "\thello" are
+    considered to have no common leading whitespace.  (This behaviour is
+    new in Python 2.5; older versions of this module incorrectly
+    expanded tabs before searching for common leading whitespace.)
     """
-    lines = text.expandtabs().split('\n')
+    # Look for the longest leading string of spaces and tabs common to
+    # all lines.
     margin = None
-    for line in lines:
-        content = line.lstrip()
-        if not content:
-            continue
-        indent = len(line) - len(content)
+    text = _whitespace_only_re.sub('', text)
+    indents = _leading_whitespace_re.findall(text)
+    for indent in indents:
         if margin is None:
             margin = indent
-        else:
-            margin = min(margin, indent)
 
-    if margin is not None and margin > 0:
-        for i in range(len(lines)):
-            lines[i] = lines[i][margin:]
+        # Current line more deeply indented than previous winner:
+        # no change (previous winner is still on top).
+        elif indent.startswith(margin): 
+            pass                        
+
+        # Current line consistent with and no deeper than previous winner:
+        # it's the new winner.
+        elif margin.startswith(indent): 
+            margin = indent             
+
+        # Current line and previous winner have no common whitespace:
+        # there is no margin.
+        else:
+            margin = ""
+            break
 
-    return '\n'.join(lines)
+    # sanity check (testing/debugging only)
+    if 0 and margin:
+        for line in text.split("\n"):
+            assert not line or line.startswith(margin), \
+                   "line = %r, margin = %r" % (line, margin)
+
+    if margin:
+        text = re.sub(r'(?m)^' + margin, '', text)
+    return text
+
+if __name__ == "__main__":
+    #print dedent("\tfoo\n\tbar")
+    #print dedent("  \thello there\n  \t  how are you?")
+    print dedent("Hello there.\n  This is indented.")

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Sun Jun 11 02:40:49 2006
@@ -145,6 +145,9 @@
 Library
 -------
 
+- Bug #1361643: fix textwrap.dedent() so it handles tabs appropriately;
+  clarify docs.
+
 - The wsgiref package has been added to the standard library.
 
 - The functions update_wrapper() and wraps() have been added to the functools


More information about the Python-checkins mailing list