[Python-checkins] r63053 - in python/trunk: Doc/library/textwrap.rst Lib/test/test_textwrap.py Lib/textwrap.py Misc/NEWS

georg.brandl python-checkins at python.org
Sun May 11 12:42:28 CEST 2008


Author: georg.brandl
Date: Sun May 11 12:42:28 2008
New Revision: 63053

Log:
#2659: add ``break_on_hyphens`` to TextWrapper.


Modified:
   python/trunk/Doc/library/textwrap.rst
   python/trunk/Lib/test/test_textwrap.py
   python/trunk/Lib/textwrap.py
   python/trunk/Misc/NEWS

Modified: python/trunk/Doc/library/textwrap.rst
==============================================================================
--- python/trunk/Doc/library/textwrap.rst	(original)
+++ python/trunk/Doc/library/textwrap.rst	Sun May 11 12:42:28 2008
@@ -41,6 +41,10 @@
 applications that wrap/fill many text strings, it will be more efficient for you
 to create your own :class:`TextWrapper` object.
 
+Text is preferably wrapped on whitespaces and right after the hyphens in
+hyphenated words; only then will long words be broken if necessary, unless
+:attr:`TextWrapper.break_long_words` is set to false.
+
 An additional utility function, :func:`dedent`, is provided to remove
 indentation from strings that have unwanted whitespace to the left of the text.
 
@@ -174,10 +178,22 @@
       than :attr:`width`.  (Long words will be put on a line by themselves, in
       order to minimize the amount by which :attr:`width` is exceeded.)
 
+
+   .. attribute:: break_on_hyphens
+
+      (default: ``True``) If true, wrapping will occur preferably on whitespaces
+      and right after hyphens in compound words, as it is customary in English.
+      If false, only whitespaces will be considered as potentially good places
+      for line breaks, but you need to set :attr:`break_long_words` to false if
+      you want truly insecable words.  Default behaviour in previous versions
+      was to always allow breaking hyphenated words.
+
+      .. versionadded:: 2.6
+
+
    :class:`TextWrapper` also provides two public methods, analogous to the
    module-level convenience functions:
 
-
    .. method:: wrap(text)
 
       Wraps the single paragraph in *text* (a string) so every line is at most

Modified: python/trunk/Lib/test/test_textwrap.py
==============================================================================
--- python/trunk/Lib/test/test_textwrap.py	(original)
+++ python/trunk/Lib/test/test_textwrap.py	Sun May 11 12:42:28 2008
@@ -364,6 +364,14 @@
              ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
               "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"])
 
+    def test_break_on_hyphens(self):
+        # Ensure that the break_on_hyphens attributes work
+        text = "yaba daba-doo"
+        self.check_wrap(text, 10, ["yaba daba-", "doo"],
+                        break_on_hyphens=True)
+        self.check_wrap(text, 10, ["yaba", "daba-doo"],
+                        break_on_hyphens=False)
+
     def test_bad_width(self):
         # Ensure that width <= 0 is caught.
         text = "Whatever, it doesn't matter."

Modified: python/trunk/Lib/textwrap.py
==============================================================================
--- python/trunk/Lib/textwrap.py	(original)
+++ python/trunk/Lib/textwrap.py	Sun May 11 12:42:28 2008
@@ -63,6 +63,10 @@
       break_long_words (default: true)
         Break words longer than 'width'.  If false, those words will not
         be broken, and some lines might be longer than 'width'.
+      break_on_hyphens (default: true)
+        Allow breaking hyphenated words. If true, wrapping will occur
+        preferably on whitespaces and right after hyphens part of
+        compound words.
       drop_whitespace (default: true)
         Drop leading and trailing whitespace from lines.
     """
@@ -85,6 +89,12 @@
         r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'   # hyphenated words
         r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))')   # em-dash
 
+    # This less funky little regex just split on recognized spaces. E.g.
+    #   "Hello there -- you goof-ball, use the -b option!"
+    # splits into
+    #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
+    wordsep_simple_re = re.compile(r'(\s+)')
+
     # XXX this is not locale- or charset-aware -- string.lowercase
     # is US-ASCII only (and therefore English-only)
     sentence_end_re = re.compile(r'[%s]'              # lowercase letter
@@ -102,7 +112,8 @@
                  replace_whitespace=True,
                  fix_sentence_endings=False,
                  break_long_words=True,
-                 drop_whitespace=True):
+                 drop_whitespace=True,
+                 break_on_hyphens=True):
         self.width = width
         self.initial_indent = initial_indent
         self.subsequent_indent = subsequent_indent
@@ -111,6 +122,7 @@
         self.fix_sentence_endings = fix_sentence_endings
         self.break_long_words = break_long_words
         self.drop_whitespace = drop_whitespace
+        self.break_on_hyphens = break_on_hyphens
 
 
     # -- Private methods -----------------------------------------------
@@ -143,8 +155,15 @@
         breaks into the following chunks:
           'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
           'use', ' ', 'the', ' ', '-b', ' ', 'option!'
+        if break_on_hyphens is True, or in:
+          'Look,', ' ', 'goof-ball', ' ', '--', ' ',
+          'use', ' ', 'the', ' ', '-b', ' ', option!'
+        otherwise.
         """
-        chunks = self.wordsep_re.split(text)
+        if self.break_on_hyphens is True:
+            chunks = self.wordsep_re.split(text)
+        else:
+            chunks = self.wordsep_simple_re.split(text)
         chunks = filter(None, chunks)  # remove empty chunks
         return chunks
 

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Sun May 11 12:42:28 2008
@@ -23,6 +23,8 @@
 Library
 -------
 
+- #2659: Added ``break_on_hyphens`` option to textwrap TextWrapper class.
+
 - The mhlib module has been deprecated for removal in Python 3.0.
 
 - The linuxaudiodev module has been deprecated for removal in Python 3.0.


More information about the Python-checkins mailing list