[Python-checkins] r67747 - in python/branches/py3k: Lib/test/test_textwrap.py Lib/textwrap.py Misc/NEWS
antoine.pitrou
python-checkins at python.org
Sun Dec 14 00:20:55 CET 2008
Author: antoine.pitrou
Date: Sun Dec 14 00:20:54 2008
New Revision: 67747
Log:
Issue #4163: textwrap module: allow word splitting on a hyphen preceded by a non-ASCII letter.
Modified:
python/branches/py3k/ (props changed)
python/branches/py3k/Lib/test/test_textwrap.py
python/branches/py3k/Lib/textwrap.py
python/branches/py3k/Misc/NEWS
Modified: python/branches/py3k/Lib/test/test_textwrap.py
==============================================================================
--- python/branches/py3k/Lib/test/test_textwrap.py (original)
+++ python/branches/py3k/Lib/test/test_textwrap.py Sun Dec 14 00:20:54 2008
@@ -365,6 +365,14 @@
self.assertRaises(ValueError, wrap, text, 0)
self.assertRaises(ValueError, wrap, text, -1)
+ def test_no_split_at_umlaut(self):
+ text = "Die Empf\xe4nger-Auswahl"
+ self.check_wrap(text, 13, ["Die", "Empf\xe4nger-", "Auswahl"])
+
+ def test_umlaut_followed_by_dash(self):
+ text = "aa \xe4\xe4-\xe4\xe4"
+ self.check_wrap(text, 7, ["aa \xe4\xe4-", "\xe4\xe4"])
+
class LongWordTestCase (BaseTestCase):
def setUp(self):
Modified: python/branches/py3k/Lib/textwrap.py
==============================================================================
--- python/branches/py3k/Lib/textwrap.py (original)
+++ python/branches/py3k/Lib/textwrap.py Sun Dec 14 00:20:54 2008
@@ -76,7 +76,7 @@
# (after stripping out empty strings).
wordsep_re = re.compile(
r'(\s+|' # any whitespace
- r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
+ r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
# This less funky little regex just split on recognized spaces. E.g.
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Sun Dec 14 00:20:54 2008
@@ -45,6 +45,9 @@
Library
-------
+- Issue #4163: textwrap module: allow word splitting on a hyphen preceded by
+ a non-ASCII letter.
+
- Issue #4616: TarFile.utime(): Restore directory times on Windows.
- Issue #4021: tokenize.detect_encoding() now raises a SyntaxError when the
More information about the Python-checkins
mailing list