[Python-checkins] r86346 - in python/branches/py3k: Doc/library/tokenize.rst Lib/linecache.py Lib/py_compile.py Lib/tabnanny.py Lib/test/test_tokenize.py Lib/tokenize.py Lib/trace.py Misc/NEWS

victor.stinner python-checkins at python.org
Tue Nov 9 02:09:00 CET 2010


Author: victor.stinner
Date: Tue Nov  9 02:08:59 2010
New Revision: 86346

Log:
Issue #10335: Add tokenize.open(), detect the file encoding using
tokenize.detect_encoding() and open it in read only mode.


Modified:
   python/branches/py3k/Doc/library/tokenize.rst
   python/branches/py3k/Lib/linecache.py
   python/branches/py3k/Lib/py_compile.py
   python/branches/py3k/Lib/tabnanny.py
   python/branches/py3k/Lib/test/test_tokenize.py
   python/branches/py3k/Lib/tokenize.py
   python/branches/py3k/Lib/trace.py
   python/branches/py3k/Misc/NEWS

Modified: python/branches/py3k/Doc/library/tokenize.rst
==============================================================================
--- python/branches/py3k/Doc/library/tokenize.rst	(original)
+++ python/branches/py3k/Doc/library/tokenize.rst	Tue Nov  9 02:08:59 2010
@@ -101,14 +101,16 @@
     If no encoding is specified, then the default of ``'utf-8'`` will be
     returned.
 
-    :func:`detect_encoding` is useful for robustly reading Python source files.
-    A common pattern for this follows::
+    Use :func:`open` to open Python source files: it uses
+    :func:`detect_encoding` to detect the file encoding.
 
-        def read_python_source(file_name):
-            with open(file_name, "rb") as fp:
-                encoding = tokenize.detect_encoding(fp.readline)[0]
-            with open(file_name, "r", encoding=encoding) as fp:
-                return fp.read()
+
+.. function:: open(filename)
+
+   Open a file in read only mode using the encoding detected by
+   :func:`detect_encoding`.
+
+   .. versionadded:: 3.2
 
 
 Example of a script rewriter that transforms float literals into Decimal
@@ -153,4 +155,3 @@
                 result.append((toknum, tokval))
         return untokenize(result).decode('utf-8')
 
-

Modified: python/branches/py3k/Lib/linecache.py
==============================================================================
--- python/branches/py3k/Lib/linecache.py	(original)
+++ python/branches/py3k/Lib/linecache.py	Tue Nov  9 02:08:59 2010
@@ -123,9 +123,7 @@
         else:
             return []
     try:
-        with open(fullname, 'rb') as fp:
-            coding, line = tokenize.detect_encoding(fp.readline)
-        with open(fullname, 'r', encoding=coding) as fp:
+        with tokenize.open(fullname) as fp:
             lines = fp.readlines()
     except IOError:
         return []

Modified: python/branches/py3k/Lib/py_compile.py
==============================================================================
--- python/branches/py3k/Lib/py_compile.py	(original)
+++ python/branches/py3k/Lib/py_compile.py	Tue Nov  9 02:08:59 2010
@@ -104,9 +104,7 @@
     byte-compile all installed files (or all files in selected
     directories).
     """
-    with open(file, "rb") as f:
-        encoding = tokenize.detect_encoding(f.readline)[0]
-    with open(file, encoding=encoding) as f:
+    with tokenize.open(file) as f:
         try:
             timestamp = int(os.fstat(f.fileno()).st_mtime)
         except AttributeError:

Modified: python/branches/py3k/Lib/tabnanny.py
==============================================================================
--- python/branches/py3k/Lib/tabnanny.py	(original)
+++ python/branches/py3k/Lib/tabnanny.py	Tue Nov  9 02:08:59 2010
@@ -93,11 +93,8 @@
                 check(fullname)
         return
 
-    with open(file, 'rb') as f:
-        encoding, lines = tokenize.detect_encoding(f.readline)
-
     try:
-        f = open(file, encoding=encoding)
+        f = tokenize.open(file)
     except IOError as msg:
         errprint("%r: I/O Error: %s" % (file, msg))
         return

Modified: python/branches/py3k/Lib/test/test_tokenize.py
==============================================================================
--- python/branches/py3k/Lib/test/test_tokenize.py	(original)
+++ python/branches/py3k/Lib/test/test_tokenize.py	Tue Nov  9 02:08:59 2010
@@ -564,7 +564,8 @@
 
 from test import support
 from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
-                     STRING, ENDMARKER, tok_name, detect_encoding)
+                     STRING, ENDMARKER, tok_name, detect_encoding,
+                     open as tokenize_open)
 from io import BytesIO
 from unittest import TestCase
 import os, sys, glob
@@ -857,6 +858,26 @@
         readline = self.get_readline((b'# coding: bad\n',))
         self.assertRaises(SyntaxError, detect_encoding, readline)
 
+    def test_open(self):
+        filename = support.TESTFN + '.py'
+        self.addCleanup(support.unlink, filename)
+
+        # test coding cookie
+        for encoding in ('iso-8859-15', 'utf-8'):
+            with open(filename, 'w', encoding=encoding) as fp:
+                print("# coding: %s" % encoding, file=fp)
+                print("print('euro:\u20ac')", file=fp)
+            with tokenize_open(filename) as fp:
+                assert fp.encoding == encoding
+                assert fp.mode == 'r'
+
+        # test BOM (no coding cookie)
+        with open(filename, 'w', encoding='utf-8-sig') as fp:
+            print("print('euro:\u20ac')", file=fp)
+        with tokenize_open(filename) as fp:
+            assert fp.encoding == 'utf-8-sig'
+            assert fp.mode == 'r'
+
 class TestTokenize(TestCase):
 
     def test_tokenize(self):

Modified: python/branches/py3k/Lib/tokenize.py
==============================================================================
--- python/branches/py3k/Lib/tokenize.py	(original)
+++ python/branches/py3k/Lib/tokenize.py	Tue Nov  9 02:08:59 2010
@@ -29,6 +29,7 @@
 from token import *
 from codecs import lookup, BOM_UTF8
 import collections
+from io import TextIOWrapper
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 
 import token
@@ -335,6 +336,20 @@
     return default, [first, second]
 
 
+_builtin_open = open
+
+def open(filename):
+    """Open a file in read only mode using the encoding detected by
+    detect_encoding().
+    """
+    buffer = _builtin_open(filename, 'rb')
+    encoding, lines = detect_encoding(buffer.readline)
+    buffer.seek(0)
+    text = TextIOWrapper(buffer, encoding, line_buffering=True)
+    text.mode = 'r'
+    return text
+
+
 def tokenize(readline):
     """
     The tokenize() generator requires one argment, readline, which

Modified: python/branches/py3k/Lib/trace.py
==============================================================================
--- python/branches/py3k/Lib/trace.py	(original)
+++ python/branches/py3k/Lib/trace.py	Tue Nov  9 02:08:59 2010
@@ -432,10 +432,9 @@
 def find_executable_linenos(filename):
     """Return dict where keys are line numbers in the line number table."""
     try:
-        with io.FileIO(filename, 'r') as file:
-            encoding, lines = tokenize.detect_encoding(file.readline)
-        with open(filename, "r", encoding=encoding) as f:
+        with tokenize.open(filename) as f:
             prog = f.read()
+            encoding = f.encoding
     except IOError as err:
         print(("Not printing coverage data for %r: %s"
                               % (filename, err)), file=sys.stderr)

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Tue Nov  9 02:08:59 2010
@@ -60,6 +60,9 @@
 Library
 -------
 
+- Issue #10335: Add tokenize.open(), detect the file encoding using
+  tokenize.detect_encoding() and open it in read only mode.
+
 - Issue #10321: Added support for binary data to smtplib.SMTP.sendmail,
   and a new method send_message to send an email.message.Message object.
 


More information about the Python-checkins mailing list