[Python-checkins] r70587 - in python/branches/py3k/Lib: linecache.py tokenize.py

Tue Mar 24 23:30:15 CET 2009

Author: benjamin.peterson
Date: Tue Mar 24 23:30:15 2009
New Revision: 70587

Log:
reuse tokenize.detect_encoding in linecache instead of a custom solution

patch by Victor Stinner #4016


Modified:
   python/branches/py3k/Lib/linecache.py
   python/branches/py3k/Lib/tokenize.py

Modified: python/branches/py3k/Lib/linecache.py
==============================================================================

--- python/branches/py3k/Lib/linecache.py	(original)
+++ python/branches/py3k/Lib/linecache.py	Tue Mar 24 23:30:15 2009
@@ -7,7 +7,7 @@
 
 import sys
 import os
-import re
+import tokenize
 
 __all__ = ["getline", "clearcache", "checkcache"]
 
@@ -120,27 +120,11 @@
                     pass
         else:
             # No luck
-##          print '*** Cannot stat', filename, ':', msg
             return []
-##  print("Refreshing cache for %s..." % fullname)
-    try:
-        fp = open(fullname, 'rU')
+    with open(fullname, 'rb') as fp:
+        coding, line = tokenize.detect_encoding(fp.readline)
+    with open(fullname, 'r', encoding=coding) as fp:
         lines = fp.readlines()
-        fp.close()
-    except Exception as msg:
-##      print '*** Cannot open', fullname, ':', msg
-        return []
-    coding = "utf-8"
-    for line in lines[:2]:
-        m = re.search(r"coding[:=]\s*([-\w.]+)", line)
-        if m:
-            coding = m.group(1)
-            break
-    try:
-        lines = [line if isinstance(line, str) else str(line, coding)
-                 for line in lines]
-    except:
-        pass  # Hope for the best
     size, mtime = stat.st_size, stat.st_mtime
     cache[filename] = size, mtime, lines, fullname
     return lines

Modified: python/branches/py3k/Lib/tokenize.py
==============================================================================
--- python/branches/py3k/Lib/tokenize.py	(original)
+++ python/branches/py3k/Lib/tokenize.py	Tue Mar 24 23:30:15 2009
@@ -27,7 +27,6 @@
 import re, string, sys
 from token import *
 from codecs import lookup, BOM_UTF8
-from itertools import chain, repeat
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
 
 import token
@@ -327,13 +326,15 @@
     which tells you which encoding was used to decode the bytes stream.
     """
     encoding, consumed = detect_encoding(readline)
-    def readline_generator():
+    def readline_generator(consumed):
+        for line in consumed:
+            yield line
         while True:
             try:
                 yield readline()
             except StopIteration:
                 return
-    chained = chain(consumed, readline_generator())
+    chained = readline_generator(consumed)
     return _tokenize(chained.__next__, encoding)