[Python-checkins] python/dist/src/Tools/scripts findnocoding.py, NONE, pysource.py, NONE, README,, pindent.py,, texcheck.py,,

jhylton@users.sourceforge.net jhylton at users.sourceforge.net
Sun Oct 16 07:24:10 CEST 2005

Update of /cvsroot/python/python/dist/src/Tools/scripts
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27718/Tools/scripts

Modified Files:
      Tag: ast-branch
	README pindent.py texcheck.py 
Added Files:
      Tag: ast-branch
	findnocoding.py pysource.py 
Log Message:
Merge head to branch (for the last time)

--- NEW FILE: findnocoding.py ---
#!/usr/bin/env python

"""List all those Python files that require a coding directive

Usage: nocoding.py dir1 [dir2...]

__author__ = "Oleg Broytmann, Reinhold Birkenfeld"

import sys, os, re, getopt

# our pysource module finds Python source files
    import pysource
    # emulate the module with a simple os.walk
    class pysource:
        has_python_ext = looks_like_python = can_be_compiled = None
        def walk_python_files(self, paths, *args, **kwargs):
            for path in paths:
                if os.path.isfile(path):
                    yield path.endswith(".py")
                elif os.path.isdir(path):
                    for root, dirs, files in os.walk(path):
                        for filename in files:
                            if filename.endswith(".py"):
                                yield os.path.join(root, filename)
    pysource = pysource()

    print >>sys.stderr, ("The pysource module is not available; "
                         "no sophisticated Python source file search will be done.")

decl_re = re.compile(r"coding[=:]\s*([-\w.]+)")

def get_declaration(line):
    match = decl_re.search(line)
    if match:
        return match.group(1)
    return ''

def has_correct_encoding(text, codec):
        unicode(text, codec)
    except UnicodeDecodeError:
        return False
        return True

def needs_declaration(fullpath):
        infile = open(fullpath, 'rU')
    except IOError: # Oops, the file was removed - ignore it
        return None

    line1 = infile.readline()
    line2 = infile.readline()

    if get_declaration(line1) or get_declaration(line2):
        # the file does have an encoding declaration, so trust it
        return False

    # check the whole file for non-ASCII characters
    rest = infile.read()

    if has_correct_encoding(line1+line2+rest, "ascii"):
        return False

    return True

usage = """Usage: %s [-cd] paths...
    -c: recognize Python source files trying to compile them
    -d: debug output""" % sys.argv[0]

    opts, args = getopt.getopt(sys.argv[1:], 'cd')
except getopt.error, msg:
    print >>sys.stderr, msg
    print >>sys.stderr, usage

is_python = pysource.looks_like_python
debug = False

for o, a in opts:
    if o == '-c':
        is_python = pysource.can_be_compiled
    elif o == '-d':
        debug = True

if not args:
    print >>sys.stderr, usage

for fullpath in pysource.walk_python_files(args, is_python):
    if debug:
        print "Testing for coding: %s" % fullpath
    result = needs_declaration(fullpath)
    if result:
        print fullpath

--- NEW FILE: pysource.py ---
#!/usr/bin/env python

List python source files.

There are three functions to check whether a file is a Python source, listed
here with increasing complexity:

- has_python_ext() checks whether a file name ends in '.py[w]'.
- look_like_python() checks whether the file is not binary and either has
  the '.py[w]' extension or the first line contains the word 'python'.
- can_be_compiled() checks whether the file can be compiled by compile().

The file also must be of appropriate size - not bigger than a megabyte.

walk_python_files() recursively lists all Python files under the given directories.
__author__ = "Oleg Broytmann, Reinhold Birkenfeld"

__all__ = ["has_python_ext", "looks_like_python", "can_be_compiled", "walk_python_files"]

import sys, os, re

binary_re = re.compile('[\x00-\x08\x0E-\x1F\x7F]')

debug = False

def print_debug(msg):
    if debug: print msg

def _open(fullpath):
        size = os.stat(fullpath).st_size
    except OSError, err: # Permission denied - ignore the file
        print_debug("%s: permission denied: %s" % (fullpath, err))
        return None

    if size > 1024*1024: # too big
        print_debug("%s: the file is too big: %d bytes" % (fullpath, size))
        return None

        return open(fullpath, 'rU')
    except IOError, err: # Access denied, or a special file - ignore it
        print_debug("%s: access denied: %s" % (fullpath, err))
        return None

def has_python_ext(fullpath):
    return fullpath.endswith(".py") or fullpath.endswith(".pyw")

def looks_like_python(fullpath):
    infile = _open(fullpath)
    if infile is None:
        return False

    line = infile.readline()

    if binary_re.search(line):
        # file appears to be binary
        print_debug("%s: appears to be binary" % fullpath)
        return False

    if fullpath.endswith(".py") or fullpath.endswith(".pyw"):
        return True
    elif "python" in line:
        # disguised Python script (e.g. CGI)
        return True

    return False

def can_be_compiled(fullpath):
    infile = _open(fullpath)
    if infile is None:
        return False

    code = infile.read()

        compile(code, fullpath, "exec")
    except Exception, err:
        print_debug("%s: cannot compile: %s" % (fullpath, err))
        return False

    return True

def walk_python_files(paths, is_python=looks_like_python, exclude_dirs=None):
    Recursively yield all Python source files below the given paths.

    paths: a list of files and/or directories to be checked.
    is_python: a function that takes a file name and checks whether it is a
               Python source file
    exclude_dirs: a list of directory base names that should be excluded in
                  the search
    if exclude_dirs is None:

    for path in paths:
        print_debug("testing: %s" % path)
        if os.path.isfile(path):
            if is_python(path):
                yield path
        elif os.path.isdir(path):
            print_debug("    it is a directory")
            for dirpath, dirnames, filenames in os.walk(path):
                for exclude in exclude_dirs:
                    if exclude in dirnames:
                for filename in filenames:
                    fullpath = os.path.join(dirpath, filename)
                    print_debug("testing: %s" % fullpath)
                    if is_python(fullpath):
                        yield fullpath
            print_debug("    unknown type")

if __name__ == "__main__":
    # Two simple examples/tests
    for fullpath in walk_python_files(['.']):
        print fullpath
    print "----------"
    for fullpath in walk_python_files(['.'], is_python=can_be_compiled):
        print fullpath

RCS file: /cvsroot/python/python/dist/src/Tools/scripts/README,v
retrieving revision
retrieving revision
diff -u -d -r1.8.28.2 -r1.8.28.3
--- README	7 Jan 2005 07:05:44 -0000
+++ README	16 Oct 2005 05:24:06 -0000
@@ -20,6 +20,7 @@
 eptags.py		Create Emacs TAGS file for Python modules
 finddiv.py		A grep-like tool that looks for division operators.
 findlinksto.py		Recursively find symbolic links to a given path prefix
+findnocoding.py		Find source files which need an encoding declaration
 fixcid.py		Massive identifier substitution on C source files
 fixdiv.py		Tool to fix division operators.
 fixheader.py		Add some cpp magic to a C include file
@@ -51,6 +52,7 @@
 pindent.py		Indent Python code, giving block-closing comments
 ptags.py		Create vi tags file for Python modules
 pydoc			Python documentation browser.
+pysource.py		Find Python source files
 redemo.py		Basic regular expression demostration facility
 reindent.py		Change .py files to use 4-space indents.
 rgrep.py		Reverse grep through a file (useful for big logfiles)

Index: pindent.py
RCS file: /cvsroot/python/python/dist/src/Tools/scripts/pindent.py,v
retrieving revision
retrieving revision
diff -u -d -r1.10.24.2 -r1.10.24.3
--- pindent.py	7 Jan 2005 07:05:50 -0000
+++ pindent.py	16 Oct 2005 05:24:06 -0000
@@ -238,6 +238,7 @@
         self.indentsize = 1
         stack = []
         todo = []
+        thisid = ''
         current, firstkw, lastkw, topid = 0, '', '', ''
         while 1:
             line = self.getline()

Index: texcheck.py
RCS file: /cvsroot/python/python/dist/src/Tools/scripts/texcheck.py,v
retrieving revision
retrieving revision
diff -u -d -r1.12.6.1 -r1.12.6.2
--- texcheck.py	7 Jan 2005 07:05:52 -0000
+++ texcheck.py	16 Oct 2005 05:24:06 -0000
@@ -57,6 +57,7 @@
     \email \kwindex \refexmodindex \filenq \e \menuselection
     \exindex \linev \newsgroup \verbatim \setshortversion
     \author \authoraddress \paragraph \subparagraph \cmemberline
+    \textbar
 def matchclose(c_lineno, c_symbol, openers, pairmap):

