[Python-checkins] cpython (2.7): Fix long-standing bugs with MANIFEST.in parsing on Windows (#6884).
eric.araujo
python-checkins at python.org
Sat Feb 25 16:13:59 CET 2012
http://hg.python.org/cpython/rev/47788c90f80b
changeset: 75256:47788c90f80b
branch: 2.7
parent: 75251:4dd71a933f20
user: Éric Araujo <merwok at netwok.org>
date: Sat Feb 25 16:13:53 2012 +0100
summary:
Fix long-standing bugs with MANIFEST.in parsing on Windows (#6884).
These regex changes fix a number of issues for distutils on Windows:
- #6884: impossible to include a file starting with 'build'
- #9691 and #14004: sdist includes too many files
- #13193: test_filelist failures
This commit replaces the incorrect changes done in 557a973709de,
c566a3447ba1 and 3925081a7ca0 to fix #13193; we were too eager to fix
the test failures and I did not study the code enough before
greenlighting patches. This time we have unit tests from the problems
reported by users to be sure we have the right fix.
Thanks to Nadeem Vawda for his help.
files:
Lib/distutils/filelist.py | 20 +-
Lib/distutils/tests/test_filelist.py | 111 +++++++++-----
Lib/distutils/tests/test_sdist.py | 14 +-
Misc/NEWS | 5 +-
4 files changed, 97 insertions(+), 53 deletions(-)
diff --git a/Lib/distutils/filelist.py b/Lib/distutils/filelist.py
--- a/Lib/distutils/filelist.py
+++ b/Lib/distutils/filelist.py
@@ -210,6 +210,7 @@
Return 1 if files are found.
"""
+ # XXX docstring lying about what the special chars are?
files_found = 0
pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
self.debug_print("include_pattern: applying regex r'%s'" %
@@ -297,11 +298,14 @@
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
# and by extension they shouldn't match such "special characters" under
# any OS. So change all non-escaped dots in the RE to match any
- # character except the special characters.
- # XXX currently the "special characters" are just slash -- i.e. this is
- # Unix-only.
- pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', r'\1[^/]', pattern_re)
-
+ # character except the special characters (currently: just os.sep).
+ sep = os.sep
+ if os.sep == '\\':
+ # we're using a regex to manipulate a regex, so we need
+ # to escape the backslash twice
+ sep = r'\\\\'
+ escaped = r'\1[^%s]' % sep
+ pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
return pattern_re
@@ -328,8 +332,10 @@
# ditch end of pattern character
empty_pattern = glob_to_re('')
prefix_re = glob_to_re(prefix)[:-len(empty_pattern)]
- # paths should always use / in manifest templates
- pattern_re = "^%s/.*%s" % (prefix_re, pattern_re)
+ sep = os.sep
+ if os.sep == '\\':
+ sep = r'\\'
+ pattern_re = "^" + sep.join((prefix_re, ".*" + pattern_re))
else: # no prefix -- respect anchor flag
if anchor:
pattern_re = "^" + pattern_re
diff --git a/Lib/distutils/tests/test_filelist.py b/Lib/distutils/tests/test_filelist.py
--- a/Lib/distutils/tests/test_filelist.py
+++ b/Lib/distutils/tests/test_filelist.py
@@ -1,4 +1,5 @@
"""Tests for distutils.filelist."""
+import os
import re
import unittest
from distutils import debug
@@ -14,6 +15,7 @@
include xo
exclude xo
include foo.tmp
+include buildout.cfg
global-include *.x
global-include *.txt
global-exclude *.tmp
@@ -24,6 +26,11 @@
"""
+def make_local_path(s):
+ """Converts '/' in a string to os.sep"""
+ return s.replace('/', os.sep)
+
+
class FileListTestCase(support.LoggingSilencer,
unittest.TestCase):
@@ -36,41 +43,60 @@
self.clear_logs()
def test_glob_to_re(self):
- # simple cases
- self.assertEqual(glob_to_re('foo*'), 'foo[^/]*\\Z(?ms)')
- self.assertEqual(glob_to_re('foo?'), 'foo[^/]\\Z(?ms)')
- self.assertEqual(glob_to_re('foo??'), 'foo[^/][^/]\\Z(?ms)')
+ sep = os.sep
+ if os.sep == '\\':
+ sep = re.escape(os.sep)
- # special cases
- self.assertEqual(glob_to_re(r'foo\\*'), r'foo\\\\[^/]*\Z(?ms)')
- self.assertEqual(glob_to_re(r'foo\\\*'), r'foo\\\\\\[^/]*\Z(?ms)')
- self.assertEqual(glob_to_re('foo????'), r'foo[^/][^/][^/][^/]\Z(?ms)')
- self.assertEqual(glob_to_re(r'foo\\??'), r'foo\\\\[^/][^/]\Z(?ms)')
+ for glob, regex in (
+ # simple cases
+ ('foo*', r'foo[^%(sep)s]*\Z(?ms)'),
+ ('foo?', r'foo[^%(sep)s]\Z(?ms)'),
+ ('foo??', r'foo[^%(sep)s][^%(sep)s]\Z(?ms)'),
+ # special cases
+ (r'foo\\*', r'foo\\\\[^%(sep)s]*\Z(?ms)'),
+ (r'foo\\\*', r'foo\\\\\\[^%(sep)s]*\Z(?ms)'),
+ ('foo????', r'foo[^%(sep)s][^%(sep)s][^%(sep)s][^%(sep)s]\Z(?ms)'),
+ (r'foo\\??', r'foo\\\\[^%(sep)s][^%(sep)s]\Z(?ms)')):
+ regex = regex % {'sep': sep}
+ self.assertEqual(glob_to_re(glob), regex)
def test_process_template_line(self):
# testing all MANIFEST.in template patterns
file_list = FileList()
+ l = make_local_path
# simulated file list
file_list.allfiles = ['foo.tmp', 'ok', 'xo', 'four.txt',
- 'global/one.txt',
- 'global/two.txt',
- 'global/files.x',
- 'global/here.tmp',
- 'f/o/f.oo',
- 'dir/graft-one',
- 'dir/dir2/graft2',
- 'dir3/ok',
- 'dir3/sub/ok.txt',
- ]
+ 'buildout.cfg',
+ # filelist does not filter out VCS directories,
+ # it's sdist that does
+ l('.hg/last-message.txt'),
+ l('global/one.txt'),
+ l('global/two.txt'),
+ l('global/files.x'),
+ l('global/here.tmp'),
+ l('f/o/f.oo'),
+ l('dir/graft-one'),
+ l('dir/dir2/graft2'),
+ l('dir3/ok'),
+ l('dir3/sub/ok.txt'),
+ ]
for line in MANIFEST_IN.split('\n'):
if line.strip() == '':
continue
file_list.process_template_line(line)
- wanted = ['ok', 'four.txt', 'global/one.txt', 'global/two.txt',
- 'f/o/f.oo', 'dir/graft-one', 'dir/dir2/graft2']
+ wanted = ['ok',
+ 'buildout.cfg',
+ 'four.txt',
+ l('.hg/last-message.txt'),
+ l('global/one.txt'),
+ l('global/two.txt'),
+ l('f/o/f.oo'),
+ l('dir/graft-one'),
+ l('dir/dir2/graft2'),
+ ]
self.assertEqual(file_list.files, wanted)
@@ -158,6 +184,7 @@
self.assertEqual(file_list.allfiles, ['a.py', 'b.txt'])
def test_process_template(self):
+ l = make_local_path
# invalid lines
file_list = FileList()
for action in ('include', 'exclude', 'global-include',
@@ -168,7 +195,7 @@
# include
file_list = FileList()
- file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py'])
+ file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('include *.py')
self.assertEqual(file_list.files, ['a.py'])
@@ -180,31 +207,31 @@
# exclude
file_list = FileList()
- file_list.files = ['a.py', 'b.txt', 'd/c.py']
+ file_list.files = ['a.py', 'b.txt', l('d/c.py')]
file_list.process_template_line('exclude *.py')
- self.assertEqual(file_list.files, ['b.txt', 'd/c.py'])
+ self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
self.assertNoWarnings()
file_list.process_template_line('exclude *.rb')
- self.assertEqual(file_list.files, ['b.txt', 'd/c.py'])
+ self.assertEqual(file_list.files, ['b.txt', l('d/c.py')])
self.assertWarnings()
# global-include
file_list = FileList()
- file_list.set_allfiles(['a.py', 'b.txt', 'd/c.py'])
+ file_list.set_allfiles(['a.py', 'b.txt', l('d/c.py')])
file_list.process_template_line('global-include *.py')
- self.assertEqual(file_list.files, ['a.py', 'd/c.py'])
+ self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
self.assertNoWarnings()
file_list.process_template_line('global-include *.rb')
- self.assertEqual(file_list.files, ['a.py', 'd/c.py'])
+ self.assertEqual(file_list.files, ['a.py', l('d/c.py')])
self.assertWarnings()
# global-exclude
file_list = FileList()
- file_list.files = ['a.py', 'b.txt', 'd/c.py']
+ file_list.files = ['a.py', 'b.txt', l('d/c.py')]
file_list.process_template_line('global-exclude *.py')
self.assertEqual(file_list.files, ['b.txt'])
@@ -216,50 +243,52 @@
# recursive-include
file_list = FileList()
- file_list.set_allfiles(['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py'])
+ file_list.set_allfiles(['a.py', l('d/b.py'), l('d/c.txt'),
+ l('d/d/e.py')])
file_list.process_template_line('recursive-include d *.py')
- self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+ self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertNoWarnings()
file_list.process_template_line('recursive-include e *.py')
- self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+ self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertWarnings()
# recursive-exclude
file_list = FileList()
- file_list.files = ['a.py', 'd/b.py', 'd/c.txt', 'd/d/e.py']
+ file_list.files = ['a.py', l('d/b.py'), l('d/c.txt'), l('d/d/e.py')]
file_list.process_template_line('recursive-exclude d *.py')
- self.assertEqual(file_list.files, ['a.py', 'd/c.txt'])
+ self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
self.assertNoWarnings()
file_list.process_template_line('recursive-exclude e *.py')
- self.assertEqual(file_list.files, ['a.py', 'd/c.txt'])
+ self.assertEqual(file_list.files, ['a.py', l('d/c.txt')])
self.assertWarnings()
# graft
file_list = FileList()
- file_list.set_allfiles(['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py'])
+ file_list.set_allfiles(['a.py', l('d/b.py'), l('d/d/e.py'),
+ l('f/f.py')])
file_list.process_template_line('graft d')
- self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+ self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertNoWarnings()
file_list.process_template_line('graft e')
- self.assertEqual(file_list.files, ['d/b.py', 'd/d/e.py'])
+ self.assertEqual(file_list.files, [l('d/b.py'), l('d/d/e.py')])
self.assertWarnings()
# prune
file_list = FileList()
- file_list.files = ['a.py', 'd/b.py', 'd/d/e.py', 'f/f.py']
+ file_list.files = ['a.py', l('d/b.py'), l('d/d/e.py'), l('f/f.py')]
file_list.process_template_line('prune d')
- self.assertEqual(file_list.files, ['a.py', 'f/f.py'])
+ self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
self.assertNoWarnings()
file_list.process_template_line('prune e')
- self.assertEqual(file_list.files, ['a.py', 'f/f.py'])
+ self.assertEqual(file_list.files, ['a.py', l('f/f.py')])
self.assertWarnings()
diff --git a/Lib/distutils/tests/test_sdist.py b/Lib/distutils/tests/test_sdist.py
--- a/Lib/distutils/tests/test_sdist.py
+++ b/Lib/distutils/tests/test_sdist.py
@@ -42,6 +42,7 @@
MANIFEST = """\
# file GENERATED by distutils, do NOT edit
README
+buildout.cfg
inroot.txt
setup.py
data%(sep)sdata.dt
@@ -150,7 +151,7 @@
dist_folder = join(self.tmp_dir, 'dist')
result = os.listdir(dist_folder)
result.sort()
- self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'] )
+ self.assertEqual(result, ['fake-1.0.tar', 'fake-1.0.tar.gz'])
os.remove(join(dist_folder, 'fake-1.0.tar'))
os.remove(join(dist_folder, 'fake-1.0.tar.gz'))
@@ -209,11 +210,18 @@
self.write_file((data_dir, 'data.dt'), '#')
some_dir = join(self.tmp_dir, 'some')
os.mkdir(some_dir)
+ # make sure VCS directories are pruned (#14004)
+ hg_dir = join(self.tmp_dir, '.hg')
+ os.mkdir(hg_dir)
+ self.write_file((hg_dir, 'last-message.txt'), '#')
+ # a buggy regex used to prevent this from working on windows (#6884)
+ self.write_file((self.tmp_dir, 'buildout.cfg'), '#')
self.write_file((self.tmp_dir, 'inroot.txt'), '#')
self.write_file((some_dir, 'file.txt'), '#')
self.write_file((some_dir, 'other_file.txt'), '#')
dist.data_files = [('data', ['data/data.dt',
+ 'buildout.cfg',
'inroot.txt',
'notexisting']),
'some/file.txt',
@@ -243,15 +251,15 @@
zip_file.close()
# making sure everything was added
- self.assertEqual(len(content), 11)
+ self.assertEqual(len(content), 12)
# checking the MANIFEST
f = open(join(self.tmp_dir, 'MANIFEST'))
try:
manifest = f.read()
- self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
finally:
f.close()
+ self.assertEqual(manifest, MANIFEST % {'sep': os.sep})
@unittest.skipUnless(zlib, "requires zlib")
def test_metadata_check_option(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -98,6 +98,9 @@
Library
-------
+- Issue #6884: Fix long-standing bugs with MANIFEST.in parsing in distutils
+ on Windows.
+
- Issue #8033: sqlite3: Fix 64-bit integer handling in user functions
on 32-bit architectures. Initial patch by Philippe Devalkeneer.
@@ -250,8 +253,6 @@
- Issues #1745761, #755670, #13357, #12629, #1200313: HTMLParser now correctly
handles non-valid attributes, including adjacent and unquoted attributes.
-- Issue #13193: Fix distutils.filelist.FileList under Windows.
-
- Issue #13373: multiprocessing.Queue.get() could sometimes block indefinitely
when called with a timeout. Patch by Arnaud Ysmal.
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list