On Wed, 10 Nov 2004, John P Speno wrote:
Hi, sorry for the delayed response.
> While using subprocess (aka popen5), I came across one potential gotcha. I've had
> exceptions ending like this:
>
> File "test.py", line 5, in test
> cmd = popen5.Popen(args, stdout=PIPE)
> File "popen5.py", line 577, in __init__
> data = os.read(errpipe_read, 1048576) # Exceptions limited to 1 MB
> OSError: [Errno 4] Interrupted system call
>
> (on Solaris 9)
>
> Would it make sense for subprocess to use a more robust read() function
> which can handle these cases, i.e. when the parent's read on the pipe
> to the child's stderr is interrupted by a system call, and returns EINTR?
> I imagine it could catch EINTR and EAGAIN and retry the failed read().
I assume you are using signals in your application? The os.read above is
not the only system call that can fail with EINTR. subprocess.py is full
of other system calls that can fail, and I suspect that many other Python
modules are as well.
I've made a patch (attached) to subprocess.py (and test_subprocess.py)
that should guard against EINTR, but I haven't committed it yet. It's
quite large.
Are Python modules supposed to handle EINTR? Why not let the C code handle
this? Or, perhaps the signal module should provide a sigaction function,
so that users can use SA_RESTART.
Index: subprocess.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/subprocess.py,v
retrieving revision 1.8
diff -u -r1.8 subprocess.py
--- subprocess.py 7 Nov 2004 14:30:34 -0000 1.8
+++ subprocess.py 17 Nov 2004 19:42:30 -0000
@@ -888,6 +888,50 @@
pass
+ def _read_no_intr(self, fd, buffersize):
+ """Like os.read, but retries on EINTR"""
+ while True:
+ try:
+ return os.read(fd, buffersize)
+ except OSError, e:
+ if e.errno == errno.EINTR:
+ continue
+ else:
+ raise
+
+
+ def _read_all(self, fd, buffersize):
+ """Like os.read, but retries on EINTR, and reads until EOF"""
+ all = ""
+ while True:
+ data = self._read_no_intr(fd, buffersize)
+ all += data
+ if data == "":
+ return all
+
+
+ def _write_no_intr(self, fd, s):
+ """Like os.write, but retries on EINTR"""
+ while True:
+ try:
+ return os.write(fd, s)
+ except OSError, e:
+ if e.errno == errno.EINTR:
+ continue
+ else:
+ raise
+
+ def _waitpid_no_intr(self, pid, options):
+ """Like os.waitpid, but retries on EINTR"""
+ while True:
+ try:
+ return os.waitpid(pid, options)
+ except OSError, e:
+ if e.errno == errno.EINTR:
+ continue
+ else:
+ raise
+
def _execute_child(self, args, executable, preexec_fn, close_fds,
cwd, env, universal_newlines,
startupinfo, creationflags, shell,
@@ -963,7 +1007,7 @@
exc_value,
tb)
exc_value.child_traceback = ''.join(exc_lines)
- os.write(errpipe_write, pickle.dumps(exc_value))
+ self._write_no_intr(errpipe_write, pickle.dumps(exc_value))
# This exitcode won't be reported to applications, so it
# really doesn't matter what we return.
@@ -979,7 +1023,7 @@
os.close(errwrite)
# Wait for exec to fail or succeed; possibly raising exception
- data = os.read(errpipe_read, 1048576) # Exceptions limited to 1 MB
+ data = self._read_all(errpipe_read, 1048576) # Exceptions limited to 1 MB
os.close(errpipe_read)
if data != "":
child_exception = pickle.loads(data)
@@ -1003,7 +1047,7 @@
attribute."""
if self.returncode == None:
try:
- pid, sts = os.waitpid(self.pid, os.WNOHANG)
+ pid, sts = self._waitpid_no_intr(self.pid, os.WNOHANG)
if pid == self.pid:
self._handle_exitstatus(sts)
except os.error:
@@ -1015,7 +1059,7 @@
"""Wait for child process to terminate. Returns returncode
attribute."""
if self.returncode == None:
- pid, sts = os.waitpid(self.pid, 0)
+ pid, sts = self._waitpid_no_intr(self.pid, 0)
self._handle_exitstatus(sts)
return self.returncode
@@ -1049,27 +1093,33 @@
stderr = []
while read_set or write_set:
- rlist, wlist, xlist = select.select(read_set, write_set, [])
+ try:
+ rlist, wlist, xlist = select.select(read_set, write_set, [])
+ except select.error, e:
+ if e[0] == errno.EINTR:
+ continue
+ else:
+ raise
if self.stdin in wlist:
# When select has indicated that the file is writable,
# we can write up to PIPE_BUF bytes without risk
# blocking. POSIX defines PIPE_BUF >= 512
- bytes_written = os.write(self.stdin.fileno(), input[:512])
+ bytes_written = self._write_no_intr(self.stdin.fileno(), input[:512])
input = input[bytes_written:]
if not input:
self.stdin.close()
write_set.remove(self.stdin)
if self.stdout in rlist:
- data = os.read(self.stdout.fileno(), 1024)
+ data = self._read_no_intr(self.stdout.fileno(), 1024)
if data == "":
self.stdout.close()
read_set.remove(self.stdout)
stdout.append(data)
if self.stderr in rlist:
- data = os.read(self.stderr.fileno(), 1024)
+ data = self._read_no_intr(self.stderr.fileno(), 1024)
if data == "":
self.stderr.close()
read_set.remove(self.stderr)
Index: test/test_subprocess.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/test/test_subprocess.py,v
retrieving revision 1.14
diff -u -r1.14 test_subprocess.py
--- test/test_subprocess.py 12 Nov 2004 15:51:48 -0000 1.14
+++ test/test_subprocess.py 17 Nov 2004 19:42:30 -0000
@@ -7,6 +7,7 @@
import tempfile
import time
import re
+import errno
mswindows = (sys.platform == "win32")
@@ -35,6 +36,16 @@
fname = tempfile.mktemp()
return os.open(fname, os.O_RDWR|os.O_CREAT), fname
+ def read_no_intr(self, obj):
+ while True:
+ try:
+ return obj.read()
+ except IOError, e:
+ if e.errno == errno.EINTR:
+ continue
+ else:
+ raise
+
#
# Generic tests
#
@@ -123,7 +134,7 @@
p = subprocess.Popen([sys.executable, "-c",
'import sys; sys.stdout.write("orange")'],
stdout=subprocess.PIPE)
- self.assertEqual(p.stdout.read(), "orange")
+ self.assertEqual(self.read_no_intr(p.stdout), "orange")
def test_stdout_filedes(self):
# stdout is set to open file descriptor
@@ -151,7 +162,7 @@
p = subprocess.Popen([sys.executable, "-c",
'import sys; sys.stderr.write("strawberry")'],
stderr=subprocess.PIPE)
- self.assertEqual(remove_stderr_debug_decorations(p.stderr.read()),
+ self.assertEqual(remove_stderr_debug_decorations(self.read_no_intr(p.stderr)),
"strawberry")
def test_stderr_filedes(self):
@@ -186,7 +197,7 @@
'sys.stderr.write("orange")'],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
- output = p.stdout.read()
+ output = self.read_no_intr(p.stdout)
stripped = remove_stderr_debug_decorations(output)
self.assertEqual(stripped, "appleorange")
@@ -220,7 +231,7 @@
stdout=subprocess.PIPE,
cwd=tmpdir)
normcase = os.path.normcase
- self.assertEqual(normcase(p.stdout.read()), normcase(tmpdir))
+ self.assertEqual(normcase(self.read_no_intr(p.stdout)), normcase(tmpdir))
def test_env(self):
newenv = os.environ.copy()
@@ -230,7 +241,7 @@
'sys.stdout.write(os.getenv("FRUIT"))'],
stdout=subprocess.PIPE,
env=newenv)
- self.assertEqual(p.stdout.read(), "orange")
+ self.assertEqual(self.read_no_intr(p.stdout), "orange")
def test_communicate(self):
p = subprocess.Popen([sys.executable, "-c",
@@ -305,7 +316,8 @@
'sys.stdout.write("\\nline6");'],
stdout=subprocess.PIPE,
universal_newlines=1)
- stdout = p.stdout.read()
+
+ stdout = self.read_no_intr(p.stdout)
if hasattr(open, 'newlines'):
# Interpreter with universal newline support
self.assertEqual(stdout,
@@ -343,7 +355,7 @@
def test_no_leaking(self):
# Make sure we leak no resources
- max_handles = 1026 # too much for most UNIX systems
+ max_handles = 10 # too much for most UNIX systems
if mswindows:
max_handles = 65 # a full test is too slow on Windows
for i in range(max_handles):
@@ -424,7 +436,7 @@
'sys.stdout.write(os.getenv("FRUIT"))'],
stdout=subprocess.PIPE,
preexec_fn=lambda: os.putenv("FRUIT", "apple"))
- self.assertEqual(p.stdout.read(), "apple")
+ self.assertEqual(self.read_no_intr(p.stdout), "apple")
def test_args_string(self):
# args is a string
@@ -457,7 +469,7 @@
p = subprocess.Popen(["echo $FRUIT"], shell=1,
stdout=subprocess.PIPE,
env=newenv)
- self.assertEqual(p.stdout.read().strip(), "apple")
+ self.assertEqual(self.read_no_intr(p.stdout).strip(), "apple")
def test_shell_string(self):
# Run command through the shell (string)
@@ -466,7 +478,7 @@
p = subprocess.Popen("echo $FRUIT", shell=1,
stdout=subprocess.PIPE,
env=newenv)
- self.assertEqual(p.stdout.read().strip(), "apple")
+ self.assertEqual(self.read_no_intr(p.stdout).strip(), "apple")
def test_call_string(self):
# call() function with string argument on UNIX
@@ -525,7 +537,7 @@
p = subprocess.Popen(["set"], shell=1,
stdout=subprocess.PIPE,
env=newenv)
- self.assertNotEqual(p.stdout.read().find("physalis"), -1)
+ self.assertNotEqual(self.read_no_intr(p.stdout).find("physalis"), -1)
def test_shell_string(self):
# Run command through the shell (string)
@@ -534,7 +546,7 @@
p = subprocess.Popen("set", shell=1,
stdout=subprocess.PIPE,
env=newenv)
- self.assertNotEqual(p.stdout.read().find("physalis"), -1)
+ self.assertNotEqual(self.read_no_intr(p.stdout).find("physalis"), -1)
def test_call_string(self):
# call() function with string argument on Windows
/Peter Åstrand <astrand(a)lysator.liu.se>
Perhaps this is more approprate for python-list but I looks like a
bug to me. Example code:
class A:
def __str__(self):
return u'\u1234'
'%s' % u'\u1234' # this works
'%s' % A() # this doesn't work
It will work if 'A' subclasses from 'unicode' but should not be
necessary, IMHO. Any reason why this shouldn't be fixed?
Neil
Hi.
[Mark Hammond]
> The point isn't about my suffering as such. The point is more that
> python-dev owns a tiny amount of the code out there, and I don't believe we
> should put Python's users through this.
>
> Sure - I would be happy to "upgrade" all the win32all code, no problem. I
> am also happy to live in the bleeding edge and take some pain that will
> cause.
>
> The issue is simply the user base, and giving Python a reputation of not
> being able to painlessly upgrade even dot revisions.
I agree with all this.
[As I imagined explicit syntax did not catch up and would require
lot of discussions.]
[GvR]
> > Another way is to use special rules
> > (similar to those for class defs), e.g. having
> >
> > <frag>
> > y=3
> > def f():
> > exec "y=2"
> > def g():
> > return y
> > return g()
> >
> > print f()
> > </frag>
> >
> > # print 3.
> >
> > Is that confusing for users? maybe they will more naturally expect 2
> > as outcome (given nested scopes).
>
> This seems the best compromise to me. It will lead to the least
> broken code, because this is the behavior that we had before nested
> scopes! It is also quite easy to implement given the current
> implementation, I believe.
>
> Maybe we could introduce a warning rather than an error for this
> situation though, because even if this behavior is clearly documented,
> it will still be confusing to some, so it is better if we outlaw it in
> some future version.
>
Yes this can be easy to implement but more confusing situations can arise:
<frag>
y=3
def f():
y=9
exec "y=2"
def g():
return y
return y,g()
print f()
</frag>
What should this print? the situation leads not to a canonical solution
as class def scopes.
or
<frag>
def f():
from foo import *
def g():
return y
return g()
print f()
</frag>
[Mark Hammond]
> > This probably won't be a very popular suggestion, but how about pulling
> > nested scopes (I assume they are at the root of the problem)
> > until this can be solved cleanly?
>
> Agreed. While I think nested scopes are kinda cool, I have lived without
> them, and really without missing them, for years. At the moment the cure
> appears worse then the symptoms in at least a few cases. If nothing else,
> it compromises the elegant simplicity of Python that drew me here in the
> first place!
>
> Assuming that people really _do_ want this feature, IMO the bar should be
> raised so there are _zero_ backward compatibility issues.
I don't say anything about pulling nested scopes (I don't think my opinion
can change things in this respect)
but I should insist that without explicit syntax IMO raising the bar
has a too high impl cost (both performance and complexity) or creates
confusion.
[Andrew Kuchling]
> >Assuming that people really _do_ want this feature, IMO the bar should be
> >raised so there are _zero_ backward compatibility issues.
>
> Even at the cost of additional implementation complexity? At the cost
> of having to learn "scopes are nested, unless you do these two things
> in which case they're not"?
>
> Let's not waffle. If nested scopes are worth doing, they're worth
> breaking code. Either leave exec and from..import illegal, or back
> out nested scopes, or think of some better solution, but let's not
> introduce complicated backward compatibility hacks.
IMO breaking code would be ok if we issue warnings today and implement
nested scopes issuing errors tomorrow. But this is simply a statement
about principles and raised impression.
IMO import * in an inner scope should end up being an error,
not sure about 'exec's.
We will need a final BDFL statement.
regards, Samuele Pedroni.
I thought it would be nice to try to improve the mimetypes module by having
it, on Windows, query the Registry to get the mapping of filename extensions
to media types, since the mimetypes code currently just blindly checks
posix-specific paths for httpd-style mapping files. However, it seems that the
way to get mappings from the Windows registry is excessively slow in Python.
I'm told that the reason has to do with the limited subset of APIs that are
exposed in the _winreg module. I think it is that EnumKey(key, index) is
querying for the entire list of subkeys for the given key every time you call
it. Or something. Whatever the situation is, the code I tried below is way
slower than I think it ought to be.
Does anyone have any suggestions (besides "write it in C")? Could _winreg
possibly be improved to provide an iterator or better interface to get the
subkeys? (or certain ones? There are a lot of keys under HKEY_CLASSES_ROOT,
and I only need the ones that start with a period). Should I file this as a
feature request?
Thanks
-Mike
from _winreg import HKEY_CLASSES_ROOT, OpenKey, EnumKey, QueryValueEx
i = 0
typemap = {}
try:
while 1:
subkeyname = EnumKey(HKEY_CLASSES_ROOT, i)
try:
subkey = OpenKey(HKEY_CLASSES_ROOT, subkeyname)
if subkeyname[:1] == '.':
data = QueryValueEx(subkey, 'Content Type')[0]
print subkeyname, '=', data
typemap[subkeyname] = data # data will be unicode
except EnvironmentError, WindowsError:
pass
i += 1
except WindowsError:
pass
I didn't see any replies to the last post, so I'll ask again with a
better subject line - as I said last time, as far as I'm aware, I'm
not aware of anyone having done a fix for the issue Tim identified
( http://www.python.org/sf/1069160 )
So, my question is: Is this important enough to delay a 2.4 final for?
My plan is currently to release it _this_ _Tuesday_, so I really need
an answer soon...
I've attached Tim's original message at the end here. At the moment,
I'm inclined to say "if it's not fixed, it won't kill us". But that's
admittedly my own biases - threading bugs annoy me <wink>
I'm happy to defer to more knowlegable types, though - is this so
bad that it merits delaying the release? I can't make time to look
at it before then - I'm still writing slides for a couple of talks at OSDC.
Anthony
From: Tim Peters <tim.peters(a)gmail.com>
To: Python Dev <python-dev(a)python.org>
Date: 2004-11-19 13:08
This one is a puzzler. See
http://www.python.org/sf/1069160
for details. The short course is that the PyThreadState_SetAsyncExc()
implementation fell into a common trap, and can cause segfaults under
rare conditions (like every other Python thread segfault bug we've
ever had).
This is easily repaired (although I've got no interest in doing the
coding, or even in contriving a test case -- this was an obvious "by
eyeball" bug).
The puzzle is how to treat this wrt 2.4. Since it's a critical bug, I
suppose it "should" force another release candidate. OTOH, this is a
C-only API (there's no exposure from Python) that's never used in the
core. We could add code to make it segfault every time <wink>, and
nothing in the distribution would notice.
OTOH, if we broke its intended behavior while fixing the bug, we'd
never know that either. "Never used in the core" means never -- the
function isn't tested.
On the third hand, it's a simple function with an obvious segfault
mode that has an obvious fix.
I'll leave it to the release manager <wink>.
After implementing over 10 new opcodes for my thesis I figured I should write
down the basic steps in an info PEP so that there is enough guidelines with
this PEP and PEP 306 to cover the bases on changes to the language itself.
To go along with this I also plan to write some benchmarks for individual
opcodes that could possibly lead to a testing suite for the opcodes themselves
(will probably do this piece-meal and put it up on SF initially since there are
a lot of opcodes).
Anyway, let me know if I seem to be missing anything or have something to add.
After a reasonable time of non-response to this I will request a PEP number
(assuming people don't think this PEP is stupid).
------------------------------------------
PEP: XXX
Title: How to change Python's bytecode
Version: $Revision: 1.4 $
Last-Modified: $Date: 2003/09/22 04:51:50 $
Author: Brett Cannoon <brett(a)python.org>
Status: Draft
Type: Informational
Content-Type: text/x-rst
Created: XX-XXX-XXXX
Post-History: XX-XXX-XXXX
Abstract
========
Python source code is compiled down to something called bytecode. This
bytecode (which can be viewed as sequences of opcodes) defines what Python is
capable of. As such, knowing how to add, remove, or change the bytecode is
important to do properly when changing the abilities of the Python language.
Rationale
=========
While changing Python's bytecode is not a frequent occurence, it still happens.
Having the required steps documented in a single location should make
experimentation with the bytecode easier since it is not necessarily obvious
what the steps are to change the bytecode.
This PEP, paired with PEP 306 [#PEP-306]_, should provide enough basic
guidelines for handling any changes performed to the Python language itself in
terms of syntactic changes that introduce new semantics.
Checklist
=========
This is a rough checklist of what files need to change and how they are
involved with the bytecode. All paths are given from the viewpoint of
``/cvsroot/python/dist/src`` from CVS). This list should not be considered
exhaustive nor to cover all possible situations.
- ``Include/opcode.h``
This include file lists all known opcodes and associates each opcode
name with
a unique number. When adding a new opcode it is important to take note
of the ``HAVE_ARGUMENT`` value. This ``#define``'s value specifies the
value at which all opcodes that have a value greater than
``HAVE_ARGUMENT`` are expected to take an argument to the opcode.
- ``Lib/opcode.py``
Lists all of the opcodes and their associated value. Used by the dis
module [#dis]_ to map bytecode values to their names.
- ``Python/ceval.c``
Contains the main interpreter loop. Code to handle the evalution of an
opcode here.
- ``Python/compile.c``
To make sure an opcode is actually used, this file must be altered.
The emitting of all bytecode occurs here.
- ``Lib/compiler/pyassem.py``, ``Lib/compiler/pycodegen.py``
The 'compiler' package [#compiler]_ needs to be altered to also reflect
any changes to the bytecode.
- ``Doc/lib/libdis.tex``
The documentation [#dis-docs] for the dis module contains a complete
list of all the opcodes.
- ``Python/import.c``
Defines the magic word (named ``MAGIC``) used in .pyc files to detect if
the bytecode used matches the one used by the version of Python running.
This number needs to be changed to make sure that the running
interpreter does not try to execute bytecode that it does not know
about.
Suggestions for bytecode development
====================================
A few things can be done to make sure that development goes smoothly when
experimenting with Python's bytecode. One is to delete all .py(c|o|w) files
after each semantic change to Python/compile.c . That way all files will use
any bytecode changes.
Make sure to run the entire testing suite [#test-suite]_. Since the
``regrtest.py`` driver recompiles all source code before a test is run it acts
a good test to make sure that no existing semantics are broken.
Running parrotbench [#parrotbench]_ is also a good way to make sure existing
semantics are not broken; this benchmark is practically a compliance test.
References
==========
.. [#PEP-306] PEP 306, How to Change Python's Grammar, Hudson
(http://www.python.org/peps/pep-0306.html)
.. [#dis] XXX
.. [#test-suite] XXX
.. [#parrotbench] XXX
.. [#dis-docs] XXX
Copyright
=========
This document has been placed in the public domain.
..
Local Variables:
mode: indented-text
indent-tabs-mode: nil
sentence-end-double-space: t
fill-column: 70
End:
I'm hoping to add BZIP2 compression to zipfile for 2.5. My primary
motivation is that Project Gutenberg seems to be starting to use BZIP2
compression for some of its zips. What other wish list things do
people around here have for zipfile? I thought I'd collect input here
and make a PEP.
I can open a pseudo-file for STORED files in binary read mode, for
example, to allow reading zip-in-zip files without fully occupying
memory.
-- Scott David Daniels
Scott.Daniels(a)Acm.Org
loewis(a)users.sourceforge.net wrote:
> Update of /cvsroot/python/python/dist/src/Doc/ext
> In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17396
> Index: windows.tex
> ===================================================================
> RCS file: /cvsroot/python/python/dist/src/Doc/ext/windows.tex,v
> retrieving revision 1.9
> retrieving revision 1.10
> diff -u -d -r1.9 -r1.10
> --- windows.tex 23 Jan 2004 09:01:56 -0000 1.9
> +++ windows.tex 30 Dec 2004 10:44:32 -0000 1.10
> @@ -142,62 +142,62 @@
[snip]
> + Now open the \menuselection{Project \sub spam properties} dialog.
> + You only need to change a few settings. Make sure \guilable{All
> + Configurations} is selected from the \guilable{Settings for:}
Macro names are wrong, which results in a compile error.
\guilable should read \guilabel.
George
On 18-dec-04, at 21:48, bcannon(a)users.sourceforge.net wrote:
> Update of /cvsroot/python/python/dist/src
> In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv21493
>
> Modified Files:
> setup.py
> Log Message:
> Switch from getting LDFLAGS and CPPFLAGS from the environment to the
> Makefile.
> This is to avoid a problem that inconsistently comes up where the
> environment
> variable is unset while the Makefile clearly has the values set and
> are used
> during ``make``.
Brett,
I'm building MacPython binary distributions with an LDFLAGS configure
option, as in
$PYTHONSRC/configure -C --enable-framework LDFLAGS=-Wl,-x
This has suddenly started failing with a very mysterious error message.
When the
make tries to build the extension modules I get
running build
running build_ext
usage: setup.py [options]
setup.py: error: no such option: -W
Could this somehow be caused by your fix?
--
Jack Jansen, <Jack.Jansen(a)cwi.nl>, http://www.cwi.nl/~jack
If I can't dance I don't want to be part of your revolution -- Emma
Goldman
I see that, as expected, windows python 2.4 was built with MSVC 7.1
rather then msvc 6.0.
It seems that I can build extensions with msvc 6.0 that work with the
python 2.4 windows
binary kit.
Is this safe?
I recall warning a while ago about mixing msvc 6.0 and msvc 7.1 runtime
DLL's. Is this
an issue with python 2.4?
I'm also surprised that the python 2.4 source kit only mentions MSVC
6.0 and not the
compiler that you actually built python 2.4 with.
Barry