[Python-checkins] r64754 - in python/branches/release25-maint: Lib/socket.py Lib/test/test_socket.py Misc/NEWS
gregory.p.smith
python-checkins at python.org
Sun Jul 6 06:04:08 CEST 2008
Author: gregory.p.smith
Date: Sun Jul 6 06:04:07 2008
New Revision: 64754
Log:
Backport r62627 + r62744 from trunk to fix issue 2632.
- Issue #2632: Prevent socket.read(bignumber) from over allocating memory
in the common case when the data is returned from the underlying socket
in increments much smaller than bignumber.
Modified:
python/branches/release25-maint/Lib/socket.py
python/branches/release25-maint/Lib/test/test_socket.py
python/branches/release25-maint/Misc/NEWS
Modified: python/branches/release25-maint/Lib/socket.py
==============================================================================
--- python/branches/release25-maint/Lib/socket.py (original)
+++ python/branches/release25-maint/Lib/socket.py Sun Jul 6 06:04:07 2008
@@ -56,6 +56,11 @@
import os, sys
try:
+ from cStringIO import StringIO
+except ImportError:
+ from StringIO import StringIO
+
+try:
from errno import EBADF
except ImportError:
EBADF = 9
@@ -211,6 +216,9 @@
bufsize = self.default_bufsize
self.bufsize = bufsize
self.softspace = False
+ # _rbufsize is the suggested recv buffer size. It is *strictly*
+ # obeyed within readline() for recv calls. If it is larger than
+ # default_bufsize it will be used for recv calls within read().
if bufsize == 0:
self._rbufsize = 1
elif bufsize == 1:
@@ -218,7 +226,11 @@
else:
self._rbufsize = bufsize
self._wbufsize = bufsize
- self._rbuf = "" # A string
+ # We use StringIO for the read buffer to avoid holding a list
+ # of variously sized string objects which have been known to
+ # fragment the heap due to how they are malloc()ed and often
+ # realloc()ed down much smaller than their original allocation.
+ self._rbuf = StringIO()
self._wbuf = [] # A list of strings
self._close = close
@@ -276,56 +288,85 @@
return buf_len
def read(self, size=-1):
- data = self._rbuf
+ # Use max, disallow tiny reads in a loop as they are very inefficient.
+ # We never leave read() with any leftover data from a new recv() call
+ # in our internal buffer.
+ rbufsize = max(self._rbufsize, self.default_bufsize)
+ # Our use of StringIO rather than lists of string objects returned by
+ # recv() minimizes memory usage and fragmentation that occurs when
+ # rbufsize is large compared to the typical return value of recv().
+ buf = self._rbuf
+ buf.seek(0, 2) # seek end
if size < 0:
# Read until EOF
- buffers = []
- if data:
- buffers.append(data)
- self._rbuf = ""
- if self._rbufsize <= 1:
- recv_size = self.default_bufsize
- else:
- recv_size = self._rbufsize
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
- data = self._sock.recv(recv_size)
+ data = self._sock.recv(rbufsize)
if not data:
break
- buffers.append(data)
- return "".join(buffers)
+ buf.write(data)
+ return buf.getvalue()
else:
# Read until size bytes or EOF seen, whichever comes first
- buf_len = len(data)
+ buf_len = buf.tell()
if buf_len >= size:
- self._rbuf = data[size:]
- return data[:size]
- buffers = []
- if data:
- buffers.append(data)
- self._rbuf = ""
+ # Already have size bytes in our buffer? Extract and return.
+ buf.seek(0)
+ rv = buf.read(size)
+ self._rbuf = StringIO()
+ self._rbuf.write(buf.read())
+ return rv
+
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
left = size - buf_len
- recv_size = min(self._rbufsize, left)
- data = self._sock.recv(recv_size)
+ # recv() will malloc the amount of memory given as its
+ # parameter even though it often returns much less data
+ # than that. The returned data string is short lived
+ # as we copy it into a StringIO and free it. This avoids
+ # fragmentation issues on many platforms.
+ data = self._sock.recv(left)
if not data:
break
- buffers.append(data)
n = len(data)
- if n >= left:
- self._rbuf = data[left:]
- buffers[-1] = data[:left]
+ if n == size and not buf_len:
+ # Shortcut. Avoid buffer data copies when:
+ # - We have no data in our buffer.
+ # AND
+ # - Our call to recv returned exactly the
+ # number of bytes we were asked to read.
+ return data
+ if n == left:
+ buf.write(data)
+ del data # explicit free
break
+ assert n <= left, "recv(%d) returned %d bytes" % (left, n)
+ buf.write(data)
buf_len += n
- return "".join(buffers)
+ del data # explicit free
+ #assert buf_len == buf.tell()
+ return buf.getvalue()
def readline(self, size=-1):
- data = self._rbuf
+ buf = self._rbuf
+ buf.seek(0, 2) # seek end
+ if buf.tell() > 0:
+ # check if we already have it in our buffer
+ buf.seek(0)
+ bline = buf.readline(size)
+ if bline.endswith('\n') or len(bline) == size:
+ self._rbuf = StringIO()
+ self._rbuf.write(buf.read())
+ return bline
+ del bline
if size < 0:
# Read until \n or EOF, whichever comes first
if self._rbufsize <= 1:
# Speed up unbuffered case
- assert data == ""
- buffers = []
+ buf.seek(0)
+ buffers = [buf.read()]
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
+ data = None
recv = self._sock.recv
while data != "\n":
data = recv(1)
@@ -333,61 +374,64 @@
break
buffers.append(data)
return "".join(buffers)
- nl = data.find('\n')
- if nl >= 0:
- nl += 1
- self._rbuf = data[nl:]
- return data[:nl]
- buffers = []
- if data:
- buffers.append(data)
- self._rbuf = ""
+
+ buf.seek(0, 2) # seek end
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
data = self._sock.recv(self._rbufsize)
if not data:
break
- buffers.append(data)
nl = data.find('\n')
if nl >= 0:
nl += 1
- self._rbuf = data[nl:]
- buffers[-1] = data[:nl]
+ buf.write(buffer(data, 0, nl))
+ self._rbuf.write(buffer(data, nl))
+ del data
break
- return "".join(buffers)
+ buf.write(data)
+ return buf.getvalue()
else:
# Read until size bytes or \n or EOF seen, whichever comes first
- nl = data.find('\n', 0, size)
- if nl >= 0:
- nl += 1
- self._rbuf = data[nl:]
- return data[:nl]
- buf_len = len(data)
+ buf.seek(0, 2) # seek end
+ buf_len = buf.tell()
if buf_len >= size:
- self._rbuf = data[size:]
- return data[:size]
- buffers = []
- if data:
- buffers.append(data)
- self._rbuf = ""
+ buf.seek(0)
+ rv = buf.read(size)
+ self._rbuf = StringIO()
+ self._rbuf.write(buf.read())
+ return rv
+ self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
data = self._sock.recv(self._rbufsize)
if not data:
break
- buffers.append(data)
left = size - buf_len
+ # did we just receive a newline?
nl = data.find('\n', 0, left)
if nl >= 0:
nl += 1
- self._rbuf = data[nl:]
- buffers[-1] = data[:nl]
- break
+ # save the excess data to _rbuf
+ self._rbuf.write(buffer(data, nl))
+ if buf_len:
+ buf.write(buffer(data, 0, nl))
+ break
+ else:
+ # Shortcut. Avoid data copy through buf when returning
+ # a substring of our first recv().
+ return data[:nl]
n = len(data)
+ if n == size and not buf_len:
+ # Shortcut. Avoid data copy through buf when
+ # returning exactly all of our first recv().
+ return data
if n >= left:
- self._rbuf = data[left:]
- buffers[-1] = data[:left]
+ buf.write(buffer(data, 0, left))
+ self._rbuf.write(buffer(data, left))
break
+ buf.write(data)
buf_len += n
- return "".join(buffers)
+ #assert buf_len == buf.tell()
+ return buf.getvalue()
def readlines(self, sizehint=0):
total = 0
Modified: python/branches/release25-maint/Lib/test/test_socket.py
==============================================================================
--- python/branches/release25-maint/Lib/test/test_socket.py (original)
+++ python/branches/release25-maint/Lib/test/test_socket.py Sun Jul 6 06:04:07 2008
@@ -762,6 +762,33 @@
self.cli_file.write(MSG)
self.cli_file.flush()
+ def testReadlineAfterRead(self):
+ a_baloo_is = self.serv_file.read(len("A baloo is"))
+ self.assertEqual("A baloo is", a_baloo_is)
+ _a_bear = self.serv_file.read(len(" a bear"))
+ self.assertEqual(" a bear", _a_bear)
+ line = self.serv_file.readline()
+ self.assertEqual("\n", line)
+ line = self.serv_file.readline()
+ self.assertEqual("A BALOO IS A BEAR.\n", line)
+ line = self.serv_file.readline()
+ self.assertEqual(MSG, line)
+
+ def _testReadlineAfterRead(self):
+ self.cli_file.write("A baloo is a bear\n")
+ self.cli_file.write("A BALOO IS A BEAR.\n")
+ self.cli_file.write(MSG)
+ self.cli_file.flush()
+
+ def testReadlineAfterReadNoNewline(self):
+ end_of_ = self.serv_file.read(len("End Of "))
+ self.assertEqual("End Of ", end_of_)
+ line = self.serv_file.readline()
+ self.assertEqual("Line", line)
+
+ def _testReadlineAfterReadNoNewline(self):
+ self.cli_file.write("End Of Line")
+
def testClosedAttr(self):
self.assert_(not self.serv_file.closed)
Modified: python/branches/release25-maint/Misc/NEWS
==============================================================================
--- python/branches/release25-maint/Misc/NEWS (original)
+++ python/branches/release25-maint/Misc/NEWS Sun Jul 6 06:04:07 2008
@@ -102,6 +102,9 @@
stdout and stderr fds rather than leaving them open until the
instance is destroyed.
+- Issue #2632: Prevent socket.read(bignumber) from over allocating memory
+ in the common case when the data is returned from the underlying socket
+ in increments much smaller than bignumber.
Extension Modules
-----------------
More information about the Python-checkins
mailing list