[Python-checkins] python/dist/src/Lib/email Header.py,1.17.6.1,1.17.6.2
bwarsaw@users.sourceforge.net
bwarsaw@users.sourceforge.net
Sat, 01 Mar 2003 21:32:41 -0800
Update of /cvsroot/python/python/dist/src/Lib/email
In directory sc8-pr-cvs1:/tmp/cvs-serv1551
Modified Files:
Tag: folding-reimpl-branch
Header.py
Log Message:
Experimental binary search for a better split point.
Index: Header.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Header.py,v
retrieving revision 1.17.6.1
retrieving revision 1.17.6.2
diff -C2 -d -r1.17.6.1 -r1.17.6.2
*** Header.py 2 Mar 2003 03:37:19 -0000 1.17.6.1
--- Header.py 2 Mar 2003 05:32:38 -0000 1.17.6.2
***************
*** 266,273 ****
# Split up a header safely for use with encode_chunks.
splittable = charset.to_splittable(s)
! encoded = charset.from_splittable(splittable)
elen = charset.encoded_header_len(encoded)
!
! if elen <= self._maxlinelen:
return [(encoded, charset)]
# If we have undetermined raw 8bit characters sitting in a byte
--- 266,279 ----
# Split up a header safely for use with encode_chunks.
splittable = charset.to_splittable(s)
! encoded = charset.from_splittable(splittable, True)
elen = charset.encoded_header_len(encoded)
! # The maxlinelen depends on whether we're on the first line or not, to
! # take account of any header field name.
! if firstline:
! maxlinelen = self._firstlinelen
! else:
! maxlinelen = self._maxlinelen
! # If the line's encoded length first, just return it
! if elen <= maxlinelen:
return [(encoded, charset)]
# If we have undetermined raw 8bit characters sitting in a byte
***************
*** 277,281 ****
# be to not split the header at all, but that means they could go out
# longer than maxlinelen.
! elif charset == '8bit':
return [(s, charset)]
# BAW: I'm not sure what the right test here is. What we're trying to
--- 283,287 ----
# be to not split the header at all, but that means they could go out
# longer than maxlinelen.
! if charset == '8bit':
return [(s, charset)]
# BAW: I'm not sure what the right test here is. What we're trying to
***************
*** 296,307 ****
# We can split on _maxlinelen boundaries because we know that the
# encoding won't change the size of the string
! splitpnt = self._maxlinelen
first = charset.from_splittable(splittable[:splitpnt], False)
last = charset.from_splittable(splittable[splitpnt:], False)
else:
# Divide and conquer.
! halfway = _floordiv(len(splittable), 2)
! first = charset.from_splittable(splittable[:halfway], False)
! last = charset.from_splittable(splittable[halfway:], False)
# Do the split
return self._split(first, charset, firstline, splitchars) + \
--- 302,315 ----
# We can split on _maxlinelen boundaries because we know that the
# encoding won't change the size of the string
! splitpnt = maxlinelen
first = charset.from_splittable(splittable[:splitpnt], False)
last = charset.from_splittable(splittable[splitpnt:], False)
else:
+ # Binary search for split point
+ first, last = _binsplit(splittable, charset, maxlinelen)
# Divide and conquer.
! ## halfway = _floordiv(len(splittable), 2)
! ## first = charset.from_splittable(splittable[:halfway], False)
! ## last = charset.from_splittable(splittable[halfway:], False)
# Do the split
return self._split(first, charset, firstline, splitchars) + \
***************
*** 433,434 ****
--- 441,464 ----
linejoiner = '\n' + continuation_ws
return linejoiner.join(lines)
+
+
+
+ def _binsplit(splittable, charset, maxlinelen):
+ i = lastm = 0
+ j = len(splittable) - 1
+ while True:
+ if j < i:
+ break
+ m = (i + j) / 2
+ chunk = charset.from_splittable(splittable[:m], True)
+ chunklen = charset.encoded_header_len(chunk)
+ if chunklen < maxlinelen:
+ lastm = m
+ i = m + 1
+ elif chunklen > maxlinelen:
+ j = m - 1
+ else:
+ break
+ first = charset.from_splittable(splittable[:lastm], False)
+ last = charset.from_splittable(splittable[lastm:], False)
+ return first, last