[Python-checkins] python/dist/src/Lib/email Header.py,1.17.6.1,1.17.6.2

Sat, 01 Mar 2003 21:32:41 -0800

Update of /cvsroot/python/python/dist/src/Lib/email
In directory sc8-pr-cvs1:/tmp/cvs-serv1551

Modified Files:
      Tag: folding-reimpl-branch
	Header.py 
Log Message:
Experimental binary search for a better split point.


Index: Header.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Header.py,v
retrieving revision 1.17.6.1
retrieving revision 1.17.6.2
diff -C2 -d -r1.17.6.1 -r1.17.6.2
*** Header.py	2 Mar 2003 03:37:19 -0000	1.17.6.1
--- Header.py	2 Mar 2003 05:32:38 -0000	1.17.6.2
***************
*** 266,273 ****
          # Split up a header safely for use with encode_chunks.
          splittable = charset.to_splittable(s)
!         encoded = charset.from_splittable(splittable)
          elen = charset.encoded_header_len(encoded)
! 
!         if elen <= self._maxlinelen:
              return [(encoded, charset)]
          # If we have undetermined raw 8bit characters sitting in a byte
--- 266,279 ----
          # Split up a header safely for use with encode_chunks.
          splittable = charset.to_splittable(s)
!         encoded = charset.from_splittable(splittable, True)
          elen = charset.encoded_header_len(encoded)
!         # The maxlinelen depends on whether we're on the first line or not, to
!         # take account of any header field name.
!         if firstline:
!             maxlinelen = self._firstlinelen
!         else:
!             maxlinelen = self._maxlinelen
!         # If the line's encoded length first, just return it
!         if elen <= maxlinelen:
              return [(encoded, charset)]
          # If we have undetermined raw 8bit characters sitting in a byte
***************
*** 277,281 ****
          # be to not split the header at all, but that means they could go out
          # longer than maxlinelen.
!         elif charset == '8bit':
              return [(s, charset)]
          # BAW: I'm not sure what the right test here is.  What we're trying to
--- 283,287 ----
          # be to not split the header at all, but that means they could go out
          # longer than maxlinelen.
!         if charset == '8bit':
              return [(s, charset)]
          # BAW: I'm not sure what the right test here is.  What we're trying to
***************
*** 296,307 ****
              # We can split on _maxlinelen boundaries because we know that the
              # encoding won't change the size of the string
!             splitpnt = self._maxlinelen
              first = charset.from_splittable(splittable[:splitpnt], False)
              last = charset.from_splittable(splittable[splitpnt:], False)
          else:
              # Divide and conquer.
!             halfway = _floordiv(len(splittable), 2)
!             first = charset.from_splittable(splittable[:halfway], False)
!             last = charset.from_splittable(splittable[halfway:], False)
          # Do the split
          return self._split(first, charset, firstline, splitchars) + \
--- 302,315 ----
              # We can split on _maxlinelen boundaries because we know that the
              # encoding won't change the size of the string
!             splitpnt = maxlinelen
              first = charset.from_splittable(splittable[:splitpnt], False)
              last = charset.from_splittable(splittable[splitpnt:], False)
          else:
+             # Binary search for split point
+             first, last = _binsplit(splittable, charset, maxlinelen)
              # Divide and conquer.
! ##            halfway = _floordiv(len(splittable), 2)
! ##            first = charset.from_splittable(splittable[:halfway], False)
! ##            last = charset.from_splittable(splittable[halfway:], False)
          # Do the split
          return self._split(first, charset, firstline, splitchars) + \
***************
*** 433,434 ****
--- 441,464 ----
      linejoiner = '\n' + continuation_ws
      return linejoiner.join(lines)
+ 
+ 
+ 
+ def _binsplit(splittable, charset, maxlinelen):
+     i = lastm = 0
+     j = len(splittable) - 1
+     while True:
+         if j < i:
+             break
+         m = (i + j) / 2
+         chunk = charset.from_splittable(splittable[:m], True)
+         chunklen = charset.encoded_header_len(chunk)
+         if chunklen < maxlinelen:
+             lastm = m
+             i = m + 1
+         elif chunklen > maxlinelen:
+             j = m - 1
+         else:
+             break
+     first = charset.from_splittable(splittable[:lastm], False)
+     last = charset.from_splittable(splittable[lastm:], False)
+     return first, last