[Matplotlib-devel] Fwd: patch for stream prediction in pdf backend
Cornelius Weig
cornelius.weig at gmx.de
Fri Aug 21 14:28:04 CEST 2015
Hi,
I have implemented PNG stream prediction for the pdf backend. It was
marked as a TODO.
Cheers,
Cornelius
-------------------
Details:
At first I thought about linking to libpng, but I couldn't find an API
for raw encoding of single lines as PDF needs it. Therefore, I
implemented the code in plain python (it makes heavy use of numpy to do
the number crunching). It needs testing with python 3 however...
I took care of five different encoding modes as specified by RFC 2083:
10 - noop
11 - Sub encoding
12 - Up encoding
13 - Avg encoding
14 - Paeth encoding
15 - Optimal encoding (*)
(*) For '15', I considered only modes 10-13, because my Paeth implementation
is not very fast (nor very clean).
The prediction is done by a static method in the 'Stream' class, which
is called from writeImages. My heuristics have shown (and
http://www.libpng.org/pub/png/book/chapter09.html also mentions it) that
gray-scale data does usually not benefit from stream prediction.
Therefore, the prediction is only applied to color images.
Finally, I think that the encoding mode is something to be made
adjustable by the user, so that a rcParams['pdf.prediction'] (or
'filter' or 'pngcompression') setting should be justified. I didn't
touch anything of this, because that's up to you devs.
-------------- next part --------------
Index: lib/matplotlib/backends/backend_pdf.py
===================================================================
--- lib/matplotlib/backends/backend_pdf.py (revision 8989)
+++ lib/matplotlib/backends/backend_pdf.py (working copy)
@@ -351,6 +351,117 @@
compressed = self.compressobj.compress(data)
self.file.write(compressed)
+ @staticmethod
+ def pngPredict(data, height, width, predictor = 10):
+ """Applies a PNG stream predictor to the data stream
+
+ predictor, int, valid values are
+ 10, no stream prediction
+ 11, Sub predictor
+ 12, Up predictor
+ 13, Average predictor
+ 14, Paeth predictor
+ 15, optimal predictor, not implemented
+ """
+ if predictor < 10 or predictor > 15:
+ return data
+
+ bytesPerSample = len(data) // (width * height)
+ bytesPerRow = width * bytesPerSample
+
+ datat = np.fromstring(data, np.uint8)
+ datat.shape = (height, bytesPerRow)
+
+ pred = np.zeros(len(data) + height, np.uint8)
+ pred.shape = (height, bytesPerRow + 1)
+
+ uLine = np.zeros(bytesPerRow, np.uint8)
+ uShift = np.zeros(bytesPerRow, dtype=np.uint8)
+ work = np.zeros(bytesPerRow, dtype=np.int16) # n.b. int16!
+ for row in xrange(height):
+
+ # PDF standard allows per-line predictors
+ pred[row, 0] = predictor - 10
+
+ cLine = datat[row]
+
+ # No predictor
+ if predictor == 10:
+ pred[row, 1:] = cLine
+
+ # Sub predictor
+ if predictor == 11:
+ work[bytesPerSample:] = cLine[:-bytesPerSample]
+ pred[row, 1:] = cLine - work
+
+ # Up predictor
+ elif predictor == 12:
+ pred[row, 1:] = cLine - uLine
+ uLine = cLine
+
+ # Average predictor
+ elif predictor == 13:
+ work[:bytesPerSample] = 0 # cluttered below...
+ work[bytesPerSample:] = cLine[:-bytesPerSample]
+ work += uLine
+ work /= 2
+
+ pred[row, 1:] = cLine - work
+ uLine = cLine
+
+ # Paeth predictor
+ elif predictor == 14:
+ work[:bytesPerSample] = 0 # cluttered below...
+ work[bytesPerSample:] = cLine[:-bytesPerSample]
+ uShift[bytesPerSample:] = uLine[:-bytesPerSample]
+
+ p = work + uLine - uShift #n.b. int16!
+ pa = abs(p - work)
+ pb = abs(p - uLine)
+ pc = abs(p - uShift)
+
+ paethSelect = lambda a,b,c,pa,pb,pc: a if pa <= pb and pa <= pc else b if pb <= pc else c
+
+ work = np.array([paethSelect(a,b,c,pa,pb,pc) for a,b,c,pa,pb,pc in
+ zip(work, uLine, uShift, pa, pb, pc)])
+
+ pred[row, 1:] = cLine - work
+ uLine = cLine
+
+ elif predictor == 15: # possibly TODO: also check Paeth prediction
+ # minimum sum of absolute differences heuristic by
+ # Lee Daniel Crocker
+ entr_id = sum(abs(cLine.astype(np.int8)))
+
+ work[:bytesPerSample] = 0 # cluttered below...
+ work[bytesPerSample:] = cLine[:-bytesPerSample]
+ predSub = cLine - work
+ entrSub = sum(abs(predSub.astype(np.int8)))
+
+ predUp = cLine - uLine
+ entrUp = sum(abs(predUp.astype(np.int8)))
+
+ work += uLine
+ work /= 2
+ predAvg = cLine - work
+ entrAvg = sum(abs(predAvg.astype(np.int8)))
+
+ if entr_id <= entrSub and entr_id <= entrUp and entr_id <= entrAvg:
+ pred[row, 0] = 0
+ pred[row, 1:] = cLine
+ elif entrSub <= entrUp and entrSub <= entrAvg:
+ pred[row, 0] = 1
+ pred[row, 1:] = predSub
+ elif entrUp <= entrAvg:
+ pred[row, 0] = 2
+ pred[row, 1:] = predUp
+ else:
+ pred[row, 0] = 3
+ pred[row, 1:] = predAvg
+
+ uLine = cLine
+ return pred.tostring()
+
def _flush(self):
"""Flush the compression object."""
@@ -1185,8 +1296,16 @@
{'Type': Name('XObject'), 'Subtype': Name('Image'),
'Width': width, 'Height': height,
'ColorSpace': Name('DeviceRGB'), 'BitsPerComponent': 8,
- 'SMask': smaskObject})
- self.currentstream.write(data) # TODO: predictors (i.e., output png)
+ 'SMask': smaskObject, 'DecodeParms': {'Predictor': predictor,
+ 'BitsPerComponent': 8, 'Columns': width, 'Colors': 3}})
+ # TODO: predictors (i.e., output png)
+ # predictor = rcParams.get('pdf.predictor', 0)
+ predictor = 15 # optimized predictor
+ if predictor:
+ predicted = Stream.pngPredict(data, height, width, predictor)
+ self.currentstream.write(predicted)
+ else:
+ self.currentstream.write(data)
self.endStream()
img.flipud_out()
More information about the Matplotlib-devel
mailing list