32 Implementation of stream filters for PDF.
34 __author__ =
"Mathieu Fenniak"
35 __author_email__ =
"biziqe@mathieu.fenniak.net"
37 from .utils
import PdfReadError, ord_, chr_
38 from sys
import version_info
39 if version_info < ( 3, 0 ):
40 from cStringIO
import StringIO
42 from io
import StringIO
49 return zlib.decompress(data)
52 return zlib.compress(data)
58 from System
import IO, Collections, Array
60 def _string_to_bytearr(buf):
61 retval = Array.CreateInstance(System.Byte, len(buf))
62 for i
in range(len(buf)):
63 retval[i] = ord(buf[i])
66 def _bytearr_to_string(bytes):
68 for i
in range(bytes.Length):
69 retval += chr(bytes[i])
72 def _read_bytes(stream):
73 ms = IO.MemoryStream()
74 buf = Array.CreateInstance(System.Byte, 2048)
76 bytes = stream.Read(buf, 0, buf.Length)
80 ms.Write(buf, 0, bytes)
86 bytes = _string_to_bytearr(data)
87 ms = IO.MemoryStream()
88 ms.Write(bytes, 0, bytes.Length)
90 gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
91 bytes = _read_bytes(gz)
92 retval = _bytearr_to_string(bytes)
97 bytes = _string_to_bytearr(data)
98 ms = IO.MemoryStream()
99 gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress,
True)
100 gz.Write(bytes, 0, bytes.Length)
104 retval = _bytearr_to_string(bytes)
115 predictor = decodeParms.get(
"/Predictor", 1)
116 except AttributeError:
121 columns = decodeParms[
"/Columns"]
123 if predictor >= 10
and predictor <= 15:
126 rowlength = columns + 1
127 assert len(data) % rowlength == 0
128 prev_rowdata = (0,) * rowlength
129 for row
in range(len(data) // rowlength):
130 rowdata = [
ord_(x)
for x
in data[(row*rowlength):((row+1)*rowlength)]]
131 filterByte = rowdata[0]
134 elif filterByte == 1:
135 for i
in range(2, rowlength):
136 rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
137 elif filterByte == 2:
138 for i
in range(1, rowlength):
139 rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
142 raise PdfReadError(
"Unsupported PNG filter %r" % filterByte)
143 prev_rowdata = rowdata
144 output.write(
''.join([chr(x)
for x
in rowdata[1:]]))
145 data = output.getvalue()
148 raise PdfReadError(
"Unsupported flatedecode predictor %r" % predictor)
150 decode = staticmethod(decode)
154 encode = staticmethod(encode)
171 retval += chr(int(char, base=16))
176 decode = staticmethod(decode)
181 http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
206 bitsfromhere=8-self.
bitpos
207 if bitsfromhere>fillbits:
208 bitsfromhere=fillbits
209 value |= (((nextbits >> (8-self.
bitpos-bitsfromhere)) &
210 (0xff >> (8-bitsfromhere))) <<
211 (fillbits-bitsfromhere))
212 fillbits -= bitsfromhere
213 self.
bitpos += bitsfromhere
220 """ algorithm derived from:
221 http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
230 raise PdfReadError(
"Missed the stop code in LZWDecode!")
239 baos += self.
dict[cW]
260 if version_info < ( 3, 0 ):
266 data = [y
for y
in data
if not (y
in ' \n\r\t')]
269 if len(retval) == 0
and c ==
"<" and data[x+1] ==
"~":
276 assert len(group) == 0
277 retval +=
'\x00\x00\x00\x00'
280 elif c ==
"~" and data[x+1] ==
">":
283 assert len(group) > 1
285 group += [ 85, 85, 85 ]
291 assert c >= 0
and c < 85
294 b = group[0] * (85**4) + \
295 group[1] * (85**3) + \
296 group[2] * (85**2) + \
299 assert b < (2**32 - 1)
300 c4 = chr((b >> 0) % 256)
301 c3 = chr((b >> 8) % 256)
302 c2 = chr((b >> 16) % 256)
304 retval += (c1 + c2 + c3 + c4)
306 retval = retval[:-4+hitEod]
311 if isinstance(data, str):
312 data = data.encode(
'ascii')
316 if ord(
'!') <= c
and c <= ord(
'u'):
320 out += struct.pack(b
'>L',b)
329 out += struct.pack(b
'>L',b)[:n-1]
332 decode = staticmethod(decode)
336 from .generic
import NameObject
337 filters = stream.get(
"/Filter", ())
338 if len(filters)
and not isinstance(filters[0], NameObject):
344 for filterType
in filters:
345 if filterType ==
"/FlateDecode" or filterType ==
"/Fl":
346 data = FlateDecode.decode(data, stream.get(
"/DecodeParms"))
347 elif filterType ==
"/ASCIIHexDecode" or filterType ==
"/AHx":
348 data = ASCIIHexDecode.decode(data)
349 elif filterType ==
"/LZWDecode" or filterType ==
"/LZW":
350 data = LZWDecode.decode(data, stream.get(
"/DecodeParms"))
351 elif filterType ==
"/ASCII85Decode" or filterType ==
"/A85":
352 data = ASCII85Decode.decode(data)
353 elif filterType ==
"/Crypt":
354 decodeParams = stream.get(
"/DecodeParams", {})
355 if "/Name" not in decodeParams
and "/Type" not in decodeParams:
358 raise NotImplementedError(
"/Crypt filter with /Name or /Type not supported yet")
361 raise NotImplementedError(
"unsupported filter %s" % filterType)