LeenO computo metrico con LibreOffice  3.22.0
Il software libero per la gestione di computi metrici e contabilità lavori.
merger.py
Vai alla documentazione di questo file.
1 # vim: sw=4:expandtab:foldmethod=marker
2 #
3 # Copyright (c) 2006, Mathieu Fenniak
4 # All rights reserved.
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
8 # met:
9 #
10 # * Redistributions of source code must retain the above copyright notice,
11 # this list of conditions and the following disclaimer.
12 # * Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
15 # * The name of the author may not be used to endorse or promote products
16 # derived from this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 # POSSIBILITY OF SUCH DAMAGE.
29 
30 from .generic import *
31 from .utils import isString, str_
32 from .pdf import PdfFileReader, PdfFileWriter
33 from .pagerange import PageRange
34 from sys import version_info
35 if version_info < ( 3, 0 ):
36  from cStringIO import StringIO
37  StreamIO = StringIO
38 else:
39  from io import BytesIO
40  from io import FileIO as file
41  StreamIO = BytesIO
42 
43 
44 class _MergedPage(object):
45  """
46  _MergedPage is used internally by PdfFileMerger to collect necessary
47  information on each page that is being merged.
48  """
49  def __init__(self, pagedata, src, id):
50  self.src = src
51  self.pagedata = pagedata
52  self.out_pagedata = None
53  self.id = id
54 
55 
56 class PdfFileMerger(object):
57  """
58  Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
59  into a single PDF. It can concatenate, slice, insert, or any combination
60  of the above.
61 
62  See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
63  and :meth:`write()<write>` for usage information.
64 
65  :param bool strict: Determines whether user should be warned of all
66  problems and also causes some correctable problems to be fatal.
67  Defaults to ``True``.
68  """
69 
70  def __init__(self, strict=True):
71  self.inputs = []
72  self.pages = []
74  self.bookmarks = []
75  self.named_dests = []
76  self.id_count = 0
77  self.strict = strict
78 
79  def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
80  """
81  Merges the pages from the given file into the output file at the
82  specified page number.
83 
84  :param int position: The *page number* to insert this file. File will
85  be inserted after the given number.
86 
87  :param fileobj: A File Object or an object that supports the standard read
88  and seek methods similar to a File Object. Could also be a
89  string representing a path to a PDF file.
90 
91  :param str bookmark: Optionally, you may specify a bookmark to be applied at
92  the beginning of the included file by supplying the text of the bookmark.
93 
94  :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
95  to merge only the specified range of pages from the source
96  document into the output document.
97 
98  :param bool import_bookmarks: You may prevent the source document's bookmarks
99  from being imported by specifying this as ``False``.
100  """
101 
102  # This parameter is passed to self.inputs.append and means
103  # that the stream used was created in this method.
104  my_file = False
105 
106  # If the fileobj parameter is a string, assume it is a path
107  # and create a file object at that location. If it is a file,
108  # copy the file's contents into a BytesIO (or StreamIO) stream object; if
109  # it is a PdfFileReader, copy that reader's stream into a
110  # BytesIO (or StreamIO) stream.
111  # If fileobj is none of the above types, it is not modified
112  decryption_key = None
113  if isString(fileobj):
114  fileobj = file(fileobj, 'rb')
115  my_file = True
116  elif isinstance(fileobj, file):
117  fileobj.seek(0)
118  filecontent = fileobj.read()
119  fileobj = StreamIO(filecontent)
120  my_file = True
121  elif isinstance(fileobj, PdfFileReader):
122  orig_tell = fileobj.stream.tell()
123  fileobj.stream.seek(0)
124  filecontent = StreamIO(fileobj.stream.read())
125  fileobj.stream.seek(orig_tell) # reset the stream to its original location
126  fileobj = filecontent
127  if hasattr(fileobj, '_decryption_key'):
128  decryption_key = fileobj._decryption_key
129  my_file = True
130 
131  # Create a new PdfFileReader instance using the stream
132  # (either file or BytesIO or StringIO) created above
133  pdfr = PdfFileReader(fileobj, strict=self.strict)
134  if decryption_key is not None:
135  pdfr._decryption_key = decryption_key
136 
137  # Find the range of pages to merge.
138  if pages == None:
139  pages = (0, pdfr.getNumPages())
140  elif isinstance(pages, PageRange):
141  pages = pages.indices(pdfr.getNumPages())
142  elif not isinstance(pages, tuple):
143  raise TypeError('"pages" must be a tuple of (start, stop[, step])')
144 
145  srcpages = []
146  if bookmark:
147  bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
148 
149  outline = []
150  if import_bookmarks:
151  outline = pdfr.getOutlines()
152  outline = self._trim_outline(pdfr, outline, pages)
153 
154  if bookmark:
155  self.bookmarks += [bookmark, outline]
156  else:
157  self.bookmarks += outline
158 
159  dests = pdfr.namedDestinations
160  dests = self._trim_dests(pdfr, dests, pages)
161  self.named_dests += dests
162 
163  # Gather all the pages that are going to be merged
164  for i in range(*pages):
165  pg = pdfr.getPage(i)
166 
167  id = self.id_count
168  self.id_count += 1
169 
170  mp = _MergedPage(pg, pdfr, id)
171 
172  srcpages.append(mp)
173 
174  self._associate_dests_to_pages(srcpages)
175  self._associate_bookmarks_to_pages(srcpages)
176 
177  # Slice to insert the pages at the specified position
178  self.pages[position:position] = srcpages
179 
180  # Keep track of our input files so we can close them later
181  self.inputs.append((fileobj, pdfr, my_file))
182 
183  def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
184  """
185  Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
186  all pages onto the end of the file instead of specifying a position.
187 
188  :param fileobj: A File Object or an object that supports the standard read
189  and seek methods similar to a File Object. Could also be a
190  string representing a path to a PDF file.
191 
192  :param str bookmark: Optionally, you may specify a bookmark to be applied at
193  the beginning of the included file by supplying the text of the bookmark.
194 
195  :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
196  to merge only the specified range of pages from the source
197  document into the output document.
198 
199  :param bool import_bookmarks: You may prevent the source document's bookmarks
200  from being imported by specifying this as ``False``.
201  """
202 
203  self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
204 
205  def write(self, fileobj):
206  """
207  Writes all data that has been merged to the given output file.
208 
209  :param fileobj: Output file. Can be a filename or any kind of
210  file-like object.
211  """
212  my_file = False
213  if isString(fileobj):
214  fileobj = file(fileobj, 'wb')
215  my_file = True
216 
217  # Add pages to the PdfFileWriter
218  # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13
219  for page in self.pages:
220  self.output.addPage(page.pagedata)
221  page.out_pagedata = self.output.getReference(self.output._pages.getObject()["/Kids"][-1].getObject())
222  #idnum = self.output._objects.index(self.output._pages.getObject()["/Kids"][-1].getObject()) + 1
223  #page.out_pagedata = IndirectObject(idnum, 0, self.output)
224 
225  # Once all pages are added, create bookmarks to point at those pages
226  self._write_dests()
227  self._write_bookmarks()
228 
229  # Write the output to the file
230  self.output.write(fileobj)
231 
232  if my_file:
233  fileobj.close()
234 
235  def close(self):
236  """
237  Shuts all file descriptors (input and output) and clears all memory
238  usage.
239  """
240  self.pages = []
241  for fo, pdfr, mine in self.inputs:
242  if mine:
243  fo.close()
244 
245  self.inputs = []
246  self.output = None
247 
248  def addMetadata(self, infos):
249  """
250  Add custom metadata to the output.
251 
252  :param dict infos: a Python dictionary where each key is a field
253  and each value is your new metadata.
254  Example: ``{u'/Title': u'My title'}``
255  """
256  self.output.addMetadata(infos)
257 
258  def setPageLayout(self, layout):
259  """
260  Set the page layout
261 
262  :param str layout: The page layout to be used
263 
264  Valid layouts are:
265  /NoLayout Layout explicitly not specified
266  /SinglePage Show one page at a time
267  /OneColumn Show one column at a time
268  /TwoColumnLeft Show pages in two columns, odd-numbered pages on the left
269  /TwoColumnRight Show pages in two columns, odd-numbered pages on the right
270  /TwoPageLeft Show two pages at a time, odd-numbered pages on the left
271  /TwoPageRight Show two pages at a time, odd-numbered pages on the right
272  """
273  self.output.setPageLayout(layout)
274 
275  def setPageMode(self, mode):
276  """
277  Set the page mode.
278 
279  :param str mode: The page mode to use.
280 
281  Valid modes are:
282  /UseNone Do not show outlines or thumbnails panels
283  /UseOutlines Show outlines (aka bookmarks) panel
284  /UseThumbs Show page thumbnails panel
285  /FullScreen Fullscreen view
286  /UseOC Show Optional Content Group (OCG) panel
287  /UseAttachments Show attachments panel
288  """
289  self.output.setPageMode(mode)
290 
291  def _trim_dests(self, pdf, dests, pages):
292  """
293  Removes any named destinations that are not a part of the specified
294  page set.
295  """
296  new_dests = []
297  prev_header_added = True
298  for k, o in list(dests.items()):
299  for j in range(*pages):
300  if pdf.getPage(j).getObject() == o['/Page'].getObject():
301  o[NameObject('/Page')] = o['/Page'].getObject()
302  assert str_(k) == str_(o['/Title'])
303  new_dests.append(o)
304  break
305  return new_dests
306 
307  def _trim_outline(self, pdf, outline, pages):
308  """
309  Removes any outline/bookmark entries that are not a part of the
310  specified page set.
311  """
312  new_outline = []
313  prev_header_added = True
314  for i, o in enumerate(outline):
315  if isinstance(o, list):
316  sub = self._trim_outline(pdf, o, pages)
317  if sub:
318  if not prev_header_added:
319  new_outline.append(outline[i-1])
320  new_outline.append(sub)
321  else:
322  prev_header_added = False
323  for j in range(*pages):
324  if pdf.getPage(j).getObject() == o['/Page'].getObject():
325  o[NameObject('/Page')] = o['/Page'].getObject()
326  new_outline.append(o)
327  prev_header_added = True
328  break
329  return new_outline
330 
331  def _write_dests(self):
332  dests = self.named_dests
333 
334  for v in dests:
335  pageno = None
336  pdf = None
337  if '/Page' in v:
338  for i, p in enumerate(self.pages):
339  if p.id == v['/Page']:
340  v[NameObject('/Page')] = p.out_pagedata
341  pageno = i
342  pdf = p.src
343  break
344  if pageno != None:
345  self.output.addNamedDestinationObject(v)
346 
347  def _write_bookmarks(self, bookmarks=None, parent=None):
348 
349  if bookmarks == None:
350  bookmarks = self.bookmarks
351 
352  last_added = None
353  for b in bookmarks:
354  if isinstance(b, list):
355  self._write_bookmarks(b, last_added)
356  continue
357 
358  pageno = None
359  pdf = None
360  if '/Page' in b:
361  for i, p in enumerate(self.pages):
362  if p.id == b['/Page']:
363  #b[NameObject('/Page')] = p.out_pagedata
364  args = [NumberObject(p.id), NameObject(b['/Type'])]
365  #nothing more to add
366  #if b['/Type'] == '/Fit' or b['/Type'] == '/FitB'
367  if b['/Type'] == '/FitH' or b['/Type'] == '/FitBH':
368  if '/Top' in b and not isinstance(b['/Top'], NullObject):
369  args.append(FloatObject(b['/Top']))
370  else:
371  args.append(FloatObject(0))
372  del b['/Top']
373  elif b['/Type'] == '/FitV' or b['/Type'] == '/FitBV':
374  if '/Left' in b and not isinstance(b['/Left'], NullObject):
375  args.append(FloatObject(b['/Left']))
376  else:
377  args.append(FloatObject(0))
378  del b['/Left']
379  elif b['/Type'] == '/XYZ':
380  if '/Left' in b and not isinstance(b['/Left'], NullObject):
381  args.append(FloatObject(b['/Left']))
382  else:
383  args.append(FloatObject(0))
384  if '/Top' in b and not isinstance(b['/Top'], NullObject):
385  args.append(FloatObject(b['/Top']))
386  else:
387  args.append(FloatObject(0))
388  if '/Zoom' in b and not isinstance(b['/Zoom'], NullObject):
389  args.append(FloatObject(b['/Zoom']))
390  else:
391  args.append(FloatObject(0))
392  del b['/Top'], b['/Zoom'], b['/Left']
393  elif b['/Type'] == '/FitR':
394  if '/Left' in b and not isinstance(b['/Left'], NullObject):
395  args.append(FloatObject(b['/Left']))
396  else:
397  args.append(FloatObject(0))
398  if '/Bottom' in b and not isinstance(b['/Bottom'], NullObject):
399  args.append(FloatObject(b['/Bottom']))
400  else:
401  args.append(FloatObject(0))
402  if '/Right' in b and not isinstance(b['/Right'], NullObject):
403  args.append(FloatObject(b['/Right']))
404  else:
405  args.append(FloatObject(0))
406  if '/Top' in b and not isinstance(b['/Top'], NullObject):
407  args.append(FloatObject(b['/Top']))
408  else:
409  args.append(FloatObject(0))
410  del b['/Left'], b['/Right'], b['/Bottom'], b['/Top']
411 
412  b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)})
413 
414  pageno = i
415  pdf = p.src
416  break
417  if pageno != None:
418  del b['/Page'], b['/Type']
419  last_added = self.output.addBookmarkDict(b, parent)
420 
421  def _associate_dests_to_pages(self, pages):
422  for nd in self.named_dests:
423  pageno = None
424  np = nd['/Page']
425 
426  if isinstance(np, NumberObject):
427  continue
428 
429  for p in pages:
430  if np.getObject() == p.pagedata.getObject():
431  pageno = p.id
432 
433  if pageno != None:
434  nd[NameObject('/Page')] = NumberObject(pageno)
435  else:
436  raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
437 
438  def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
439  if bookmarks == None:
440  bookmarks = self.bookmarks
441 
442  for b in bookmarks:
443  if isinstance(b, list):
444  self._associate_bookmarks_to_pages(pages, b)
445  continue
446 
447  pageno = None
448  bp = b['/Page']
449 
450  if isinstance(bp, NumberObject):
451  continue
452 
453  for p in pages:
454  if bp.getObject() == p.pagedata.getObject():
455  pageno = p.id
456 
457  if pageno != None:
458  b[NameObject('/Page')] = NumberObject(pageno)
459  else:
460  raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],))
461 
462  def findBookmark(self, bookmark, root=None):
463  if root == None:
464  root = self.bookmarks
465 
466  for i, b in enumerate(root):
467  if isinstance(b, list):
468  res = self.findBookmark(bookmark, b)
469  if res:
470  return [i] + res
471  elif b == bookmark or b['/Title'] == bookmark:
472  return [i]
473 
474  return None
475 
476  def addBookmark(self, title, pagenum, parent=None):
477  """
478  Add a bookmark to this PDF file.
479 
480  :param str title: Title to use for this bookmark.
481  :param int pagenum: Page number this bookmark will point to.
482  :param parent: A reference to a parent bookmark to create nested
483  bookmarks.
484  """
485  if parent == None:
486  iloc = [len(self.bookmarks)-1]
487  elif isinstance(parent, list):
488  iloc = parent
489  else:
490  iloc = self.findBookmark(parent)
491 
492  dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
493 
494  if parent == None:
495  self.bookmarks.append(dest)
496  else:
497  bmparent = self.bookmarks
498  for i in iloc[:-1]:
499  bmparent = bmparent[i]
500  npos = iloc[-1]+1
501  if npos < len(bmparent) and isinstance(bmparent[npos], list):
502  bmparent[npos].append(dest)
503  else:
504  bmparent.insert(npos, [dest])
505  return dest
506 
507  def addNamedDestination(self, title, pagenum):
508  """
509  Add a destination to the output.
510 
511  :param str title: Title to use
512  :param int pagenum: Page number this destination points at.
513  """
514 
515  dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826))
516  self.named_dests.append(dest)
517 
518 
519 class OutlinesObject(list):
520  def __init__(self, pdf, tree, parent=None):
521  list.__init__(self)
522  self.tree = tree
523  self.pdf = pdf
524  self.parent = parent
525 
526  def remove(self, index):
527  obj = self[index]
528  del self[index]
529  self.tree.removeChild(obj)
530 
531  def add(self, title, pagenum):
532  pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum]
533  action = DictionaryObject()
534  action.update({
535  NameObject('/D') : ArrayObject([pageRef, NameObject('/FitH'), NumberObject(826)]),
536  NameObject('/S') : NameObject('/GoTo')
537  })
538  actionRef = self.pdf._addObject(action)
539  bookmark = TreeObject()
540 
541  bookmark.update({
542  NameObject('/A'): actionRef,
543  NameObject('/Title'): createStringObject(title),
544  })
545 
546  self.pdf._addObject(bookmark)
547 
548  self.tree.addChild(bookmark)
549 
550  def removeAll(self):
551  for child in [x for x in self.tree.children()]:
552  self.tree.removeChild(child)
553  self.pop()
generic.TreeObject
Definition: generic.py:644
merger.PdfFileMerger.output
output
Definition: merger.py:73
merger.PdfFileMerger._trim_outline
def _trim_outline(self, pdf, outline, pages)
Definition: merger.py:307
merger.OutlinesObject
Definition: merger.py:519
generic.DictionaryObject
Definition: generic.py:497
merger.PdfFileMerger.write
def write(self, fileobj)
Definition: merger.py:205
merger.PdfFileMerger.addNamedDestination
def addNamedDestination(self, title, pagenum)
Definition: merger.py:507
merger.PdfFileMerger.close
def close(self)
Definition: merger.py:235
merger.OutlinesObject.tree
tree
Definition: merger.py:522
merger.PdfFileMerger.setPageLayout
def setPageLayout(self, layout)
Definition: merger.py:258
utils.isString
def isString(s)
Definition: utils.py:52
merger.OutlinesObject.add
def add(self, title, pagenum)
Definition: merger.py:531
merger._MergedPage
Definition: merger.py:44
pdf.PdfFileWriter
Definition: pdf.py:79
merger.PdfFileMerger.merge
def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True)
Definition: merger.py:79
merger._MergedPage.src
src
Definition: merger.py:50
merger.PdfFileMerger.inputs
inputs
Definition: merger.py:71
merger.OutlinesObject.removeAll
def removeAll(self)
Definition: merger.py:550
merger.PdfFileMerger.__init__
def __init__(self, strict=True)
Definition: merger.py:70
merger.PdfFileMerger.addBookmark
def addBookmark(self, title, pagenum, parent=None)
Definition: merger.py:476
merger._MergedPage.__init__
def __init__(self, pagedata, src, id)
Definition: merger.py:49
merger._MergedPage.pagedata
pagedata
Definition: merger.py:51
generic.FloatObject
Definition: generic.py:226
merger.PdfFileMerger._associate_bookmarks_to_pages
def _associate_bookmarks_to_pages(self, pages, bookmarks=None)
Definition: merger.py:438
generic.Bookmark
Definition: generic.py:1144
merger.PdfFileMerger.findBookmark
def findBookmark(self, bookmark, root=None)
Definition: merger.py:462
merger.PdfFileMerger.id_count
id_count
Definition: merger.py:76
merger._MergedPage.id
id
Definition: merger.py:53
merger.PdfFileMerger.strict
strict
Definition: merger.py:77
merger.PdfFileMerger.pages
pages
Definition: merger.py:72
merger.PdfFileMerger._trim_dests
def _trim_dests(self, pdf, dests, pages)
Definition: merger.py:291
merger.OutlinesObject.parent
parent
Definition: merger.py:524
merger._MergedPage.out_pagedata
out_pagedata
Definition: merger.py:52
merger.OutlinesObject.__init__
def __init__(self, pdf, tree, parent=None)
Definition: merger.py:520
merger.PdfFileMerger.named_dests
named_dests
Definition: merger.py:75
merger.PdfFileMerger._write_dests
def _write_dests(self)
Definition: merger.py:331
merger.PdfFileMerger.append
def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True)
Definition: merger.py:183
merger.PdfFileMerger._write_bookmarks
def _write_bookmarks(self, bookmarks=None, parent=None)
Definition: merger.py:347
generic.NameObject
Definition: generic.py:467
generic.NumberObject
Definition: generic.py:251
merger.PdfFileMerger.setPageMode
def setPageMode(self, mode)
Definition: merger.py:275
generic.createStringObject
def createStringObject(string)
Given a string (either a "str" or "unicode"), create a ByteStringObject or a TextStringObject to repr...
Definition: generic.py:280
generic.Destination
Definition: generic.py:1024
generic.ArrayObject
Definition: generic.py:141
pdf.PdfFileReader
Definition: pdf.py:1044
merger.PdfFileMerger.addMetadata
def addMetadata(self, infos)
Definition: merger.py:248
generic.TextStringObject
Represents a string object that has been decoded into a real unicode string.
Definition: generic.py:421
merger.OutlinesObject.remove
def remove(self, index)
Definition: merger.py:526
merger.StreamIO
StreamIO
Definition: merger.py:37
utils.str_
def str_(b)
Definition: utils.py:251
merger.PdfFileMerger._associate_dests_to_pages
def _associate_dests_to_pages(self, pages)
Definition: merger.py:421
merger.OutlinesObject.pdf
pdf
Definition: merger.py:523
merger.PdfFileMerger.bookmarks
bookmarks
Definition: merger.py:74
merger.PdfFileMerger
Definition: merger.py:56