30 from .generic
import *
31 from .utils
import isString, str_
32 from .pdf
import PdfFileReader, PdfFileWriter
33 from .pagerange
import PageRange
34 from sys
import version_info
35 if version_info < ( 3, 0 ):
36 from cStringIO
import StringIO
39 from io
import BytesIO
40 from io
import FileIO
as file
46 _MergedPage is used internally by PdfFileMerger to collect necessary
47 information on each page that is being merged.
58 Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs
59 into a single PDF. It can concatenate, slice, insert, or any combination
62 See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
63 and :meth:`write()<write>` for usage information.
65 :param bool strict: Determines whether user should be warned of all
66 problems and also causes some correctable problems to be fatal.
79 def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
81 Merges the pages from the given file into the output file at the
82 specified page number.
84 :param int position: The *page number* to insert this file. File will
85 be inserted after the given number.
87 :param fileobj: A File Object or an object that supports the standard read
88 and seek methods similar to a File Object. Could also be a
89 string representing a path to a PDF file.
91 :param str bookmark: Optionally, you may specify a bookmark to be applied at
92 the beginning of the included file by supplying the text of the bookmark.
94 :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
95 to merge only the specified range of pages from the source
96 document into the output document.
98 :param bool import_bookmarks: You may prevent the source document's bookmarks
99 from being imported by specifying this as ``False``.
112 decryption_key =
None
114 fileobj = file(fileobj,
'rb')
116 elif isinstance(fileobj, file):
118 filecontent = fileobj.read()
121 elif isinstance(fileobj, PdfFileReader):
122 orig_tell = fileobj.stream.tell()
123 fileobj.stream.seek(0)
124 filecontent =
StreamIO(fileobj.stream.read())
125 fileobj.stream.seek(orig_tell)
126 fileobj = filecontent
127 if hasattr(fileobj,
'_decryption_key'):
128 decryption_key = fileobj._decryption_key
134 if decryption_key
is not None:
135 pdfr._decryption_key = decryption_key
139 pages = (0, pdfr.getNumPages())
140 elif isinstance(pages, PageRange):
141 pages = pages.indices(pdfr.getNumPages())
142 elif not isinstance(pages, tuple):
143 raise TypeError(
'"pages" must be a tuple of (start, stop[, step])')
151 outline = pdfr.getOutlines()
159 dests = pdfr.namedDestinations
164 for i
in range(*pages):
178 self.
pages[position:position] = srcpages
183 def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True):
185 Identical to the :meth:`merge()<merge>` method, but assumes you want to concatenate
186 all pages onto the end of the file instead of specifying a position.
188 :param fileobj: A File Object or an object that supports the standard read
189 and seek methods similar to a File Object. Could also be a
190 string representing a path to a PDF file.
192 :param str bookmark: Optionally, you may specify a bookmark to be applied at
193 the beginning of the included file by supplying the text of the bookmark.
195 :param pages: can be a :ref:`Page Range <page-range>` or a ``(start, stop[, step])`` tuple
196 to merge only the specified range of pages from the source
197 document into the output document.
199 :param bool import_bookmarks: You may prevent the source document's bookmarks
200 from being imported by specifying this as ``False``.
203 self.
merge(len(self.
pages), fileobj, bookmark, pages, import_bookmarks)
207 Writes all data that has been merged to the given output file.
209 :param fileobj: Output file. Can be a filename or any kind of
214 fileobj = file(fileobj,
'wb')
219 for page
in self.
pages:
220 self.
output.addPage(page.pagedata)
221 page.out_pagedata = self.
output.getReference(self.
output._pages.getObject()[
"/Kids"][-1].getObject())
237 Shuts all file descriptors (input and output) and clears all memory
241 for fo, pdfr, mine
in self.
inputs:
250 Add custom metadata to the output.
252 :param dict infos: a Python dictionary where each key is a field
253 and each value is your new metadata.
254 Example: ``{u'/Title': u'My title'}``
262 :param str layout: The page layout to be used
265 /NoLayout Layout explicitly not specified
266 /SinglePage Show one page at a time
267 /OneColumn Show one column at a time
268 /TwoColumnLeft Show pages in two columns, odd-numbered pages on the left
269 /TwoColumnRight Show pages in two columns, odd-numbered pages on the right
270 /TwoPageLeft Show two pages at a time, odd-numbered pages on the left
271 /TwoPageRight Show two pages at a time, odd-numbered pages on the right
279 :param str mode: The page mode to use.
282 /UseNone Do not show outlines or thumbnails panels
283 /UseOutlines Show outlines (aka bookmarks) panel
284 /UseThumbs Show page thumbnails panel
285 /FullScreen Fullscreen view
286 /UseOC Show Optional Content Group (OCG) panel
287 /UseAttachments Show attachments panel
291 def _trim_dests(self, pdf, dests, pages):
293 Removes any named destinations that are not a part of the specified
297 prev_header_added =
True
298 for k, o
in list(dests.items()):
299 for j
in range(*pages):
300 if pdf.getPage(j).getObject() == o[
'/Page'].getObject():
301 o[
NameObject(
'/Page')] = o[
'/Page'].getObject()
307 def _trim_outline(self, pdf, outline, pages):
309 Removes any outline/bookmark entries that are not a part of the
313 prev_header_added =
True
314 for i, o
in enumerate(outline):
315 if isinstance(o, list):
318 if not prev_header_added:
319 new_outline.append(outline[i-1])
320 new_outline.append(sub)
322 prev_header_added =
False
323 for j
in range(*pages):
324 if pdf.getPage(j).getObject() == o[
'/Page'].getObject():
325 o[
NameObject(
'/Page')] = o[
'/Page'].getObject()
326 new_outline.append(o)
327 prev_header_added =
True
331 def _write_dests(self):
338 for i, p
in enumerate(self.
pages):
339 if p.id == v[
'/Page']:
345 self.
output.addNamedDestinationObject(v)
347 def _write_bookmarks(self, bookmarks=None, parent=None):
349 if bookmarks ==
None:
354 if isinstance(b, list):
361 for i, p
in enumerate(self.
pages):
362 if p.id == b[
'/Page']:
367 if b[
'/Type'] ==
'/FitH' or b[
'/Type'] ==
'/FitBH':
368 if '/Top' in b
and not isinstance(b[
'/Top'], NullObject):
373 elif b[
'/Type'] ==
'/FitV' or b[
'/Type'] ==
'/FitBV':
374 if '/Left' in b
and not isinstance(b[
'/Left'], NullObject):
379 elif b[
'/Type'] ==
'/XYZ':
380 if '/Left' in b
and not isinstance(b[
'/Left'], NullObject):
384 if '/Top' in b
and not isinstance(b[
'/Top'], NullObject):
388 if '/Zoom' in b
and not isinstance(b[
'/Zoom'], NullObject):
392 del b[
'/Top'], b[
'/Zoom'], b[
'/Left']
393 elif b[
'/Type'] ==
'/FitR':
394 if '/Left' in b
and not isinstance(b[
'/Left'], NullObject):
398 if '/Bottom' in b
and not isinstance(b[
'/Bottom'], NullObject):
402 if '/Right' in b
and not isinstance(b[
'/Right'], NullObject):
406 if '/Top' in b
and not isinstance(b[
'/Top'], NullObject):
410 del b[
'/Left'], b[
'/Right'], b[
'/Bottom'], b[
'/Top']
418 del b[
'/Page'], b[
'/Type']
419 last_added = self.
output.addBookmarkDict(b, parent)
421 def _associate_dests_to_pages(self, pages):
426 if isinstance(np, NumberObject):
430 if np.getObject() == p.pagedata.getObject():
436 raise ValueError(
"Unresolved named destination '%s'" % (nd[
'/Title'],))
438 def _associate_bookmarks_to_pages(self, pages, bookmarks=None):
439 if bookmarks ==
None:
443 if isinstance(b, list):
450 if isinstance(bp, NumberObject):
454 if bp.getObject() == p.pagedata.getObject():
460 raise ValueError(
"Unresolved bookmark '%s'" % (b[
'/Title'],))
466 for i, b
in enumerate(root):
467 if isinstance(b, list):
471 elif b == bookmark
or b[
'/Title'] == bookmark:
478 Add a bookmark to this PDF file.
480 :param str title: Title to use for this bookmark.
481 :param int pagenum: Page number this bookmark will point to.
482 :param parent: A reference to a parent bookmark to create nested
487 elif isinstance(parent, list):
499 bmparent = bmparent[i]
501 if npos < len(bmparent)
and isinstance(bmparent[npos], list):
502 bmparent[npos].
append(dest)
504 bmparent.insert(npos, [dest])
509 Add a destination to the output.
511 :param str title: Title to use
512 :param int pagenum: Page number this destination points at.
529 self.
tree.removeChild(obj)
531 def add(self, title, pagenum):
532 pageRef = self.
pdf.getObject(self.
pdf._pages)[
'/Kids'][pagenum]
538 actionRef = self.
pdf._addObject(action)
546 self.
pdf._addObject(bookmark)
548 self.
tree.addChild(bookmark)
551 for child
in [x
for x
in self.
tree.children()]:
552 self.
tree.removeChild(child)