35 A pure-Python PDF library with an increasing number of capabilities. 
   36 See README for links to FAQ, documentation, homepage, etc. 
   39 __author__ = 
"Mathieu Fenniak" 
   40 __author_email__ = 
"biziqe@mathieu.fenniak.net" 
   42 __maintainer__ = 
"Phaseit, Inc." 
   43 __maintainer_email = 
"PyPDF2@phaseit.net" 
   50 from sys 
import version_info
 
   51 if version_info < ( 3, 0 ):
 
   52     from cStringIO 
import StringIO
 
   54     from io 
import StringIO
 
   56 if version_info < ( 3, 0 ):
 
   59     from io 
import BytesIO
 
   65 from .generic 
import *
 
   66 from .utils 
import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList
 
   67 from .utils 
import isString, b_, u_, ord_, chr_, str_, formatWarning
 
   69 if version_info < ( 2, 4 ):
 
   70    from sets 
import ImmutableSet 
as frozenset
 
   72 if version_info < ( 2, 5 ):
 
   75     from hashlib 
import md5
 
   81     This class supports writing PDF files out, given pages produced by another 
   82     class (typically :class:`PdfFileReader<PdfFileReader>`). 
  113     def _addObject(self, obj):
 
  119             raise ValueError(
"pdf must be self")
 
  122     def _addPage(self, page, action):
 
  123         assert page[
"/Type"] == 
"/Page" 
  127         action(pages[
"/Kids"], page)
 
  132         Adds a page to this PDF file.  The page is usually acquired from a 
  133         :class:`PdfFileReader<PdfFileReader>` instance. 
  135         :param PageObject page: The page to add to the document. Should be 
  136             an instance of :class:`PageObject<PyPDF2.pdf.PageObject>` 
  142         Insert a page in this PDF file. The page is usually acquired from a 
  143         :class:`PdfFileReader<PdfFileReader>` instance. 
  145         :param PageObject page: The page to add to the document.  This 
  146             argument should be an instance of :class:`PageObject<pdf.PageObject>`. 
  147         :param int index: Position at which the page will be inserted. 
  149         self.
_addPage(page, 
lambda l, p: l.insert(index, p))
 
  153         Retrieves a page by number from this PDF file. 
  155         :param int pageNumber: The page number to retrieve 
  156             (pages begin at zero) 
  157         :return: the page at the index given by *pageNumber* 
  158         :rtype: :class:`PageObject<pdf.PageObject>` 
  162         return pages[
"/Kids"][pageNumber].
getObject()
 
  166         :return: the number of pages. 
  174         Appends a blank page to this PDF file and returns it. If no page size 
  175         is specified, use the size of the last page. 
  177         :param float width: The width of the new page expressed in default user 
  179         :param float height: The height of the new page expressed in default 
  181         :return: the newly appended page 
  182         :rtype: :class:`PageObject<PyPDF2.pdf.PageObject>` 
  183         :raises PageSizeNotDefinedError: if width and height are not defined 
  184             and previous page does not exist. 
  186         page = PageObject.createBlankPage(self, width, height)
 
  192         Inserts a blank page to this PDF file and returns it. If no page size 
  193         is specified, use the size of the last page. 
  195         :param float width: The width of the new page expressed in default user 
  197         :param float height: The height of the new page expressed in default 
  199         :param int index: Position to add the page. 
  200         :return: the newly appended page 
  201         :rtype: :class:`PageObject<PyPDF2.pdf.PageObject>` 
  202         :raises PageSizeNotDefinedError: if width and height are not defined 
  203             and previous page does not exist. 
  205         if width 
is None or height 
is None and \
 
  208             width = oldpage.mediaBox.getWidth()
 
  209             height = oldpage.mediaBox.getHeight()
 
  210         page = PageObject.createBlankPage(self, width, height)
 
  216         Add Javascript which will launch upon opening this PDF. 
  218         :param str javascript: Your Javascript. 
  220         >>> output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") 
  221         # Example: This will launch the print window when the PDF is opened. 
  232         js_string_name = str(uuid.uuid4())
 
  235         js_name_tree.update({
 
  243                 NameObject(
"/OpenAction"): js_indirect_object,
 
  249         Embed a file inside the PDF. 
  251         :param str fname: The filename to display. 
  252         :param str fdata: The data in the file. 
  255         https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 
  278         file_entry.setData(fdata)
 
  293         efEntry.update({ 
NameObject(
"/F"):file_entry })
 
  309          /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >> 
  315         embeddedFilesNamesDictionary.update({
 
  320         embeddedFilesDictionary.update({
 
  321                 NameObject(
"/EmbeddedFiles"): embeddedFilesNamesDictionary
 
  330         Copy pages from reader to writer. Includes an optional callback parameter 
  331         which is invoked after pages are appended to the writer. 
  333         :param reader: a PdfFileReader object from which to copy page 
  334             annotations to this writer object.  The writer's annots 
  336         :callback after_page_append (function): Callback function that is invoked after 
  337             each page is appended to the writer. Callback signature: 
  339             :param writer_pageref (PDF page reference): Reference to the page 
  340                 appended to the writer. 
  343         reader_num_pages = reader.getNumPages()
 
  347         for rpagenum 
in range(0, reader_num_pages):
 
  348             reader_page = reader.getPage(rpagenum)
 
  350             writer_page = self.
getPage(writer_num_pages+rpagenum)
 
  352             if callable(after_page_append): after_page_append(writer_page)
 
  356         Update the form field values for a given page from a fields dictionary. 
  357         Copy field texts and values from fields to page. 
  359         :param page: Page reference from PDF writer where the annotations 
  360             and field data will be updated. 
  361         :param fields: a Python dictionary of field names (/T) and text 
  365         for j 
in range(0, len(page[
'/Annots'])):
 
  366             writer_annot = page[
'/Annots'][j].
getObject()
 
  368                 if writer_annot.get(
'/T') == field:
 
  369                     writer_annot.update({
 
  375         Copy the reader document root to the writer. 
  377         :param reader:  PdfFileReader from the document root should be copied. 
  378         :callback after_page_append 
  384         Create a copy (clone) of a document from a PDF file reader 
  386         :param reader: PDF file reader instance from which the clone 
  388         :callback after_page_append (function): Callback function that is invoked after 
  389             each page is appended to the writer. Signature includes a reference to the 
  390             appended page (delegates to appendPagesFromReader). Callback signature: 
  392             :param writer_pageref (PDF page reference): Reference to the page just 
  393                 appended to the document. 
  398     def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True):
 
  400         Encrypt this PDF file with the PDF Standard encryption handler. 
  402         :param str user_pwd: The "user password", which allows for opening 
  403             and reading the PDF file with the restrictions provided. 
  404         :param str owner_pwd: The "owner password", which allows for 
  405             opening the PDF files without any restrictions.  By default, 
  406             the owner password is the same as the user password. 
  407         :param bool use_128bit: flag as to whether to use 128bit 
  408             encryption.  When false, 40bit encryption will be used.  By default, 
  412         if owner_pwd == 
None:
 
  417             keylen = int(128 / 8)
 
  429             U, key = _alg34(user_pwd, O, P, ID_1)
 
  432             U, key = _alg35(user_pwd, rev, keylen, O, P, ID_1, 
False)
 
  447         Writes the collection of pages added to this object out as a PDF file. 
  449         :param stream: An object to write the file to.  The object must support 
  450             the write method and the tell method, similar to a file object. 
  452         if hasattr(stream, 
'mode') 
and 'b' not in stream.mode:
 
  453             warnings.warn(
"File <%s> to write to is not in binary mode. It may not be written to correctly." % stream.name)
 
  460         externalReferenceMap = {}
 
  470         for objIndex 
in range(len(self.
_objects)):
 
  472             if isinstance(obj, PageObject) 
and obj.indirectRef != 
None:
 
  473                 data = obj.indirectRef
 
  474                 if data.pdf 
not in externalReferenceMap:
 
  475                     externalReferenceMap[data.pdf] = {}
 
  476                 if data.generation 
not in externalReferenceMap[data.pdf]:
 
  477                     externalReferenceMap[data.pdf][data.generation] = {}
 
  478                 externalReferenceMap[data.pdf][data.generation][data.idnum] = 
IndirectObject(objIndex + 1, 0, self)
 
  481         if debug: print((
"ERM:", externalReferenceMap, 
"root:", self.
_root))
 
  486         object_positions = []
 
  491             object_positions.append(stream.tell())
 
  492             stream.write(
b_(str(idnum) + 
" 0 obj\n"))
 
  494             if hasattr(self, 
"_encrypt") 
and idnum != self.
_encrypt.idnum:
 
  495                 pack1 = struct.pack(
"<i", i + 1)[:3]
 
  496                 pack2 = struct.pack(
"<i", 0)[:2]
 
  499                 md5_hash = md5(key).digest()
 
  501             obj.writeToStream(stream, key)
 
  502             stream.write(
b_(
"\nendobj\n"))
 
  505         xref_location = stream.tell()
 
  506         stream.write(
b_(
"xref\n"))
 
  507         stream.write(
b_(
"0 %s\n" % (len(self.
_objects) + 1)))
 
  508         stream.write(
b_(
"%010d %05d f \n" % (0, 65535)))
 
  509         for offset 
in object_positions:
 
  510             stream.write(
b_(
"%010d %05d n \n" % (offset, 0)))
 
  513         stream.write(
b_(
"trailer\n"))
 
  520         if hasattr(self, 
"_ID"):
 
  522         if hasattr(self, 
"_encrypt"):
 
  524         trailer.writeToStream(stream, 
None)
 
  527         stream.write(
b_(
"\nstartxref\n%s\n%%%%EOF\n" % (xref_location)))
 
  531         Add custom metadata to the output. 
  533         :param dict infos: a Python dictionary where each key is a field 
  534             and each value is your new metadata. 
  537         for key, value 
in list(infos.items()):
 
  541     def _sweepIndirectReferences(self, externMap, data):
 
  543         if debug: print((data, 
"TYPE", data.__class__.__name__))
 
  544         if isinstance(data, DictionaryObject):
 
  545             for key, value 
in list(data.items()):
 
  548                 if isinstance(value, StreamObject):
 
  554         elif isinstance(data, ArrayObject):
 
  555             for i 
in range(len(data)):
 
  557                 if isinstance(value, StreamObject):
 
  563         elif isinstance(data, IndirectObject):
 
  566                 if data.idnum 
in self.
stack:
 
  569                     self.
stack.append(data.idnum)
 
  574                 newobj = externMap.get(data.pdf, {}).get(data.generation, {}).get(data.idnum, 
None)
 
  577                         newobj = data.pdf.getObject(data)
 
  581                         if data.pdf 
not in externMap:
 
  582                             externMap[data.pdf] = {}
 
  583                         if data.generation 
not in externMap[data.pdf]:
 
  584                             externMap[data.pdf][data.generation] = {}
 
  585                         externMap[data.pdf][data.generation][data.idnum] = newobj_ido
 
  597         idnum = self.
_objects.index(obj) + 1
 
  599         assert ref.getObject() == obj
 
  605             idnum = self.
_objects.index(outline) + 1
 
  607             assert outlineRef.getObject() == outline
 
  619             idnum = self.
_objects.index(names) + 1
 
  621             assert namesRef.getObject() == names
 
  622             if '/Dests' in names 
and isinstance(names[
'/Dests'], DictionaryObject):
 
  623                 dests = names[
'/Dests']
 
  624                 idnum = self.
_objects.index(dests) + 1
 
  626                 assert destsRef.getObject() == dests
 
  627                 if '/Names' in dests:
 
  659         parent = parent.getObject()
 
  661         parent.addChild(destRef, self)
 
  667         for k, v 
in list(bookmark.items()):
 
  669         bookmarkObj.update(bookmark)
 
  673             for k, v 
in list(bookmark[
'/A'].items()):
 
  685         parent = parent.getObject()
 
  686         parent.addChild(bookmarkRef, self)
 
  690     def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args):
 
  692         Add a bookmark to this PDF file. 
  694         :param str title: Title to use for this bookmark. 
  695         :param int pagenum: Page number this bookmark will point to. 
  696         :param parent: A reference to a parent bookmark to create nested 
  698         :param tuple color: Color of the bookmark as a red, green, blue tuple 
  700         :param bool bold: Bookmark is bold 
  701         :param bool italic: Bookmark is italic 
  702         :param str fit: The fit of the destination page. See 
  703             :meth:`addLink()<addLink>` for details. 
  714         destArray = dest.getDestArray()
 
  733         if color 
is not None:
 
  746         parent = parent.getObject()
 
  747         parent.addChild(bookmarkRef, self)
 
  755         nd.extend([dest[
'/Title'], destRef])
 
  770         nd.extend([title, destRef])
 
  776         Removes links and annotations from this output. 
  781             if "/Annots" in pageRef:
 
  782                 del pageRef[
'/Annots']
 
  786         Removes images from this output. 
  788         :param bool ignoreByteStringObject: optional parameter 
  789             to ignore ByteString Objects. 
  792         for j 
in range(len(pages)):
 
  795             content = pageRef[
'/Contents'].
getObject()
 
  796             if not isinstance(content, ContentStream):
 
  801             for operands, operator 
in content.operations:
 
  802                 if operator == 
b_(
'Tj'):
 
  804                     if ignoreByteStringObject:
 
  805                         if not isinstance(text, TextStringObject):
 
  807                 elif operator == 
b_(
"'"):
 
  809                     if ignoreByteStringObject:
 
  810                         if not isinstance(text, TextStringObject):
 
  812                 elif operator == 
b_(
'"'):
 
  814                     if ignoreByteStringObject:
 
  815                         if not isinstance(text, TextStringObject):
 
  817                 elif operator == 
b_(
"TJ"):
 
  818                     for i 
in range(len(operands[0])):
 
  819                         if ignoreByteStringObject:
 
  820                             if not isinstance(operands[0][i], TextStringObject):
 
  823                 if operator == 
b_(
'q'):
 
  825                 if operator == 
b_(
'Q'):
 
  828                     if operator 
in [
b_(
'cm'), 
b_(
'w'), 
b_(
'J'), 
b_(
'j'), 
b_(
'M'), 
b_(
'd'), 
b_(
'ri'), 
b_(
'i'),
 
  829                             b_(
'gs'), 
b_(
'W'), 
b_(
'b'), 
b_(
's'), 
b_(
'S'), 
b_(
'f'), 
b_(
'F'), 
b_(
'n'), 
b_(
'm'), 
b_(
'l'),
 
  832                 if operator == 
b_(
're'):
 
  834                 _operations.append((operands, operator))
 
  836             content.operations = _operations
 
  837             pageRef.__setitem__(
NameObject(
'/Contents'), content)
 
  841         Removes images from this output. 
  843         :param bool ignoreByteStringObject: optional parameter 
  844             to ignore ByteString Objects. 
  847         for j 
in range(len(pages)):
 
  850             content = pageRef[
'/Contents'].
getObject()
 
  851             if not isinstance(content, ContentStream):
 
  853             for operands,operator 
in content.operations:
 
  854                 if operator == 
b_(
'Tj'):
 
  856                     if not ignoreByteStringObject:
 
  857                         if isinstance(text, TextStringObject):
 
  860                         if isinstance(text, TextStringObject) 
or \
 
  861                                 isinstance(text, ByteStringObject):
 
  863                 elif operator == 
b_(
"'"):
 
  865                     if not ignoreByteStringObject:
 
  866                         if isinstance(text, TextStringObject):
 
  869                         if isinstance(text, TextStringObject) 
or \
 
  870                                 isinstance(text, ByteStringObject):
 
  872                 elif operator == 
b_(
'"'):
 
  874                     if not ignoreByteStringObject:
 
  875                         if isinstance(text, TextStringObject):
 
  878                         if isinstance(text, TextStringObject) 
or \
 
  879                                 isinstance(text, ByteStringObject):
 
  881                 elif operator == 
b_(
"TJ"):
 
  882                     for i 
in range(len(operands[0])):
 
  883                         if not ignoreByteStringObject:
 
  884                             if isinstance(operands[0][i], TextStringObject):
 
  887                             if isinstance(operands[0][i], TextStringObject) 
or \
 
  888                                     isinstance(operands[0][i], ByteStringObject):
 
  891             pageRef.__setitem__(
NameObject(
'/Contents'), content)
 
  893     def addLink(self, pagenum, pagedest, rect, border=None, fit='/Fit', *args):
 
  895         Add an internal link from a rectangular area to the specified page. 
  897         :param int pagenum: index of the page on which to place the link. 
  898         :param int pagedest: index of the page to which the link should go. 
  899         :param rect: :class:`RectangleObject<PyPDF2.generic.RectangleObject>` or array of four 
  900             integers specifying the clickable rectangular area 
  901             ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``. 
  902         :param border: if provided, an array describing border-drawing 
  903             properties. See the PDF spec for details. No border will be 
  904             drawn if this argument is omitted. 
  905         :param str fit: Page fit or 'zoom' option (see below). Additional arguments may need 
  906             to be supplied. Passing ``None`` will be read as a null value for that coordinate. 
  908         Valid zoom arguments (see Table 8.2 of the PDF 1.7 reference for details): 
  909              /Fit       No additional arguments 
  910              /XYZ       [left] [top] [zoomFactor] 
  913              /FitR      [left] [bottom] [right] [top] 
  914              /FitB      No additional arguments 
  923         if border 
is not None:
 
  924             borderArr = [
NameObject(n) 
for n 
in border[:3]]
 
  927                 borderArr.append(dashPattern)
 
  933         elif isinstance(rect, RectangleObject):
 
  945         destArray = dest.getDestArray()
 
  958         if "/Annots" in pageRef:
 
  959             pageRef[
'/Annots'].append(lnkRef)
 
  963     _valid_layouts = [
'/NoLayout', 
'/SinglePage', 
'/OneColumn', 
'/TwoColumnLeft', 
'/TwoColumnRight', 
'/TwoPageLeft', 
'/TwoPageRight']
 
  968         See :meth:`setPageLayout()<PdfFileWriter.setPageLayout>` for a description of valid layouts. 
  970         :return: Page layout currently being used. 
  971         :rtype: str, None if not specified 
  982         :param str layout: The page layout to be used 
  985              /NoLayout        Layout explicitly not specified 
  986              /SinglePage      Show one page at a time 
  987              /OneColumn       Show one column at a time 
  988              /TwoColumnLeft   Show pages in two columns, odd-numbered pages on the left 
  989              /TwoColumnRight  Show pages in two columns, odd-numbered pages on the right 
  990              /TwoPageLeft     Show two pages at a time, odd-numbered pages on the left 
  991              /TwoPageRight    Show two pages at a time, odd-numbered pages on the right 
  993         if not isinstance(layout, NameObject):
 
  995                 warnings.warn(
"Layout should be one of: {}".format(
', '.join(self.
_valid_layouts)))
 
  999     pageLayout = property(getPageLayout, setPageLayout)
 
 1000     """Read and write property accessing the :meth:`getPageLayout()<PdfFileWriter.getPageLayout>` 
 1001     and :meth:`setPageLayout()<PdfFileWriter.setPageLayout>` methods.""" 
 1003     _valid_modes = [
'/UseNone', 
'/UseOutlines', 
'/UseThumbs', 
'/FullScreen', 
'/UseOC', 
'/UseAttachments']
 
 1008         See :meth:`setPageMode()<PdfFileWriter.setPageMode>` for a description 
 1011         :return: Page mode currently being used. 
 1012         :rtype: str, None if not specified 
 1023         :param str mode: The page mode to use. 
 1026             /UseNone         Do not show outlines or thumbnails panels 
 1027             /UseOutlines     Show outlines (aka bookmarks) panel 
 1028             /UseThumbs       Show page thumbnails panel 
 1029             /FullScreen      Fullscreen view 
 1030             /UseOC           Show Optional Content Group (OCG) panel 
 1031             /UseAttachments  Show attachments panel 
 1033         if not isinstance(mode, NameObject):
 
 1035                 warnings.warn(
"Mode should be one of: {}".format(
', '.join(self.
_valid_modes)))
 
 1039     pageMode = property(getPageMode, setPageMode)
 
 1040     """Read and write property accessing the :meth:`getPageMode()<PdfFileWriter.getPageMode>` 
 1041     and :meth:`setPageMode()<PdfFileWriter.setPageMode>` methods.""" 
 1046     Initializes a PdfFileReader object.  This operation can take some time, as 
 1047     the PDF stream's cross-reference tables are read into memory. 
 1049     :param stream: A File object or an object that supports the standard read 
 1050         and seek methods similar to a File object. Could also be a 
 1051         string representing a path to a PDF file. 
 1052     :param bool strict: Determines whether user should be warned of all 
 1053         problems and also causes some correctable problems to be fatal. 
 1054         Defaults to ``True``. 
 1055     :param warndest: Destination for logging warnings (defaults to 
 1057     :param bool overwriteWarnings: Determines whether to override Python's 
 1058         ``warnings.py`` module with a custom implementation (defaults to 
 1061     def __init__(self, stream, strict=True, warndest = None, overwriteWarnings = True):
 
 1062         if overwriteWarnings:
 
 1065             def _showwarning(message, category, filename, lineno, file=warndest, line=None):
 
 1069                     file.write(
formatWarning(message, category, filename, lineno, line))
 
 1072             warnings.showwarning = _showwarning
 
 1078         if hasattr(stream, 
'mode') 
and 'b' not in stream.mode:
 
 1079             warnings.warn(
"PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", 
utils.PdfReadWarning)
 
 1081             fileobj = open(stream, 
'rb')
 
 1091         Retrieves the PDF file's document information dictionary, if it exists. 
 1092         Note that some PDF files use metadata streams instead of docinfo 
 1093         dictionaries, and these metadata streams will not be accessed by this 
 1096         :return: the document information of this PDF file 
 1097         :rtype: :class:`DocumentInformation<pdf.DocumentInformation>` or ``None`` if none exists. 
 1099         if "/Info" not in self.
trailer:
 
 1107     """Read-only property that accesses the :meth:`getDocumentInfo()<PdfFileReader.getDocumentInfo>` function.""" 
 1111         Retrieves XMP (Extensible Metadata Platform) data from the PDF document 
 1114         :return: a :class:`XmpInformation<xmp.XmpInformation>` 
 1115             instance that can be used to access XMP metadata from the document. 
 1116         :rtype: :class:`XmpInformation<xmp.XmpInformation>` or 
 1117             ``None`` if no metadata was found on the document root. 
 1127     Read-only property that accesses the 
 1128     :meth:`getXmpMetadata()<PdfFileReader.getXmpMetadata>` function. 
 1133         Calculates the number of pages in this PDF file. 
 1135         :return: number of pages 
 1137         :raises PdfReadError: if file is encrypted and restrictions prevent 
 1148                 return self.
trailer[
"/Root"][
"/Pages"][
"/Count"]
 
 1160     Read-only property that accesses the 
 1161     :meth:`getNumPages()<PdfFileReader.getNumPages>` function. 
 1166         Retrieves a page by number from this PDF file. 
 1168         :param int pageNumber: The page number to retrieve 
 1169             (pages begin at zero) 
 1170         :return: a :class:`PageObject<pdf.PageObject>` instance. 
 1171         :rtype: :class:`PageObject<pdf.PageObject>` 
 1179     namedDestinations = property(
lambda self:
 
 1182     Read-only property that accesses the 
 1183     :meth:`getNamedDestinations()<PdfFileReader.getNamedDestinations>` function. 
 1189     def getFields(self, tree = None, retval = None, fileobj = None):
 
 1191         Extracts field data if this PDF contains interactive form fields. 
 1192         The *tree* and *retval* parameters are for recursive use. 
 1194         :param fileobj: A file object (usually a text file) to write 
 1195             a report to on all interactive form fields found. 
 1196         :return: A dictionary where each key is a field name, and each 
 1197             value is a :class:`Field<PyPDF2.generic.Field>` object. By 
 1198             default, the mapping name is used for keys. 
 1199         :rtype: dict, or ``None`` if form data could not be located. 
 1201         fieldAttributes = {
"/FT" : 
"Field Type", 
"/Parent" : 
"Parent",
 
 1202                        "/T" : 
"Field Name", 
"/TU" : 
"Alternate Field Name",
 
 1203                        "/TM" : 
"Mapping Name", 
"/Ff" : 
"Field Flags",
 
 1204                        "/V" : 
"Value", 
"/DV" : 
"Default Value"}
 
 1207             catalog = self.
trailer[
"/Root"]
 
 1209             if "/AcroForm" in catalog:
 
 1210                 tree = catalog[
"/AcroForm"]
 
 1217         for attr 
in fieldAttributes:
 
 1220                 self.
_buildField(tree, retval, fileobj, fieldAttributes)
 
 1223         if "/Fields" in tree:
 
 1224             fields = tree[
"/Fields"]
 
 1226                 field = f.getObject()
 
 1227                 self.
_buildField(field, retval, fileobj, fieldAttributes)
 
 1231     def _buildField(self, field, retval, fileobj, fieldAttributes):
 
 1244         retval[key] = 
Field(field)
 
 1246     def _checkKids(self, tree, retval, fileobj):
 
 1249             for kid 
in tree[
"/Kids"]:
 
 1250                 self.
getFields(kid.getObject(), retval, fileobj)
 
 1252     def _writeField(self, fileobj, field, fieldAttributes):
 
 1253         order = [
"/TM", 
"/T", 
"/FT", 
"/Parent", 
"/TU", 
"/Ff", 
"/V", 
"/DV"]
 
 1255             attrName = fieldAttributes[attr]
 
 1259                     types = {
"/Btn":
"Button", 
"/Tx":
"Text", 
"/Ch": 
"Choice",
 
 1261                     if field[attr] 
in types:
 
 1262                         fileobj.write(attrName + 
": " + types[field[attr]] + 
"\n")
 
 1263                 elif attr == 
"/Parent":
 
 1266                         name = field[
"/Parent"][
"/TM"]
 
 1268                         name = field[
"/Parent"][
"/T"]
 
 1269                     fileobj.write(attrName + 
": " + name + 
"\n")
 
 1271                     fileobj.write(attrName + 
": " + str(field[attr]) + 
"\n")
 
 1277         ''' Retrieves form fields from the document with textual data (inputs, dropdowns) 
 1282             (formfields[field][
'/T'], formfields[field].get(
'/V')) 
for field 
in formfields \
 
 1283                 if formfields[field].get(
'/FT') == 
'/Tx' 
 1288         Retrieves the named destinations present in the document. 
 1290         :return: a dictionary which maps names to 
 1291             :class:`Destinations<PyPDF2.generic.Destination>`. 
 1296             catalog = self.
trailer[
"/Root"]
 
 1299             if "/Dests" in catalog:
 
 1300                 tree = catalog[
"/Dests"]
 
 1301             elif "/Names" in catalog:
 
 1302                 names = catalog[
'/Names']
 
 1303                 if "/Dests" in names:
 
 1304                     tree = names[
'/Dests']
 
 1311             for kid 
in tree[
"/Kids"]:
 
 1314         if "/Names" in tree:
 
 1315             names = tree[
"/Names"]
 
 1316             for i 
in range(0, len(names), 2):
 
 1319                 if isinstance(val, DictionaryObject) 
and '/D' in val:
 
 1329     Read-only property that accesses the 
 1330         :meth:`getOutlines()<PdfFileReader.getOutlines>` function. 
 1335         Retrieves the document outline present in the document. 
 1337         :return: a nested list of :class:`Destinations<PyPDF2.generic.Destination>`. 
 1339         if outlines == 
None:
 
 1341             catalog = self.
trailer[
"/Root"]
 
 1344             if "/Outlines" in catalog:
 
 1346                     lines = catalog[
"/Outlines"]
 
 1353                 if "/First" in lines:
 
 1354                     node = lines[
"/First"]
 
 1364                 outlines.append(outline)
 
 1367             if "/First" in node:
 
 1371                     outlines.append(subOutlines)
 
 1373             if "/Next" not in node:
 
 1375             node = node[
"/Next"]
 
 1379     def _getPageNumberByIndirect(self, indirectRef):
 
 1380         """Generate _pageId2Num""" 
 1383             for i, x 
in enumerate(self.
pages):
 
 1384                 id2num[x.indirectRef.idnum] = i
 
 1387         if isinstance(indirectRef, int):
 
 1390             idnum = indirectRef.idnum
 
 1397         Retrieve page number of a given PageObject 
 1399         :param PageObject page: The page to get page number. Should be 
 1400             an instance of :class:`PageObject<PyPDF2.pdf.PageObject>` 
 1401         :return: the page number or -1 if page not found 
 1404         indirectRef = page.indirectRef
 
 1410         Retrieve page number of a given Destination object 
 1412         :param Destination destination: The destination to get page number. 
 1413              Should be an instance of 
 1414              :class:`Destination<PyPDF2.pdf.Destination>` 
 1415         :return: the page number or -1 if page not found 
 1418         indirectRef = destination.page
 
 1422     def _buildDestination(self, title, array):
 
 1423         page, typ = array[0:2]
 
 1427     def _buildOutline(self, node):
 
 1428         dest, title, outline = 
None, 
None, 
None 
 1430         if "/A" in node 
and "/Title" in node:
 
 1432             title  = node[
"/Title"]
 
 1434             if action[
"/S"] == 
"/GoTo":
 
 1436         elif "/Dest" in node 
and "/Title" in node:
 
 1438             title = node[
"/Title"]
 
 1439             dest  = node[
"/Dest"]
 
 1443             if isinstance(dest, ArrayObject):
 
 1455     Read-only property that emulates a list based upon the 
 1456     :meth:`getNumPages()<PdfFileReader.getNumPages>` and 
 1457     :meth:`getPage()<PdfFileReader.getPage>` methods. 
 1462         Get the page layout. 
 1463         See :meth:`setPageLayout()<PdfFileWriter.setPageLayout>` 
 1464         for a description of valid layouts. 
 1466         :return: Page layout currently being used. 
 1467         :rtype: ``str``, ``None`` if not specified 
 1470             return self.
trailer[
'/Root'][
'/PageLayout']
 
 1474     pageLayout = property(getPageLayout)
 
 1475     """Read-only property accessing the 
 1476     :meth:`getPageLayout()<PdfFileReader.getPageLayout>` method.""" 
 1481         See :meth:`setPageMode()<PdfFileWriter.setPageMode>` 
 1482         for a description of valid modes. 
 1484         :return: Page mode currently being used. 
 1485         :rtype: ``str``, ``None`` if not specified 
 1488             return self.
trailer[
'/Root'][
'/PageMode']
 
 1492     pageMode = property(getPageMode)
 
 1493     """Read-only property accessing the 
 1494     :meth:`getPageMode()<PdfFileReader.getPageMode>` method.""" 
 1496     def _flatten(self, pages=None, inherit=None, indirectRef=None):
 
 1497         inheritablePageAttributes = (
 
 1509         if "/Type" in pages:
 
 1513             for attr 
in inheritablePageAttributes:
 
 1515                     inherit[attr] = pages[attr]
 
 1516             for page 
in pages[
"/Kids"]:
 
 1518                 if isinstance(page, IndirectObject):
 
 1519                     addt[
"indirectRef"] = page
 
 1520                 self.
_flatten(page.getObject(), inherit, **addt)
 
 1522             for attr, value 
in list(inherit.items()):
 
 1525                 if attr 
not in pages:
 
 1528             pageObj.update(pages)
 
 1531     def _getObjectFromStream(self, indirectReference):
 
 1535         stmnum, idx = self.
xref_objStm[indirectReference.idnum]
 
 1536         if debug: print((
"Here1: %s %s"%(stmnum, idx)))
 
 1538         if debug: print((
"Here2: objStm=%s.. stmnum=%s data=%s"%(objStm, stmnum, objStm.getData())))
 
 1540         assert objStm[
'/Type'] == 
'/ObjStm' 
 1542         assert idx < objStm[
'/N']
 
 1543         streamData = 
BytesIO(
b_(objStm.getData()))
 
 1544         for i 
in range(objStm[
'/N']):
 
 1546             streamData.seek(-1, 1)
 
 1547             objnum = NumberObject.readFromStream(streamData)
 
 1549             streamData.seek(-1, 1)
 
 1550             offset = NumberObject.readFromStream(streamData)
 
 1552             streamData.seek(-1, 1)
 
 1553             if objnum != indirectReference.idnum:
 
 1556             if self.
strict and idx != i:
 
 1558             streamData.seek(objStm[
'/First']+offset, 0)
 
 1560                 pos = streamData.tell()
 
 1561                 streamData.seek(0, 0)
 
 1562                 lines = streamData.readlines()
 
 1563                 for i 
in range(0, len(lines)):
 
 1565                 streamData.seek(pos, 0)
 
 1571                 e = sys.exc_info()[1]
 
 1572                 warnings.warn(
"Invalid stream (index %d) within object %d %d: %s" % \
 
 1586         if debug: print((
"looking at:", indirectReference.idnum, indirectReference.generation))
 
 1588                                                 indirectReference.idnum)
 
 1591         if indirectReference.generation == 0 
and \
 
 1594         elif indirectReference.generation 
in self.
xref and \
 
 1595                 indirectReference.idnum 
in self.
xref[indirectReference.generation]:
 
 1596             start = self.
xref[indirectReference.generation][indirectReference.idnum]
 
 1597             if debug: print((
"  Uncompressed Object", indirectReference.idnum, indirectReference.generation, 
":", start))
 
 1598             self.
stream.seek(start, 0)
 
 1600             if idnum != indirectReference.idnum 
and self.
xrefIndex:
 
 1603                     raise utils.PdfReadError(
"Expected object ID (%d %d) does not match actual (%d %d); xref table not zero-indexed." \
 
 1604                                      % (indirectReference.idnum, indirectReference.generation, idnum, generation))
 
 1606             elif idnum != indirectReference.idnum:
 
 1608                 raise utils.PdfReadError(
"Expected object ID (%d %d) does not match actual (%d %d)." \
 
 1609                                          % (indirectReference.idnum, indirectReference.generation, idnum, generation))
 
 1610             assert generation == indirectReference.generation
 
 1616                 if not hasattr(self, 
'_decryption_key'):
 
 1620                 pack1 = struct.pack(
"<i", indirectReference.idnum)[:3]
 
 1621                 pack2 = struct.pack(
"<i", indirectReference.generation)[:2]
 
 1624                 md5_hash = md5(key).digest()
 
 1628             warnings.warn(
"Object %d %d not defined."%(indirectReference.idnum,
 
 1633                     indirectReference.idnum, retval)
 
 1636     def _decryptObject(self, obj, key):
 
 1637         if isinstance(obj, ByteStringObject) 
or isinstance(obj, TextStringObject):
 
 1639         elif isinstance(obj, StreamObject):
 
 1641         elif isinstance(obj, DictionaryObject):
 
 1642             for dictkey, value 
in list(obj.items()):
 
 1644         elif isinstance(obj, ArrayObject):
 
 1645             for i 
in range(len(obj)):
 
 1660         obj = stream.read(3)
 
 1663         if (extra 
and self.
strict):
 
 1665             warnings.warn(
"Superfluous whitespace found in object header %s %s" % \
 
 1667         return int(idnum), int(generation)
 
 1672         if debug 
and out: print((
"cache hit: %d %d"%(idnum, generation)))
 
 1673         elif debug: print((
"cache miss: %d %d"%(idnum, generation)))
 
 1679             msg = 
"Overwriting cache for %s %s"%(generation, idnum)
 
 1681             else:           warnings.warn(msg)
 
 1687         if debug: print(
">>read", stream)
 
 1690         if not stream.tell():
 
 1692         last1K = stream.tell() - 1024 + 1 
 
 1694         while line[:5] != 
b_(
"%%EOF"):
 
 1695             if stream.tell() < last1K:
 
 1698             if debug: print(
"  line:",line)
 
 1703             startxref = int(line)
 
 1706             if not line.startswith(
b_(
"startxref")):
 
 1708             startxref = int(line[9:].strip())
 
 1709             warnings.warn(
"startxref on same line as offset")
 
 1712             if line[:9] != 
b_(
"startxref"):
 
 1721             stream.seek(startxref, 0)
 
 1725                 ref = stream.read(4)
 
 1726                 if ref[:3] != 
b_(
"ref"):
 
 1733                     if firsttime 
and num != 0:
 
 1736                             warnings.warn(
"Xref table not zero-indexed. ID numbers for objects will be corrected.", 
utils.PdfReadWarning)
 
 1747                         line = stream.read(20)
 
 1755                         while line[0] 
in b_(
"\x0D\x0A"):
 
 1756                             stream.seek(-20 + 1, 1)
 
 1757                             line = stream.read(20)
 
 1765                         if line[-1] 
in b_(
"0123456789t"):
 
 1768                         offset, generation = line[:16].split(
b_(
" "))
 
 1769                         offset, generation = int(offset), int(generation)
 
 1770                         if generation 
not in self.
xref:
 
 1771                             self.
xref[generation] = {}
 
 1772                         if num 
in self.
xref[generation]:
 
 1779                             self.
xref[generation][num] = offset
 
 1784                     trailertag = stream.read(7)
 
 1785                     if trailertag != 
b_(
"trailer"):
 
 1793                 for key, value 
in list(newTrailer.items()):
 
 1796                 if "/Prev" in newTrailer:
 
 1797                     startxref = newTrailer[
"/Prev"]
 
 1805                 assert xrefstream[
"/Type"] == 
"/XRef" 
 1807                 streamData = 
BytesIO(
b_(xrefstream.getData()))
 
 1810                 idx_pairs = xrefstream.get(
"/Index", [0, xrefstream.get(
"/Size")])
 
 1811                 if debug: print((
"read idx_pairs=%s"%list(self.
_pairs(idx_pairs))))
 
 1812                 entrySizes = xrefstream.get(
"/W")
 
 1813                 assert len(entrySizes) >= 3
 
 1814                 if self.
strict and len(entrySizes) > 3:
 
 1820                     if entrySizes[i] > 0:
 
 1821                         d = streamData.read(entrySizes[i])
 
 1829                 def used_before(num, generation):
 
 1831                     return num 
in self.
xref.get(generation, []) 
or \
 
 1836                 for start, size 
in self.
_pairs(idx_pairs):
 
 1838                     assert start >= last_end
 
 1839                     last_end = start + size
 
 1840                     for num 
in range(start, start+size):
 
 1842                         xref_type = getEntry(0)
 
 1846                             next_free_object = getEntry(1)
 
 1847                             next_generation = getEntry(2)
 
 1848                         elif xref_type == 1:
 
 1850                             byte_offset = getEntry(1)
 
 1851                             generation = getEntry(2)
 
 1852                             if generation 
not in self.
xref:
 
 1853                                 self.
xref[generation] = {}
 
 1854                             if not used_before(num, generation):
 
 1855                                 self.
xref[generation][num] = byte_offset
 
 1856                                 if debug: print((
"XREF Uncompressed: %s %s"%(
 
 1858                         elif xref_type == 2:
 
 1860                             objstr_num = getEntry(1)
 
 1861                             obstr_idx = getEntry(2)
 
 1863                             if not used_before(num, generation):
 
 1864                                 if debug: print((
"XREF Compressed: %s %s %s"%(
 
 1865                                         num, objstr_num, obstr_idx)))
 
 1871                 trailerKeys = 
"/Root", 
"/Encrypt", 
"/Info", 
"/ID" 
 1872                 for key 
in trailerKeys:
 
 1873                     if key 
in xrefstream 
and key 
not in self.
trailer:
 
 1875                 if "/Prev" in xrefstream:
 
 1876                     startxref = xrefstream[
"/Prev"]
 
 1884                 tmp = stream.read(20)
 
 1885                 xref_loc = tmp.find(
b_(
"xref"))
 
 1887                     startxref -= (10 - xref_loc)
 
 1890                 stream.seek(startxref, 0)
 
 1892                 for look 
in range(5):
 
 1893                     if stream.read(1).isdigit():
 
 1905             for gen 
in self.
xref:
 
 1906                 if gen == 65535: 
continue 
 1907                 for id 
in self.
xref[gen]:
 
 1908                     stream.seek(self.
xref[gen][id], 0)
 
 1919     def _zeroXref(self, generation):
 
 1920         self.
xref[generation] = dict( (k-self.
xrefIndex, v) 
for (k, v) 
in list(self.
xref[generation].items()) )
 
 1922     def _pairs(self, array):
 
 1925             yield array[i], array[i+1]
 
 1927             if (i+1) >= len(array):
 
 1932         if debug: print(
">>readNextEndLine")
 
 1936             if stream.tell() == 0:
 
 1939             if debug: print((
"  x:", x, 
"%x"%ord(x)))
 
 1940             if stream.tell() < 2:
 
 1943             if x == 
b_(
'\n') 
or x == 
b_(
'\r'): 
 
 1945                 while x == 
b_(
'\n') 
or x == 
b_(
'\r'):
 
 1947                         if ord(x) == 0x0D: print(
"  x is CR 0D")
 
 1948                         elif ord(x) == 0x0A: print(
"  x is LF 0A")
 
 1950                     if x == 
b_(
'\n') 
or x == 
b_(
'\r'): 
 
 1953                     if stream.tell() < 2:
 
 1956                 stream.seek(2 
if crlf 
else 1, 1) 
 
 1959                 if debug: print(
"  x is neither")
 
 1961                 if debug: print((
"  RNEL line:", line))
 
 1962         if debug: print(
"leaving RNEL")
 
 1967         When using an encrypted / secured PDF file with the PDF Standard 
 1968         encryption handler, this function will allow the file to be decrypted. 
 1969         It checks the given password against the document's user password and 
 1970         owner password, and then stores the resulting decryption key if either 
 1971         password is correct. 
 1973         It does not matter which password was matched.  Both passwords provide 
 1974         the correct decryption key that will allow the document to be used with 
 1977         :param str password: The password to match. 
 1978         :return: ``0`` if the password failed, ``1`` if the password matched the user 
 1979             password, and ``2`` if the password matched the owner password. 
 1981         :raises NotImplementedError: if document uses an unsupported encryption 
 1991     def _decrypt(self, password):
 
 1993         if encrypt[
'/Filter'] != 
'/Standard':
 
 1994             raise NotImplementedError(
"only Standard PDF encryption handler is available")
 
 1995         if not (encrypt[
'/V'] 
in (1, 2)):
 
 1996             raise NotImplementedError(
"only algorithm code 1 and 2 are supported")
 
 2006                 keylen = encrypt[
'/Length'].
getObject() // 8
 
 2007             key = _alg33_1(password, rev, keylen)
 
 2013                 for i 
in range(19, -1, -1):
 
 2015                     for l 
in range(len(key)):
 
 2025     def _authenticateUserPassword(self, password):
 
 2032         real_U = encrypt[
'/U'].
getObject().original_bytes
 
 2034             U, key = _alg34(password, owner_entry, p_entry, id1_entry)
 
 2036             U, key = _alg35(password, rev,
 
 2037                     encrypt[
"/Length"].
getObject() // 8, owner_entry,
 
 2040             U, real_U = U[:16], real_U[:16]
 
 2041         return U == real_U, key
 
 2044         return "/Encrypt" in self.
trailer 
 2048     Read-only boolean property showing whether this PDF file is encrypted. 
 2049     Note that this property, if true, will remain true even after the 
 2050     :meth:`decrypt()<PdfFileReader.decrypt>` method is called. 
 2055     retval = self.get(name)
 
 2056     if isinstance(retval, RectangleObject):
 
 2060             retval = self.get(d)
 
 2063     if isinstance(retval, IndirectObject):
 
 2064         retval = self.pdf.getObject(retval)
 
 2071     if not isinstance(name, NameObject):
 
 2091     This class represents a single page within a PDF file.  Typically this 
 2092     object will be created by accessing the 
 2093     :meth:`getPage()<PyPDF2.PdfFileReader.getPage>` method of the 
 2094     :class:`PdfFileReader<PyPDF2.PdfFileReader>` class, but it is 
 2095     also possible to create an empty page with the 
 2096     :meth:`createBlankPage()<PageObject.createBlankPage>` static method. 
 2098     :param pdf: PDF file the page belongs to. 
 2099     :param indirectRef: Stores the original indirect reference to 
 2100         this object in its source PDF 
 2103         DictionaryObject.__init__(self)
 
 2109         Returns a new blank page. 
 2110         If ``width`` or ``height`` is ``None``, try to get the page size 
 2111         from the last page of *pdf*. 
 2113         :param pdf: PDF file the page belongs to 
 2114         :param float width: The width of the new page expressed in default user 
 2116         :param float height: The height of the new page expressed in default user 
 2118         :return: the new blank page: 
 2119         :rtype: :class:`PageObject<PageObject>` 
 2120         :raises PageSizeNotDefinedError: if ``pdf`` is ``None`` or contains 
 2129         if width 
is None or height 
is None:
 
 2130             if pdf 
is not None and pdf.getNumPages() > 0:
 
 2131                 lastpage = pdf.getPage(pdf.getNumPages() - 1)
 
 2132                 width = lastpage.mediaBox.getWidth()
 
 2133                 height = lastpage.mediaBox.getHeight()
 
 2140     createBlankPage = staticmethod(createBlankPage)
 
 2144         Rotates a page clockwise by increments of 90 degrees. 
 2146         :param int angle: Angle to rotate the page.  Must be an increment 
 2149         assert angle % 90 == 0
 
 2155         Rotates a page counter-clockwise by increments of 90 degrees. 
 2157         :param int angle: Angle to rotate the page.  Must be an increment 
 2160         assert angle % 90 == 0
 
 2164     def _rotate(self, angle):
 
 2165         currentAngle = self.get(
"/Rotate", 0)
 
 2168     def _mergeResources(res1, res2, resource):
 
 2173         for key 
in list(page2Res.keys()):
 
 2174             if key 
in newRes 
and newRes.raw_get(key) != page2Res.raw_get(key):
 
 2175                 newname = 
NameObject(key + str(uuid.uuid4()))
 
 2176                 renameRes[key] = newname
 
 2177                 newRes[newname] = page2Res[key]
 
 2178             elif key 
not in newRes:
 
 2179                 newRes[key] = page2Res.raw_get(key)
 
 2180         return newRes, renameRes
 
 2181     _mergeResources = staticmethod(_mergeResources)
 
 2183     def _contentStreamRename(stream, rename, pdf):
 
 2187         for operands, operator 
in stream.operations:
 
 2188             for i 
in range(len(operands)):
 
 2190                 if isinstance(op, NameObject):
 
 2191                     operands[i] = rename.get(op,op)
 
 2193     _contentStreamRename = staticmethod(_contentStreamRename)
 
 2195     def _pushPopGS(contents, pdf):
 
 2200         stream.operations.insert(0, [[], 
"q"])
 
 2201         stream.operations.append([[], 
"Q"])
 
 2203     _pushPopGS = staticmethod(_pushPopGS)
 
 2205     def _addTransformationMatrix(contents, pdf, ctm):
 
 2208         a, b, c, d, e, f = ctm
 
 2214     _addTransformationMatrix = staticmethod(_addTransformationMatrix)
 
 2218         Accesses the page contents. 
 2220         :return: the ``/Contents`` object, or ``None`` if it doesn't exist. 
 2221             ``/Contents`` is optional, as described in PDF Reference  7.7.3.3 
 2223         if "/Contents" in self:
 
 2230         Merges the content streams of two pages into one.  Resource references 
 2231         (i.e. fonts) are maintained from both pages.  The mediabox/cropbox/etc 
 2232         of this page are not altered.  The parameter page's content stream will 
 2233         be added to the end of this page's content stream, meaning that it will 
 2234         be drawn after, or "on top" of this page. 
 2236         :param PageObject page2: The page to be merged into this one. Should be 
 2237             an instance of :class:`PageObject<PageObject>`. 
 2241     def _mergePage(self, page2, page2transformation=None, ctm=None, expand=False):
 
 2248         originalResources = self[
"/Resources"].
getObject()
 
 2249         page2Resources = page2[
"/Resources"].
getObject()
 
 2252         for page 
in (self, page2):
 
 2253             if "/Annots" in page:
 
 2254                 annots = page[
"/Annots"]
 
 2255                 if isinstance(annots, ArrayObject):
 
 2257                         newAnnots.append(ref)
 
 2259         for res 
in "/ExtGState", 
"/Font", 
"/XObject", 
"/ColorSpace", 
"/Pattern", 
"/Shading", 
"/Properties":
 
 2260             new, newrename = PageObject._mergeResources(originalResources, page2Resources, res)
 
 2263                 rename.update(newrename)
 
 2275         if originalContent 
is not None:
 
 2276             newContentArray.append(PageObject._pushPopGS(
 
 2277                   originalContent, self.
pdf))
 
 2279         page2Content = page2.getContents()
 
 2280         if page2Content 
is not None:
 
 2281             if page2transformation 
is not None:
 
 2282                 page2Content = page2transformation(page2Content)
 
 2283             page2Content = PageObject._contentStreamRename(
 
 2284                 page2Content, rename, self.
pdf)
 
 2285             page2Content = PageObject._pushPopGS(page2Content, self.
pdf)
 
 2286             newContentArray.append(page2Content)
 
 2290             corners1 = [self.
mediaBox.getLowerLeft_x().as_numeric(), self.
mediaBox.getLowerLeft_y().as_numeric(),
 
 2291                         self.
mediaBox.getUpperRight_x().as_numeric(), self.
mediaBox.getUpperRight_y().as_numeric()]
 
 2292             corners2 = [page2.mediaBox.getLowerLeft_x().as_numeric(), page2.mediaBox.getLowerLeft_y().as_numeric(),
 
 2293                         page2.mediaBox.getUpperLeft_x().as_numeric(), page2.mediaBox.getUpperLeft_y().as_numeric(),
 
 2294                         page2.mediaBox.getUpperRight_x().as_numeric(), page2.mediaBox.getUpperRight_y().as_numeric(),
 
 2295                         page2.mediaBox.getLowerRight_x().as_numeric(), page2.mediaBox.getLowerRight_y().as_numeric()]
 
 2297                 ctm = [float(x) 
for x 
in ctm]
 
 2298                 new_x = [ctm[0]*corners2[i] + ctm[2]*corners2[i+1] + ctm[4] 
for i 
in range(0, 8, 2)]
 
 2299                 new_y = [ctm[1]*corners2[i] + ctm[3]*corners2[i+1] + ctm[5] 
for i 
in range(0, 8, 2)]
 
 2301                 new_x = corners2[0:8:2]
 
 2302                 new_y = corners2[1:8:2]
 
 2303             lowerleft = [min(new_x), min(new_y)]
 
 2304             upperright = [max(new_x), max(new_y)]
 
 2305             lowerleft = [min(corners1[0], lowerleft[0]), min(corners1[1], lowerleft[1])]
 
 2306             upperright = [max(corners1[2], upperright[0]), max(corners1[3], upperright[1])]
 
 2308             self.
mediaBox.setLowerLeft(lowerleft)
 
 2309             self.
mediaBox.setUpperRight(upperright)
 
 2312         self[
NameObject(
'/Resources')] = newResources
 
 2317         This is similar to mergePage, but a transformation matrix is 
 2318         applied to the merged stream. 
 2320         :param PageObject page2: The page to be merged into this one. Should be 
 2321             an instance of :class:`PageObject<PageObject>`. 
 2322         :param tuple ctm: a 6-element tuple containing the operands of the 
 2323             transformation matrix 
 2324         :param bool expand: Whether the page should be expanded to fit the dimensions 
 2325             of the page to be merged. 
 2328             PageObject._addTransformationMatrix(page2Content, page2.pdf, ctm), ctm, expand)
 
 2332         This is similar to mergePage, but the stream to be merged is scaled 
 2333         by appling a transformation matrix. 
 2335         :param PageObject page2: The page to be merged into this one. Should be 
 2336             an instance of :class:`PageObject<PageObject>`. 
 2337         :param float scale: The scaling factor 
 2338         :param bool expand: Whether the page should be expanded to fit the 
 2339             dimensions of the page to be merged. 
 2348         This is similar to mergePage, but the stream to be merged is rotated 
 2349         by appling a transformation matrix. 
 2351         :param PageObject page2: the page to be merged into this one. Should be 
 2352             an instance of :class:`PageObject<PageObject>`. 
 2353         :param float rotation: The angle of the rotation, in degrees 
 2354         :param bool expand: Whether the page should be expanded to fit the 
 2355             dimensions of the page to be merged. 
 2357         rotation = math.radians(rotation)
 
 2359             [math.cos(rotation),  math.sin(rotation),
 
 2360              -math.sin(rotation), math.cos(rotation),
 
 2365         This is similar to mergePage, but the stream to be merged is translated 
 2366         by appling a transformation matrix. 
 2368         :param PageObject page2: the page to be merged into this one. Should be 
 2369             an instance of :class:`PageObject<PageObject>`. 
 2370         :param float tx: The translation on X axis 
 2371         :param float ty: The translation on Y axis 
 2372         :param bool expand: Whether the page should be expanded to fit the 
 2373             dimensions of the page to be merged. 
 2381         This is similar to mergePage, but the stream to be merged is rotated 
 2382         and translated by appling a transformation matrix. 
 2384         :param PageObject page2: the page to be merged into this one. Should be 
 2385             an instance of :class:`PageObject<PageObject>`. 
 2386         :param float tx: The translation on X axis 
 2387         :param float ty: The translation on Y axis 
 2388         :param float rotation: The angle of the rotation, in degrees 
 2389         :param bool expand: Whether the page should be expanded to fit the 
 2390             dimensions of the page to be merged. 
 2393         translation = [[1, 0, 0],
 
 2396         rotation = math.radians(rotation)
 
 2397         rotating = [[math.cos(rotation), math.sin(rotation), 0],
 
 2398                     [-math.sin(rotation), math.cos(rotation), 0],
 
 2400         rtranslation = [[1, 0, 0],
 
 2407                                                  ctm[1][0], ctm[1][1],
 
 2408                                                  ctm[2][0], ctm[2][1]], expand)
 
 2412         This is similar to mergePage, but the stream to be merged is rotated 
 2413         and scaled by appling a transformation matrix. 
 2415         :param PageObject page2: the page to be merged into this one. Should be 
 2416             an instance of :class:`PageObject<PageObject>`. 
 2417         :param float rotation: The angle of the rotation, in degrees 
 2418         :param float scale: The scaling factor 
 2419         :param bool expand: Whether the page should be expanded to fit the 
 2420             dimensions of the page to be merged. 
 2422         rotation = math.radians(rotation)
 
 2423         rotating = [[math.cos(rotation), math.sin(rotation), 0],
 
 2424                     [-math.sin(rotation), math.cos(rotation), 0],
 
 2426         scaling = [[scale, 0,    0],
 
 2432                                          [ctm[0][0], ctm[0][1],
 
 2433                                           ctm[1][0], ctm[1][1],
 
 2434                                           ctm[2][0], ctm[2][1]], expand)
 
 2438         This is similar to mergePage, but the stream to be merged is translated 
 2439         and scaled by appling a transformation matrix. 
 2441         :param PageObject page2: the page to be merged into this one. Should be 
 2442             an instance of :class:`PageObject<PageObject>`. 
 2443         :param float scale: The scaling factor 
 2444         :param float tx: The translation on X axis 
 2445         :param float ty: The translation on Y axis 
 2446         :param bool expand: Whether the page should be expanded to fit the 
 2447             dimensions of the page to be merged. 
 2450         translation = [[1, 0, 0],
 
 2453         scaling = [[scale, 0,    0],
 
 2459                                                  ctm[1][0], ctm[1][1],
 
 2460                                                  ctm[2][0], ctm[2][1]], expand)
 
 2464         This is similar to mergePage, but the stream to be merged is translated, 
 2465         rotated and scaled by appling a transformation matrix. 
 2467         :param PageObject page2: the page to be merged into this one. Should be 
 2468             an instance of :class:`PageObject<PageObject>`. 
 2469         :param float tx: The translation on X axis 
 2470         :param float ty: The translation on Y axis 
 2471         :param float rotation: The angle of the rotation, in degrees 
 2472         :param float scale: The scaling factor 
 2473         :param bool expand: Whether the page should be expanded to fit the 
 2474             dimensions of the page to be merged. 
 2476         translation = [[1, 0, 0],
 
 2479         rotation = math.radians(rotation)
 
 2480         rotating = [[math.cos(rotation), math.sin(rotation), 0],
 
 2481                     [-math.sin(rotation), math.cos(rotation), 0],
 
 2483         scaling = [[scale, 0,    0],
 
 2490                                                  ctm[1][0], ctm[1][1],
 
 2491                                                  ctm[2][0], ctm[2][1]], expand)
 
 2500         Applies a transformation matrix to the page. 
 2502         :param tuple ctm: A 6-element tuple containing the operands of the 
 2503             transformation matrix. 
 2506         if originalContent 
is not None:
 
 2507             newContent = PageObject._addTransformationMatrix(
 
 2508                 originalContent, self.
pdf, ctm)
 
 2509             newContent = PageObject._pushPopGS(newContent, self.
pdf)
 
 2514         Scales a page by the given factors by appling a transformation 
 2515         matrix to its content and updating the page size. 
 2517         :param float sx: The scaling factor on horizontal axis. 
 2518         :param float sy: The scaling factor on vertical axis. 
 2524             float(self.
mediaBox.getLowerLeft_x()) * sx,
 
 2525             float(self.
mediaBox.getLowerLeft_y()) * sy,
 
 2526             float(self.
mediaBox.getUpperRight_x()) * sx,
 
 2527             float(self.
mediaBox.getUpperRight_y()) * sy])
 
 2529             viewport = self[
"/VP"]
 
 2530             if isinstance(viewport, ArrayObject):
 
 2531                 bbox = viewport[0][
"/BBox"]
 
 2533                 bbox = viewport[
"/BBox"]
 
 2535                 float(bbox[0]) * sx,
 
 2536                 float(bbox[1]) * sy,
 
 2537                 float(bbox[2]) * sx,
 
 2538                 float(bbox[3]) * sy])
 
 2539             if isinstance(viewport, ArrayObject):
 
 2546         Scales a page by the given factor by appling a transformation 
 2547         matrix to its content and updating the page size. 
 2549         :param float factor: The scaling factor (for both X and Y axis). 
 2551         self.
scale(factor, factor)
 
 2555         Scales a page to the specified dimentions by appling a 
 2556         transformation matrix to its content and updating the page size. 
 2558         :param float width: The new width. 
 2559         :param float height: The new heigth. 
 2561         sx = width / float(self.
mediaBox.getUpperRight_x() -
 
 2563         sy = height / float(self.
mediaBox.getUpperRight_y() -
 
 2569         Compresses the size of this page by joining all content streams and 
 2570         applying a FlateDecode filter. 
 2572         However, it is possible that this function will perform no action if 
 2573         content stream compression becomes "automatic" for some reason. 
 2576         if content 
is not None:
 
 2577             if not isinstance(content, ContentStream):
 
 2579             self[
NameObject(
"/Contents")] = content.flateEncode()
 
 2583         Locate all text drawing commands, in the order they are provided in the 
 2584         content stream, and extract the text.  This works well for some PDF 
 2585         files, but poorly for others, depending on the generator used.  This will 
 2586         be refined in the future.  Do not rely on the order of text coming out of 
 2587         this function, as it will change if this function is made more 
 2590         :return: a unicode string object. 
 2594         if not isinstance(content, ContentStream):
 
 2599         for operands, operator 
in content.operations:
 
 2600             if operator == 
b_(
"Tj"):
 
 2602                 if isinstance(_text, TextStringObject):
 
 2604             elif operator == 
b_(
"T*"):
 
 2606             elif operator == 
b_(
"'"):
 
 2609                 if isinstance(_text, TextStringObject):
 
 2611             elif operator == 
b_(
'"'):
 
 2613                 if isinstance(_text, TextStringObject):
 
 2616             elif operator == 
b_(
"TJ"):
 
 2617                 for i 
in operands[0]:
 
 2618                     if isinstance(i, TextStringObject):
 
 2625     A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units, 
 2626     defining the boundaries of the physical medium on which the page is 
 2627     intended to be displayed or printed. 
 2632     A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units, 
 2633     defining the visible region of default user space.  When the page is 
 2634     displayed or printed, its contents are to be clipped (cropped) to this 
 2635     rectangle and then imposed on the output medium in some 
 2636     implementation-defined manner.  Default value: same as :attr:`mediaBox<mediaBox>`. 
 2641     A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units, 
 2642     defining the region to which the contents of the page should be clipped 
 2643     when output in a production enviroment. 
 2648     A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units, 
 2649     defining the intended dimensions of the finished page after trimming. 
 2654     A :class:`RectangleObject<PyPDF2.generic.RectangleObject>`, expressed in default user space units, 
 2655     defining the extent of the page's meaningful content as intended by the 
 2666         stream = stream.getObject()
 
 2667         if isinstance(stream, ArrayObject):
 
 2670                 data += s.getObject().
getData()
 
 2676     def __parseContentStream(self, stream):
 
 2682             if peek == 
b_(
'') 
or ord_(peek) == 0:
 
 2685             if peek.isalpha() 
or peek == 
b_(
"'") 
or peek == 
b_(
'"'):
 
 2687                         NameObject.delimiterPattern, 
True)
 
 2688                 if operator == 
b_(
"BI"):
 
 2691                     assert operands == []
 
 2697             elif peek == 
b_(
'%'):
 
 2703                 while peek 
not in (
b_(
'\r'), 
b_(
'\n')):
 
 2704                     peek = stream.read(1)
 
 2708     def _readInlineImage(self, stream):
 
 2722             settings[key] = value
 
 2724         tmp = stream.read(3)
 
 2725         assert tmp[:2] == 
b_(
"ID")
 
 2729             tok = stream.read(1)
 
 2732                 tok2 = stream.read(1)
 
 2735                     tok3 = stream.read(1)
 
 2738                     has_q_whitespace = 
False 
 2739                     while tok3 
in utils.WHITESPACES:
 
 2740                         has_q_whitespace = 
True 
 2742                         tok3 = stream.read(1)
 
 2743                     if tok3 == 
b_(
"Q") 
and has_q_whitespace:
 
 2754         return {
"settings": settings, 
"data": data}
 
 2759             if operator == 
b_(
"INLINE IMAGE"):
 
 2760                 newdata.write(
b_(
"BI"))
 
 2763                 newdata.write(dicttext.getvalue()[2:-2])
 
 2764                 newdata.write(
b_(
"ID "))
 
 2765                 newdata.write(operands[
"data"])
 
 2766                 newdata.write(
b_(
"EI"))
 
 2769                     op.writeToStream(newdata, 
None)
 
 2770                     newdata.write(
b_(
" "))
 
 2771                 newdata.write(
b_(operator))
 
 2772             newdata.write(
b_(
"\n"))
 
 2773         return newdata.getvalue()
 
 2775     def _setData(self, value):
 
 2778     _data = property(_getData, _setData)
 
 2783     A class representing the basic document metadata provided in a PDF File. 
 2784     This class is accessible through 
 2785     :meth:`getDocumentInfo()<PyPDF2.PdfFileReader.getDocumentInfo()>` 
 2787     All text properties of the document metadata have 
 2788     *two* properties, eg. author and author_raw. The non-raw property will 
 2789     always return a ``TextStringObject``, making it ideal for a case where 
 2790     the metadata is being displayed. The raw property can sometimes return 
 2791     a ``ByteStringObject``, if PyPDF2 was unable to decode the string's 
 2792     text encoding; this requires additional safety in the caller and 
 2793     therefore is not as commonly accessed. 
 2797         DictionaryObject.__init__(self)
 
 2800         retval = self.get(key, 
None)
 
 2801         if isinstance(retval, TextStringObject):
 
 2805     title = property(
lambda self: self.
getText(
"/Title"))
 
 2806     """Read-only property accessing the document's **title**. 
 2807     Returns a unicode string (``TextStringObject``) or ``None`` 
 2808     if the title is not specified.""" 
 2809     title_raw = property(
lambda self: self.get(
"/Title"))
 
 2810     """The "raw" version of title; can return a ``ByteStringObject``.""" 
 2812     author = property(
lambda self: self.
getText(
"/Author"))
 
 2813     """Read-only property accessing the document's **author**. 
 2814     Returns a unicode string (``TextStringObject``) or ``None`` 
 2815     if the author is not specified.""" 
 2816     author_raw = property(
lambda self: self.get(
"/Author"))
 
 2817     """The "raw" version of author; can return a ``ByteStringObject``.""" 
 2819     subject = property(
lambda self: self.
getText(
"/Subject"))
 
 2820     """Read-only property accessing the document's **subject**. 
 2821     Returns a unicode string (``TextStringObject``) or ``None`` 
 2822     if the subject is not specified.""" 
 2823     subject_raw = property(
lambda self: self.get(
"/Subject"))
 
 2824     """The "raw" version of subject; can return a ``ByteStringObject``.""" 
 2826     creator = property(
lambda self: self.
getText(
"/Creator"))
 
 2827     """Read-only property accessing the document's **creator**. If the 
 2828     document was converted to PDF from another format, this is the name of the 
 2829     application (e.g. OpenOffice) that created the original document from 
 2830     which it was converted. Returns a unicode string (``TextStringObject``) 
 2831     or ``None`` if the creator is not specified.""" 
 2832     creator_raw = property(
lambda self: self.get(
"/Creator"))
 
 2833     """The "raw" version of creator; can return a ``ByteStringObject``.""" 
 2835     producer = property(
lambda self: self.
getText(
"/Producer"))
 
 2836     """Read-only property accessing the document's **producer**. 
 2837     If the document was converted to PDF from another format, this is 
 2838     the name of the application (for example, OSX Quartz) that converted 
 2839     it to PDF. Returns a unicode string (``TextStringObject``) 
 2840     or ``None`` if the producer is not specified.""" 
 2841     producer_raw = property(
lambda self: self.get(
"/Producer"))
 
 2842     """The "raw" version of producer; can return a ``ByteStringObject``.""" 
 2848     d = 
b_(
"\x00\x00\x00\x00\x00\x00\x00\x00") + 
b_(d)
 
 2850     return struct.unpack(
">q", d)[0]
 
 2853 _encryption_padding = 
b_(
'\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56') + \
 
 2854         b_(
'\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \
 
 2855         b_(
'\xa9\xfe\x64\x53\x69\x7a')
 
 2860 def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True):
 
 2866     password = 
b_((
str_(password) + 
str_(_encryption_padding))[:32])
 
 2873     m.update(owner_entry.original_bytes)
 
 2876     p_entry = struct.pack(
'<i', p_entry)
 
 2880     m.update(id1_entry.original_bytes)
 
 2883     if rev >= 3 
and not metadata_encrypt:
 
 2884         m.update(
b_(
"\xff\xff\xff\xff"))
 
 2886     md5_hash = m.digest()
 
 2894             md5_hash = md5(md5_hash[:keylen]).digest()
 
 2899     return md5_hash[:keylen]
 
 2904 def _alg33(owner_pwd, user_pwd, rev, keylen):
 
 2906     key = _alg33_1(owner_pwd, rev, keylen)
 
 2909     user_pwd = 
b_((user_pwd + 
str_(_encryption_padding))[:32])
 
 2920         for i 
in range(1, 20):
 
 2922             for l 
in range(len(key)):
 
 2923                 new_key += chr(
ord_(key[l]) ^ i)
 
 2931 def _alg33_1(password, rev, keylen):
 
 2935     password = 
b_((password + 
str_(_encryption_padding))[:32])
 
 2941     md5_hash = m.digest()
 
 2944             md5_hash = md5(md5_hash).digest()
 
 2949     key = md5_hash[:keylen]
 
 2955 def _alg34(password, owner_entry, p_entry, id1_entry):
 
 2958     key = _alg32(password, 2, 5, owner_entry, p_entry, id1_entry)
 
 2970 def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt):
 
 2973     key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
 
 2977     m.update(_encryption_padding)
 
 2982     m.update(id1_entry.original_bytes)
 
 2983     md5_hash = m.digest()
 
 2993     for i 
in range(1, 20):
 
 2995         for l 
in range(len(key)):
 
 2996             new_key += 
b_(chr(
ord_(key[l]) ^ i))
 
 3004     return val + (
b_(
'\x00') * 16), key