github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/model/reader.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package model 7 8 import ( 9 "errors" 10 "fmt" 11 "io" 12 "strings" 13 14 "github.com/unidoc/unidoc/common" 15 . "github.com/unidoc/unidoc/pdf/core" 16 ) 17 18 // PdfReader represents a PDF file reader. It is a frontend to the lower level parsing mechanism and provides 19 // a higher level access to work with PDF structure and information, such as the page structure etc. 20 type PdfReader struct { 21 parser *PdfParser 22 root PdfObject 23 pages *PdfObjectDictionary 24 pageList []*PdfIndirectObject 25 PageList []*PdfPage 26 pageCount int 27 catalog *PdfObjectDictionary 28 outlineTree *PdfOutlineTreeNode 29 AcroForm *PdfAcroForm 30 31 modelManager *ModelManager 32 33 // For tracking traversal (cache). 34 traversed map[PdfObject]bool 35 } 36 37 // NewPdfReader returns a new PdfReader for an input io.ReadSeeker interface. Can be used to read PDF from 38 // memory or file. Immediately loads and traverses the PDF structure including pages and page contents (if 39 // not encrypted). 40 func NewPdfReader(rs io.ReadSeeker) (*PdfReader, error) { 41 pdfReader := &PdfReader{} 42 pdfReader.traversed = map[PdfObject]bool{} 43 44 pdfReader.modelManager = NewModelManager() 45 46 // Create the parser, loads the cross reference table and trailer. 47 parser, err := NewParser(rs) 48 if err != nil { 49 return nil, err 50 } 51 pdfReader.parser = parser 52 53 isEncrypted, err := pdfReader.IsEncrypted() 54 if err != nil { 55 return nil, err 56 } 57 58 // Load pdf doc structure if not encrypted. 59 if !isEncrypted { 60 err = pdfReader.loadStructure() 61 if err != nil { 62 return nil, err 63 } 64 } 65 66 return pdfReader, nil 67 } 68 69 // IsEncrypted returns true if the PDF file is encrypted. 70 func (this *PdfReader) IsEncrypted() (bool, error) { 71 return this.parser.IsEncrypted() 72 } 73 74 // GetEncryptionMethod returns a string containing some information about the encryption method used. 75 // XXX/TODO: May be better to return a standardized struct with information. 76 func (this *PdfReader) GetEncryptionMethod() string { 77 crypter := this.parser.GetCrypter() 78 str := crypter.Filter + " - " 79 80 if crypter.V == 0 { 81 str += "Undocumented algorithm" 82 } else if crypter.V == 1 { 83 // RC4 or AES (bits: 40) 84 str += "RC4: 40 bits" 85 } else if crypter.V == 2 { 86 str += fmt.Sprintf("RC4: %d bits", crypter.Length) 87 } else if crypter.V == 3 { 88 str += "Unpublished algorithm" 89 } else if crypter.V >= 4 { 90 // Look at CF, StmF, StrF 91 str += fmt.Sprintf("Stream filter: %s - String filter: %s", crypter.StreamFilter, crypter.StringFilter) 92 str += "; Crypt filters:" 93 for name, cf := range crypter.CryptFilters { 94 str += fmt.Sprintf(" - %s: %s (%d)", name, cf.Cfm, cf.Length) 95 } 96 } 97 perms := crypter.GetAccessPermissions() 98 str += fmt.Sprintf(" - %#v", perms) 99 100 return str 101 } 102 103 // Decrypt decrypts the PDF file with a specified password. Also tries to 104 // decrypt with an empty password. Returns true if successful, 105 // false otherwise. 106 func (this *PdfReader) Decrypt(password []byte) (bool, error) { 107 success, err := this.parser.Decrypt(password) 108 if err != nil { 109 return false, err 110 } 111 if !success { 112 return false, nil 113 } 114 115 err = this.loadStructure() 116 if err != nil { 117 common.Log.Debug("ERROR: Fail to load structure (%s)", err) 118 return false, err 119 } 120 121 return true, nil 122 } 123 124 // CheckAccessRights checks access rights and permissions for a specified password. If either user/owner 125 // password is specified, full rights are granted, otherwise the access rights are specified by the 126 // Permissions flag. 127 // 128 // The bool flag indicates that the user can access and view the file. 129 // The AccessPermissions shows what access the user has for editing etc. 130 // An error is returned if there was a problem performing the authentication. 131 func (this *PdfReader) CheckAccessRights(password []byte) (bool, AccessPermissions, error) { 132 return this.parser.CheckAccessRights(password) 133 } 134 135 // Loads the structure of the pdf file: pages, outlines, etc. 136 func (this *PdfReader) loadStructure() error { 137 if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() { 138 return fmt.Errorf("File need to be decrypted first") 139 } 140 141 trailerDict := this.parser.GetTrailer() 142 if trailerDict == nil { 143 return fmt.Errorf("Missing trailer") 144 } 145 146 // Catalog. 147 root, ok := trailerDict.Get("Root").(*PdfObjectReference) 148 if !ok { 149 return fmt.Errorf("Invalid Root (trailer: %s)", *trailerDict) 150 } 151 oc, err := this.parser.LookupByReference(*root) 152 if err != nil { 153 common.Log.Debug("ERROR: Failed to read root element catalog: %s", err) 154 return err 155 } 156 pcatalog, ok := oc.(*PdfIndirectObject) 157 if !ok { 158 common.Log.Debug("ERROR: Missing catalog: (root %q) (trailer %s)", oc, *trailerDict) 159 return errors.New("Missing catalog") 160 } 161 catalog, ok := (*pcatalog).PdfObject.(*PdfObjectDictionary) 162 if !ok { 163 common.Log.Debug("ERROR: Invalid catalog (%s)", pcatalog.PdfObject) 164 return errors.New("Invalid catalog") 165 } 166 common.Log.Trace("Catalog: %s", catalog) 167 168 // Pages. 169 pagesRef, ok := catalog.Get("Pages").(*PdfObjectReference) 170 if !ok { 171 return errors.New("Pages in catalog should be a reference") 172 } 173 op, err := this.parser.LookupByReference(*pagesRef) 174 if err != nil { 175 common.Log.Debug("ERROR: Failed to read pages") 176 return err 177 } 178 ppages, ok := op.(*PdfIndirectObject) 179 if !ok { 180 common.Log.Debug("ERROR: Pages object invalid") 181 common.Log.Debug("op: %p", ppages) 182 return errors.New("Pages object invalid") 183 } 184 pages, ok := ppages.PdfObject.(*PdfObjectDictionary) 185 if !ok { 186 common.Log.Debug("ERROR: Pages object invalid (%s)", ppages) 187 return errors.New("Pages object invalid") 188 } 189 pageCount, ok := pages.Get("Count").(*PdfObjectInteger) 190 if !ok { 191 common.Log.Debug("ERROR: Pages count object invalid") 192 return errors.New("Pages count invalid") 193 } 194 195 this.root = root 196 this.catalog = catalog 197 this.pages = pages 198 this.pageCount = int(*pageCount) 199 this.pageList = []*PdfIndirectObject{} 200 201 traversedPageNodes := map[PdfObject]bool{} 202 err = this.buildPageList(ppages, nil, traversedPageNodes) 203 if err != nil { 204 return err 205 } 206 common.Log.Trace("---") 207 common.Log.Trace("TOC") 208 common.Log.Trace("Pages") 209 common.Log.Trace("%d: %s", len(this.pageList), this.pageList) 210 211 // Outlines. 212 this.outlineTree, err = this.loadOutlines() 213 if err != nil { 214 common.Log.Debug("ERROR: Failed to build outline tree (%s)", err) 215 return err 216 } 217 218 // Load interactive forms and fields. 219 this.AcroForm, err = this.loadForms() 220 if err != nil { 221 return err 222 } 223 224 return nil 225 } 226 227 // Trace to object. Keeps a list of already visited references to avoid circular references. 228 // 229 // Example circular reference. 230 // 1 0 obj << /Next 2 0 R >> 231 // 2 0 obj << /Next 1 0 R >> 232 // 233 func (this *PdfReader) traceToObjectWrapper(obj PdfObject, refList map[*PdfObjectReference]bool) (PdfObject, error) { 234 // Keep a list of references to avoid circular references. 235 236 ref, isRef := obj.(*PdfObjectReference) 237 if isRef { 238 // Make sure not already visited (circular ref). 239 if _, alreadyTraversed := refList[ref]; alreadyTraversed { 240 return nil, errors.New("Circular reference") 241 } 242 refList[ref] = true 243 obj, err := this.parser.LookupByReference(*ref) 244 if err != nil { 245 return nil, err 246 } 247 return this.traceToObjectWrapper(obj, refList) 248 } 249 250 // Not a reference, an object. Can be indirect or any direct pdf object (other than reference). 251 return obj, nil 252 } 253 254 func (this *PdfReader) traceToObject(obj PdfObject) (PdfObject, error) { 255 refList := map[*PdfObjectReference]bool{} 256 return this.traceToObjectWrapper(obj, refList) 257 } 258 259 func (this *PdfReader) loadOutlines() (*PdfOutlineTreeNode, error) { 260 if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() { 261 return nil, fmt.Errorf("File need to be decrypted first") 262 } 263 264 // Has outlines? Otherwise return an empty outlines structure. 265 catalog := this.catalog 266 outlinesObj := catalog.Get("Outlines") 267 if outlinesObj == nil { 268 return nil, nil 269 } 270 271 common.Log.Trace("-Has outlines") 272 // Trace references to the object. 273 outlineRootObj, err := this.traceToObject(outlinesObj) 274 if err != nil { 275 common.Log.Debug("ERROR: Failed to read outlines") 276 return nil, err 277 } 278 common.Log.Trace("Outline root: %v", outlineRootObj) 279 280 if _, isNull := outlineRootObj.(*PdfObjectNull); isNull { 281 common.Log.Trace("Outline root is null - no outlines") 282 return nil, nil 283 } 284 285 outlineRoot, ok := outlineRootObj.(*PdfIndirectObject) 286 if !ok { 287 return nil, errors.New("Outline root should be an indirect object") 288 } 289 290 dict, ok := outlineRoot.PdfObject.(*PdfObjectDictionary) 291 if !ok { 292 return nil, errors.New("Outline indirect object should contain a dictionary") 293 } 294 295 common.Log.Trace("Outline root dict: %v", dict) 296 297 outlineTree, _, err := this.buildOutlineTree(outlineRoot, nil, nil) 298 if err != nil { 299 return nil, err 300 } 301 common.Log.Trace("Resulting outline tree: %v", outlineTree) 302 303 return outlineTree, nil 304 } 305 306 // Recursive build outline tree. 307 // prev PdfObject, 308 // Input: The indirect object containing an Outlines or Outline item dictionary. 309 // Parent, Prev are the parent or previous node in the hierarchy. 310 // The function returns the corresponding tree node and the last node which is used 311 // for setting the Last pointer of the tree node structures. 312 func (this *PdfReader) buildOutlineTree(obj PdfObject, parent *PdfOutlineTreeNode, prev *PdfOutlineTreeNode) (*PdfOutlineTreeNode, *PdfOutlineTreeNode, error) { 313 container, isInd := obj.(*PdfIndirectObject) 314 if !isInd { 315 return nil, nil, fmt.Errorf("Outline container not an indirect object %T", obj) 316 } 317 dict, ok := container.PdfObject.(*PdfObjectDictionary) 318 if !ok { 319 return nil, nil, errors.New("Not a dictionary object") 320 } 321 common.Log.Trace("build outline tree: dict: %v (%v) p: %p", dict, container, container) 322 323 if obj := dict.Get("Title"); obj != nil { 324 // Outline item has a title. (required) 325 outlineItem, err := this.newPdfOutlineItemFromIndirectObject(container) 326 if err != nil { 327 return nil, nil, err 328 } 329 outlineItem.Parent = parent 330 outlineItem.Prev = prev 331 332 if firstObj := dict.Get("First"); firstObj != nil { 333 firstObj, err = this.traceToObject(firstObj) 334 if err != nil { 335 return nil, nil, err 336 } 337 if _, isNull := firstObj.(*PdfObjectNull); !isNull { 338 first, last, err := this.buildOutlineTree(firstObj, &outlineItem.PdfOutlineTreeNode, nil) 339 if err != nil { 340 return nil, nil, err 341 } 342 outlineItem.First = first 343 outlineItem.Last = last 344 } 345 } 346 347 // Resolve the reference to next 348 if nextObj := dict.Get("Next"); nextObj != nil { 349 nextObj, err = this.traceToObject(nextObj) 350 if err != nil { 351 return nil, nil, err 352 } 353 if _, isNull := nextObj.(*PdfObjectNull); !isNull { 354 next, last, err := this.buildOutlineTree(nextObj, parent, &outlineItem.PdfOutlineTreeNode) 355 if err != nil { 356 return nil, nil, err 357 } 358 outlineItem.Next = next 359 return &outlineItem.PdfOutlineTreeNode, last, nil 360 } 361 } 362 363 return &outlineItem.PdfOutlineTreeNode, &outlineItem.PdfOutlineTreeNode, nil 364 } else { 365 // Outline dictionary (structure element). 366 367 outline, err := newPdfOutlineFromIndirectObject(container) 368 if err != nil { 369 return nil, nil, err 370 } 371 outline.Parent = parent 372 //outline.Prev = parent 373 374 if firstObj := dict.Get("First"); firstObj != nil { 375 // Has children... 376 firstObj, err = this.traceToObject(firstObj) 377 if err != nil { 378 return nil, nil, err 379 } 380 if _, isNull := firstObj.(*PdfObjectNull); !isNull { 381 first, last, err := this.buildOutlineTree(firstObj, &outline.PdfOutlineTreeNode, nil) 382 if err != nil { 383 return nil, nil, err 384 } 385 outline.First = first 386 outline.Last = last 387 } 388 } 389 390 /* 391 if nextObj, hasNext := (*dict)["Next"]; hasNext { 392 nextObj, err = this.traceToObject(nextObj) 393 if err != nil { 394 return nil, nil, err 395 } 396 if _, isNull := nextObj.(*PdfObjectNull); !isNull { 397 next, last, err := this.buildOutlineTree(nextObj, parent, &outline.PdfOutlineTreeNode) 398 if err != nil { 399 return nil, nil, err 400 } 401 outline.Next = next 402 return &outline.PdfOutlineTreeNode, last, nil 403 } 404 }*/ 405 406 return &outline.PdfOutlineTreeNode, &outline.PdfOutlineTreeNode, nil 407 } 408 } 409 410 // GetOutlineTree returns the outline tree. 411 func (this *PdfReader) GetOutlineTree() *PdfOutlineTreeNode { 412 return this.outlineTree 413 } 414 415 // GetOutlinesFlattened returns a flattened list of tree nodes and titles. 416 func (this *PdfReader) GetOutlinesFlattened() ([]*PdfOutlineTreeNode, []string, error) { 417 outlineNodeList := []*PdfOutlineTreeNode{} 418 flattenedTitleList := []string{} 419 420 // Recursive flattening function. 421 var flattenFunc func(*PdfOutlineTreeNode, *[]*PdfOutlineTreeNode, *[]string, int) 422 flattenFunc = func(node *PdfOutlineTreeNode, outlineList *[]*PdfOutlineTreeNode, titleList *[]string, depth int) { 423 if node == nil { 424 return 425 } 426 if node.context == nil { 427 common.Log.Debug("ERROR: Missing node.context") // Should not happen ever. 428 return 429 } 430 431 if item, isItem := node.context.(*PdfOutlineItem); isItem { 432 *outlineList = append(*outlineList, &item.PdfOutlineTreeNode) 433 title := strings.Repeat(" ", depth*2) + string(*item.Title) 434 *titleList = append(*titleList, title) 435 if item.Next != nil { 436 flattenFunc(item.Next, outlineList, titleList, depth) 437 } 438 } 439 440 if node.First != nil { 441 title := strings.Repeat(" ", depth*2) + "+" 442 *titleList = append(*titleList, title) 443 flattenFunc(node.First, outlineList, titleList, depth+1) 444 } 445 } 446 flattenFunc(this.outlineTree, &outlineNodeList, &flattenedTitleList, 0) 447 return outlineNodeList, flattenedTitleList, nil 448 } 449 450 // loadForms loads the AcroForm. 451 func (this *PdfReader) loadForms() (*PdfAcroForm, error) { 452 if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() { 453 return nil, fmt.Errorf("File need to be decrypted first") 454 } 455 456 // Has forms? 457 catalog := this.catalog 458 obj := catalog.Get("AcroForm") 459 if obj == nil { 460 // Nothing to load. 461 return nil, nil 462 } 463 var err error 464 obj, err = this.traceToObject(obj) 465 if err != nil { 466 return nil, err 467 } 468 obj = TraceToDirectObject(obj) 469 if _, isNull := obj.(*PdfObjectNull); isNull { 470 common.Log.Trace("Acroform is a null object (empty)\n") 471 return nil, nil 472 } 473 474 formsDict, ok := obj.(*PdfObjectDictionary) 475 if !ok { 476 common.Log.Debug("Invalid AcroForm entry %T", obj) 477 common.Log.Debug("Does not have forms") 478 return nil, fmt.Errorf("Invalid acroform entry %T", obj) 479 } 480 common.Log.Trace("Has Acro forms") 481 // Load it. 482 483 // Ensure we have access to everything. 484 common.Log.Trace("Traverse the Acroforms structure") 485 err = this.traverseObjectData(formsDict) 486 if err != nil { 487 common.Log.Debug("ERROR: Unable to traverse AcroForms (%s)", err) 488 return nil, err 489 } 490 491 // Create the acro forms object. 492 acroForm, err := this.newPdfAcroFormFromDict(formsDict) 493 if err != nil { 494 return nil, err 495 } 496 497 return acroForm, nil 498 } 499 500 func (this *PdfReader) lookupPageByObject(obj PdfObject) (*PdfPage, error) { 501 // can be indirect, direct, or reference 502 // look up the corresponding page 503 return nil, errors.New("Page not found") 504 } 505 506 // Build the table of contents. 507 // tree, ex: Pages -> Pages -> Pages -> Page 508 // Traverse through the whole thing recursively. 509 func (this *PdfReader) buildPageList(node *PdfIndirectObject, parent *PdfIndirectObject, traversedPageNodes map[PdfObject]bool) error { 510 if node == nil { 511 return nil 512 } 513 514 if _, alreadyTraversed := traversedPageNodes[node]; alreadyTraversed { 515 common.Log.Debug("Cyclic recursion, skipping") 516 return nil 517 } 518 traversedPageNodes[node] = true 519 520 nodeDict, ok := node.PdfObject.(*PdfObjectDictionary) 521 if !ok { 522 return errors.New("Node not a dictionary") 523 } 524 525 objType, ok := (*nodeDict).Get("Type").(*PdfObjectName) 526 if !ok { 527 return errors.New("Node missing Type (Required)") 528 } 529 common.Log.Trace("buildPageList node type: %s", *objType) 530 if *objType == "Page" { 531 p, err := this.newPdfPageFromDict(nodeDict) 532 if err != nil { 533 return err 534 } 535 p.setContainer(node) 536 537 if parent != nil { 538 // Set the parent (in case missing or incorrect). 539 nodeDict.Set("Parent", parent) 540 } 541 this.pageList = append(this.pageList, node) 542 this.PageList = append(this.PageList, p) 543 544 return nil 545 } 546 if *objType != "Pages" { 547 common.Log.Debug("ERROR: Table of content containing non Page/Pages object! (%s)", objType) 548 return errors.New("Table of content containing non Page/Pages object!") 549 } 550 551 // A Pages object. Update the parent. 552 if parent != nil { 553 nodeDict.Set("Parent", parent) 554 } 555 556 // Resolve the object recursively. 557 err := this.traverseObjectData(node) 558 if err != nil { 559 return err 560 } 561 562 kidsObj, err := this.parser.Trace(nodeDict.Get("Kids")) 563 if err != nil { 564 common.Log.Debug("ERROR: Failed loading Kids object") 565 return err 566 } 567 568 var kids *PdfObjectArray 569 kids, ok = kidsObj.(*PdfObjectArray) 570 if !ok { 571 kidsIndirect, isIndirect := kidsObj.(*PdfIndirectObject) 572 if !isIndirect { 573 return errors.New("Invalid Kids object") 574 } 575 kids, ok = kidsIndirect.PdfObject.(*PdfObjectArray) 576 if !ok { 577 return errors.New("Invalid Kids indirect object") 578 } 579 } 580 common.Log.Trace("Kids: %s", kids) 581 for idx, child := range *kids { 582 child, ok := child.(*PdfIndirectObject) 583 if !ok { 584 common.Log.Debug("ERROR: Page not indirect object - (%s)", child) 585 return errors.New("Page not indirect object") 586 } 587 (*kids)[idx] = child 588 err = this.buildPageList(child, node, traversedPageNodes) 589 if err != nil { 590 return err 591 } 592 } 593 594 return nil 595 } 596 597 // GetNumPages returns the number of pages in the document. 598 func (this *PdfReader) GetNumPages() (int, error) { 599 if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() { 600 return 0, fmt.Errorf("File need to be decrypted first") 601 } 602 return len(this.pageList), nil 603 } 604 605 // Resolves a reference, returning the object and indicates whether or not 606 // it was cached. 607 func (this *PdfReader) resolveReference(ref *PdfObjectReference) (PdfObject, bool, error) { 608 cachedObj, isCached := this.parser.ObjCache[int(ref.ObjectNumber)] 609 if !isCached { 610 common.Log.Trace("Reader Lookup ref: %s", ref) 611 obj, err := this.parser.LookupByReference(*ref) 612 if err != nil { 613 return nil, false, err 614 } 615 this.parser.ObjCache[int(ref.ObjectNumber)] = obj 616 return obj, false, nil 617 } 618 return cachedObj, true, nil 619 } 620 621 /* 622 * Recursively traverse through the page object data and look up 623 * references to indirect objects. 624 * 625 * GH: Are we fully protected against circular references? (Add tests). 626 */ 627 func (this *PdfReader) traverseObjectData(o PdfObject) error { 628 common.Log.Trace("Traverse object data") 629 if _, isTraversed := this.traversed[o]; isTraversed { 630 common.Log.Trace("-Already traversed...") 631 return nil 632 } 633 this.traversed[o] = true 634 635 if io, isIndirectObj := o.(*PdfIndirectObject); isIndirectObj { 636 common.Log.Trace("io: %s", io) 637 common.Log.Trace("- %s", io.PdfObject) 638 err := this.traverseObjectData(io.PdfObject) 639 return err 640 } 641 642 if so, isStreamObj := o.(*PdfObjectStream); isStreamObj { 643 err := this.traverseObjectData(so.PdfObjectDictionary) 644 return err 645 } 646 647 if dict, isDict := o.(*PdfObjectDictionary); isDict { 648 common.Log.Trace("- dict: %s", dict) 649 for _, name := range dict.Keys() { 650 v := dict.Get(name) 651 if ref, isRef := v.(*PdfObjectReference); isRef { 652 resolvedObj, _, err := this.resolveReference(ref) 653 if err != nil { 654 return err 655 } 656 dict.Set(name, resolvedObj) 657 err = this.traverseObjectData(resolvedObj) 658 if err != nil { 659 return err 660 } 661 } else { 662 err := this.traverseObjectData(v) 663 if err != nil { 664 return err 665 } 666 } 667 } 668 return nil 669 } 670 671 if arr, isArray := o.(*PdfObjectArray); isArray { 672 common.Log.Trace("- array: %s", arr) 673 for idx, v := range *arr { 674 if ref, isRef := v.(*PdfObjectReference); isRef { 675 resolvedObj, _, err := this.resolveReference(ref) 676 if err != nil { 677 return err 678 } 679 (*arr)[idx] = resolvedObj 680 681 err = this.traverseObjectData(resolvedObj) 682 if err != nil { 683 return err 684 } 685 } else { 686 err := this.traverseObjectData(v) 687 if err != nil { 688 return err 689 } 690 } 691 } 692 return nil 693 } 694 695 if _, isRef := o.(*PdfObjectReference); isRef { 696 common.Log.Debug("ERROR: Reader tracing a reference!") 697 return errors.New("Reader tracing a reference!") 698 } 699 700 return nil 701 } 702 703 // GetPageAsIndirectObject returns an indirect object containing the page dictionary for a specified page number. 704 func (this *PdfReader) GetPageAsIndirectObject(pageNumber int) (PdfObject, error) { 705 if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() { 706 return nil, fmt.Errorf("File needs to be decrypted first") 707 } 708 if len(this.pageList) < pageNumber { 709 return nil, errors.New("Invalid page number (page count too short)") 710 } 711 page := this.pageList[pageNumber-1] 712 713 // Look up all references related to page and load everything. 714 err := this.traverseObjectData(page) 715 if err != nil { 716 return nil, err 717 } 718 common.Log.Trace("Page: %T %s", page, page) 719 common.Log.Trace("- %T %s", page.PdfObject, page.PdfObject) 720 721 return page, nil 722 } 723 724 // GetPage returns the PdfPage model for the specified page number. 725 func (this *PdfReader) GetPage(pageNumber int) (*PdfPage, error) { 726 if this.parser.GetCrypter() != nil && !this.parser.IsAuthenticated() { 727 return nil, fmt.Errorf("File needs to be decrypted first") 728 } 729 if len(this.pageList) < pageNumber { 730 return nil, errors.New("Invalid page number (page count too short)") 731 } 732 idx := pageNumber - 1 733 if idx < 0 { 734 return nil, fmt.Errorf("Page numbering must start at 1") 735 } 736 page := this.PageList[idx] 737 738 return page, nil 739 } 740 741 // GetOCProperties returns the optional content properties PdfObject. 742 func (this *PdfReader) GetOCProperties() (PdfObject, error) { 743 dict := this.catalog 744 obj := dict.Get("OCProperties") 745 var err error 746 obj, err = this.traceToObject(obj) 747 if err != nil { 748 return nil, err 749 } 750 751 // Resolve all references... 752 // Should be pretty safe. Should not be referencing to pages or 753 // any large structures. Local structures and references 754 // to OC Groups. 755 err = this.traverseObjectData(obj) 756 if err != nil { 757 return nil, err 758 } 759 760 return obj, nil 761 } 762 763 // Inspect inspects the object types, subtypes and content in the PDF file returning a map of 764 // object type to number of instances of each. 765 func (this *PdfReader) Inspect() (map[string]int, error) { 766 return this.parser.Inspect() 767 } 768 769 // GetObjectNums returns the object numbers of the PDF objects in the file 770 // Numbered objects are either indirect objects or stream objects. 771 // e.g. objNums := pdfReader.GetObjectNums() 772 // The underlying objects can then be accessed with 773 // pdfReader.GetIndirectObjectByNumber(objNums[0]) for the first available object. 774 func (r *PdfReader) GetObjectNums() []int { 775 return r.parser.GetObjectNums() 776 } 777 778 // GetIndirectObjectByNumber retrieves and returns a specific PdfObject by object number. 779 func (this *PdfReader) GetIndirectObjectByNumber(number int) (PdfObject, error) { 780 obj, err := this.parser.LookupByNumber(number) 781 return obj, err 782 } 783 784 // GetTrailer returns the PDF's trailer dictionary. 785 func (this *PdfReader) GetTrailer() (*PdfObjectDictionary, error) { 786 trailerDict := this.parser.GetTrailer() 787 if trailerDict == nil { 788 return nil, errors.New("Trailer missing") 789 } 790 791 return trailerDict, nil 792 }