github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/core/utils.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package core 7 8 import ( 9 "errors" 10 "fmt" 11 "sort" 12 13 "github.com/unidoc/unidoc/common" 14 ) 15 16 // Check slice range to make sure within bounds for accessing: 17 // slice[a:b] where sliceLen=len(slice). 18 func checkBounds(sliceLen, a, b int) error { 19 if a < 0 || a > sliceLen { 20 return errors.New("Slice index a out of bounds") 21 } 22 if b < a { 23 return errors.New("Invalid slice index b < a") 24 } 25 if b > sliceLen { 26 return errors.New("Slice index b out of bounds") 27 } 28 29 return nil 30 } 31 32 // Inspect analyzes the document object structure. 33 func (parser *PdfParser) Inspect() (map[string]int, error) { 34 return parser.inspect() 35 } 36 37 // GetObjectNums returns a sorted list of object numbers of the PDF objects in the file. 38 func (parser *PdfParser) GetObjectNums() []int { 39 objNums := []int{} 40 for _, x := range parser.xrefs { 41 objNums = append(objNums, x.objectNumber) 42 } 43 44 // Sort the object numbers to give consistent ordering of PDF objects in output. 45 // Needed since parser.xrefs is a map. 46 sort.Ints(objNums) 47 48 return objNums 49 } 50 51 func getUniDocVersion() string { 52 return common.Version 53 } 54 55 /* 56 * Inspect object types. 57 * Go through all objects in the cross ref table and detect the types. 58 * Mostly for debugging purposes and inspecting odd PDF files. 59 */ 60 func (parser *PdfParser) inspect() (map[string]int, error) { 61 common.Log.Trace("--------INSPECT ----------") 62 common.Log.Trace("Xref table:") 63 64 objTypes := map[string]int{} 65 objCount := 0 66 failedCount := 0 67 68 keys := []int{} 69 for k := range parser.xrefs { 70 keys = append(keys, k) 71 } 72 sort.Ints(keys) 73 74 i := 0 75 for _, k := range keys { 76 xref := parser.xrefs[k] 77 if xref.objectNumber == 0 { 78 continue 79 } 80 objCount++ 81 common.Log.Trace("==========") 82 common.Log.Trace("Looking up object number: %d", xref.objectNumber) 83 o, err := parser.LookupByNumber(xref.objectNumber) 84 if err != nil { 85 common.Log.Trace("ERROR: Fail to lookup obj %d (%s)", xref.objectNumber, err) 86 failedCount++ 87 continue 88 } 89 90 common.Log.Trace("obj: %s", o) 91 92 iobj, isIndirect := o.(*PdfIndirectObject) 93 if isIndirect { 94 common.Log.Trace("IND OOBJ %d: %s", xref.objectNumber, iobj) 95 dict, isDict := iobj.PdfObject.(*PdfObjectDictionary) 96 if isDict { 97 // Check if has Type parameter. 98 if ot, has := dict.Get("Type").(*PdfObjectName); has { 99 otype := string(*ot) 100 common.Log.Trace("---> Obj type: %s", otype) 101 _, isDefined := objTypes[otype] 102 if isDefined { 103 objTypes[otype]++ 104 } else { 105 objTypes[otype] = 1 106 } 107 } else if ot, has := dict.Get("Subtype").(*PdfObjectName); has { 108 // Check if subtype 109 otype := string(*ot) 110 common.Log.Trace("---> Obj subtype: %s", otype) 111 _, isDefined := objTypes[otype] 112 if isDefined { 113 objTypes[otype]++ 114 } else { 115 objTypes[otype] = 1 116 } 117 } 118 if val, has := dict.Get("S").(*PdfObjectName); has && *val == "JavaScript" { 119 // Check if Javascript. 120 _, isDefined := objTypes["JavaScript"] 121 if isDefined { 122 objTypes["JavaScript"]++ 123 } else { 124 objTypes["JavaScript"] = 1 125 } 126 } 127 128 } 129 } else if sobj, isStream := o.(*PdfObjectStream); isStream { 130 if otype, ok := sobj.PdfObjectDictionary.Get("Type").(*PdfObjectName); ok { 131 common.Log.Trace("--> Stream object type: %s", *otype) 132 k := string(*otype) 133 if _, isDefined := objTypes[k]; isDefined { 134 objTypes[k]++ 135 } else { 136 objTypes[k] = 1 137 } 138 } 139 } else { // Direct. 140 dict, isDict := o.(*PdfObjectDictionary) 141 if isDict { 142 ot, isName := dict.Get("Type").(*PdfObjectName) 143 if isName { 144 otype := string(*ot) 145 common.Log.Trace("--- obj type %s", otype) 146 objTypes[otype]++ 147 } 148 } 149 common.Log.Trace("DIRECT OBJ %d: %s", xref.objectNumber, o) 150 } 151 152 i++ 153 } 154 common.Log.Trace("--------EOF INSPECT ----------") 155 common.Log.Trace("=======") 156 common.Log.Trace("Object count: %d", objCount) 157 common.Log.Trace("Failed lookup: %d", failedCount) 158 for t, c := range objTypes { 159 common.Log.Trace("%s: %d", t, c) 160 } 161 common.Log.Trace("=======") 162 163 if len(parser.xrefs) < 1 { 164 common.Log.Debug("ERROR: This document is invalid (xref table missing!)") 165 return nil, fmt.Errorf("Invalid document (xref table missing)") 166 } 167 168 fontObjs, ok := objTypes["Font"] 169 if !ok || fontObjs < 2 { 170 common.Log.Trace("This document is probably scanned!") 171 } else { 172 common.Log.Trace("This document is valid for extraction!") 173 } 174 175 return objTypes, nil 176 } 177 178 func absInt(x int) int { 179 if x < 0 { 180 return -x 181 } else { 182 return x 183 } 184 }