github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/core/primitives.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package core 7 8 import ( 9 "bytes" 10 "fmt" 11 12 "github.com/unidoc/unidoc/common" 13 ) 14 15 // PdfObject is an interface which all primitive PDF objects must implement. 16 type PdfObject interface { 17 // Output a string representation of the primitive (for debugging). 18 String() string 19 20 // Output the PDF primitive as written to file as expected by the standard. 21 DefaultWriteString() string 22 } 23 24 // PdfObjectBool represents the primitive PDF boolean object. 25 type PdfObjectBool bool 26 27 // PdfObjectInteger represents the primitive PDF integer numerical object. 28 type PdfObjectInteger int64 29 30 // PdfObjectFloat represents the primitive PDF floating point numerical object. 31 type PdfObjectFloat float64 32 33 // PdfObjectString represents the primitive PDF string object. 34 // TODO (v3): Change to a struct and add a flag for hex/plaintext. 35 type PdfObjectString string 36 37 // PdfObjectName represents the primitive PDF name object. 38 type PdfObjectName string 39 40 // PdfObjectArray represents the primitive PDF array object. 41 type PdfObjectArray []PdfObject 42 43 // PdfObjectDictionary represents the primitive PDF dictionary/map object. 44 type PdfObjectDictionary struct { 45 dict map[PdfObjectName]PdfObject 46 keys []PdfObjectName 47 } 48 49 // PdfObjectNull represents the primitive PDF null object. 50 type PdfObjectNull struct{} 51 52 // PdfObjectReference represents the primitive PDF reference object. 53 type PdfObjectReference struct { 54 ObjectNumber int64 55 GenerationNumber int64 56 } 57 58 // PdfIndirectObject represents the primitive PDF indirect object. 59 type PdfIndirectObject struct { 60 PdfObjectReference 61 PdfObject 62 } 63 64 // PdfObjectStream represents the primitive PDF Object stream. 65 type PdfObjectStream struct { 66 PdfObjectReference 67 *PdfObjectDictionary 68 Stream []byte 69 } 70 71 // MakeDict creates and returns an empty PdfObjectDictionary. 72 func MakeDict() *PdfObjectDictionary { 73 d := &PdfObjectDictionary{} 74 d.dict = map[PdfObjectName]PdfObject{} 75 d.keys = []PdfObjectName{} 76 return d 77 } 78 79 // MakeName creates a PdfObjectName from a string. 80 func MakeName(s string) *PdfObjectName { 81 name := PdfObjectName(s) 82 return &name 83 } 84 85 // MakeInteger creates a PdfObjectInteger from an int64. 86 func MakeInteger(val int64) *PdfObjectInteger { 87 num := PdfObjectInteger(val) 88 return &num 89 } 90 91 // MakeArray creates an PdfObjectArray from a list of PdfObjects. 92 func MakeArray(objects ...PdfObject) *PdfObjectArray { 93 array := PdfObjectArray{} 94 for _, obj := range objects { 95 array = append(array, obj) 96 } 97 return &array 98 } 99 100 // MakeArrayFromIntegers creates an PdfObjectArray from a slice of ints, where each array element is 101 // an PdfObjectInteger. 102 func MakeArrayFromIntegers(vals []int) *PdfObjectArray { 103 array := PdfObjectArray{} 104 for _, val := range vals { 105 array = append(array, MakeInteger(int64(val))) 106 } 107 return &array 108 } 109 110 // MakeArrayFromIntegers64 creates an PdfObjectArray from a slice of int64s, where each array element 111 // is an PdfObjectInteger. 112 func MakeArrayFromIntegers64(vals []int64) *PdfObjectArray { 113 array := PdfObjectArray{} 114 for _, val := range vals { 115 array = append(array, MakeInteger(val)) 116 } 117 return &array 118 } 119 120 // MakeArrayFromFloats creates an PdfObjectArray from a slice of float64s, where each array element is an 121 // PdfObjectFloat. 122 func MakeArrayFromFloats(vals []float64) *PdfObjectArray { 123 array := PdfObjectArray{} 124 for _, val := range vals { 125 array = append(array, MakeFloat(val)) 126 } 127 return &array 128 } 129 130 // MakeBool creates an PdfObjectBool from a bool. 131 func MakeBool(val bool) *PdfObjectBool { 132 v := PdfObjectBool(val) 133 return &v 134 } 135 136 // MakeFloat creates an PdfObjectFloat from a float64. 137 func MakeFloat(val float64) *PdfObjectFloat { 138 num := PdfObjectFloat(val) 139 return &num 140 } 141 142 // MakeString creates an PdfObjectString from a string. 143 func MakeString(s string) *PdfObjectString { 144 str := PdfObjectString(s) 145 return &str 146 } 147 148 // MakeNull creates an PdfObjectNull. 149 func MakeNull() *PdfObjectNull { 150 null := PdfObjectNull{} 151 return &null 152 } 153 154 // MakeIndirectObject creates an PdfIndirectObject with a specified direct object PdfObject. 155 func MakeIndirectObject(obj PdfObject) *PdfIndirectObject { 156 ind := &PdfIndirectObject{} 157 ind.PdfObject = obj 158 return ind 159 } 160 161 // MakeStream creates an PdfObjectStream with specified contents and encoding. If encoding is nil, then raw encoding 162 // will be used (i.e. no encoding applied). 163 func MakeStream(contents []byte, encoder StreamEncoder) (*PdfObjectStream, error) { 164 stream := &PdfObjectStream{} 165 166 if encoder == nil { 167 encoder = NewRawEncoder() 168 } 169 170 stream.PdfObjectDictionary = encoder.MakeStreamDict() 171 172 encoded, err := encoder.EncodeBytes(contents) 173 if err != nil { 174 return nil, err 175 } 176 stream.PdfObjectDictionary.Set("Length", MakeInteger(int64(len(encoded)))) 177 178 stream.Stream = encoded 179 return stream, nil 180 } 181 182 func (bool *PdfObjectBool) String() string { 183 if *bool { 184 return "true" 185 } else { 186 return "false" 187 } 188 } 189 190 // DefaultWriteString outputs the object as it is to be written to file. 191 func (bool *PdfObjectBool) DefaultWriteString() string { 192 if *bool { 193 return "true" 194 } else { 195 return "false" 196 } 197 } 198 199 func (int *PdfObjectInteger) String() string { 200 return fmt.Sprintf("%d", *int) 201 } 202 203 // DefaultWriteString outputs the object as it is to be written to file. 204 func (int *PdfObjectInteger) DefaultWriteString() string { 205 return fmt.Sprintf("%d", *int) 206 } 207 208 func (float *PdfObjectFloat) String() string { 209 return fmt.Sprintf("%f", *float) 210 } 211 212 // DefaultWriteString outputs the object as it is to be written to file. 213 func (float *PdfObjectFloat) DefaultWriteString() string { 214 return fmt.Sprintf("%f", *float) 215 } 216 217 func (str *PdfObjectString) String() string { 218 return string(*str) 219 } 220 221 // DefaultWriteString outputs the object as it is to be written to file. 222 func (str *PdfObjectString) DefaultWriteString() string { 223 var output bytes.Buffer 224 225 escapeSequences := map[byte]string{ 226 '\n': "\\n", 227 '\r': "\\r", 228 '\t': "\\t", 229 '\b': "\\b", 230 '\f': "\\f", 231 '(': "\\(", 232 ')': "\\)", 233 '\\': "\\\\", 234 } 235 236 output.WriteString("(") 237 for i := 0; i < len(*str); i++ { 238 char := (*str)[i] 239 if escStr, useEsc := escapeSequences[char]; useEsc { 240 output.WriteString(escStr) 241 } else { 242 output.WriteByte(char) 243 } 244 } 245 output.WriteString(")") 246 247 return output.String() 248 } 249 250 func (name *PdfObjectName) String() string { 251 return fmt.Sprintf("%s", string(*name)) 252 } 253 254 // DefaultWriteString outputs the object as it is to be written to file. 255 func (name *PdfObjectName) DefaultWriteString() string { 256 var output bytes.Buffer 257 258 if len(*name) > 127 { 259 common.Log.Debug("ERROR: Name too long (%s)", *name) 260 } 261 262 output.WriteString("/") 263 for i := 0; i < len(*name); i++ { 264 char := (*name)[i] 265 if !IsPrintable(char) || char == '#' || IsDelimiter(char) { 266 output.WriteString(fmt.Sprintf("#%.2x", char)) 267 } else { 268 output.WriteByte(char) 269 } 270 } 271 272 return output.String() 273 } 274 275 // ToFloat64Array returns a slice of all elements in the array as a float64 slice. An error is returned if the array 276 // contains non-numeric objects (each element can be either PdfObjectInteger or PdfObjectFloat). 277 func (array *PdfObjectArray) ToFloat64Array() ([]float64, error) { 278 vals := []float64{} 279 280 for _, obj := range *array { 281 if number, is := obj.(*PdfObjectInteger); is { 282 vals = append(vals, float64(*number)) 283 } else if number, is := obj.(*PdfObjectFloat); is { 284 vals = append(vals, float64(*number)) 285 } else { 286 return nil, fmt.Errorf("Type error") 287 } 288 } 289 290 return vals, nil 291 } 292 293 // ToIntegerArray returns a slice of all array elements as an int slice. An error is returned if the array contains 294 // non-integer objects. Each element can only be PdfObjectInteger. 295 func (array *PdfObjectArray) ToIntegerArray() ([]int, error) { 296 vals := []int{} 297 298 for _, obj := range *array { 299 if number, is := obj.(*PdfObjectInteger); is { 300 vals = append(vals, int(*number)) 301 } else { 302 return nil, fmt.Errorf("Type error") 303 } 304 } 305 306 return vals, nil 307 } 308 309 func (array *PdfObjectArray) String() string { 310 outStr := "[" 311 for ind, o := range *array { 312 outStr += o.String() 313 if ind < (len(*array) - 1) { 314 outStr += ", " 315 } 316 } 317 outStr += "]" 318 return outStr 319 } 320 321 // DefaultWriteString outputs the object as it is to be written to file. 322 func (array *PdfObjectArray) DefaultWriteString() string { 323 outStr := "[" 324 for ind, o := range *array { 325 outStr += o.DefaultWriteString() 326 if ind < (len(*array) - 1) { 327 outStr += " " 328 } 329 } 330 outStr += "]" 331 return outStr 332 } 333 334 // Append adds an PdfObject to the array. 335 func (array *PdfObjectArray) Append(obj PdfObject) { 336 *array = append(*array, obj) 337 } 338 339 func getNumberAsFloat(obj PdfObject) (float64, error) { 340 if fObj, ok := obj.(*PdfObjectFloat); ok { 341 return float64(*fObj), nil 342 } 343 344 if iObj, ok := obj.(*PdfObjectInteger); ok { 345 return float64(*iObj), nil 346 } 347 348 return 0, fmt.Errorf("Not a number") 349 } 350 351 // GetAsFloat64Slice returns the array as []float64 slice. 352 // Returns an error if not entirely numeric (only PdfObjectIntegers, PdfObjectFloats). 353 func (array *PdfObjectArray) GetAsFloat64Slice() ([]float64, error) { 354 slice := []float64{} 355 356 for _, obj := range *array { 357 obj := TraceToDirectObject(obj) 358 number, err := getNumberAsFloat(obj) 359 if err != nil { 360 return nil, fmt.Errorf("Array element not a number") 361 } 362 slice = append(slice, number) 363 } 364 365 return slice, nil 366 } 367 368 // Merge merges in key/values from another dictionary. Overwriting if has same keys. 369 func (d *PdfObjectDictionary) Merge(another *PdfObjectDictionary) { 370 if another != nil { 371 for _, key := range another.Keys() { 372 val := another.Get(key) 373 d.Set(key, val) 374 } 375 } 376 } 377 378 func (d *PdfObjectDictionary) String() string { 379 outStr := "Dict(" 380 for _, k := range d.keys { 381 v := d.dict[k] 382 outStr += fmt.Sprintf("\"%s\": %s, ", k, v.String()) 383 } 384 outStr += ")" 385 return outStr 386 } 387 388 // DefaultWriteString outputs the object as it is to be written to file. 389 func (d *PdfObjectDictionary) DefaultWriteString() string { 390 outStr := "<<" 391 for _, k := range d.keys { 392 v := d.dict[k] 393 common.Log.Trace("Writing k: %s %T %v %v", k, v, k, v) 394 outStr += k.DefaultWriteString() 395 outStr += " " 396 outStr += v.DefaultWriteString() 397 } 398 outStr += ">>" 399 return outStr 400 } 401 402 // Set sets the dictionary's key -> val mapping entry. Overwrites if key already set. 403 func (d *PdfObjectDictionary) Set(key PdfObjectName, val PdfObject) { 404 found := false 405 for _, k := range d.keys { 406 if k == key { 407 found = true 408 break 409 } 410 } 411 412 if !found { 413 d.keys = append(d.keys, key) 414 } 415 416 d.dict[key] = val 417 } 418 419 // Get returns the PdfObject corresponding to the specified key. 420 // Returns a nil value if the key is not set. 421 // 422 // The design is such that we only return 1 value. 423 // The reason is that, it will be easy to do type casts such as 424 // name, ok := dict.Get("mykey").(*PdfObjectName) 425 // if !ok .... 426 func (d *PdfObjectDictionary) Get(key PdfObjectName) PdfObject { 427 val, has := d.dict[key] 428 if !has { 429 return nil 430 } 431 return val 432 } 433 434 // Keys returns the list of keys in the dictionary. 435 func (d *PdfObjectDictionary) Keys() []PdfObjectName { 436 return d.keys 437 } 438 439 // Remove removes an element specified by key. 440 func (d *PdfObjectDictionary) Remove(key PdfObjectName) { 441 idx := -1 442 for i, k := range d.keys { 443 if k == key { 444 idx = i 445 break 446 } 447 } 448 449 if idx >= 0 { 450 // Found. Remove from key list and map. 451 d.keys = append(d.keys[:idx], d.keys[idx+1:]...) 452 delete(d.dict, key) 453 } 454 } 455 456 // SetIfNotNil sets the dictionary's key -> val mapping entry -IF- val is not nil. 457 // Note that we take care to perform a type switch. Otherwise if we would supply a nil value 458 // of another type, e.g. (PdfObjectArray*)(nil), then it would not be a PdfObject(nil) and thus 459 // would get set. 460 // 461 func (d *PdfObjectDictionary) SetIfNotNil(key PdfObjectName, val PdfObject) { 462 if val != nil { 463 switch t := val.(type) { 464 case *PdfObjectName: 465 if t != nil { 466 d.Set(key, val) 467 } 468 case *PdfObjectDictionary: 469 if t != nil { 470 d.Set(key, val) 471 } 472 case *PdfObjectStream: 473 if t != nil { 474 d.Set(key, val) 475 } 476 case *PdfObjectString: 477 if t != nil { 478 d.Set(key, val) 479 } 480 case *PdfObjectNull: 481 if t != nil { 482 d.Set(key, val) 483 } 484 case *PdfObjectInteger: 485 if t != nil { 486 d.Set(key, val) 487 } 488 case *PdfObjectArray: 489 if t != nil { 490 d.Set(key, val) 491 } 492 case *PdfObjectBool: 493 if t != nil { 494 d.Set(key, val) 495 } 496 case *PdfObjectFloat: 497 if t != nil { 498 d.Set(key, val) 499 } 500 case *PdfObjectReference: 501 if t != nil { 502 d.Set(key, val) 503 } 504 case *PdfIndirectObject: 505 if t != nil { 506 d.Set(key, val) 507 } 508 default: 509 common.Log.Error("ERROR: Unknown type: %T - should never happen!", val) 510 } 511 } 512 } 513 514 func (ref *PdfObjectReference) String() string { 515 return fmt.Sprintf("Ref(%d %d)", ref.ObjectNumber, ref.GenerationNumber) 516 } 517 518 // DefaultWriteString outputs the object as it is to be written to file. 519 func (ref *PdfObjectReference) DefaultWriteString() string { 520 return fmt.Sprintf("%d %d R", ref.ObjectNumber, ref.GenerationNumber) 521 } 522 523 func (ind *PdfIndirectObject) String() string { 524 // Avoid printing out the object, can cause problems with circular 525 // references. 526 return fmt.Sprintf("IObject:%d", (*ind).ObjectNumber) 527 } 528 529 // DefaultWriteString outputs the object as it is to be written to file. 530 func (ind *PdfIndirectObject) DefaultWriteString() string { 531 outStr := fmt.Sprintf("%d 0 R", (*ind).ObjectNumber) 532 return outStr 533 } 534 535 func (stream *PdfObjectStream) String() string { 536 return fmt.Sprintf("Object stream %d: %s", stream.ObjectNumber, stream.PdfObjectDictionary) 537 } 538 539 // DefaultWriteString outputs the object as it is to be written to file. 540 func (stream *PdfObjectStream) DefaultWriteString() string { 541 outStr := fmt.Sprintf("%d 0 R", (*stream).ObjectNumber) 542 return outStr 543 } 544 545 func (null *PdfObjectNull) String() string { 546 return "null" 547 } 548 549 // DefaultWriteString outputs the object as it is to be written to file. 550 func (null *PdfObjectNull) DefaultWriteString() string { 551 return "null" 552 } 553 554 // Handy functions to work with primitive objects. 555 556 // TraceMaxDepth specifies the maximum recursion depth allowed. 557 const TraceMaxDepth = 20 558 559 // TraceToDirectObject traces a PdfObject to a direct object. For example direct objects contained 560 // in indirect objects (can be double referenced even). 561 // 562 // Note: This function does not trace/resolve references. That needs to be done beforehand. 563 func TraceToDirectObject(obj PdfObject) PdfObject { 564 iobj, isIndirectObj := obj.(*PdfIndirectObject) 565 depth := 0 566 for isIndirectObj == true { 567 obj = iobj.PdfObject 568 iobj, isIndirectObj = obj.(*PdfIndirectObject) 569 depth++ 570 if depth > TraceMaxDepth { 571 common.Log.Error("ERROR: Trace depth level beyond %d - not going deeper!", TraceMaxDepth) 572 return nil 573 } 574 } 575 return obj 576 }