github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/core/encoding.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package core 7 8 // Implement encoders for PDF. Currently supported: 9 // - Raw (Identity) 10 // - FlateDecode 11 // - LZW 12 // - DCT Decode (JPEG) 13 // - RunLength 14 // - ASCII Hex 15 // - ASCII85 16 // - CCITT Fax (dummy) 17 // - JBIG2 (dummy) 18 // - JPX (dummy) 19 20 import ( 21 "bytes" 22 "compress/zlib" 23 "encoding/hex" 24 "errors" 25 "fmt" 26 goimage "image" 27 gocolor "image/color" 28 "image/jpeg" 29 "io" 30 31 // Need two slightly different implementations of LZW (EarlyChange parameter). 32 lzw0 "compress/lzw" 33 34 lzw1 "golang.org/x/image/tiff/lzw" 35 36 "github.com/unidoc/unidoc/common" 37 ) 38 39 const ( 40 StreamEncodingFilterNameFlate = "FlateDecode" 41 StreamEncodingFilterNameLZW = "LZWDecode" 42 StreamEncodingFilterNameDCT = "DCTDecode" 43 StreamEncodingFilterNameRunLength = "RunLengthDecode" 44 StreamEncodingFilterNameASCIIHex = "ASCIIHexDecode" 45 StreamEncodingFilterNameASCII85 = "ASCII85Decode" 46 StreamEncodingFilterNameCCITTFax = "CCITTFaxDecode" 47 StreamEncodingFilterNameJBIG2 = "JBIG2Decode" 48 StreamEncodingFilterNameJPX = "JPXDecode" 49 StreamEncodingFilterNameRaw = "Raw" 50 ) 51 52 const ( 53 DefaultJPEGQuality = 75 54 ) 55 56 type StreamEncoder interface { 57 GetFilterName() string 58 MakeDecodeParams() PdfObject 59 MakeStreamDict() *PdfObjectDictionary 60 61 EncodeBytes(data []byte) ([]byte, error) 62 DecodeBytes(encoded []byte) ([]byte, error) 63 DecodeStream(streamObj *PdfObjectStream) ([]byte, error) 64 } 65 66 // Flate encoding. 67 type FlateEncoder struct { 68 Predictor int 69 BitsPerComponent int 70 // For predictors 71 Columns int 72 Colors int 73 } 74 75 // Make a new flate encoder with default parameters, predictor 1 and bits per component 8. 76 func NewFlateEncoder() *FlateEncoder { 77 encoder := &FlateEncoder{} 78 79 // Default (No prediction) 80 encoder.Predictor = 1 81 82 // Currently only supporting 8. 83 encoder.BitsPerComponent = 8 84 85 encoder.Colors = 1 86 encoder.Columns = 1 87 88 return encoder 89 } 90 91 // Set the predictor function. Specify the number of columns per row. 92 // The columns indicates the number of samples per row. 93 // Used for grouping data together for compression. 94 func (this *FlateEncoder) SetPredictor(columns int) { 95 // Only supporting PNG sub predictor for encoding. 96 this.Predictor = 11 97 this.Columns = columns 98 } 99 100 func (this *FlateEncoder) GetFilterName() string { 101 return StreamEncodingFilterNameFlate 102 } 103 104 func (this *FlateEncoder) MakeDecodeParams() PdfObject { 105 if this.Predictor > 1 { 106 decodeParams := MakeDict() 107 decodeParams.Set("Predictor", MakeInteger(int64(this.Predictor))) 108 109 // Only add if not default option. 110 if this.BitsPerComponent != 8 { 111 decodeParams.Set("BitsPerComponent", MakeInteger(int64(this.BitsPerComponent))) 112 } 113 if this.Columns != 1 { 114 decodeParams.Set("Columns", MakeInteger(int64(this.Columns))) 115 } 116 if this.Colors != 1 { 117 decodeParams.Set("Colors", MakeInteger(int64(this.Colors))) 118 } 119 return decodeParams 120 } 121 122 return nil 123 } 124 125 // Make a new instance of an encoding dictionary for a stream object. 126 // Has the Filter set and the DecodeParms. 127 func (this *FlateEncoder) MakeStreamDict() *PdfObjectDictionary { 128 dict := MakeDict() 129 dict.Set("Filter", MakeName(this.GetFilterName())) 130 131 decodeParams := this.MakeDecodeParams() 132 if decodeParams != nil { 133 dict.Set("DecodeParms", decodeParams) 134 } 135 136 return dict 137 } 138 139 // Create a new flate decoder from a stream object, getting all the encoding parameters 140 // from the DecodeParms stream object dictionary entry. 141 func newFlateEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObjectDictionary) (*FlateEncoder, error) { 142 encoder := NewFlateEncoder() 143 144 encDict := streamObj.PdfObjectDictionary 145 if encDict == nil { 146 // No encoding dictionary. 147 return encoder, nil 148 } 149 150 // If decodeParams not provided, see if we can get from the stream. 151 if decodeParams == nil { 152 obj := TraceToDirectObject(encDict.Get("DecodeParms")) 153 if obj != nil { 154 if arr, isArr := obj.(*PdfObjectArray); isArr { 155 if len(*arr) != 1 { 156 common.Log.Debug("Error: DecodeParms array length != 1 (%d)", len(*arr)) 157 return nil, errors.New("Range check error") 158 } 159 obj = TraceToDirectObject((*arr)[0]) 160 } 161 162 dp, isDict := obj.(*PdfObjectDictionary) 163 if !isDict { 164 common.Log.Debug("Error: DecodeParms not a dictionary (%T)", obj) 165 return nil, fmt.Errorf("Invalid DecodeParms") 166 } 167 decodeParams = dp 168 } 169 } 170 if decodeParams == nil { 171 // Can safely return here if no decode params, as the following depend on the decode params. 172 return encoder, nil 173 } 174 175 common.Log.Trace("decode params: %s", decodeParams.String()) 176 obj := decodeParams.Get("Predictor") 177 if obj == nil { 178 common.Log.Debug("Error: Predictor missing from DecodeParms - Continue with default (1)") 179 } else { 180 predictor, ok := obj.(*PdfObjectInteger) 181 if !ok { 182 common.Log.Debug("Error: Predictor specified but not numeric (%T)", obj) 183 return nil, fmt.Errorf("Invalid Predictor") 184 } 185 encoder.Predictor = int(*predictor) 186 } 187 188 // Bits per component. Use default if not specified (8). 189 obj = decodeParams.Get("BitsPerComponent") 190 if obj != nil { 191 bpc, ok := obj.(*PdfObjectInteger) 192 if !ok { 193 common.Log.Debug("ERROR: Invalid BitsPerComponent") 194 return nil, fmt.Errorf("Invalid BitsPerComponent") 195 } 196 encoder.BitsPerComponent = int(*bpc) 197 } 198 199 if encoder.Predictor > 1 { 200 // Columns. 201 encoder.Columns = 1 202 obj = decodeParams.Get("Columns") 203 if obj != nil { 204 columns, ok := obj.(*PdfObjectInteger) 205 if !ok { 206 return nil, fmt.Errorf("Predictor column invalid") 207 } 208 209 encoder.Columns = int(*columns) 210 } 211 212 // Colors. 213 // Number of interleaved color components per sample (Default 1 if not specified) 214 encoder.Colors = 1 215 obj = decodeParams.Get("Colors") 216 if obj != nil { 217 colors, ok := obj.(*PdfObjectInteger) 218 if !ok { 219 return nil, fmt.Errorf("Predictor colors not an integer") 220 } 221 encoder.Colors = int(*colors) 222 } 223 } 224 225 return encoder, nil 226 } 227 228 func (this *FlateEncoder) DecodeBytes(encoded []byte) ([]byte, error) { 229 common.Log.Trace("FlateDecode bytes") 230 231 bufReader := bytes.NewReader(encoded) 232 r, err := zlib.NewReader(bufReader) 233 if err != nil { 234 common.Log.Debug("Decoding error %v\n", err) 235 common.Log.Debug("Stream (%d) % x", len(encoded), encoded) 236 return nil, err 237 } 238 defer r.Close() 239 240 var outBuf bytes.Buffer 241 outBuf.ReadFrom(r) 242 243 common.Log.Trace("En: % x\n", encoded) 244 common.Log.Trace("De: % x\n", outBuf.Bytes()) 245 246 return outBuf.Bytes(), nil 247 } 248 249 // Decode a FlateEncoded stream object and give back decoded bytes. 250 func (this *FlateEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 251 // TODO: Handle more filter bytes and support more values of BitsPerComponent. 252 253 common.Log.Trace("FlateDecode stream") 254 common.Log.Trace("Predictor: %d", this.Predictor) 255 if this.BitsPerComponent != 8 { 256 return nil, fmt.Errorf("Invalid BitsPerComponent=%d (only 8 supported)", this.BitsPerComponent) 257 } 258 259 outData, err := this.DecodeBytes(streamObj.Stream) 260 if err != nil { 261 return nil, err 262 } 263 common.Log.Trace("En: % x\n", streamObj.Stream) 264 common.Log.Trace("De: % x\n", outData) 265 266 if this.Predictor > 1 { 267 if this.Predictor == 2 { // TIFF encoding: Needs some tests. 268 common.Log.Trace("Tiff encoding") 269 common.Log.Trace("Colors: %d", this.Colors) 270 271 rowLength := int(this.Columns) * this.Colors 272 if rowLength < 1 { 273 // No data. Return empty set. 274 return []byte{}, nil 275 } 276 rows := len(outData) / rowLength 277 if len(outData)%rowLength != 0 { 278 common.Log.Debug("ERROR: TIFF encoding: Invalid row length...") 279 return nil, fmt.Errorf("Invalid row length (%d/%d)", len(outData), rowLength) 280 } 281 if rowLength%this.Colors != 0 { 282 return nil, fmt.Errorf("Invalid row length (%d) for colors %d", rowLength, this.Colors) 283 } 284 if rowLength > len(outData) { 285 common.Log.Debug("Row length cannot be longer than data length (%d/%d)", rowLength, len(outData)) 286 return nil, errors.New("Range check error") 287 } 288 common.Log.Trace("inp outData (%d): % x", len(outData), outData) 289 290 pOutBuffer := bytes.NewBuffer(nil) 291 292 // 0-255 -255 255 ; 0-255=-255; 293 for i := 0; i < rows; i++ { 294 rowData := outData[rowLength*i : rowLength*(i+1)] 295 // Predicts the same as the sample to the left. 296 // Interleaved by colors. 297 for j := this.Colors; j < rowLength; j++ { 298 rowData[j] = byte(int(rowData[j]+rowData[j-this.Colors]) % 256) 299 } 300 pOutBuffer.Write(rowData) 301 } 302 pOutData := pOutBuffer.Bytes() 303 common.Log.Trace("POutData (%d): % x", len(pOutData), pOutData) 304 return pOutData, nil 305 } else if this.Predictor >= 10 && this.Predictor <= 15 { 306 common.Log.Trace("PNG Encoding") 307 // Columns represents the number of samples per row; Each sample can contain multiple color 308 // components. 309 rowLength := int(this.Columns*this.Colors + 1) // 1 byte to specify predictor algorithms per row. 310 rows := len(outData) / rowLength 311 if len(outData)%rowLength != 0 { 312 return nil, fmt.Errorf("Invalid row length (%d/%d)", len(outData), rowLength) 313 } 314 if rowLength > len(outData) { 315 common.Log.Debug("Row length cannot be longer than data length (%d/%d)", rowLength, len(outData)) 316 return nil, errors.New("Range check error") 317 } 318 319 pOutBuffer := bytes.NewBuffer(nil) 320 321 common.Log.Trace("Predictor columns: %d", this.Columns) 322 common.Log.Trace("Length: %d / %d = %d rows", len(outData), rowLength, rows) 323 prevRowData := make([]byte, rowLength) 324 for i := 0; i < rowLength; i++ { 325 prevRowData[i] = 0 326 } 327 328 for i := 0; i < rows; i++ { 329 rowData := outData[rowLength*i : rowLength*(i+1)] 330 331 fb := rowData[0] 332 switch fb { 333 case 0: 334 // No prediction. (No operation). 335 case 1: 336 // Sub: Predicts the same as the sample to the left. 337 for j := 2; j < rowLength; j++ { 338 rowData[j] = byte(int(rowData[j]+rowData[j-1]) % 256) 339 } 340 case 2: 341 // Up: Predicts the same as the sample above 342 for j := 1; j < rowLength; j++ { 343 rowData[j] = byte(int(rowData[j]+prevRowData[j]) % 256) 344 } 345 case 3: 346 // Avg: Predicts the same as the average of the sample to the left and above. 347 for j := 1; j < rowLength; j++ { 348 if j == 1 { 349 rowData[j] = byte(int(rowData[j]+prevRowData[j]) % 256) 350 } else { 351 avg := (rowData[j-1] + prevRowData[j]) / 2 352 rowData[j] = byte(int(rowData[j]+avg) % 256) 353 } 354 } 355 case 4: 356 // Paeth: a nonlinear function of the sample above, the sample to the left and the sample 357 // to the upper left. 358 for j := 2; j < rowLength; j++ { 359 a := rowData[j-1] // left 360 b := prevRowData[j] // above 361 c := prevRowData[j-1] // upper left 362 363 p := int(a + b - c) 364 pa := absInt(p - int(a)) 365 pb := absInt(p - int(b)) 366 pc := absInt(p - int(c)) 367 368 if pa <= pb && pa <= pc { 369 // Use a (left). 370 rowData[j] = byte(int(rowData[j]+a) % 256) 371 } else if pb <= pc { 372 // Use b (upper). 373 rowData[j] = byte(int(rowData[j]+b) % 256) 374 } else { 375 // Use c (upper left). 376 rowData[j] = byte(int(rowData[j]+c) % 256) 377 } 378 } 379 380 default: 381 common.Log.Debug("ERROR: Invalid filter byte (%d) @row %d", fb, i) 382 return nil, fmt.Errorf("Invalid filter byte (%d)", fb) 383 } 384 385 for i := 0; i < rowLength; i++ { 386 prevRowData[i] = rowData[i] 387 } 388 pOutBuffer.Write(rowData[1:]) 389 } 390 pOutData := pOutBuffer.Bytes() 391 return pOutData, nil 392 } else { 393 common.Log.Debug("ERROR: Unsupported predictor (%d)", this.Predictor) 394 return nil, fmt.Errorf("Unsupported predictor (%d)", this.Predictor) 395 } 396 } 397 398 return outData, nil 399 } 400 401 // Encode a bytes array and return the encoded value based on the encoder parameters. 402 func (this *FlateEncoder) EncodeBytes(data []byte) ([]byte, error) { 403 if this.Predictor != 1 && this.Predictor != 11 { 404 common.Log.Debug("Encoding error: FlateEncoder Predictor = 1, 11 only supported") 405 return nil, ErrUnsupportedEncodingParameters 406 } 407 408 if this.Predictor == 11 { 409 // The length of each output row in number of samples. 410 // N.B. Each output row has one extra sample as compared to the input to indicate the 411 // predictor type. 412 rowLength := int(this.Columns) 413 rows := len(data) / rowLength 414 if len(data)%rowLength != 0 { 415 common.Log.Error("Invalid column length") 416 return nil, errors.New("Invalid row length") 417 } 418 419 pOutBuffer := bytes.NewBuffer(nil) 420 421 tmpData := make([]byte, rowLength) 422 423 for i := 0; i < rows; i++ { 424 rowData := data[rowLength*i : rowLength*(i+1)] 425 426 // PNG SUB method. 427 // Sub: Predicts the same as the sample to the left. 428 tmpData[0] = rowData[0] 429 for j := 1; j < rowLength; j++ { 430 tmpData[j] = byte(int(rowData[j]-rowData[j-1]) % 256) 431 } 432 433 pOutBuffer.WriteByte(1) // sub method 434 pOutBuffer.Write(tmpData) 435 } 436 437 data = pOutBuffer.Bytes() 438 } 439 440 var b bytes.Buffer 441 w := zlib.NewWriter(&b) 442 w.Write(data) 443 w.Close() 444 445 return b.Bytes(), nil 446 } 447 448 // LZW encoding/decoding functionality. 449 type LZWEncoder struct { 450 Predictor int 451 BitsPerComponent int 452 // For predictors 453 Columns int 454 Colors int 455 // LZW algorithm setting. 456 EarlyChange int 457 } 458 459 // Make a new LZW encoder with default parameters. 460 func NewLZWEncoder() *LZWEncoder { 461 encoder := &LZWEncoder{} 462 463 // Default (No prediction) 464 encoder.Predictor = 1 465 466 // Currently only supporting 8. 467 encoder.BitsPerComponent = 8 468 469 encoder.Colors = 1 470 encoder.Columns = 1 471 encoder.EarlyChange = 1 472 473 return encoder 474 } 475 476 func (this *LZWEncoder) GetFilterName() string { 477 return StreamEncodingFilterNameLZW 478 } 479 480 func (this *LZWEncoder) MakeDecodeParams() PdfObject { 481 if this.Predictor > 1 { 482 decodeParams := MakeDict() 483 decodeParams.Set("Predictor", MakeInteger(int64(this.Predictor))) 484 485 // Only add if not default option. 486 if this.BitsPerComponent != 8 { 487 decodeParams.Set("BitsPerComponent", MakeInteger(int64(this.BitsPerComponent))) 488 } 489 if this.Columns != 1 { 490 decodeParams.Set("Columns", MakeInteger(int64(this.Columns))) 491 } 492 if this.Colors != 1 { 493 decodeParams.Set("Colors", MakeInteger(int64(this.Colors))) 494 } 495 return decodeParams 496 } 497 return nil 498 } 499 500 // Make a new instance of an encoding dictionary for a stream object. 501 // Has the Filter set and the DecodeParms. 502 func (this *LZWEncoder) MakeStreamDict() *PdfObjectDictionary { 503 dict := MakeDict() 504 505 dict.Set("Filter", MakeName(this.GetFilterName())) 506 507 decodeParams := this.MakeDecodeParams() 508 if decodeParams != nil { 509 dict.Set("DecodeParms", decodeParams) 510 } 511 512 dict.Set("EarlyChange", MakeInteger(int64(this.EarlyChange))) 513 514 return dict 515 } 516 517 // Create a new LZW encoder/decoder from a stream object, getting all the encoding parameters 518 // from the DecodeParms stream object dictionary entry. 519 func newLZWEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObjectDictionary) (*LZWEncoder, error) { 520 // Start with default settings. 521 encoder := NewLZWEncoder() 522 523 encDict := streamObj.PdfObjectDictionary 524 if encDict == nil { 525 // No encoding dictionary. 526 return encoder, nil 527 } 528 529 // If decodeParams not provided, see if we can get from the stream. 530 if decodeParams == nil { 531 obj := encDict.Get("DecodeParms") 532 if obj != nil { 533 if dp, isDict := obj.(*PdfObjectDictionary); isDict { 534 decodeParams = dp 535 } else if a, isArr := obj.(*PdfObjectArray); isArr { 536 if len(*a) == 1 { 537 if dp, isDict := (*a)[0].(*PdfObjectDictionary); isDict { 538 decodeParams = dp 539 } 540 } 541 } 542 if decodeParams == nil { 543 common.Log.Error("DecodeParms not a dictionary %#v", obj) 544 return nil, fmt.Errorf("Invalid DecodeParms") 545 } 546 } 547 } 548 549 // The EarlyChange indicates when to increase code length, as different 550 // implementations use a different mechanisms. Essentially this chooses 551 // which LZW implementation to use. 552 // The default is 1 (one code early) 553 obj := encDict.Get("EarlyChange") 554 if obj != nil { 555 earlyChange, ok := obj.(*PdfObjectInteger) 556 if !ok { 557 common.Log.Debug("Error: EarlyChange specified but not numeric (%T)", obj) 558 return nil, fmt.Errorf("Invalid EarlyChange") 559 } 560 if *earlyChange != 0 && *earlyChange != 1 { 561 return nil, fmt.Errorf("Invalid EarlyChange value (not 0 or 1)") 562 } 563 564 encoder.EarlyChange = int(*earlyChange) 565 } else { 566 encoder.EarlyChange = 1 // default 567 } 568 569 if decodeParams == nil { 570 // No decode parameters. Can safely return here if not set as the following options 571 // are related to the decode Params. 572 return encoder, nil 573 } 574 575 obj = decodeParams.Get("Predictor") 576 if obj != nil { 577 predictor, ok := obj.(*PdfObjectInteger) 578 if !ok { 579 common.Log.Debug("Error: Predictor specified but not numeric (%T)", obj) 580 return nil, fmt.Errorf("Invalid Predictor") 581 } 582 encoder.Predictor = int(*predictor) 583 } 584 585 // Bits per component. Use default if not specified (8). 586 obj = decodeParams.Get("BitsPerComponent") 587 if obj != nil { 588 bpc, ok := obj.(*PdfObjectInteger) 589 if !ok { 590 common.Log.Debug("ERROR: Invalid BitsPerComponent") 591 return nil, fmt.Errorf("Invalid BitsPerComponent") 592 } 593 encoder.BitsPerComponent = int(*bpc) 594 } 595 596 if encoder.Predictor > 1 { 597 // Columns. 598 encoder.Columns = 1 599 obj = decodeParams.Get("Columns") 600 if obj != nil { 601 columns, ok := obj.(*PdfObjectInteger) 602 if !ok { 603 return nil, fmt.Errorf("Predictor column invalid") 604 } 605 606 encoder.Columns = int(*columns) 607 } 608 609 // Colors. 610 // Number of interleaved color components per sample (Default 1 if not specified) 611 encoder.Colors = 1 612 obj = decodeParams.Get("Colors") 613 if obj != nil { 614 colors, ok := obj.(*PdfObjectInteger) 615 if !ok { 616 return nil, fmt.Errorf("Predictor colors not an integer") 617 } 618 encoder.Colors = int(*colors) 619 } 620 } 621 622 common.Log.Trace("decode params: %s", decodeParams.String()) 623 return encoder, nil 624 } 625 626 func (this *LZWEncoder) DecodeBytes(encoded []byte) ([]byte, error) { 627 var outBuf bytes.Buffer 628 bufReader := bytes.NewReader(encoded) 629 630 var r io.ReadCloser 631 if this.EarlyChange == 1 { 632 // LZW implementation with code length increases one code early (1). 633 r = lzw1.NewReader(bufReader, lzw1.MSB, 8) 634 } else { 635 // 0: LZW implementation with postponed code length increases (0). 636 r = lzw0.NewReader(bufReader, lzw0.MSB, 8) 637 } 638 defer r.Close() 639 640 _, err := outBuf.ReadFrom(r) 641 if err != nil { 642 return nil, err 643 } 644 645 return outBuf.Bytes(), nil 646 } 647 648 func (this *LZWEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 649 // Revamp this support to handle TIFF predictor (2). 650 // Also handle more filter bytes and check 651 // BitsPerComponent. Default value is 8, currently we are only 652 // supporting that one. 653 654 common.Log.Trace("LZW Decoding") 655 common.Log.Trace("Predictor: %d", this.Predictor) 656 657 outData, err := this.DecodeBytes(streamObj.Stream) 658 if err != nil { 659 return nil, err 660 } 661 662 common.Log.Trace(" IN: (%d) % x", len(streamObj.Stream), streamObj.Stream) 663 common.Log.Trace("OUT: (%d) % x", len(outData), outData) 664 665 if this.Predictor > 1 { 666 if this.Predictor == 2 { // TIFF encoding: Needs some tests. 667 common.Log.Trace("Tiff encoding") 668 669 rowLength := int(this.Columns) * this.Colors 670 if rowLength < 1 { 671 // No data. Return empty set. 672 return []byte{}, nil 673 } 674 675 rows := len(outData) / rowLength 676 if len(outData)%rowLength != 0 { 677 common.Log.Debug("ERROR: TIFF encoding: Invalid row length...") 678 return nil, fmt.Errorf("Invalid row length (%d/%d)", len(outData), rowLength) 679 } 680 681 if rowLength%this.Colors != 0 { 682 return nil, fmt.Errorf("Invalid row length (%d) for colors %d", rowLength, this.Colors) 683 } 684 685 if rowLength > len(outData) { 686 common.Log.Debug("Row length cannot be longer than data length (%d/%d)", rowLength, len(outData)) 687 return nil, errors.New("Range check error") 688 } 689 common.Log.Trace("inp outData (%d): % x", len(outData), outData) 690 691 pOutBuffer := bytes.NewBuffer(nil) 692 693 // 0-255 -255 255 ; 0-255=-255; 694 for i := 0; i < rows; i++ { 695 rowData := outData[rowLength*i : rowLength*(i+1)] 696 // Predicts the same as the sample to the left. 697 // Interleaved by colors. 698 for j := this.Colors; j < rowLength; j++ { 699 rowData[j] = byte(int(rowData[j]+rowData[j-this.Colors]) % 256) 700 } 701 // GH: Appears that this is not working as expected... 702 703 pOutBuffer.Write(rowData) 704 } 705 pOutData := pOutBuffer.Bytes() 706 common.Log.Trace("POutData (%d): % x", len(pOutData), pOutData) 707 return pOutData, nil 708 } else if this.Predictor >= 10 && this.Predictor <= 15 { 709 common.Log.Trace("PNG Encoding") 710 // Columns represents the number of samples per row; Each sample can contain multiple color 711 // components. 712 rowLength := int(this.Columns*this.Colors + 1) // 1 byte to specify predictor algorithms per row. 713 if rowLength < 1 { 714 // No data. Return empty set. 715 return []byte{}, nil 716 } 717 rows := len(outData) / rowLength 718 if len(outData)%rowLength != 0 { 719 return nil, fmt.Errorf("Invalid row length (%d/%d)", len(outData), rowLength) 720 } 721 if rowLength > len(outData) { 722 common.Log.Debug("Row length cannot be longer than data length (%d/%d)", rowLength, len(outData)) 723 return nil, errors.New("Range check error") 724 } 725 726 pOutBuffer := bytes.NewBuffer(nil) 727 728 common.Log.Trace("Predictor columns: %d", this.Columns) 729 common.Log.Trace("Length: %d / %d = %d rows", len(outData), rowLength, rows) 730 prevRowData := make([]byte, rowLength) 731 for i := 0; i < rowLength; i++ { 732 prevRowData[i] = 0 733 } 734 735 for i := 0; i < rows; i++ { 736 rowData := outData[rowLength*i : rowLength*(i+1)] 737 738 fb := rowData[0] 739 switch fb { 740 case 0: 741 // No prediction. (No operation). 742 case 1: 743 // Sub: Predicts the same as the sample to the left. 744 for j := 2; j < rowLength; j++ { 745 rowData[j] = byte(int(rowData[j]+rowData[j-1]) % 256) 746 } 747 case 2: 748 // Up: Predicts the same as the sample above 749 for j := 1; j < rowLength; j++ { 750 rowData[j] = byte(int(rowData[j]+prevRowData[j]) % 256) 751 } 752 default: 753 common.Log.Debug("ERROR: Invalid filter byte (%d)", fb) 754 return nil, fmt.Errorf("Invalid filter byte (%d)", fb) 755 } 756 757 for i := 0; i < rowLength; i++ { 758 prevRowData[i] = rowData[i] 759 } 760 pOutBuffer.Write(rowData[1:]) 761 } 762 pOutData := pOutBuffer.Bytes() 763 return pOutData, nil 764 } else { 765 common.Log.Debug("ERROR: Unsupported predictor (%d)", this.Predictor) 766 return nil, fmt.Errorf("Unsupported predictor (%d)", this.Predictor) 767 } 768 } 769 770 return outData, nil 771 } 772 773 // Support for encoding LZW. Currently not supporting predictors (raw compressed data only). 774 // Only supports the Early change = 1 algorithm (compress/lzw) as the other implementation 775 // does not have a write method. 776 // TODO: Consider refactoring compress/lzw to allow both. 777 func (this *LZWEncoder) EncodeBytes(data []byte) ([]byte, error) { 778 if this.Predictor != 1 { 779 return nil, fmt.Errorf("LZW Predictor = 1 only supported yet") 780 } 781 782 if this.EarlyChange == 1 { 783 return nil, fmt.Errorf("LZW Early Change = 0 only supported yet") 784 } 785 786 var b bytes.Buffer 787 w := lzw0.NewWriter(&b, lzw0.MSB, 8) 788 w.Write(data) 789 w.Close() 790 791 return b.Bytes(), nil 792 } 793 794 // 795 // DCT (JPG) encoding/decoding functionality for images. 796 type DCTEncoder struct { 797 ColorComponents int // 1 (gray), 3 (rgb), 4 (cmyk) 798 BitsPerComponent int // 8 or 16 bit 799 Width int 800 Height int 801 Quality int 802 } 803 804 // Make a new DCT encoder with default parameters. 805 func NewDCTEncoder() *DCTEncoder { 806 encoder := &DCTEncoder{} 807 808 encoder.ColorComponents = 3 809 encoder.BitsPerComponent = 8 810 811 encoder.Quality = DefaultJPEGQuality 812 813 return encoder 814 } 815 816 func (this *DCTEncoder) GetFilterName() string { 817 return StreamEncodingFilterNameDCT 818 } 819 820 func (this *DCTEncoder) MakeDecodeParams() PdfObject { 821 // Does not have decode params. 822 return nil 823 } 824 825 // Make a new instance of an encoding dictionary for a stream object. 826 // Has the Filter set. Some other parameters are generated elsewhere. 827 func (this *DCTEncoder) MakeStreamDict() *PdfObjectDictionary { 828 dict := MakeDict() 829 830 dict.Set("Filter", MakeName(this.GetFilterName())) 831 832 return dict 833 } 834 835 // Create a new DCT encoder/decoder from a stream object, getting all the encoding parameters 836 // from the stream object dictionary entry and the image data itself. 837 // TODO: Support if used with other filters [ASCII85Decode FlateDecode DCTDecode]... 838 // need to apply the other filters prior to this one... 839 func newDCTEncoderFromStream(streamObj *PdfObjectStream, multiEnc *MultiEncoder) (*DCTEncoder, error) { 840 // Start with default settings. 841 encoder := NewDCTEncoder() 842 843 encDict := streamObj.PdfObjectDictionary 844 if encDict == nil { 845 // No encoding dictionary. 846 return encoder, nil 847 } 848 849 // If using DCTDecode in combination with other filters, make sure to decode that first... 850 encoded := streamObj.Stream 851 if multiEnc != nil { 852 e, err := multiEnc.DecodeBytes(encoded) 853 if err != nil { 854 return nil, err 855 } 856 encoded = e 857 858 } 859 860 bufReader := bytes.NewReader(encoded) 861 862 cfg, err := jpeg.DecodeConfig(bufReader) 863 //img, _, err := goimage.Decode(bufReader) 864 if err != nil { 865 common.Log.Debug("Error decoding file: %s", err) 866 return nil, err 867 } 868 869 switch cfg.ColorModel { 870 case gocolor.RGBAModel: 871 encoder.BitsPerComponent = 8 872 encoder.ColorComponents = 3 // alpha is not included in pdf. 873 case gocolor.RGBA64Model: 874 encoder.BitsPerComponent = 16 875 encoder.ColorComponents = 3 876 case gocolor.GrayModel: 877 encoder.BitsPerComponent = 8 878 encoder.ColorComponents = 1 879 case gocolor.Gray16Model: 880 encoder.BitsPerComponent = 16 881 encoder.ColorComponents = 1 882 case gocolor.CMYKModel: 883 encoder.BitsPerComponent = 8 884 encoder.ColorComponents = 4 885 case gocolor.YCbCrModel: 886 // YCbCr is not supported by PDF, but it could be a different colorspace 887 // with 3 components. Would be specified by the ColorSpace entry. 888 encoder.BitsPerComponent = 8 889 encoder.ColorComponents = 3 890 default: 891 return nil, errors.New("Unsupported color model") 892 } 893 encoder.Width = cfg.Width 894 encoder.Height = cfg.Height 895 common.Log.Trace("DCT Encoder: %+v", encoder) 896 encoder.Quality = DefaultJPEGQuality 897 898 return encoder, nil 899 } 900 901 func (this *DCTEncoder) DecodeBytes(encoded []byte) ([]byte, error) { 902 bufReader := bytes.NewReader(encoded) 903 //img, _, err := goimage.Decode(bufReader) 904 img, err := jpeg.Decode(bufReader) 905 if err != nil { 906 common.Log.Debug("Error decoding image: %s", err) 907 return nil, err 908 } 909 bounds := img.Bounds() 910 911 var decoded = make([]byte, bounds.Dx()*bounds.Dy()*this.ColorComponents*this.BitsPerComponent/8) 912 index := 0 913 914 for j := bounds.Min.Y; j < bounds.Max.Y; j++ { 915 for i := bounds.Min.X; i < bounds.Max.X; i++ { 916 color := img.At(i, j) 917 918 // Gray scale. 919 if this.ColorComponents == 1 { 920 if this.BitsPerComponent == 16 { 921 // Gray - 16 bit. 922 val, ok := color.(gocolor.Gray16) 923 if !ok { 924 return nil, errors.New("Color type error") 925 } 926 decoded[index] = byte((val.Y >> 8) & 0xff) 927 index++ 928 decoded[index] = byte(val.Y & 0xff) 929 index++ 930 } else { 931 // Gray - 8 bit. 932 val, ok := color.(gocolor.Gray) 933 if !ok { 934 return nil, errors.New("Color type error") 935 } 936 decoded[index] = byte(val.Y & 0xff) 937 index++ 938 } 939 } else if this.ColorComponents == 3 { 940 if this.BitsPerComponent == 16 { 941 val, ok := color.(gocolor.RGBA64) 942 if !ok { 943 return nil, errors.New("Color type error") 944 } 945 decoded[index] = byte((val.R >> 8) & 0xff) 946 index++ 947 decoded[index] = byte(val.R & 0xff) 948 index++ 949 decoded[index] = byte((val.G >> 8) & 0xff) 950 index++ 951 decoded[index] = byte(val.G & 0xff) 952 index++ 953 decoded[index] = byte((val.B >> 8) & 0xff) 954 index++ 955 decoded[index] = byte(val.B & 0xff) 956 index++ 957 } else { 958 // RGB - 8 bit. 959 val, isRGB := color.(gocolor.RGBA) 960 if isRGB { 961 decoded[index] = val.R & 0xff 962 index++ 963 decoded[index] = val.G & 0xff 964 index++ 965 decoded[index] = val.B & 0xff 966 index++ 967 } else { 968 // Hack around YCbCr from go jpeg package. 969 val, ok := color.(gocolor.YCbCr) 970 if !ok { 971 return nil, errors.New("Color type error") 972 } 973 r, g, b, _ := val.RGBA() 974 // The fact that we cannot use the Y, Cb, Cr values directly, 975 // indicates that either the jpeg package is converting the raw 976 // data into YCbCr with some kind of mapping, or that the original 977 // data is not in R,G,B... 978 // XXX: This is not good as it means we end up with R, G, B... even 979 // if the original colormap was different. Unless calling the RGBA() 980 // call exactly reverses the previous conversion to YCbCr (even if 981 // real data is not rgb)... ? 982 // TODO: Test more. Consider whether we need to implement our own jpeg filter. 983 decoded[index] = byte(r >> 8) //byte(val.Y & 0xff) 984 index++ 985 decoded[index] = byte(g >> 8) //val.Cb & 0xff) 986 index++ 987 decoded[index] = byte(b >> 8) //val.Cr & 0xff) 988 index++ 989 } 990 } 991 } else if this.ColorComponents == 4 { 992 // CMYK - 8 bit. 993 val, ok := color.(gocolor.CMYK) 994 if !ok { 995 return nil, errors.New("Color type error") 996 } 997 // TODO: Is the inversion not handled right in the JPEG package for APP14? 998 // Should not need to invert here... 999 decoded[index] = 255 - val.C&0xff 1000 index++ 1001 decoded[index] = 255 - val.M&0xff 1002 index++ 1003 decoded[index] = 255 - val.Y&0xff 1004 index++ 1005 decoded[index] = 255 - val.K&0xff 1006 index++ 1007 } 1008 } 1009 } 1010 1011 return decoded, nil 1012 } 1013 1014 func (this *DCTEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 1015 return this.DecodeBytes(streamObj.Stream) 1016 } 1017 1018 type DrawableImage interface { 1019 ColorModel() gocolor.Model 1020 Bounds() goimage.Rectangle 1021 At(x, y int) gocolor.Color 1022 Set(x, y int, c gocolor.Color) 1023 } 1024 1025 func (this *DCTEncoder) EncodeBytes(data []byte) ([]byte, error) { 1026 bounds := goimage.Rect(0, 0, this.Width, this.Height) 1027 var img DrawableImage 1028 if this.ColorComponents == 1 { 1029 if this.BitsPerComponent == 16 { 1030 img = goimage.NewGray16(bounds) 1031 } else { 1032 img = goimage.NewGray(bounds) 1033 } 1034 } else if this.ColorComponents == 3 { 1035 if this.BitsPerComponent == 16 { 1036 img = goimage.NewRGBA64(bounds) 1037 } else { 1038 img = goimage.NewRGBA(bounds) 1039 } 1040 } else if this.ColorComponents == 4 { 1041 img = goimage.NewCMYK(bounds) 1042 } else { 1043 return nil, errors.New("Unsupported") 1044 } 1045 1046 // Draw the data on the image.. 1047 x := 0 1048 y := 0 1049 bytesPerColor := this.ColorComponents * this.BitsPerComponent / 8 1050 for i := 0; i+bytesPerColor-1 < len(data); i += bytesPerColor { 1051 var c gocolor.Color 1052 if this.ColorComponents == 1 { 1053 if this.BitsPerComponent == 16 { 1054 val := uint16(data[i])<<8 | uint16(data[i+1]) 1055 c = gocolor.Gray16{val} 1056 } else { 1057 val := uint8(data[i] & 0xff) 1058 c = gocolor.Gray{val} 1059 } 1060 } else if this.ColorComponents == 3 { 1061 if this.BitsPerComponent == 16 { 1062 r := uint16(data[i])<<8 | uint16(data[i+1]) 1063 g := uint16(data[i+2])<<8 | uint16(data[i+3]) 1064 b := uint16(data[i+4])<<8 | uint16(data[i+5]) 1065 c = gocolor.RGBA64{R: r, G: g, B: b, A: 0} 1066 } else { 1067 r := uint8(data[i] & 0xff) 1068 g := uint8(data[i+1] & 0xff) 1069 b := uint8(data[i+2] & 0xff) 1070 c = gocolor.RGBA{R: r, G: g, B: b, A: 0} 1071 } 1072 } else if this.ColorComponents == 4 { 1073 c1 := uint8(data[i] & 0xff) 1074 m1 := uint8(data[i+1] & 0xff) 1075 y1 := uint8(data[i+2] & 0xff) 1076 k1 := uint8(data[i+3] & 0xff) 1077 c = gocolor.CMYK{C: c1, M: m1, Y: y1, K: k1} 1078 } 1079 1080 img.Set(x, y, c) 1081 x++ 1082 if x == this.Width { 1083 x = 0 1084 y++ 1085 } 1086 } 1087 1088 // The quality is specified from 0-100 (with 100 being the best quality) in the DCT structure. 1089 // N.B. even 100 is lossy, as still is transformed, but as good as it gets for DCT. 1090 // This is not related to the DPI, but rather inherent transformation losses. 1091 1092 opt := jpeg.Options{} 1093 opt.Quality = this.Quality 1094 1095 var buf bytes.Buffer 1096 err := jpeg.Encode(&buf, img, &opt) 1097 if err != nil { 1098 return nil, err 1099 } 1100 1101 return buf.Bytes(), nil 1102 } 1103 1104 // Run length encoding. 1105 type RunLengthEncoder struct { 1106 } 1107 1108 // Make a new run length encoder 1109 func NewRunLengthEncoder() *RunLengthEncoder { 1110 return &RunLengthEncoder{} 1111 } 1112 1113 func (this *RunLengthEncoder) GetFilterName() string { 1114 return StreamEncodingFilterNameRunLength 1115 } 1116 1117 // Create a new run length decoder from a stream object. 1118 func newRunLengthEncoderFromStream(streamObj *PdfObjectStream, decodeParams *PdfObjectDictionary) (*RunLengthEncoder, error) { 1119 return NewRunLengthEncoder(), nil 1120 } 1121 1122 /* 1123 7.4.5 RunLengthDecode Filter 1124 The RunLengthDecode filter decodes data that has been encoded in a simple byte-oriented format based on run length. 1125 The encoded data shall be a sequence of runs, where each run shall consist of a length byte followed by 1 to 128 1126 bytes of data. If the length byte is in the range 0 to 127, the following length + 1 (1 to 128) bytes shall be 1127 copied literally during decompression. If length is in the range 129 to 255, the following single byte shall be 1128 copied 257 - length (2 to 128) times during decompression. A length value of 128 shall denote EOD. 1129 */ 1130 func (this *RunLengthEncoder) DecodeBytes(encoded []byte) ([]byte, error) { 1131 bufReader := bytes.NewReader(encoded) 1132 inb := []byte{} 1133 for { 1134 b, err := bufReader.ReadByte() 1135 if err != nil { 1136 return nil, err 1137 } 1138 if b > 128 { 1139 v, err := bufReader.ReadByte() 1140 if err != nil { 1141 return nil, err 1142 } 1143 for i := 0; i < 257-int(b); i++ { 1144 inb = append(inb, v) 1145 } 1146 } else if b < 128 { 1147 for i := 0; i < int(b)+1; i++ { 1148 v, err := bufReader.ReadByte() 1149 if err != nil { 1150 return nil, err 1151 } 1152 inb = append(inb, v) 1153 } 1154 } else { 1155 break 1156 } 1157 } 1158 1159 return inb, nil 1160 } 1161 1162 // Decode RunLengthEncoded stream object and give back decoded bytes. 1163 func (this *RunLengthEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 1164 return this.DecodeBytes(streamObj.Stream) 1165 } 1166 1167 // Encode a bytes array and return the encoded value based on the encoder parameters. 1168 func (this *RunLengthEncoder) EncodeBytes(data []byte) ([]byte, error) { 1169 bufReader := bytes.NewReader(data) 1170 inb := []byte{} 1171 literal := []byte{} 1172 1173 b0, err := bufReader.ReadByte() 1174 if err == io.EOF { 1175 return []byte{}, nil 1176 } else if err != nil { 1177 return nil, err 1178 } 1179 runLen := 1 1180 1181 for { 1182 b, err := bufReader.ReadByte() 1183 if err == io.EOF { 1184 break 1185 } else if err != nil { 1186 return nil, err 1187 } 1188 1189 if b == b0 { 1190 if len(literal) > 0 { 1191 literal = literal[:len(literal)-1] 1192 if len(literal) > 0 { 1193 inb = append(inb, byte(len(literal)-1)) 1194 inb = append(inb, literal...) 1195 } 1196 runLen = 1 1197 literal = []byte{} 1198 } 1199 runLen++ 1200 if runLen >= 127 { 1201 inb = append(inb, byte(257-runLen), b0) 1202 runLen = 0 1203 } 1204 1205 } else { 1206 if runLen > 0 { 1207 if runLen == 1 { 1208 literal = []byte{b0} 1209 } else { 1210 inb = append(inb, byte(257-runLen), b0) 1211 } 1212 1213 runLen = 0 1214 } 1215 literal = append(literal, b) 1216 if len(literal) >= 127 { 1217 inb = append(inb, byte(len(literal)-1)) 1218 inb = append(inb, literal...) 1219 literal = []byte{} 1220 } 1221 } 1222 b0 = b 1223 } 1224 1225 if len(literal) > 0 { 1226 inb = append(inb, byte(len(literal)-1)) 1227 inb = append(inb, literal...) 1228 } else if runLen > 0 { 1229 inb = append(inb, byte(257-runLen), b0) 1230 } 1231 inb = append(inb, 128) 1232 return inb, nil 1233 } 1234 1235 func (this *RunLengthEncoder) MakeDecodeParams() PdfObject { 1236 return nil 1237 } 1238 1239 // Make a new instance of an encoding dictionary for a stream object. 1240 func (this *RunLengthEncoder) MakeStreamDict() *PdfObjectDictionary { 1241 dict := MakeDict() 1242 dict.Set("Filter", MakeName(this.GetFilterName())) 1243 return dict 1244 } 1245 1246 ///// 1247 // ASCII hex encoder/decoder. 1248 type ASCIIHexEncoder struct { 1249 } 1250 1251 // Make a new ASCII hex encoder. 1252 func NewASCIIHexEncoder() *ASCIIHexEncoder { 1253 encoder := &ASCIIHexEncoder{} 1254 return encoder 1255 } 1256 1257 func (this *ASCIIHexEncoder) GetFilterName() string { 1258 return StreamEncodingFilterNameASCIIHex 1259 } 1260 1261 func (this *ASCIIHexEncoder) MakeDecodeParams() PdfObject { 1262 return nil 1263 } 1264 1265 // Make a new instance of an encoding dictionary for a stream object. 1266 func (this *ASCIIHexEncoder) MakeStreamDict() *PdfObjectDictionary { 1267 dict := MakeDict() 1268 dict.Set("Filter", MakeName(this.GetFilterName())) 1269 return dict 1270 } 1271 1272 func (this *ASCIIHexEncoder) DecodeBytes(encoded []byte) ([]byte, error) { 1273 bufReader := bytes.NewReader(encoded) 1274 inb := []byte{} 1275 for { 1276 b, err := bufReader.ReadByte() 1277 if err != nil { 1278 return nil, err 1279 } 1280 if b == '>' { 1281 break 1282 } 1283 if IsWhiteSpace(b) { 1284 continue 1285 } 1286 if (b >= 'a' && b <= 'f') || (b >= 'A' && b <= 'F') || (b >= '0' && b <= '9') { 1287 inb = append(inb, b) 1288 } else { 1289 common.Log.Debug("ERROR: Invalid ascii hex character (%c)", b) 1290 return nil, fmt.Errorf("Invalid ascii hex character (%c)", b) 1291 } 1292 } 1293 if len(inb)%2 == 1 { 1294 inb = append(inb, '0') 1295 } 1296 common.Log.Trace("Inbound %s", inb) 1297 outb := make([]byte, hex.DecodedLen(len(inb))) 1298 _, err := hex.Decode(outb, inb) 1299 if err != nil { 1300 return nil, err 1301 } 1302 return outb, nil 1303 } 1304 1305 // ASCII hex decoding. 1306 func (this *ASCIIHexEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 1307 return this.DecodeBytes(streamObj.Stream) 1308 } 1309 1310 func (this *ASCIIHexEncoder) EncodeBytes(data []byte) ([]byte, error) { 1311 var encoded bytes.Buffer 1312 1313 for _, b := range data { 1314 encoded.WriteString(fmt.Sprintf("%.2X ", b)) 1315 } 1316 encoded.WriteByte('>') 1317 1318 return encoded.Bytes(), nil 1319 } 1320 1321 // 1322 // ASCII85 encoder/decoder. 1323 // 1324 type ASCII85Encoder struct { 1325 } 1326 1327 // Make a new ASCII85 encoder. 1328 func NewASCII85Encoder() *ASCII85Encoder { 1329 encoder := &ASCII85Encoder{} 1330 return encoder 1331 } 1332 1333 func (this *ASCII85Encoder) GetFilterName() string { 1334 return StreamEncodingFilterNameASCII85 1335 } 1336 1337 func (this *ASCII85Encoder) MakeDecodeParams() PdfObject { 1338 return nil 1339 } 1340 1341 // Make a new instance of an encoding dictionary for a stream object. 1342 func (this *ASCII85Encoder) MakeStreamDict() *PdfObjectDictionary { 1343 dict := MakeDict() 1344 dict.Set("Filter", MakeName(this.GetFilterName())) 1345 return dict 1346 } 1347 1348 // 5 ASCII characters -> 4 raw binary bytes 1349 func (this *ASCII85Encoder) DecodeBytes(encoded []byte) ([]byte, error) { 1350 decoded := []byte{} 1351 1352 common.Log.Trace("ASCII85 Decode") 1353 1354 i := 0 1355 eod := false 1356 1357 for i < len(encoded) && !eod { 1358 codes := [5]byte{0, 0, 0, 0, 0} 1359 spaces := 0 // offset due to whitespace. 1360 j := 0 1361 toWrite := 4 1362 for j < 5+spaces { 1363 if i+j == len(encoded) { 1364 break 1365 } 1366 code := encoded[i+j] 1367 if IsWhiteSpace(code) { 1368 // Skip whitespace. 1369 spaces++ 1370 j++ 1371 continue 1372 } else if code == '~' && i+j+1 < len(encoded) && encoded[i+j+1] == '>' { 1373 toWrite = (j - spaces) - 1 1374 if toWrite < 0 { 1375 toWrite = 0 1376 } 1377 // EOD marker. Marks end of data. 1378 eod = true 1379 break 1380 } else if code >= '!' && code <= 'u' { 1381 // Valid code. 1382 code -= '!' 1383 } else if code == 'z' && j-spaces == 0 { 1384 // 'z' in beginning of the byte sequence means that all 5 codes are 0. 1385 // Already all 0 initialized, so can break here. 1386 toWrite = 4 1387 j++ 1388 break 1389 } else { 1390 common.Log.Error("Failed decoding, invalid code") 1391 return nil, errors.New("Invalid code encountered") 1392 } 1393 1394 codes[j-spaces] = code 1395 j++ 1396 } 1397 i += j 1398 1399 // Pad with 'u' 84 (unused ones) 1400 // Takes care of issues at ends for input data that is not a multiple of 4-bytes. 1401 for m := toWrite + 1; m < 5; m++ { 1402 codes[m] = 84 1403 } 1404 1405 // Convert to a uint32 value. 1406 value := uint32(codes[0])*85*85*85*85 + uint32(codes[1])*85*85*85 + uint32(codes[2])*85*85 + uint32(codes[3])*85 + uint32(codes[4]) 1407 1408 // Convert to 4 bytes. 1409 decodedBytes := []byte{ 1410 byte((value >> 24) & 0xff), 1411 byte((value >> 16) & 0xff), 1412 byte((value >> 8) & 0xff), 1413 byte(value & 0xff)} 1414 1415 // This accounts for the end of data, where the original data length is not a multiple of 4. 1416 // In that case, 0 bytes are assumed but only 1417 decoded = append(decoded, decodedBytes[:toWrite]...) 1418 } 1419 1420 common.Log.Trace("ASCII85, encoded: % X", encoded) 1421 common.Log.Trace("ASCII85, decoded: % X", decoded) 1422 1423 return decoded, nil 1424 } 1425 1426 // ASCII85 stream decoding. 1427 func (this *ASCII85Encoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 1428 return this.DecodeBytes(streamObj.Stream) 1429 } 1430 1431 // Convert a base 256 number to a series of base 85 values (5 codes). 1432 // 85^5 = 4437053125 > 256^4 = 4294967296 1433 // So 5 base-85 numbers will always be enough to cover 4 base-256 numbers. 1434 // The base 256 value is already converted to an uint32 value. 1435 func (this *ASCII85Encoder) base256Tobase85(base256val uint32) [5]byte { 1436 base85 := [5]byte{0, 0, 0, 0, 0} 1437 remainder := base256val 1438 for i := 0; i < 5; i++ { 1439 divider := uint32(1) 1440 for j := 0; j < 4-i; j++ { 1441 divider *= 85 1442 } 1443 val := remainder / divider 1444 remainder = remainder % divider 1445 base85[i] = byte(val) 1446 } 1447 return base85 1448 } 1449 1450 // Encode data into ASCII85 encoded format. 1451 func (this *ASCII85Encoder) EncodeBytes(data []byte) ([]byte, error) { 1452 var encoded bytes.Buffer 1453 1454 for i := 0; i < len(data); i += 4 { 1455 b1 := data[i] 1456 n := 1 1457 1458 b2 := byte(0) 1459 if i+1 < len(data) { 1460 b2 = data[i+1] 1461 n++ 1462 } 1463 1464 b3 := byte(0) 1465 if i+2 < len(data) { 1466 b3 = data[i+2] 1467 n++ 1468 } 1469 1470 b4 := byte(0) 1471 if i+3 < len(data) { 1472 b4 = data[i+3] 1473 n++ 1474 } 1475 1476 // Convert to a uint32 number. 1477 base256 := (uint32(b1) << 24) | (uint32(b2) << 16) | (uint32(b3) << 8) | uint32(b4) 1478 if base256 == 0 { 1479 encoded.WriteByte('z') 1480 } else { 1481 base85vals := this.base256Tobase85(base256) 1482 for _, val := range base85vals[:n+1] { 1483 encoded.WriteByte(val + '!') 1484 } 1485 } 1486 } 1487 1488 // EOD. 1489 encoded.WriteString("~>") 1490 return encoded.Bytes(), nil 1491 } 1492 1493 // 1494 // Raw encoder/decoder (no encoding, pass through) 1495 // 1496 type RawEncoder struct{} 1497 1498 func NewRawEncoder() *RawEncoder { 1499 return &RawEncoder{} 1500 } 1501 1502 func (this *RawEncoder) GetFilterName() string { 1503 return StreamEncodingFilterNameRaw 1504 } 1505 1506 func (this *RawEncoder) MakeDecodeParams() PdfObject { 1507 return nil 1508 } 1509 1510 // Make a new instance of an encoding dictionary for a stream object. 1511 func (this *RawEncoder) MakeStreamDict() *PdfObjectDictionary { 1512 return MakeDict() 1513 } 1514 1515 func (this *RawEncoder) DecodeBytes(encoded []byte) ([]byte, error) { 1516 return encoded, nil 1517 } 1518 1519 func (this *RawEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 1520 return streamObj.Stream, nil 1521 } 1522 1523 func (this *RawEncoder) EncodeBytes(data []byte) ([]byte, error) { 1524 return data, nil 1525 } 1526 1527 // 1528 // CCITTFax encoder/decoder (dummy, for now) 1529 // 1530 type CCITTFaxEncoder struct{} 1531 1532 func NewCCITTFaxEncoder() *CCITTFaxEncoder { 1533 return &CCITTFaxEncoder{} 1534 } 1535 1536 func (this *CCITTFaxEncoder) GetFilterName() string { 1537 return StreamEncodingFilterNameCCITTFax 1538 } 1539 1540 func (this *CCITTFaxEncoder) MakeDecodeParams() PdfObject { 1541 return nil 1542 } 1543 1544 // Make a new instance of an encoding dictionary for a stream object. 1545 func (this *CCITTFaxEncoder) MakeStreamDict() *PdfObjectDictionary { 1546 return MakeDict() 1547 } 1548 1549 func (this *CCITTFaxEncoder) DecodeBytes(encoded []byte) ([]byte, error) { 1550 common.Log.Debug("Error: Attempting to use unsupported encoding %s", this.GetFilterName()) 1551 return encoded, ErrNoCCITTFaxDecode 1552 } 1553 1554 func (this *CCITTFaxEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 1555 common.Log.Debug("Error: Attempting to use unsupported encoding %s", this.GetFilterName()) 1556 return streamObj.Stream, ErrNoCCITTFaxDecode 1557 } 1558 1559 func (this *CCITTFaxEncoder) EncodeBytes(data []byte) ([]byte, error) { 1560 common.Log.Debug("Error: Attempting to use unsupported encoding %s", this.GetFilterName()) 1561 return data, ErrNoCCITTFaxDecode 1562 } 1563 1564 // 1565 // JBIG2 encoder/decoder (dummy, for now) 1566 // 1567 type JBIG2Encoder struct{} 1568 1569 func NewJBIG2Encoder() *JBIG2Encoder { 1570 return &JBIG2Encoder{} 1571 } 1572 1573 func (this *JBIG2Encoder) GetFilterName() string { 1574 return StreamEncodingFilterNameJBIG2 1575 } 1576 1577 func (this *JBIG2Encoder) MakeDecodeParams() PdfObject { 1578 return nil 1579 } 1580 1581 // Make a new instance of an encoding dictionary for a stream object. 1582 func (this *JBIG2Encoder) MakeStreamDict() *PdfObjectDictionary { 1583 return MakeDict() 1584 } 1585 1586 func (this *JBIG2Encoder) DecodeBytes(encoded []byte) ([]byte, error) { 1587 common.Log.Debug("Error: Attempting to use unsupported encoding %s", this.GetFilterName()) 1588 return encoded, ErrNoJBIG2Decode 1589 } 1590 1591 func (this *JBIG2Encoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 1592 common.Log.Debug("Error: Attempting to use unsupported encoding %s", this.GetFilterName()) 1593 return streamObj.Stream, ErrNoJBIG2Decode 1594 } 1595 1596 func (this *JBIG2Encoder) EncodeBytes(data []byte) ([]byte, error) { 1597 common.Log.Debug("Error: Attempting to use unsupported encoding %s", this.GetFilterName()) 1598 return data, ErrNoJBIG2Decode 1599 } 1600 1601 // 1602 // JPX encoder/decoder (dummy, for now) 1603 // 1604 type JPXEncoder struct{} 1605 1606 func NewJPXEncoder() *JPXEncoder { 1607 return &JPXEncoder{} 1608 } 1609 1610 func (this *JPXEncoder) GetFilterName() string { 1611 return StreamEncodingFilterNameJPX 1612 } 1613 1614 func (this *JPXEncoder) MakeDecodeParams() PdfObject { 1615 return nil 1616 } 1617 1618 // Make a new instance of an encoding dictionary for a stream object. 1619 func (this *JPXEncoder) MakeStreamDict() *PdfObjectDictionary { 1620 return MakeDict() 1621 } 1622 1623 func (this *JPXEncoder) DecodeBytes(encoded []byte) ([]byte, error) { 1624 common.Log.Debug("Error: Attempting to use unsupported encoding %s", this.GetFilterName()) 1625 return encoded, ErrNoJPXDecode 1626 } 1627 1628 func (this *JPXEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 1629 common.Log.Debug("Error: Attempting to use unsupported encoding %s", this.GetFilterName()) 1630 return streamObj.Stream, ErrNoJPXDecode 1631 } 1632 1633 func (this *JPXEncoder) EncodeBytes(data []byte) ([]byte, error) { 1634 common.Log.Debug("Error: Attempting to use unsupported encoding %s", this.GetFilterName()) 1635 return data, ErrNoJPXDecode 1636 } 1637 1638 // 1639 // Multi encoder: support serial encoding. 1640 // 1641 type MultiEncoder struct { 1642 // Encoders in the order that they are to be applied. 1643 encoders []StreamEncoder 1644 } 1645 1646 func NewMultiEncoder() *MultiEncoder { 1647 encoder := MultiEncoder{} 1648 encoder.encoders = []StreamEncoder{} 1649 1650 return &encoder 1651 } 1652 1653 func newMultiEncoderFromStream(streamObj *PdfObjectStream) (*MultiEncoder, error) { 1654 mencoder := NewMultiEncoder() 1655 1656 encDict := streamObj.PdfObjectDictionary 1657 if encDict == nil { 1658 // No encoding dictionary. 1659 return mencoder, nil 1660 } 1661 1662 // Prepare the decode params array (one for each filter type) 1663 // Optional, not always present. 1664 var decodeParamsDict *PdfObjectDictionary 1665 decodeParamsArray := []PdfObject{} 1666 obj := encDict.Get("DecodeParms") 1667 if obj != nil { 1668 // If it is a dictionary, assume it applies to all 1669 dict, isDict := obj.(*PdfObjectDictionary) 1670 if isDict { 1671 decodeParamsDict = dict 1672 } 1673 1674 // If it is an array, assume there is one for each 1675 arr, isArray := obj.(*PdfObjectArray) 1676 if isArray { 1677 for _, dictObj := range *arr { 1678 dictObj = TraceToDirectObject(dictObj) 1679 if dict, is := dictObj.(*PdfObjectDictionary); is { 1680 decodeParamsArray = append(decodeParamsArray, dict) 1681 } else { 1682 decodeParamsArray = append(decodeParamsArray, MakeDict()) 1683 } 1684 } 1685 } 1686 } 1687 1688 obj = encDict.Get("Filter") 1689 if obj == nil { 1690 return nil, fmt.Errorf("Filter missing") 1691 } 1692 1693 array, ok := obj.(*PdfObjectArray) 1694 if !ok { 1695 return nil, fmt.Errorf("Multi filter can only be made from array") 1696 } 1697 1698 for idx, obj := range *array { 1699 name, ok := obj.(*PdfObjectName) 1700 if !ok { 1701 return nil, fmt.Errorf("Multi filter array element not a name") 1702 } 1703 1704 var dp PdfObject 1705 1706 // If decode params dict is set, use it. Otherwise take from array.. 1707 if decodeParamsDict != nil { 1708 dp = decodeParamsDict 1709 } else { 1710 // Only get the dp if provided. Oftentimes there is no decode params dict 1711 // provided. 1712 if len(decodeParamsArray) > 0 { 1713 if idx >= len(decodeParamsArray) { 1714 return nil, fmt.Errorf("Missing elements in decode params array") 1715 } 1716 dp = decodeParamsArray[idx] 1717 } 1718 } 1719 1720 var dParams *PdfObjectDictionary 1721 if dict, is := dp.(*PdfObjectDictionary); is { 1722 dParams = dict 1723 } 1724 1725 common.Log.Trace("Next name: %s, dp: %v, dParams: %v", *name, dp, dParams) 1726 if *name == StreamEncodingFilterNameFlate { 1727 // XXX: need to separate out the DecodeParms.. 1728 encoder, err := newFlateEncoderFromStream(streamObj, dParams) 1729 if err != nil { 1730 return nil, err 1731 } 1732 mencoder.AddEncoder(encoder) 1733 } else if *name == StreamEncodingFilterNameLZW { 1734 encoder, err := newLZWEncoderFromStream(streamObj, dParams) 1735 if err != nil { 1736 return nil, err 1737 } 1738 mencoder.AddEncoder(encoder) 1739 } else if *name == StreamEncodingFilterNameASCIIHex { 1740 encoder := NewASCIIHexEncoder() 1741 mencoder.AddEncoder(encoder) 1742 } else if *name == StreamEncodingFilterNameASCII85 { 1743 encoder := NewASCII85Encoder() 1744 mencoder.AddEncoder(encoder) 1745 } else if *name == StreamEncodingFilterNameDCT { 1746 encoder, err := newDCTEncoderFromStream(streamObj, mencoder) 1747 if err != nil { 1748 return nil, err 1749 } 1750 mencoder.AddEncoder(encoder) 1751 common.Log.Trace("Added DCT encoder...") 1752 common.Log.Trace("Multi encoder: %#v", mencoder) 1753 } else { 1754 common.Log.Error("Unsupported filter %s", *name) 1755 return nil, fmt.Errorf("Invalid filter in multi filter array") 1756 } 1757 } 1758 1759 return mencoder, nil 1760 } 1761 1762 func (this *MultiEncoder) GetFilterName() string { 1763 name := "" 1764 for idx, encoder := range this.encoders { 1765 name += encoder.GetFilterName() 1766 if idx < len(this.encoders)-1 { 1767 name += " " 1768 } 1769 } 1770 return name 1771 } 1772 1773 func (this *MultiEncoder) MakeDecodeParams() PdfObject { 1774 if len(this.encoders) == 0 { 1775 return nil 1776 } 1777 1778 if len(this.encoders) == 1 { 1779 return this.encoders[0].MakeDecodeParams() 1780 } 1781 1782 array := PdfObjectArray{} 1783 for _, encoder := range this.encoders { 1784 decodeParams := encoder.MakeDecodeParams() 1785 if decodeParams == nil { 1786 array = append(array, MakeNull()) 1787 } else { 1788 array = append(array, decodeParams) 1789 } 1790 } 1791 1792 return &array 1793 } 1794 1795 func (this *MultiEncoder) AddEncoder(encoder StreamEncoder) { 1796 this.encoders = append(this.encoders, encoder) 1797 } 1798 1799 func (this *MultiEncoder) MakeStreamDict() *PdfObjectDictionary { 1800 dict := MakeDict() 1801 dict.Set("Filter", MakeName(this.GetFilterName())) 1802 1803 // Pass all values from children, except Filter and DecodeParms. 1804 for _, encoder := range this.encoders { 1805 encDict := encoder.MakeStreamDict() 1806 for _, key := range encDict.Keys() { 1807 val := encDict.Get(key) 1808 if key != "Filter" && key != "DecodeParms" { 1809 dict.Set(key, val) 1810 } 1811 } 1812 } 1813 1814 // Make the decode params array or dict. 1815 decodeParams := this.MakeDecodeParams() 1816 if decodeParams != nil { 1817 dict.Set("DecodeParms", decodeParams) 1818 } 1819 1820 return dict 1821 } 1822 1823 func (this *MultiEncoder) DecodeBytes(encoded []byte) ([]byte, error) { 1824 decoded := encoded 1825 var err error 1826 // Apply in forward order. 1827 for _, encoder := range this.encoders { 1828 common.Log.Trace("Multi Encoder Decode: Applying Filter: %v %T", encoder, encoder) 1829 1830 decoded, err = encoder.DecodeBytes(decoded) 1831 if err != nil { 1832 return nil, err 1833 } 1834 } 1835 1836 return decoded, nil 1837 } 1838 1839 func (this *MultiEncoder) DecodeStream(streamObj *PdfObjectStream) ([]byte, error) { 1840 return this.DecodeBytes(streamObj.Stream) 1841 } 1842 1843 func (this *MultiEncoder) EncodeBytes(data []byte) ([]byte, error) { 1844 encoded := data 1845 var err error 1846 1847 // Apply in inverse order. 1848 for i := len(this.encoders) - 1; i >= 0; i-- { 1849 encoder := this.encoders[i] 1850 encoded, err = encoder.EncodeBytes(encoded) 1851 if err != nil { 1852 return nil, err 1853 } 1854 } 1855 1856 return encoded, nil 1857 }