github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/contentstream/encoding.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package contentstream 7 8 import ( 9 "bytes" 10 "errors" 11 "fmt" 12 gocolor "image/color" 13 "image/jpeg" 14 15 "github.com/unidoc/unidoc/common" 16 "github.com/unidoc/unidoc/pdf/core" 17 ) 18 19 // Creates the encoder for the inline image's Filter and DecodeParms. 20 func newEncoderFromInlineImage(inlineImage *ContentStreamInlineImage) (core.StreamEncoder, error) { 21 if inlineImage.Filter == nil { 22 // No filter, return raw data back. 23 return core.NewRawEncoder(), nil 24 } 25 26 // The filter should be a name or an array with a list of filter names. 27 filterName, ok := inlineImage.Filter.(*core.PdfObjectName) 28 if !ok { 29 array, ok := inlineImage.Filter.(*core.PdfObjectArray) 30 if !ok { 31 return nil, fmt.Errorf("Filter not a Name or Array object") 32 } 33 if len(*array) == 0 { 34 // Empty array -> indicates raw filter (no filter). 35 return core.NewRawEncoder(), nil 36 } 37 38 if len(*array) != 1 { 39 menc, err := newMultiEncoderFromInlineImage(inlineImage) 40 if err != nil { 41 common.Log.Error("Failed creating multi encoder: %v", err) 42 return nil, err 43 } 44 45 common.Log.Trace("Multi enc: %s\n", menc) 46 return menc, nil 47 } 48 49 // Single element. 50 filterObj := (*array)[0] 51 filterName, ok = filterObj.(*core.PdfObjectName) 52 if !ok { 53 return nil, fmt.Errorf("Filter array member not a Name object") 54 } 55 } 56 57 // From Table 94 p. 224 (PDF32000_2008): 58 // Additional Abbreviations in an Inline Image Object: 59 60 switch *filterName { 61 case "AHx", "ASCIIHexDecode": 62 return core.NewASCIIHexEncoder(), nil 63 case "A85", "ASCII85Decode": 64 return core.NewASCII85Encoder(), nil 65 case "DCT", "DCTDecode": 66 return newDCTEncoderFromInlineImage(inlineImage) 67 case "Fl", "FlateDecode": 68 return newFlateEncoderFromInlineImage(inlineImage, nil) 69 case "LZW", "LZWDecode": 70 return newLZWEncoderFromInlineImage(inlineImage, nil) 71 case "CCF", "CCITTFaxDecode": 72 return core.NewCCITTFaxEncoder(), nil 73 case "RL", "RunLengthDecode": 74 return core.NewRunLengthEncoder(), nil 75 default: 76 common.Log.Debug("Unsupported inline image encoding filter name : %s", *filterName) 77 return nil, errors.New("Unsupported inline encoding method") 78 } 79 } 80 81 // Create a new flate decoder from an inline image object, getting all the encoding parameters 82 // from the DecodeParms stream object dictionary entry that can be provided optionally, usually 83 // only when a multi filter is used. 84 func newFlateEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decodeParams *core.PdfObjectDictionary) (*core.FlateEncoder, error) { 85 encoder := core.NewFlateEncoder() 86 87 // If decodeParams not provided, see if we can get from the stream. 88 if decodeParams == nil { 89 obj := inlineImage.DecodeParms 90 if obj != nil { 91 dp, isDict := obj.(*core.PdfObjectDictionary) 92 if !isDict { 93 common.Log.Debug("Error: DecodeParms not a dictionary (%T)", obj) 94 return nil, fmt.Errorf("Invalid DecodeParms") 95 } 96 decodeParams = dp 97 } 98 } 99 if decodeParams == nil { 100 // Can safely return here if no decode params, as the following depend on the decode params. 101 return encoder, nil 102 } 103 104 common.Log.Trace("decode params: %s", decodeParams.String()) 105 obj := decodeParams.Get("Predictor") 106 if obj == nil { 107 common.Log.Debug("Error: Predictor missing from DecodeParms - Continue with default (1)") 108 } else { 109 predictor, ok := obj.(*core.PdfObjectInteger) 110 if !ok { 111 common.Log.Debug("Error: Predictor specified but not numeric (%T)", obj) 112 return nil, fmt.Errorf("Invalid Predictor") 113 } 114 encoder.Predictor = int(*predictor) 115 } 116 117 // Bits per component. Use default if not specified (8). 118 obj = decodeParams.Get("BitsPerComponent") 119 if obj != nil { 120 bpc, ok := obj.(*core.PdfObjectInteger) 121 if !ok { 122 common.Log.Debug("ERROR: Invalid BitsPerComponent") 123 return nil, fmt.Errorf("Invalid BitsPerComponent") 124 } 125 encoder.BitsPerComponent = int(*bpc) 126 } 127 128 if encoder.Predictor > 1 { 129 // Columns. 130 encoder.Columns = 1 131 obj = decodeParams.Get("Columns") 132 if obj != nil { 133 columns, ok := obj.(*core.PdfObjectInteger) 134 if !ok { 135 return nil, fmt.Errorf("Predictor column invalid") 136 } 137 138 encoder.Columns = int(*columns) 139 } 140 141 // Colors. 142 // Number of interleaved color components per sample (Default 1 if not specified) 143 encoder.Colors = 1 144 obj := decodeParams.Get("Colors") 145 if obj != nil { 146 colors, ok := obj.(*core.PdfObjectInteger) 147 if !ok { 148 return nil, fmt.Errorf("Predictor colors not an integer") 149 } 150 encoder.Colors = int(*colors) 151 } 152 } 153 154 return encoder, nil 155 } 156 157 // Create a new LZW encoder/decoder based on an inline image object, getting all the encoding parameters 158 // from the DecodeParms stream object dictionary entry. 159 func newLZWEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decodeParams *core.PdfObjectDictionary) (*core.LZWEncoder, error) { 160 // Start with default settings. 161 encoder := core.NewLZWEncoder() 162 163 // If decodeParams not provided, see if we can get from the inline image directly. 164 if decodeParams == nil { 165 if inlineImage.DecodeParms != nil { 166 dp, isDict := inlineImage.DecodeParms.(*core.PdfObjectDictionary) 167 if !isDict { 168 common.Log.Debug("Error: DecodeParms not a dictionary (%T)", inlineImage.DecodeParms) 169 return nil, fmt.Errorf("Invalid DecodeParms") 170 } 171 decodeParams = dp 172 } 173 } 174 175 if decodeParams == nil { 176 // No decode parameters. Can safely return here if not set as the following options 177 // are related to the decode Params. 178 return encoder, nil 179 } 180 181 // The EarlyChange indicates when to increase code length, as different 182 // implementations use a different mechanisms. Essentially this chooses 183 // which LZW implementation to use. 184 // The default is 1 (one code early) 185 // 186 // The EarlyChange parameter is specified in the object stream dictionary for regular streams, 187 // but it is not specified explicitly where to check for it in the case of inline images. 188 // We will check in the decodeParms for now, we can adjust later if we come across cases of this. 189 obj := decodeParams.Get("EarlyChange") 190 if obj != nil { 191 earlyChange, ok := obj.(*core.PdfObjectInteger) 192 if !ok { 193 common.Log.Debug("Error: EarlyChange specified but not numeric (%T)", obj) 194 return nil, fmt.Errorf("Invalid EarlyChange") 195 } 196 if *earlyChange != 0 && *earlyChange != 1 { 197 return nil, fmt.Errorf("Invalid EarlyChange value (not 0 or 1)") 198 } 199 200 encoder.EarlyChange = int(*earlyChange) 201 } else { 202 encoder.EarlyChange = 1 // default 203 } 204 205 obj = decodeParams.Get("Predictor") 206 if obj != nil { 207 predictor, ok := obj.(*core.PdfObjectInteger) 208 if !ok { 209 common.Log.Debug("Error: Predictor specified but not numeric (%T)", obj) 210 return nil, fmt.Errorf("Invalid Predictor") 211 } 212 encoder.Predictor = int(*predictor) 213 } 214 215 // Bits per component. Use default if not specified (8). 216 obj = decodeParams.Get("BitsPerComponent") 217 if obj != nil { 218 bpc, ok := obj.(*core.PdfObjectInteger) 219 if !ok { 220 common.Log.Debug("ERROR: Invalid BitsPerComponent") 221 return nil, fmt.Errorf("Invalid BitsPerComponent") 222 } 223 encoder.BitsPerComponent = int(*bpc) 224 } 225 226 if encoder.Predictor > 1 { 227 // Columns. 228 encoder.Columns = 1 229 obj = decodeParams.Get("Columns") 230 if obj != nil { 231 columns, ok := obj.(*core.PdfObjectInteger) 232 if !ok { 233 return nil, fmt.Errorf("Predictor column invalid") 234 } 235 236 encoder.Columns = int(*columns) 237 } 238 239 // Colors. 240 // Number of interleaved color components per sample (Default 1 if not specified) 241 encoder.Colors = 1 242 obj = decodeParams.Get("Colors") 243 if obj != nil { 244 colors, ok := obj.(*core.PdfObjectInteger) 245 if !ok { 246 return nil, fmt.Errorf("Predictor colors not an integer") 247 } 248 encoder.Colors = int(*colors) 249 } 250 } 251 252 common.Log.Trace("decode params: %s", decodeParams.String()) 253 return encoder, nil 254 } 255 256 // Create a new DCT encoder/decoder based on an inline image, getting all the encoding parameters 257 // from the stream object dictionary entry and the image data itself. 258 func newDCTEncoderFromInlineImage(inlineImage *ContentStreamInlineImage) (*core.DCTEncoder, error) { 259 // Start with default settings. 260 encoder := core.NewDCTEncoder() 261 262 bufReader := bytes.NewReader(inlineImage.stream) 263 264 cfg, err := jpeg.DecodeConfig(bufReader) 265 //img, _, err := goimage.Decode(bufReader) 266 if err != nil { 267 common.Log.Debug("Error decoding file: %s", err) 268 return nil, err 269 } 270 271 switch cfg.ColorModel { 272 case gocolor.RGBAModel: 273 encoder.BitsPerComponent = 8 274 encoder.ColorComponents = 3 // alpha is not included in pdf. 275 case gocolor.RGBA64Model: 276 encoder.BitsPerComponent = 16 277 encoder.ColorComponents = 3 278 case gocolor.GrayModel: 279 encoder.BitsPerComponent = 8 280 encoder.ColorComponents = 1 281 case gocolor.Gray16Model: 282 encoder.BitsPerComponent = 16 283 encoder.ColorComponents = 1 284 case gocolor.CMYKModel: 285 encoder.BitsPerComponent = 8 286 encoder.ColorComponents = 4 287 case gocolor.YCbCrModel: 288 // YCbCr is not supported by PDF, but it could be a different colorspace 289 // with 3 components. Would be specified by the ColorSpace entry. 290 encoder.BitsPerComponent = 8 291 encoder.ColorComponents = 3 292 default: 293 return nil, errors.New("Unsupported color model") 294 } 295 encoder.Width = cfg.Width 296 encoder.Height = cfg.Height 297 common.Log.Trace("DCT Encoder: %+v", encoder) 298 299 return encoder, nil 300 } 301 302 // Create a new multi-filter encoder/decoder based on an inline image, getting all the encoding parameters 303 // from the filter specification and the DecodeParms (DP) dictionaries. 304 func newMultiEncoderFromInlineImage(inlineImage *ContentStreamInlineImage) (*core.MultiEncoder, error) { 305 mencoder := core.NewMultiEncoder() 306 307 // Prepare the decode params array (one for each filter type) 308 // Optional, not always present. 309 var decodeParamsDict *core.PdfObjectDictionary 310 decodeParamsArray := []core.PdfObject{} 311 if obj := inlineImage.DecodeParms; obj != nil { 312 // If it is a dictionary, assume it applies to all 313 dict, isDict := obj.(*core.PdfObjectDictionary) 314 if isDict { 315 decodeParamsDict = dict 316 } 317 318 // If it is an array, assume there is one for each 319 arr, isArray := obj.(*core.PdfObjectArray) 320 if isArray { 321 for _, dictObj := range *arr { 322 if dict, is := dictObj.(*core.PdfObjectDictionary); is { 323 decodeParamsArray = append(decodeParamsArray, dict) 324 } else { 325 decodeParamsArray = append(decodeParamsArray, nil) 326 } 327 } 328 } 329 } 330 331 obj := inlineImage.Filter 332 if obj == nil { 333 return nil, fmt.Errorf("Filter missing") 334 } 335 336 array, ok := obj.(*core.PdfObjectArray) 337 if !ok { 338 return nil, fmt.Errorf("Multi filter can only be made from array") 339 } 340 341 for idx, obj := range *array { 342 name, ok := obj.(*core.PdfObjectName) 343 if !ok { 344 return nil, fmt.Errorf("Multi filter array element not a name") 345 } 346 347 var dp core.PdfObject 348 349 // If decode params dict is set, use it. Otherwise take from array.. 350 if decodeParamsDict != nil { 351 dp = decodeParamsDict 352 } else { 353 // Only get the dp if provided. Oftentimes there is no decode params dict 354 // provided. 355 if len(decodeParamsArray) > 0 { 356 if idx >= len(decodeParamsArray) { 357 return nil, fmt.Errorf("Missing elements in decode params array") 358 } 359 dp = decodeParamsArray[idx] 360 } 361 } 362 363 var dParams *core.PdfObjectDictionary 364 if dict, is := dp.(*core.PdfObjectDictionary); is { 365 dParams = dict 366 } 367 368 if *name == core.StreamEncodingFilterNameFlate || *name == "Fl" { 369 // XXX: need to separate out the DecodeParms.. 370 encoder, err := newFlateEncoderFromInlineImage(inlineImage, dParams) 371 if err != nil { 372 return nil, err 373 } 374 mencoder.AddEncoder(encoder) 375 } else if *name == core.StreamEncodingFilterNameLZW { 376 encoder, err := newLZWEncoderFromInlineImage(inlineImage, dParams) 377 if err != nil { 378 return nil, err 379 } 380 mencoder.AddEncoder(encoder) 381 } else if *name == core.StreamEncodingFilterNameASCIIHex { 382 encoder := core.NewASCIIHexEncoder() 383 mencoder.AddEncoder(encoder) 384 } else if *name == core.StreamEncodingFilterNameASCII85 || *name == "A85" { 385 encoder := core.NewASCII85Encoder() 386 mencoder.AddEncoder(encoder) 387 } else { 388 common.Log.Error("Unsupported filter %s", *name) 389 return nil, fmt.Errorf("Invalid filter in multi filter array") 390 } 391 } 392 393 return mencoder, nil 394 }