github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/contentstream/inline-image.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package contentstream 7 8 import ( 9 "bytes" 10 "errors" 11 "fmt" 12 13 "github.com/unidoc/unidoc/common" 14 "github.com/unidoc/unidoc/pdf/core" 15 "github.com/unidoc/unidoc/pdf/model" 16 ) 17 18 // A representation of an inline image in a Content stream. Everything between the BI and EI operands. 19 // ContentStreamInlineImage implements the core.PdfObject interface although strictly it is not a PDF object. 20 type ContentStreamInlineImage struct { 21 BitsPerComponent core.PdfObject 22 ColorSpace core.PdfObject 23 Decode core.PdfObject 24 DecodeParms core.PdfObject 25 Filter core.PdfObject 26 Height core.PdfObject 27 ImageMask core.PdfObject 28 Intent core.PdfObject 29 Interpolate core.PdfObject 30 Width core.PdfObject 31 stream []byte 32 } 33 34 // Make a new content stream inline image object from an image. 35 func NewInlineImageFromImage(img model.Image, encoder core.StreamEncoder) (*ContentStreamInlineImage, error) { 36 if encoder == nil { 37 encoder = core.NewRawEncoder() 38 } 39 40 inlineImage := ContentStreamInlineImage{} 41 if img.ColorComponents == 1 { 42 inlineImage.ColorSpace = core.MakeName("G") // G short for DeviceGray 43 } else if img.ColorComponents == 3 { 44 inlineImage.ColorSpace = core.MakeName("RGB") // RGB short for DeviceRGB 45 } else if img.ColorComponents == 4 { 46 inlineImage.ColorSpace = core.MakeName("CMYK") // CMYK short for DeviceCMYK 47 } else { 48 common.Log.Debug("Invalid number of color components for inline image: %d", img.ColorComponents) 49 return nil, errors.New("Invalid number of color components") 50 } 51 inlineImage.BitsPerComponent = core.MakeInteger(img.BitsPerComponent) 52 inlineImage.Width = core.MakeInteger(img.Width) 53 inlineImage.Height = core.MakeInteger(img.Height) 54 55 encoded, err := encoder.EncodeBytes(img.Data) 56 if err != nil { 57 return nil, err 58 } 59 60 inlineImage.stream = encoded 61 62 filterName := encoder.GetFilterName() 63 if filterName != core.StreamEncodingFilterNameRaw { 64 inlineImage.Filter = core.MakeName(filterName) 65 } 66 // XXX/FIXME: Add decode params? 67 68 return &inlineImage, nil 69 } 70 71 func (this *ContentStreamInlineImage) String() string { 72 s := fmt.Sprintf("InlineImage(len=%d)\n", len(this.stream)) 73 if this.BitsPerComponent != nil { 74 s += "- BPC " + this.BitsPerComponent.DefaultWriteString() + "\n" 75 } 76 if this.ColorSpace != nil { 77 s += "- CS " + this.ColorSpace.DefaultWriteString() + "\n" 78 } 79 if this.Decode != nil { 80 s += "- D " + this.Decode.DefaultWriteString() + "\n" 81 } 82 if this.DecodeParms != nil { 83 s += "- DP " + this.DecodeParms.DefaultWriteString() + "\n" 84 } 85 if this.Filter != nil { 86 s += "- F " + this.Filter.DefaultWriteString() + "\n" 87 } 88 if this.Height != nil { 89 s += "- H " + this.Height.DefaultWriteString() + "\n" 90 } 91 if this.ImageMask != nil { 92 s += "- IM " + this.ImageMask.DefaultWriteString() + "\n" 93 } 94 if this.Intent != nil { 95 s += "- Intent " + this.Intent.DefaultWriteString() + "\n" 96 } 97 if this.Interpolate != nil { 98 s += "- I " + this.Interpolate.DefaultWriteString() + "\n" 99 } 100 if this.Width != nil { 101 s += "- W " + this.Width.DefaultWriteString() + "\n" 102 } 103 return s 104 } 105 106 func (this *ContentStreamInlineImage) DefaultWriteString() string { 107 var output bytes.Buffer 108 109 // We do not start with "BI" as that is the operand and is written out separately. 110 // Write out the parameters 111 s := "" 112 113 if this.BitsPerComponent != nil { 114 s += "/BPC " + this.BitsPerComponent.DefaultWriteString() + "\n" 115 } 116 if this.ColorSpace != nil { 117 s += "/CS " + this.ColorSpace.DefaultWriteString() + "\n" 118 } 119 if this.Decode != nil { 120 s += "/D " + this.Decode.DefaultWriteString() + "\n" 121 } 122 if this.DecodeParms != nil { 123 s += "/DP " + this.DecodeParms.DefaultWriteString() + "\n" 124 } 125 if this.Filter != nil { 126 s += "/F " + this.Filter.DefaultWriteString() + "\n" 127 } 128 if this.Height != nil { 129 s += "/H " + this.Height.DefaultWriteString() + "\n" 130 } 131 if this.ImageMask != nil { 132 s += "/IM " + this.ImageMask.DefaultWriteString() + "\n" 133 } 134 if this.Intent != nil { 135 s += "/Intent " + this.Intent.DefaultWriteString() + "\n" 136 } 137 if this.Interpolate != nil { 138 s += "/I " + this.Interpolate.DefaultWriteString() + "\n" 139 } 140 if this.Width != nil { 141 s += "/W " + this.Width.DefaultWriteString() + "\n" 142 } 143 output.WriteString(s) 144 145 output.WriteString("ID ") 146 output.Write(this.stream) 147 output.WriteString("\nEI\n") 148 149 return output.String() 150 } 151 152 func (this *ContentStreamInlineImage) GetColorSpace(resources *model.PdfPageResources) (model.PdfColorspace, error) { 153 if this.ColorSpace == nil { 154 // Default. 155 common.Log.Debug("Inline image not having specified colorspace, assuming Gray") 156 return model.NewPdfColorspaceDeviceGray(), nil 157 } 158 159 // If is an array, then could be an indexed colorspace. 160 if arr, isArr := this.ColorSpace.(*core.PdfObjectArray); isArr { 161 return newIndexedColorspaceFromPdfObject(arr) 162 } 163 164 name, ok := this.ColorSpace.(*core.PdfObjectName) 165 if !ok { 166 common.Log.Debug("Error: Invalid object type (%T;%+v)", this.ColorSpace, this.ColorSpace) 167 return nil, errors.New("Type check error") 168 } 169 170 if *name == "G" || *name == "DeviceGray" { 171 return model.NewPdfColorspaceDeviceGray(), nil 172 } else if *name == "RGB" || *name == "DeviceRGB" { 173 return model.NewPdfColorspaceDeviceRGB(), nil 174 } else if *name == "CMYK" || *name == "DeviceCMYK" { 175 return model.NewPdfColorspaceDeviceCMYK(), nil 176 } else if *name == "I" || *name == "Indexed" { 177 return nil, errors.New("Unsupported Index colorspace") 178 } else { 179 if resources.ColorSpace == nil { 180 // Can also refer to a name in the PDF page resources... 181 common.Log.Debug("Error, unsupported inline image colorspace: %s", *name) 182 return nil, errors.New("Unknown colorspace") 183 } 184 185 cs, has := resources.ColorSpace.Colorspaces[string(*name)] 186 if !has { 187 // Can also refer to a name in the PDF page resources... 188 common.Log.Debug("Error, unsupported inline image colorspace: %s", *name) 189 return nil, errors.New("Unknown colorspace") 190 } 191 192 return cs, nil 193 } 194 195 } 196 197 func (this *ContentStreamInlineImage) GetEncoder() (core.StreamEncoder, error) { 198 return newEncoderFromInlineImage(this) 199 } 200 201 // Is a mask ? 202 // The image mask entry in the image dictionary specifies that the image data shall be used as a stencil 203 // mask for painting in the current color. The mask data is 1bpc, grayscale. 204 func (this *ContentStreamInlineImage) IsMask() (bool, error) { 205 if this.ImageMask != nil { 206 imMask, ok := this.ImageMask.(*core.PdfObjectBool) 207 if !ok { 208 common.Log.Debug("Image mask not a boolean") 209 return false, errors.New("Invalid object type") 210 } 211 212 return bool(*imMask), nil 213 } else { 214 return false, nil 215 } 216 217 } 218 219 // Export the inline image to Image which can be transformed or exported easily. 220 // Page resources are needed to look up colorspace information. 221 func (this *ContentStreamInlineImage) ToImage(resources *model.PdfPageResources) (*model.Image, error) { 222 // Decode the imaging data if encoded. 223 encoder, err := newEncoderFromInlineImage(this) 224 if err != nil { 225 return nil, err 226 } 227 common.Log.Trace("encoder: %+v %T", encoder, encoder) 228 common.Log.Trace("inline image: %+v", this) 229 230 decoded, err := encoder.DecodeBytes(this.stream) 231 if err != nil { 232 return nil, err 233 } 234 235 image := &model.Image{} 236 237 // Height. 238 if this.Height == nil { 239 return nil, errors.New("Height attribute missing") 240 } 241 height, ok := this.Height.(*core.PdfObjectInteger) 242 if !ok { 243 return nil, errors.New("Invalid height") 244 } 245 image.Height = int64(*height) 246 247 // Width. 248 if this.Width == nil { 249 return nil, errors.New("Width attribute missing") 250 } 251 width, ok := this.Width.(*core.PdfObjectInteger) 252 if !ok { 253 return nil, errors.New("Invalid width") 254 } 255 image.Width = int64(*width) 256 257 // Image mask? 258 isMask, err := this.IsMask() 259 if err != nil { 260 return nil, err 261 } 262 263 if isMask { 264 // Masks are grayscale 1bpc. 265 image.BitsPerComponent = 1 266 image.ColorComponents = 1 267 } else { 268 // BPC. 269 if this.BitsPerComponent == nil { 270 common.Log.Debug("Inline Bits per component missing - assuming 8") 271 image.BitsPerComponent = 8 272 } else { 273 bpc, ok := this.BitsPerComponent.(*core.PdfObjectInteger) 274 if !ok { 275 common.Log.Debug("Error invalid bits per component value, type %T", this.BitsPerComponent) 276 return nil, errors.New("BPC Type error") 277 } 278 image.BitsPerComponent = int64(*bpc) 279 } 280 281 // Color components. 282 if this.ColorSpace != nil { 283 cs, err := this.GetColorSpace(resources) 284 if err != nil { 285 return nil, err 286 } 287 image.ColorComponents = cs.GetNumComponents() 288 } else { 289 // Default gray if not specified. 290 common.Log.Debug("Inline Image colorspace not specified - assuming 1 color component") 291 image.ColorComponents = 1 292 } 293 } 294 295 image.Data = decoded 296 297 return image, nil 298 } 299 300 // Parse an inline image from a content stream, both read its properties and binary data. 301 // When called, "BI" has already been read from the stream. This function 302 // finishes reading through "EI" and then returns the ContentStreamInlineImage. 303 func (this *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage, error) { 304 // Reading parameters. 305 im := ContentStreamInlineImage{} 306 307 for { 308 this.skipSpaces() 309 obj, err, isOperand := this.parseObject() 310 if err != nil { 311 return nil, err 312 } 313 314 if !isOperand { 315 // Not an operand.. Read key value properties.. 316 param, ok := obj.(*core.PdfObjectName) 317 if !ok { 318 common.Log.Debug("Invalid inline image property (expecting name) - %T", obj) 319 return nil, fmt.Errorf("Invalid inline image property (expecting name) - %T", obj) 320 } 321 322 valueObj, err, isOperand := this.parseObject() 323 if err != nil { 324 return nil, err 325 } 326 if isOperand { 327 return nil, fmt.Errorf("Not expecting an operand") 328 } 329 330 // From 8.9.7 "Inline Images" p. 223 (PDF32000_2008): 331 // The key-value pairs appearing between the BI and ID operators are analogous to those in the dictionary 332 // portion of an image XObject (though the syntax is different). 333 // Table 93 shows the entries that are valid for an inline image, all of which shall have the same meanings 334 // as in a stream dictionary (see Table 5) or an image dictionary (see Table 89). 335 // Entries other than those listed shall be ignored; in particular, the Type, Subtype, and Length 336 // entries normally found in a stream or image dictionary are unnecessary. 337 // For convenience, the abbreviations shown in the table may be used in place of the fully spelled-out keys. 338 // Table 94 shows additional abbreviations that can be used for the names of colour spaces and filters. 339 340 switch *param { 341 case "BPC", "BitsPerComponent": 342 im.BitsPerComponent = valueObj 343 case "CS", "ColorSpace": 344 im.ColorSpace = valueObj 345 case "D", "Decode": 346 im.Decode = valueObj 347 case "DP", "DecodeParms": 348 im.DecodeParms = valueObj 349 case "F", "Filter": 350 im.Filter = valueObj 351 case "H", "Height": 352 im.Height = valueObj 353 case "IM", "ImageMask": 354 im.ImageMask = valueObj 355 case "Intent": 356 im.Intent = valueObj 357 case "I", "Interpolate": 358 im.Interpolate = valueObj 359 case "W", "Width": 360 im.Width = valueObj 361 default: 362 return nil, fmt.Errorf("Unknown inline image parameter %s", *param) 363 } 364 } 365 366 if isOperand { 367 operand, ok := obj.(*core.PdfObjectString) 368 if !ok { 369 return nil, fmt.Errorf("Failed to read inline image - invalid operand") 370 } 371 372 if *operand == "EI" { 373 // Image fully defined 374 common.Log.Trace("Inline image finished...") 375 return &im, nil 376 } else if *operand == "ID" { 377 // Inline image data. 378 // Should get a single space (0x20) followed by the data and then EI. 379 common.Log.Trace("ID start") 380 381 // Skip the space if its there. 382 b, err := this.reader.Peek(1) 383 if err != nil { 384 return nil, err 385 } 386 if core.IsWhiteSpace(b[0]) { 387 this.reader.Discard(1) 388 } 389 390 // Unfortunately there is no good way to know how many bytes to read since it 391 // depends on the Filter and encoding etc. 392 // Therefore we will simply read until we find "<ws>EI<ws>" where <ws> is whitespace 393 // although of course that could be a part of the data (even if unlikely). 394 im.stream = []byte{} 395 state := 0 396 var skipBytes []byte 397 for { 398 c, err := this.reader.ReadByte() 399 if err != nil { 400 common.Log.Debug("Unable to find end of image EI in inline image data") 401 return nil, err 402 } 403 404 if state == 0 { 405 if core.IsWhiteSpace(c) { 406 skipBytes = []byte{} 407 skipBytes = append(skipBytes, c) 408 state = 1 409 } else { 410 im.stream = append(im.stream, c) 411 } 412 } else if state == 1 { 413 skipBytes = append(skipBytes, c) 414 if c == 'E' { 415 state = 2 416 } else { 417 im.stream = append(im.stream, skipBytes...) 418 skipBytes = []byte{} // Clear. 419 // Need an extra check to decide if we fall back to state 0 or 1. 420 if core.IsWhiteSpace(c) { 421 state = 1 422 } else { 423 state = 0 424 } 425 } 426 } else if state == 2 { 427 skipBytes = append(skipBytes, c) 428 if c == 'I' { 429 state = 3 430 } else { 431 im.stream = append(im.stream, skipBytes...) 432 skipBytes = []byte{} // Clear. 433 state = 0 434 } 435 } else if state == 3 { 436 skipBytes = append(skipBytes, c) 437 if core.IsWhiteSpace(c) { 438 // image data finished. 439 if len(im.stream) > 100 { 440 common.Log.Trace("Image stream (%d): % x ...", len(im.stream), im.stream[:100]) 441 } else { 442 common.Log.Trace("Image stream (%d): % x", len(im.stream), im.stream) 443 } 444 // Exit point. 445 return &im, nil 446 } else { 447 // Seems like "<ws>EI" was part of the data. 448 im.stream = append(im.stream, skipBytes...) 449 skipBytes = []byte{} // Clear. 450 state = 0 451 } 452 } 453 } 454 // Never reached (exit point is at end of EI). 455 } 456 } 457 } 458 }