github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/contentstream/inline-image.go (about)

     1  /*
     2   * This file is subject to the terms and conditions defined in
     3   * file 'LICENSE.md', which is part of this source code package.
     4   */
     5  
     6  package contentstream
     7  
     8  import (
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  
    13  	"github.com/unidoc/unidoc/common"
    14  	"github.com/unidoc/unidoc/pdf/core"
    15  	"github.com/unidoc/unidoc/pdf/model"
    16  )
    17  
    18  // A representation of an inline image in a Content stream. Everything between the BI and EI operands.
    19  // ContentStreamInlineImage implements the core.PdfObject interface although strictly it is not a PDF object.
    20  type ContentStreamInlineImage struct {
    21  	BitsPerComponent core.PdfObject
    22  	ColorSpace       core.PdfObject
    23  	Decode           core.PdfObject
    24  	DecodeParms      core.PdfObject
    25  	Filter           core.PdfObject
    26  	Height           core.PdfObject
    27  	ImageMask        core.PdfObject
    28  	Intent           core.PdfObject
    29  	Interpolate      core.PdfObject
    30  	Width            core.PdfObject
    31  	stream           []byte
    32  }
    33  
    34  // Make a new content stream inline image object from an image.
    35  func NewInlineImageFromImage(img model.Image, encoder core.StreamEncoder) (*ContentStreamInlineImage, error) {
    36  	if encoder == nil {
    37  		encoder = core.NewRawEncoder()
    38  	}
    39  
    40  	inlineImage := ContentStreamInlineImage{}
    41  	if img.ColorComponents == 1 {
    42  		inlineImage.ColorSpace = core.MakeName("G") // G short for DeviceGray
    43  	} else if img.ColorComponents == 3 {
    44  		inlineImage.ColorSpace = core.MakeName("RGB") // RGB short for DeviceRGB
    45  	} else if img.ColorComponents == 4 {
    46  		inlineImage.ColorSpace = core.MakeName("CMYK") // CMYK short for DeviceCMYK
    47  	} else {
    48  		common.Log.Debug("Invalid number of color components for inline image: %d", img.ColorComponents)
    49  		return nil, errors.New("Invalid number of color components")
    50  	}
    51  	inlineImage.BitsPerComponent = core.MakeInteger(img.BitsPerComponent)
    52  	inlineImage.Width = core.MakeInteger(img.Width)
    53  	inlineImage.Height = core.MakeInteger(img.Height)
    54  
    55  	encoded, err := encoder.EncodeBytes(img.Data)
    56  	if err != nil {
    57  		return nil, err
    58  	}
    59  
    60  	inlineImage.stream = encoded
    61  
    62  	filterName := encoder.GetFilterName()
    63  	if filterName != core.StreamEncodingFilterNameRaw {
    64  		inlineImage.Filter = core.MakeName(filterName)
    65  	}
    66  	// XXX/FIXME: Add decode params?
    67  
    68  	return &inlineImage, nil
    69  }
    70  
    71  func (this *ContentStreamInlineImage) String() string {
    72  	s := fmt.Sprintf("InlineImage(len=%d)\n", len(this.stream))
    73  	if this.BitsPerComponent != nil {
    74  		s += "- BPC " + this.BitsPerComponent.DefaultWriteString() + "\n"
    75  	}
    76  	if this.ColorSpace != nil {
    77  		s += "- CS " + this.ColorSpace.DefaultWriteString() + "\n"
    78  	}
    79  	if this.Decode != nil {
    80  		s += "- D " + this.Decode.DefaultWriteString() + "\n"
    81  	}
    82  	if this.DecodeParms != nil {
    83  		s += "- DP " + this.DecodeParms.DefaultWriteString() + "\n"
    84  	}
    85  	if this.Filter != nil {
    86  		s += "- F " + this.Filter.DefaultWriteString() + "\n"
    87  	}
    88  	if this.Height != nil {
    89  		s += "- H " + this.Height.DefaultWriteString() + "\n"
    90  	}
    91  	if this.ImageMask != nil {
    92  		s += "- IM " + this.ImageMask.DefaultWriteString() + "\n"
    93  	}
    94  	if this.Intent != nil {
    95  		s += "- Intent " + this.Intent.DefaultWriteString() + "\n"
    96  	}
    97  	if this.Interpolate != nil {
    98  		s += "- I " + this.Interpolate.DefaultWriteString() + "\n"
    99  	}
   100  	if this.Width != nil {
   101  		s += "- W " + this.Width.DefaultWriteString() + "\n"
   102  	}
   103  	return s
   104  }
   105  
   106  func (this *ContentStreamInlineImage) DefaultWriteString() string {
   107  	var output bytes.Buffer
   108  
   109  	// We do not start with "BI" as that is the operand and is written out separately.
   110  	// Write out the parameters
   111  	s := ""
   112  
   113  	if this.BitsPerComponent != nil {
   114  		s += "/BPC " + this.BitsPerComponent.DefaultWriteString() + "\n"
   115  	}
   116  	if this.ColorSpace != nil {
   117  		s += "/CS " + this.ColorSpace.DefaultWriteString() + "\n"
   118  	}
   119  	if this.Decode != nil {
   120  		s += "/D " + this.Decode.DefaultWriteString() + "\n"
   121  	}
   122  	if this.DecodeParms != nil {
   123  		s += "/DP " + this.DecodeParms.DefaultWriteString() + "\n"
   124  	}
   125  	if this.Filter != nil {
   126  		s += "/F " + this.Filter.DefaultWriteString() + "\n"
   127  	}
   128  	if this.Height != nil {
   129  		s += "/H " + this.Height.DefaultWriteString() + "\n"
   130  	}
   131  	if this.ImageMask != nil {
   132  		s += "/IM " + this.ImageMask.DefaultWriteString() + "\n"
   133  	}
   134  	if this.Intent != nil {
   135  		s += "/Intent " + this.Intent.DefaultWriteString() + "\n"
   136  	}
   137  	if this.Interpolate != nil {
   138  		s += "/I " + this.Interpolate.DefaultWriteString() + "\n"
   139  	}
   140  	if this.Width != nil {
   141  		s += "/W " + this.Width.DefaultWriteString() + "\n"
   142  	}
   143  	output.WriteString(s)
   144  
   145  	output.WriteString("ID ")
   146  	output.Write(this.stream)
   147  	output.WriteString("\nEI\n")
   148  
   149  	return output.String()
   150  }
   151  
   152  func (this *ContentStreamInlineImage) GetColorSpace(resources *model.PdfPageResources) (model.PdfColorspace, error) {
   153  	if this.ColorSpace == nil {
   154  		// Default.
   155  		common.Log.Debug("Inline image not having specified colorspace, assuming Gray")
   156  		return model.NewPdfColorspaceDeviceGray(), nil
   157  	}
   158  
   159  	// If is an array, then could be an indexed colorspace.
   160  	if arr, isArr := this.ColorSpace.(*core.PdfObjectArray); isArr {
   161  		return newIndexedColorspaceFromPdfObject(arr)
   162  	}
   163  
   164  	name, ok := this.ColorSpace.(*core.PdfObjectName)
   165  	if !ok {
   166  		common.Log.Debug("Error: Invalid object type (%T;%+v)", this.ColorSpace, this.ColorSpace)
   167  		return nil, errors.New("Type check error")
   168  	}
   169  
   170  	if *name == "G" || *name == "DeviceGray" {
   171  		return model.NewPdfColorspaceDeviceGray(), nil
   172  	} else if *name == "RGB" || *name == "DeviceRGB" {
   173  		return model.NewPdfColorspaceDeviceRGB(), nil
   174  	} else if *name == "CMYK" || *name == "DeviceCMYK" {
   175  		return model.NewPdfColorspaceDeviceCMYK(), nil
   176  	} else if *name == "I" || *name == "Indexed" {
   177  		return nil, errors.New("Unsupported Index colorspace")
   178  	} else {
   179  		if resources.ColorSpace == nil {
   180  			// Can also refer to a name in the PDF page resources...
   181  			common.Log.Debug("Error, unsupported inline image colorspace: %s", *name)
   182  			return nil, errors.New("Unknown colorspace")
   183  		}
   184  
   185  		cs, has := resources.ColorSpace.Colorspaces[string(*name)]
   186  		if !has {
   187  			// Can also refer to a name in the PDF page resources...
   188  			common.Log.Debug("Error, unsupported inline image colorspace: %s", *name)
   189  			return nil, errors.New("Unknown colorspace")
   190  		}
   191  
   192  		return cs, nil
   193  	}
   194  
   195  }
   196  
   197  func (this *ContentStreamInlineImage) GetEncoder() (core.StreamEncoder, error) {
   198  	return newEncoderFromInlineImage(this)
   199  }
   200  
   201  // Is a mask ?
   202  // The image mask entry in the image dictionary specifies that the image data shall be used as a stencil
   203  // mask for painting in the current color. The mask data is 1bpc, grayscale.
   204  func (this *ContentStreamInlineImage) IsMask() (bool, error) {
   205  	if this.ImageMask != nil {
   206  		imMask, ok := this.ImageMask.(*core.PdfObjectBool)
   207  		if !ok {
   208  			common.Log.Debug("Image mask not a boolean")
   209  			return false, errors.New("Invalid object type")
   210  		}
   211  
   212  		return bool(*imMask), nil
   213  	} else {
   214  		return false, nil
   215  	}
   216  
   217  }
   218  
   219  // Export the inline image to Image which can be transformed or exported easily.
   220  // Page resources are needed to look up colorspace information.
   221  func (this *ContentStreamInlineImage) ToImage(resources *model.PdfPageResources) (*model.Image, error) {
   222  	// Decode the imaging data if encoded.
   223  	encoder, err := newEncoderFromInlineImage(this)
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  	common.Log.Trace("encoder: %+v %T", encoder, encoder)
   228  	common.Log.Trace("inline image: %+v", this)
   229  
   230  	decoded, err := encoder.DecodeBytes(this.stream)
   231  	if err != nil {
   232  		return nil, err
   233  	}
   234  
   235  	image := &model.Image{}
   236  
   237  	// Height.
   238  	if this.Height == nil {
   239  		return nil, errors.New("Height attribute missing")
   240  	}
   241  	height, ok := this.Height.(*core.PdfObjectInteger)
   242  	if !ok {
   243  		return nil, errors.New("Invalid height")
   244  	}
   245  	image.Height = int64(*height)
   246  
   247  	// Width.
   248  	if this.Width == nil {
   249  		return nil, errors.New("Width attribute missing")
   250  	}
   251  	width, ok := this.Width.(*core.PdfObjectInteger)
   252  	if !ok {
   253  		return nil, errors.New("Invalid width")
   254  	}
   255  	image.Width = int64(*width)
   256  
   257  	// Image mask?
   258  	isMask, err := this.IsMask()
   259  	if err != nil {
   260  		return nil, err
   261  	}
   262  
   263  	if isMask {
   264  		// Masks are grayscale 1bpc.
   265  		image.BitsPerComponent = 1
   266  		image.ColorComponents = 1
   267  	} else {
   268  		// BPC.
   269  		if this.BitsPerComponent == nil {
   270  			common.Log.Debug("Inline Bits per component missing - assuming 8")
   271  			image.BitsPerComponent = 8
   272  		} else {
   273  			bpc, ok := this.BitsPerComponent.(*core.PdfObjectInteger)
   274  			if !ok {
   275  				common.Log.Debug("Error invalid bits per component value, type %T", this.BitsPerComponent)
   276  				return nil, errors.New("BPC Type error")
   277  			}
   278  			image.BitsPerComponent = int64(*bpc)
   279  		}
   280  
   281  		// Color components.
   282  		if this.ColorSpace != nil {
   283  			cs, err := this.GetColorSpace(resources)
   284  			if err != nil {
   285  				return nil, err
   286  			}
   287  			image.ColorComponents = cs.GetNumComponents()
   288  		} else {
   289  			// Default gray if not specified.
   290  			common.Log.Debug("Inline Image colorspace not specified - assuming 1 color component")
   291  			image.ColorComponents = 1
   292  		}
   293  	}
   294  
   295  	image.Data = decoded
   296  
   297  	return image, nil
   298  }
   299  
   300  // Parse an inline image from a content stream, both read its properties and binary data.
   301  // When called, "BI" has already been read from the stream.  This function
   302  // finishes reading through "EI" and then returns the ContentStreamInlineImage.
   303  func (this *ContentStreamParser) ParseInlineImage() (*ContentStreamInlineImage, error) {
   304  	// Reading parameters.
   305  	im := ContentStreamInlineImage{}
   306  
   307  	for {
   308  		this.skipSpaces()
   309  		obj, err, isOperand := this.parseObject()
   310  		if err != nil {
   311  			return nil, err
   312  		}
   313  
   314  		if !isOperand {
   315  			// Not an operand.. Read key value properties..
   316  			param, ok := obj.(*core.PdfObjectName)
   317  			if !ok {
   318  				common.Log.Debug("Invalid inline image property (expecting name) - %T", obj)
   319  				return nil, fmt.Errorf("Invalid inline image property (expecting name) - %T", obj)
   320  			}
   321  
   322  			valueObj, err, isOperand := this.parseObject()
   323  			if err != nil {
   324  				return nil, err
   325  			}
   326  			if isOperand {
   327  				return nil, fmt.Errorf("Not expecting an operand")
   328  			}
   329  
   330  			// From 8.9.7 "Inline Images" p. 223 (PDF32000_2008):
   331  			// The key-value pairs appearing between the BI and ID operators are analogous to those in the dictionary
   332  			// portion of an image XObject (though the syntax is different).
   333  			// Table 93 shows the entries that are valid for an inline image, all of which shall have the same meanings
   334  			// as in a stream dictionary (see Table 5) or an image dictionary (see Table 89).
   335  			// Entries other than those listed shall be ignored; in particular, the Type, Subtype, and Length
   336  			// entries normally found in a stream or image dictionary are unnecessary.
   337  			// For convenience, the abbreviations shown in the table may be used in place of the fully spelled-out keys.
   338  			// Table 94 shows additional abbreviations that can be used for the names of colour spaces and filters.
   339  
   340  			switch *param {
   341  			case "BPC", "BitsPerComponent":
   342  				im.BitsPerComponent = valueObj
   343  			case "CS", "ColorSpace":
   344  				im.ColorSpace = valueObj
   345  			case "D", "Decode":
   346  				im.Decode = valueObj
   347  			case "DP", "DecodeParms":
   348  				im.DecodeParms = valueObj
   349  			case "F", "Filter":
   350  				im.Filter = valueObj
   351  			case "H", "Height":
   352  				im.Height = valueObj
   353  			case "IM", "ImageMask":
   354  				im.ImageMask = valueObj
   355  			case "Intent":
   356  				im.Intent = valueObj
   357  			case "I", "Interpolate":
   358  				im.Interpolate = valueObj
   359  			case "W", "Width":
   360  				im.Width = valueObj
   361  			default:
   362  				return nil, fmt.Errorf("Unknown inline image parameter %s", *param)
   363  			}
   364  		}
   365  
   366  		if isOperand {
   367  			operand, ok := obj.(*core.PdfObjectString)
   368  			if !ok {
   369  				return nil, fmt.Errorf("Failed to read inline image - invalid operand")
   370  			}
   371  
   372  			if *operand == "EI" {
   373  				// Image fully defined
   374  				common.Log.Trace("Inline image finished...")
   375  				return &im, nil
   376  			} else if *operand == "ID" {
   377  				// Inline image data.
   378  				// Should get a single space (0x20) followed by the data and then EI.
   379  				common.Log.Trace("ID start")
   380  
   381  				// Skip the space if its there.
   382  				b, err := this.reader.Peek(1)
   383  				if err != nil {
   384  					return nil, err
   385  				}
   386  				if core.IsWhiteSpace(b[0]) {
   387  					this.reader.Discard(1)
   388  				}
   389  
   390  				// Unfortunately there is no good way to know how many bytes to read since it
   391  				// depends on the Filter and encoding etc.
   392  				// Therefore we will simply read until we find "<ws>EI<ws>" where <ws> is whitespace
   393  				// although of course that could be a part of the data (even if unlikely).
   394  				im.stream = []byte{}
   395  				state := 0
   396  				var skipBytes []byte
   397  				for {
   398  					c, err := this.reader.ReadByte()
   399  					if err != nil {
   400  						common.Log.Debug("Unable to find end of image EI in inline image data")
   401  						return nil, err
   402  					}
   403  
   404  					if state == 0 {
   405  						if core.IsWhiteSpace(c) {
   406  							skipBytes = []byte{}
   407  							skipBytes = append(skipBytes, c)
   408  							state = 1
   409  						} else {
   410  							im.stream = append(im.stream, c)
   411  						}
   412  					} else if state == 1 {
   413  						skipBytes = append(skipBytes, c)
   414  						if c == 'E' {
   415  							state = 2
   416  						} else {
   417  							im.stream = append(im.stream, skipBytes...)
   418  							skipBytes = []byte{} // Clear.
   419  							// Need an extra check to decide if we fall back to state 0 or 1.
   420  							if core.IsWhiteSpace(c) {
   421  								state = 1
   422  							} else {
   423  								state = 0
   424  							}
   425  						}
   426  					} else if state == 2 {
   427  						skipBytes = append(skipBytes, c)
   428  						if c == 'I' {
   429  							state = 3
   430  						} else {
   431  							im.stream = append(im.stream, skipBytes...)
   432  							skipBytes = []byte{} // Clear.
   433  							state = 0
   434  						}
   435  					} else if state == 3 {
   436  						skipBytes = append(skipBytes, c)
   437  						if core.IsWhiteSpace(c) {
   438  							// image data finished.
   439  							if len(im.stream) > 100 {
   440  								common.Log.Trace("Image stream (%d): % x ...", len(im.stream), im.stream[:100])
   441  							} else {
   442  								common.Log.Trace("Image stream (%d): % x", len(im.stream), im.stream)
   443  							}
   444  							// Exit point.
   445  							return &im, nil
   446  						} else {
   447  							// Seems like "<ws>EI" was part of the data.
   448  							im.stream = append(im.stream, skipBytes...)
   449  							skipBytes = []byte{} // Clear.
   450  							state = 0
   451  						}
   452  					}
   453  				}
   454  				// Never reached (exit point is at end of EI).
   455  			}
   456  		}
   457  	}
   458  }