github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/contentstream/processor.go (about)

     1  /*
     2   * This file is subject to the terms and conditions defined in
     3   * file 'LICENSE.md', which is part of this source code package.
     4   */
     5  
     6  package contentstream
     7  
     8  import (
     9  	"errors"
    10  
    11  	"github.com/unidoc/unidoc/common"
    12  	. "github.com/unidoc/unidoc/pdf/core"
    13  	. "github.com/unidoc/unidoc/pdf/model"
    14  )
    15  
    16  // Basic graphics state implementation.
    17  // Initially only implementing and tracking a portion of the information specified.  Easy to add more.
    18  type GraphicsState struct {
    19  	ColorspaceStroking    PdfColorspace
    20  	ColorspaceNonStroking PdfColorspace
    21  	ColorStroking         PdfColor
    22  	ColorNonStroking      PdfColor
    23  }
    24  
    25  type GraphicStateStack []GraphicsState
    26  
    27  func (gsStack *GraphicStateStack) Push(gs GraphicsState) {
    28  	*gsStack = append(*gsStack, gs)
    29  }
    30  
    31  func (gsStack *GraphicStateStack) Pop() GraphicsState {
    32  	gs := (*gsStack)[len(*gsStack)-1]
    33  	*gsStack = (*gsStack)[:len(*gsStack)-1]
    34  	return gs
    35  }
    36  
    37  // ContentStreamProcessor defines a data structure and methods for processing a content stream, keeping track of the
    38  // current graphics state, and allowing external handlers to define their own functions as a part of the processing,
    39  // for example rendering or extracting certain information.
    40  type ContentStreamProcessor struct {
    41  	graphicsStack GraphicStateStack
    42  	operations    []*ContentStreamOperation
    43  	graphicsState GraphicsState
    44  
    45  	handlers     []HandlerEntry
    46  	currentIndex int
    47  }
    48  
    49  type HandlerFunc func(op *ContentStreamOperation, gs GraphicsState, resources *PdfPageResources) error
    50  
    51  type HandlerEntry struct {
    52  	Condition HandlerConditionEnum
    53  	Operand   string
    54  	Handler   HandlerFunc
    55  }
    56  
    57  type HandlerConditionEnum int
    58  
    59  func (this HandlerConditionEnum) All() bool {
    60  	return this == HandlerConditionEnumAllOperands
    61  }
    62  
    63  func (this HandlerConditionEnum) Operand() bool {
    64  	return this == HandlerConditionEnumOperand
    65  }
    66  
    67  const (
    68  	HandlerConditionEnumOperand     HandlerConditionEnum = iota
    69  	HandlerConditionEnumAllOperands HandlerConditionEnum = iota
    70  )
    71  
    72  func NewContentStreamProcessor(ops []*ContentStreamOperation) *ContentStreamProcessor {
    73  	csp := ContentStreamProcessor{}
    74  	csp.graphicsStack = GraphicStateStack{}
    75  
    76  	// Set defaults..
    77  	gs := GraphicsState{}
    78  
    79  	csp.graphicsState = gs
    80  
    81  	csp.handlers = []HandlerEntry{}
    82  	csp.currentIndex = 0
    83  	csp.operations = ops
    84  
    85  	return &csp
    86  }
    87  
    88  func (csp *ContentStreamProcessor) AddHandler(condition HandlerConditionEnum, operand string, handler HandlerFunc) {
    89  	entry := HandlerEntry{}
    90  	entry.Condition = condition
    91  	entry.Operand = operand
    92  	entry.Handler = handler
    93  	csp.handlers = append(csp.handlers, entry)
    94  }
    95  
    96  func (csp *ContentStreamProcessor) getColorspace(name string, resources *PdfPageResources) (PdfColorspace, error) {
    97  	switch name {
    98  	case "DeviceGray":
    99  		return NewPdfColorspaceDeviceGray(), nil
   100  	case "DeviceRGB":
   101  		return NewPdfColorspaceDeviceRGB(), nil
   102  	case "DeviceCMYK":
   103  		return NewPdfColorspaceDeviceCMYK(), nil
   104  	case "Pattern":
   105  		return NewPdfColorspaceSpecialPattern(), nil
   106  	}
   107  
   108  	// Next check the colorspace dictionary.
   109  	cs, has := resources.ColorSpace.Colorspaces[name]
   110  	if has {
   111  		return cs, nil
   112  	}
   113  
   114  	// Lastly check other potential colormaps.
   115  	switch name {
   116  	case "CalGray":
   117  		return NewPdfColorspaceCalGray(), nil
   118  	case "CalRGB":
   119  		return NewPdfColorspaceCalRGB(), nil
   120  	case "Lab":
   121  		return NewPdfColorspaceLab(), nil
   122  	}
   123  
   124  	// Otherwise unsupported.
   125  	common.Log.Debug("Unknown colorspace requested: %s", name)
   126  	return nil, errors.New("Unsupported colorspace")
   127  }
   128  
   129  // Get initial color for a given colorspace.
   130  func (csp *ContentStreamProcessor) getInitialColor(cs PdfColorspace) (PdfColor, error) {
   131  	switch cs := cs.(type) {
   132  	case *PdfColorspaceDeviceGray:
   133  		return NewPdfColorDeviceGray(0.0), nil
   134  	case *PdfColorspaceDeviceRGB:
   135  		return NewPdfColorDeviceRGB(0.0, 0.0, 0.0), nil
   136  	case *PdfColorspaceDeviceCMYK:
   137  		return NewPdfColorDeviceCMYK(0.0, 0.0, 0.0, 1.0), nil
   138  	case *PdfColorspaceCalGray:
   139  		return NewPdfColorCalGray(0.0), nil
   140  	case *PdfColorspaceCalRGB:
   141  		return NewPdfColorCalRGB(0.0, 0.0, 0.0), nil
   142  	case *PdfColorspaceLab:
   143  		l := 0.0
   144  		a := 0.0
   145  		b := 0.0
   146  		if cs.Range[0] > 0 {
   147  			l = cs.Range[0]
   148  		}
   149  		if cs.Range[2] > 0 {
   150  			a = cs.Range[2]
   151  		}
   152  		return NewPdfColorLab(l, a, b), nil
   153  	case *PdfColorspaceICCBased:
   154  		if cs.Alternate == nil {
   155  			// Alternate not defined.
   156  			// Try to fall back to DeviceGray, DeviceRGB or DeviceCMYK.
   157  			common.Log.Trace("ICC Based not defined - attempting fall back (N = %d)", cs.N)
   158  			if cs.N == 1 {
   159  				common.Log.Trace("Falling back to DeviceGray")
   160  				return csp.getInitialColor(NewPdfColorspaceDeviceGray())
   161  			} else if cs.N == 3 {
   162  				common.Log.Trace("Falling back to DeviceRGB")
   163  				return csp.getInitialColor(NewPdfColorspaceDeviceRGB())
   164  			} else if cs.N == 4 {
   165  				common.Log.Trace("Falling back to DeviceCMYK")
   166  				return csp.getInitialColor(NewPdfColorspaceDeviceCMYK())
   167  			} else {
   168  				return nil, errors.New("Alternate space not defined for ICC")
   169  			}
   170  		}
   171  		return csp.getInitialColor(cs.Alternate)
   172  	case *PdfColorspaceSpecialIndexed:
   173  		if cs.Base == nil {
   174  			return nil, errors.New("Indexed base not specified")
   175  		}
   176  		return csp.getInitialColor(cs.Base)
   177  	case *PdfColorspaceSpecialSeparation:
   178  		if cs.AlternateSpace == nil {
   179  			return nil, errors.New("Alternate space not specified")
   180  		}
   181  		return csp.getInitialColor(cs.AlternateSpace)
   182  	case *PdfColorspaceDeviceN:
   183  		if cs.AlternateSpace == nil {
   184  			return nil, errors.New("Alternate space not specified")
   185  		}
   186  		return csp.getInitialColor(cs.AlternateSpace)
   187  	case *PdfColorspaceSpecialPattern:
   188  		// FIXME/check: A pattern does not have an initial color...
   189  		return nil, nil
   190  	}
   191  
   192  	common.Log.Debug("Unable to determine initial color for unknown colorspace: %T", cs)
   193  	return nil, errors.New("Unsupported colorspace")
   194  }
   195  
   196  // Process the entire operations.
   197  func (this *ContentStreamProcessor) Process(resources *PdfPageResources) error {
   198  	// Initialize graphics state
   199  	this.graphicsState.ColorspaceStroking = NewPdfColorspaceDeviceGray()
   200  	this.graphicsState.ColorspaceNonStroking = NewPdfColorspaceDeviceGray()
   201  	this.graphicsState.ColorStroking = NewPdfColorDeviceGray(0)
   202  	this.graphicsState.ColorNonStroking = NewPdfColorDeviceGray(0)
   203  
   204  	for _, op := range this.operations {
   205  		var err error
   206  
   207  		// Internal handling.
   208  		switch op.Operand {
   209  		case "q":
   210  			this.graphicsStack.Push(this.graphicsState)
   211  		case "Q":
   212  			this.graphicsState = this.graphicsStack.Pop()
   213  
   214  		// Color operations (Table 74 p. 179)
   215  		case "CS":
   216  			err = this.handleCommand_CS(op, resources)
   217  		case "cs":
   218  			err = this.handleCommand_cs(op, resources)
   219  		case "SC":
   220  			err = this.handleCommand_SC(op, resources)
   221  		case "SCN":
   222  			err = this.handleCommand_SCN(op, resources)
   223  		case "sc":
   224  			err = this.handleCommand_sc(op, resources)
   225  		case "scn":
   226  			err = this.handleCommand_scn(op, resources)
   227  		case "G":
   228  			err = this.handleCommand_G(op, resources)
   229  		case "g":
   230  			err = this.handleCommand_g(op, resources)
   231  		case "RG":
   232  			err = this.handleCommand_RG(op, resources)
   233  		case "rg":
   234  			err = this.handleCommand_rg(op, resources)
   235  		case "K":
   236  			err = this.handleCommand_K(op, resources)
   237  		case "k":
   238  			err = this.handleCommand_k(op, resources)
   239  		}
   240  		if err != nil {
   241  			common.Log.Debug("Processor handling error (%s): %v", op.Operand, err)
   242  			common.Log.Debug("Operand: %#v", op.Operand)
   243  			return err
   244  		}
   245  
   246  		// Check if have external handler also, and process if so.
   247  		for _, entry := range this.handlers {
   248  			var err error
   249  			if entry.Condition.All() {
   250  				err = entry.Handler(op, this.graphicsState, resources)
   251  			} else if entry.Condition.Operand() && op.Operand == entry.Operand {
   252  				err = entry.Handler(op, this.graphicsState, resources)
   253  			}
   254  			if err != nil {
   255  				common.Log.Debug("Processor handler error: %v", err)
   256  				return err
   257  			}
   258  		}
   259  	}
   260  
   261  	return nil
   262  }
   263  
   264  // CS: Set the current color space for stroking operations.
   265  func (csp *ContentStreamProcessor) handleCommand_CS(op *ContentStreamOperation, resources *PdfPageResources) error {
   266  	if len(op.Params) < 1 {
   267  		common.Log.Debug("Invalid cs command, skipping over")
   268  		return errors.New("Too few parameters")
   269  	}
   270  	if len(op.Params) > 1 {
   271  		common.Log.Debug("cs command with too many parameters - continuing")
   272  		return errors.New("Too many parameters")
   273  	}
   274  	name, ok := op.Params[0].(*PdfObjectName)
   275  	if !ok {
   276  		common.Log.Debug("ERROR: cs command with invalid parameter, skipping over")
   277  		return errors.New("Type check error")
   278  	}
   279  	// Set the current color space to use for stroking operations.
   280  	// Either device based or referring to resource dict.
   281  	cs, err := csp.getColorspace(string(*name), resources)
   282  	if err != nil {
   283  		return err
   284  	}
   285  	csp.graphicsState.ColorspaceStroking = cs
   286  
   287  	// Set initial color.
   288  	color, err := csp.getInitialColor(cs)
   289  	if err != nil {
   290  		return err
   291  	}
   292  	csp.graphicsState.ColorStroking = color
   293  
   294  	return nil
   295  }
   296  
   297  // cs: Set the current color space for non-stroking operations.
   298  func (csp *ContentStreamProcessor) handleCommand_cs(op *ContentStreamOperation, resources *PdfPageResources) error {
   299  	if len(op.Params) < 1 {
   300  		common.Log.Debug("Invalid CS command, skipping over")
   301  		return errors.New("Too few parameters")
   302  	}
   303  	if len(op.Params) > 1 {
   304  		common.Log.Debug("CS command with too many parameters - continuing")
   305  		return errors.New("Too many parameters")
   306  	}
   307  	name, ok := op.Params[0].(*PdfObjectName)
   308  	if !ok {
   309  		common.Log.Debug("ERROR: CS command with invalid parameter, skipping over")
   310  		return errors.New("Type check error")
   311  	}
   312  	// Set the current color space to use for non-stroking operations.
   313  	// Either device based or referring to resource dict.
   314  	cs, err := csp.getColorspace(string(*name), resources)
   315  	if err != nil {
   316  		return err
   317  	}
   318  	csp.graphicsState.ColorspaceNonStroking = cs
   319  
   320  	// Set initial color.
   321  	color, err := csp.getInitialColor(cs)
   322  	if err != nil {
   323  		return err
   324  	}
   325  	csp.graphicsState.ColorNonStroking = color
   326  
   327  	return nil
   328  }
   329  
   330  // SC: Set the color to use for stroking operations in a device, CIE-based or Indexed colorspace. (not ICC based)
   331  func (this *ContentStreamProcessor) handleCommand_SC(op *ContentStreamOperation, resources *PdfPageResources) error {
   332  	// For DeviceGray, CalGray, Indexed: one operand is required
   333  	// For DeviceRGB, CalRGB, Lab: 3 operands required
   334  
   335  	cs := this.graphicsState.ColorspaceStroking
   336  	if len(op.Params) != cs.GetNumComponents() {
   337  		common.Log.Debug("Invalid number of parameters for SC")
   338  		common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   339  		return errors.New("Invalid number of parameters")
   340  	}
   341  
   342  	color, err := cs.ColorFromPdfObjects(op.Params)
   343  	if err != nil {
   344  		return err
   345  	}
   346  
   347  	this.graphicsState.ColorStroking = color
   348  	return nil
   349  }
   350  
   351  func isPatternCS(cs PdfColorspace) bool {
   352  	_, isPattern := cs.(*PdfColorspaceSpecialPattern)
   353  	return isPattern
   354  }
   355  
   356  // SCN: Same as SC but also supports Pattern, Separation, DeviceN and ICCBased color spaces.
   357  func (this *ContentStreamProcessor) handleCommand_SCN(op *ContentStreamOperation, resources *PdfPageResources) error {
   358  	cs := this.graphicsState.ColorspaceStroking
   359  
   360  	if !isPatternCS(cs) {
   361  		if len(op.Params) != cs.GetNumComponents() {
   362  			common.Log.Debug("Invalid number of parameters for SC")
   363  			common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   364  			return errors.New("Invalid number of parameters")
   365  		}
   366  	}
   367  
   368  	color, err := cs.ColorFromPdfObjects(op.Params)
   369  	if err != nil {
   370  		return err
   371  	}
   372  
   373  	this.graphicsState.ColorStroking = color
   374  
   375  	return nil
   376  }
   377  
   378  // sc: Same as SC except used for non-stroking operations.
   379  func (this *ContentStreamProcessor) handleCommand_sc(op *ContentStreamOperation, resources *PdfPageResources) error {
   380  	cs := this.graphicsState.ColorspaceNonStroking
   381  
   382  	if !isPatternCS(cs) {
   383  		if len(op.Params) != cs.GetNumComponents() {
   384  			common.Log.Debug("Invalid number of parameters for SC")
   385  			common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   386  			return errors.New("Invalid number of parameters")
   387  		}
   388  	}
   389  
   390  	color, err := cs.ColorFromPdfObjects(op.Params)
   391  	if err != nil {
   392  		return err
   393  	}
   394  
   395  	this.graphicsState.ColorNonStroking = color
   396  
   397  	return nil
   398  }
   399  
   400  // scn: Same as SCN except used for non-stroking operations.
   401  func (this *ContentStreamProcessor) handleCommand_scn(op *ContentStreamOperation, resources *PdfPageResources) error {
   402  	cs := this.graphicsState.ColorspaceNonStroking
   403  
   404  	if !isPatternCS(cs) {
   405  		if len(op.Params) != cs.GetNumComponents() {
   406  			common.Log.Debug("Invalid number of parameters for SC")
   407  			common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   408  			return errors.New("Invalid number of parameters")
   409  		}
   410  	}
   411  
   412  	color, err := cs.ColorFromPdfObjects(op.Params)
   413  	if err != nil {
   414  		common.Log.Debug("ERROR: Fail to get color from params: %+v (CS is %+v)", op.Params, cs)
   415  		return err
   416  	}
   417  
   418  	this.graphicsState.ColorNonStroking = color
   419  
   420  	return nil
   421  }
   422  
   423  // G: Set the stroking colorspace to DeviceGray, and the color to the specified graylevel (range [0-1]).
   424  // gray G
   425  func (this *ContentStreamProcessor) handleCommand_G(op *ContentStreamOperation, resources *PdfPageResources) error {
   426  	cs := NewPdfColorspaceDeviceGray()
   427  	if len(op.Params) != cs.GetNumComponents() {
   428  		common.Log.Debug("Invalid number of parameters for SC")
   429  		common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   430  		return errors.New("Invalid number of parameters")
   431  	}
   432  
   433  	color, err := cs.ColorFromPdfObjects(op.Params)
   434  	if err != nil {
   435  		return err
   436  	}
   437  
   438  	this.graphicsState.ColorspaceStroking = cs
   439  	this.graphicsState.ColorStroking = color
   440  
   441  	return nil
   442  }
   443  
   444  // g: Same as G, but for non-stroking colorspace and color (range [0-1]).
   445  // gray g
   446  func (this *ContentStreamProcessor) handleCommand_g(op *ContentStreamOperation, resources *PdfPageResources) error {
   447  	cs := NewPdfColorspaceDeviceGray()
   448  	if len(op.Params) != cs.GetNumComponents() {
   449  		common.Log.Debug("Invalid number of parameters for SC")
   450  		common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   451  		return errors.New("Invalid number of parameters")
   452  	}
   453  
   454  	color, err := cs.ColorFromPdfObjects(op.Params)
   455  	if err != nil {
   456  		return err
   457  	}
   458  
   459  	this.graphicsState.ColorspaceNonStroking = cs
   460  	this.graphicsState.ColorNonStroking = color
   461  
   462  	return nil
   463  }
   464  
   465  // RG: Sets the stroking colorspace to DeviceRGB and the stroking color to r,g,b. [0-1] ranges.
   466  // r g b RG
   467  func (this *ContentStreamProcessor) handleCommand_RG(op *ContentStreamOperation, resources *PdfPageResources) error {
   468  	cs := NewPdfColorspaceDeviceRGB()
   469  	if len(op.Params) != cs.GetNumComponents() {
   470  		common.Log.Debug("Invalid number of parameters for SC")
   471  		common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   472  		return errors.New("Invalid number of parameters")
   473  	}
   474  
   475  	color, err := cs.ColorFromPdfObjects(op.Params)
   476  	if err != nil {
   477  		return err
   478  	}
   479  
   480  	this.graphicsState.ColorspaceStroking = cs
   481  	this.graphicsState.ColorStroking = color
   482  
   483  	return nil
   484  }
   485  
   486  // rg: Same as RG but for non-stroking colorspace, color.
   487  func (this *ContentStreamProcessor) handleCommand_rg(op *ContentStreamOperation, resources *PdfPageResources) error {
   488  	cs := NewPdfColorspaceDeviceRGB()
   489  	if len(op.Params) != cs.GetNumComponents() {
   490  		common.Log.Debug("Invalid number of parameters for SC")
   491  		common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   492  		return errors.New("Invalid number of parameters")
   493  	}
   494  
   495  	color, err := cs.ColorFromPdfObjects(op.Params)
   496  	if err != nil {
   497  		return err
   498  	}
   499  
   500  	this.graphicsState.ColorspaceNonStroking = cs
   501  	this.graphicsState.ColorNonStroking = color
   502  
   503  	return nil
   504  }
   505  
   506  // K: Sets the stroking colorspace to DeviceCMYK and the stroking color to c,m,y,k. [0-1] ranges.
   507  // c m y k K
   508  func (this *ContentStreamProcessor) handleCommand_K(op *ContentStreamOperation, resources *PdfPageResources) error {
   509  	cs := NewPdfColorspaceDeviceCMYK()
   510  	if len(op.Params) != cs.GetNumComponents() {
   511  		common.Log.Debug("Invalid number of parameters for SC")
   512  		common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   513  		return errors.New("Invalid number of parameters")
   514  	}
   515  
   516  	color, err := cs.ColorFromPdfObjects(op.Params)
   517  	if err != nil {
   518  		return err
   519  	}
   520  
   521  	this.graphicsState.ColorspaceStroking = cs
   522  	this.graphicsState.ColorStroking = color
   523  
   524  	return nil
   525  }
   526  
   527  // k: Same as K but for non-stroking colorspace, color.
   528  func (this *ContentStreamProcessor) handleCommand_k(op *ContentStreamOperation, resources *PdfPageResources) error {
   529  	cs := NewPdfColorspaceDeviceCMYK()
   530  	if len(op.Params) != cs.GetNumComponents() {
   531  		common.Log.Debug("Invalid number of parameters for SC")
   532  		common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs)
   533  		return errors.New("Invalid number of parameters")
   534  	}
   535  
   536  	color, err := cs.ColorFromPdfObjects(op.Params)
   537  	if err != nil {
   538  		return err
   539  	}
   540  
   541  	this.graphicsState.ColorspaceNonStroking = cs
   542  	this.graphicsState.ColorNonStroking = color
   543  
   544  	return nil
   545  }