github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/contentstream/processor.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package contentstream 7 8 import ( 9 "errors" 10 11 "github.com/unidoc/unidoc/common" 12 . "github.com/unidoc/unidoc/pdf/core" 13 . "github.com/unidoc/unidoc/pdf/model" 14 ) 15 16 // Basic graphics state implementation. 17 // Initially only implementing and tracking a portion of the information specified. Easy to add more. 18 type GraphicsState struct { 19 ColorspaceStroking PdfColorspace 20 ColorspaceNonStroking PdfColorspace 21 ColorStroking PdfColor 22 ColorNonStroking PdfColor 23 } 24 25 type GraphicStateStack []GraphicsState 26 27 func (gsStack *GraphicStateStack) Push(gs GraphicsState) { 28 *gsStack = append(*gsStack, gs) 29 } 30 31 func (gsStack *GraphicStateStack) Pop() GraphicsState { 32 gs := (*gsStack)[len(*gsStack)-1] 33 *gsStack = (*gsStack)[:len(*gsStack)-1] 34 return gs 35 } 36 37 // ContentStreamProcessor defines a data structure and methods for processing a content stream, keeping track of the 38 // current graphics state, and allowing external handlers to define their own functions as a part of the processing, 39 // for example rendering or extracting certain information. 40 type ContentStreamProcessor struct { 41 graphicsStack GraphicStateStack 42 operations []*ContentStreamOperation 43 graphicsState GraphicsState 44 45 handlers []HandlerEntry 46 currentIndex int 47 } 48 49 type HandlerFunc func(op *ContentStreamOperation, gs GraphicsState, resources *PdfPageResources) error 50 51 type HandlerEntry struct { 52 Condition HandlerConditionEnum 53 Operand string 54 Handler HandlerFunc 55 } 56 57 type HandlerConditionEnum int 58 59 func (this HandlerConditionEnum) All() bool { 60 return this == HandlerConditionEnumAllOperands 61 } 62 63 func (this HandlerConditionEnum) Operand() bool { 64 return this == HandlerConditionEnumOperand 65 } 66 67 const ( 68 HandlerConditionEnumOperand HandlerConditionEnum = iota 69 HandlerConditionEnumAllOperands HandlerConditionEnum = iota 70 ) 71 72 func NewContentStreamProcessor(ops []*ContentStreamOperation) *ContentStreamProcessor { 73 csp := ContentStreamProcessor{} 74 csp.graphicsStack = GraphicStateStack{} 75 76 // Set defaults.. 77 gs := GraphicsState{} 78 79 csp.graphicsState = gs 80 81 csp.handlers = []HandlerEntry{} 82 csp.currentIndex = 0 83 csp.operations = ops 84 85 return &csp 86 } 87 88 func (csp *ContentStreamProcessor) AddHandler(condition HandlerConditionEnum, operand string, handler HandlerFunc) { 89 entry := HandlerEntry{} 90 entry.Condition = condition 91 entry.Operand = operand 92 entry.Handler = handler 93 csp.handlers = append(csp.handlers, entry) 94 } 95 96 func (csp *ContentStreamProcessor) getColorspace(name string, resources *PdfPageResources) (PdfColorspace, error) { 97 switch name { 98 case "DeviceGray": 99 return NewPdfColorspaceDeviceGray(), nil 100 case "DeviceRGB": 101 return NewPdfColorspaceDeviceRGB(), nil 102 case "DeviceCMYK": 103 return NewPdfColorspaceDeviceCMYK(), nil 104 case "Pattern": 105 return NewPdfColorspaceSpecialPattern(), nil 106 } 107 108 // Next check the colorspace dictionary. 109 cs, has := resources.ColorSpace.Colorspaces[name] 110 if has { 111 return cs, nil 112 } 113 114 // Lastly check other potential colormaps. 115 switch name { 116 case "CalGray": 117 return NewPdfColorspaceCalGray(), nil 118 case "CalRGB": 119 return NewPdfColorspaceCalRGB(), nil 120 case "Lab": 121 return NewPdfColorspaceLab(), nil 122 } 123 124 // Otherwise unsupported. 125 common.Log.Debug("Unknown colorspace requested: %s", name) 126 return nil, errors.New("Unsupported colorspace") 127 } 128 129 // Get initial color for a given colorspace. 130 func (csp *ContentStreamProcessor) getInitialColor(cs PdfColorspace) (PdfColor, error) { 131 switch cs := cs.(type) { 132 case *PdfColorspaceDeviceGray: 133 return NewPdfColorDeviceGray(0.0), nil 134 case *PdfColorspaceDeviceRGB: 135 return NewPdfColorDeviceRGB(0.0, 0.0, 0.0), nil 136 case *PdfColorspaceDeviceCMYK: 137 return NewPdfColorDeviceCMYK(0.0, 0.0, 0.0, 1.0), nil 138 case *PdfColorspaceCalGray: 139 return NewPdfColorCalGray(0.0), nil 140 case *PdfColorspaceCalRGB: 141 return NewPdfColorCalRGB(0.0, 0.0, 0.0), nil 142 case *PdfColorspaceLab: 143 l := 0.0 144 a := 0.0 145 b := 0.0 146 if cs.Range[0] > 0 { 147 l = cs.Range[0] 148 } 149 if cs.Range[2] > 0 { 150 a = cs.Range[2] 151 } 152 return NewPdfColorLab(l, a, b), nil 153 case *PdfColorspaceICCBased: 154 if cs.Alternate == nil { 155 // Alternate not defined. 156 // Try to fall back to DeviceGray, DeviceRGB or DeviceCMYK. 157 common.Log.Trace("ICC Based not defined - attempting fall back (N = %d)", cs.N) 158 if cs.N == 1 { 159 common.Log.Trace("Falling back to DeviceGray") 160 return csp.getInitialColor(NewPdfColorspaceDeviceGray()) 161 } else if cs.N == 3 { 162 common.Log.Trace("Falling back to DeviceRGB") 163 return csp.getInitialColor(NewPdfColorspaceDeviceRGB()) 164 } else if cs.N == 4 { 165 common.Log.Trace("Falling back to DeviceCMYK") 166 return csp.getInitialColor(NewPdfColorspaceDeviceCMYK()) 167 } else { 168 return nil, errors.New("Alternate space not defined for ICC") 169 } 170 } 171 return csp.getInitialColor(cs.Alternate) 172 case *PdfColorspaceSpecialIndexed: 173 if cs.Base == nil { 174 return nil, errors.New("Indexed base not specified") 175 } 176 return csp.getInitialColor(cs.Base) 177 case *PdfColorspaceSpecialSeparation: 178 if cs.AlternateSpace == nil { 179 return nil, errors.New("Alternate space not specified") 180 } 181 return csp.getInitialColor(cs.AlternateSpace) 182 case *PdfColorspaceDeviceN: 183 if cs.AlternateSpace == nil { 184 return nil, errors.New("Alternate space not specified") 185 } 186 return csp.getInitialColor(cs.AlternateSpace) 187 case *PdfColorspaceSpecialPattern: 188 // FIXME/check: A pattern does not have an initial color... 189 return nil, nil 190 } 191 192 common.Log.Debug("Unable to determine initial color for unknown colorspace: %T", cs) 193 return nil, errors.New("Unsupported colorspace") 194 } 195 196 // Process the entire operations. 197 func (this *ContentStreamProcessor) Process(resources *PdfPageResources) error { 198 // Initialize graphics state 199 this.graphicsState.ColorspaceStroking = NewPdfColorspaceDeviceGray() 200 this.graphicsState.ColorspaceNonStroking = NewPdfColorspaceDeviceGray() 201 this.graphicsState.ColorStroking = NewPdfColorDeviceGray(0) 202 this.graphicsState.ColorNonStroking = NewPdfColorDeviceGray(0) 203 204 for _, op := range this.operations { 205 var err error 206 207 // Internal handling. 208 switch op.Operand { 209 case "q": 210 this.graphicsStack.Push(this.graphicsState) 211 case "Q": 212 this.graphicsState = this.graphicsStack.Pop() 213 214 // Color operations (Table 74 p. 179) 215 case "CS": 216 err = this.handleCommand_CS(op, resources) 217 case "cs": 218 err = this.handleCommand_cs(op, resources) 219 case "SC": 220 err = this.handleCommand_SC(op, resources) 221 case "SCN": 222 err = this.handleCommand_SCN(op, resources) 223 case "sc": 224 err = this.handleCommand_sc(op, resources) 225 case "scn": 226 err = this.handleCommand_scn(op, resources) 227 case "G": 228 err = this.handleCommand_G(op, resources) 229 case "g": 230 err = this.handleCommand_g(op, resources) 231 case "RG": 232 err = this.handleCommand_RG(op, resources) 233 case "rg": 234 err = this.handleCommand_rg(op, resources) 235 case "K": 236 err = this.handleCommand_K(op, resources) 237 case "k": 238 err = this.handleCommand_k(op, resources) 239 } 240 if err != nil { 241 common.Log.Debug("Processor handling error (%s): %v", op.Operand, err) 242 common.Log.Debug("Operand: %#v", op.Operand) 243 return err 244 } 245 246 // Check if have external handler also, and process if so. 247 for _, entry := range this.handlers { 248 var err error 249 if entry.Condition.All() { 250 err = entry.Handler(op, this.graphicsState, resources) 251 } else if entry.Condition.Operand() && op.Operand == entry.Operand { 252 err = entry.Handler(op, this.graphicsState, resources) 253 } 254 if err != nil { 255 common.Log.Debug("Processor handler error: %v", err) 256 return err 257 } 258 } 259 } 260 261 return nil 262 } 263 264 // CS: Set the current color space for stroking operations. 265 func (csp *ContentStreamProcessor) handleCommand_CS(op *ContentStreamOperation, resources *PdfPageResources) error { 266 if len(op.Params) < 1 { 267 common.Log.Debug("Invalid cs command, skipping over") 268 return errors.New("Too few parameters") 269 } 270 if len(op.Params) > 1 { 271 common.Log.Debug("cs command with too many parameters - continuing") 272 return errors.New("Too many parameters") 273 } 274 name, ok := op.Params[0].(*PdfObjectName) 275 if !ok { 276 common.Log.Debug("ERROR: cs command with invalid parameter, skipping over") 277 return errors.New("Type check error") 278 } 279 // Set the current color space to use for stroking operations. 280 // Either device based or referring to resource dict. 281 cs, err := csp.getColorspace(string(*name), resources) 282 if err != nil { 283 return err 284 } 285 csp.graphicsState.ColorspaceStroking = cs 286 287 // Set initial color. 288 color, err := csp.getInitialColor(cs) 289 if err != nil { 290 return err 291 } 292 csp.graphicsState.ColorStroking = color 293 294 return nil 295 } 296 297 // cs: Set the current color space for non-stroking operations. 298 func (csp *ContentStreamProcessor) handleCommand_cs(op *ContentStreamOperation, resources *PdfPageResources) error { 299 if len(op.Params) < 1 { 300 common.Log.Debug("Invalid CS command, skipping over") 301 return errors.New("Too few parameters") 302 } 303 if len(op.Params) > 1 { 304 common.Log.Debug("CS command with too many parameters - continuing") 305 return errors.New("Too many parameters") 306 } 307 name, ok := op.Params[0].(*PdfObjectName) 308 if !ok { 309 common.Log.Debug("ERROR: CS command with invalid parameter, skipping over") 310 return errors.New("Type check error") 311 } 312 // Set the current color space to use for non-stroking operations. 313 // Either device based or referring to resource dict. 314 cs, err := csp.getColorspace(string(*name), resources) 315 if err != nil { 316 return err 317 } 318 csp.graphicsState.ColorspaceNonStroking = cs 319 320 // Set initial color. 321 color, err := csp.getInitialColor(cs) 322 if err != nil { 323 return err 324 } 325 csp.graphicsState.ColorNonStroking = color 326 327 return nil 328 } 329 330 // SC: Set the color to use for stroking operations in a device, CIE-based or Indexed colorspace. (not ICC based) 331 func (this *ContentStreamProcessor) handleCommand_SC(op *ContentStreamOperation, resources *PdfPageResources) error { 332 // For DeviceGray, CalGray, Indexed: one operand is required 333 // For DeviceRGB, CalRGB, Lab: 3 operands required 334 335 cs := this.graphicsState.ColorspaceStroking 336 if len(op.Params) != cs.GetNumComponents() { 337 common.Log.Debug("Invalid number of parameters for SC") 338 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 339 return errors.New("Invalid number of parameters") 340 } 341 342 color, err := cs.ColorFromPdfObjects(op.Params) 343 if err != nil { 344 return err 345 } 346 347 this.graphicsState.ColorStroking = color 348 return nil 349 } 350 351 func isPatternCS(cs PdfColorspace) bool { 352 _, isPattern := cs.(*PdfColorspaceSpecialPattern) 353 return isPattern 354 } 355 356 // SCN: Same as SC but also supports Pattern, Separation, DeviceN and ICCBased color spaces. 357 func (this *ContentStreamProcessor) handleCommand_SCN(op *ContentStreamOperation, resources *PdfPageResources) error { 358 cs := this.graphicsState.ColorspaceStroking 359 360 if !isPatternCS(cs) { 361 if len(op.Params) != cs.GetNumComponents() { 362 common.Log.Debug("Invalid number of parameters for SC") 363 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 364 return errors.New("Invalid number of parameters") 365 } 366 } 367 368 color, err := cs.ColorFromPdfObjects(op.Params) 369 if err != nil { 370 return err 371 } 372 373 this.graphicsState.ColorStroking = color 374 375 return nil 376 } 377 378 // sc: Same as SC except used for non-stroking operations. 379 func (this *ContentStreamProcessor) handleCommand_sc(op *ContentStreamOperation, resources *PdfPageResources) error { 380 cs := this.graphicsState.ColorspaceNonStroking 381 382 if !isPatternCS(cs) { 383 if len(op.Params) != cs.GetNumComponents() { 384 common.Log.Debug("Invalid number of parameters for SC") 385 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 386 return errors.New("Invalid number of parameters") 387 } 388 } 389 390 color, err := cs.ColorFromPdfObjects(op.Params) 391 if err != nil { 392 return err 393 } 394 395 this.graphicsState.ColorNonStroking = color 396 397 return nil 398 } 399 400 // scn: Same as SCN except used for non-stroking operations. 401 func (this *ContentStreamProcessor) handleCommand_scn(op *ContentStreamOperation, resources *PdfPageResources) error { 402 cs := this.graphicsState.ColorspaceNonStroking 403 404 if !isPatternCS(cs) { 405 if len(op.Params) != cs.GetNumComponents() { 406 common.Log.Debug("Invalid number of parameters for SC") 407 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 408 return errors.New("Invalid number of parameters") 409 } 410 } 411 412 color, err := cs.ColorFromPdfObjects(op.Params) 413 if err != nil { 414 common.Log.Debug("ERROR: Fail to get color from params: %+v (CS is %+v)", op.Params, cs) 415 return err 416 } 417 418 this.graphicsState.ColorNonStroking = color 419 420 return nil 421 } 422 423 // G: Set the stroking colorspace to DeviceGray, and the color to the specified graylevel (range [0-1]). 424 // gray G 425 func (this *ContentStreamProcessor) handleCommand_G(op *ContentStreamOperation, resources *PdfPageResources) error { 426 cs := NewPdfColorspaceDeviceGray() 427 if len(op.Params) != cs.GetNumComponents() { 428 common.Log.Debug("Invalid number of parameters for SC") 429 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 430 return errors.New("Invalid number of parameters") 431 } 432 433 color, err := cs.ColorFromPdfObjects(op.Params) 434 if err != nil { 435 return err 436 } 437 438 this.graphicsState.ColorspaceStroking = cs 439 this.graphicsState.ColorStroking = color 440 441 return nil 442 } 443 444 // g: Same as G, but for non-stroking colorspace and color (range [0-1]). 445 // gray g 446 func (this *ContentStreamProcessor) handleCommand_g(op *ContentStreamOperation, resources *PdfPageResources) error { 447 cs := NewPdfColorspaceDeviceGray() 448 if len(op.Params) != cs.GetNumComponents() { 449 common.Log.Debug("Invalid number of parameters for SC") 450 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 451 return errors.New("Invalid number of parameters") 452 } 453 454 color, err := cs.ColorFromPdfObjects(op.Params) 455 if err != nil { 456 return err 457 } 458 459 this.graphicsState.ColorspaceNonStroking = cs 460 this.graphicsState.ColorNonStroking = color 461 462 return nil 463 } 464 465 // RG: Sets the stroking colorspace to DeviceRGB and the stroking color to r,g,b. [0-1] ranges. 466 // r g b RG 467 func (this *ContentStreamProcessor) handleCommand_RG(op *ContentStreamOperation, resources *PdfPageResources) error { 468 cs := NewPdfColorspaceDeviceRGB() 469 if len(op.Params) != cs.GetNumComponents() { 470 common.Log.Debug("Invalid number of parameters for SC") 471 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 472 return errors.New("Invalid number of parameters") 473 } 474 475 color, err := cs.ColorFromPdfObjects(op.Params) 476 if err != nil { 477 return err 478 } 479 480 this.graphicsState.ColorspaceStroking = cs 481 this.graphicsState.ColorStroking = color 482 483 return nil 484 } 485 486 // rg: Same as RG but for non-stroking colorspace, color. 487 func (this *ContentStreamProcessor) handleCommand_rg(op *ContentStreamOperation, resources *PdfPageResources) error { 488 cs := NewPdfColorspaceDeviceRGB() 489 if len(op.Params) != cs.GetNumComponents() { 490 common.Log.Debug("Invalid number of parameters for SC") 491 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 492 return errors.New("Invalid number of parameters") 493 } 494 495 color, err := cs.ColorFromPdfObjects(op.Params) 496 if err != nil { 497 return err 498 } 499 500 this.graphicsState.ColorspaceNonStroking = cs 501 this.graphicsState.ColorNonStroking = color 502 503 return nil 504 } 505 506 // K: Sets the stroking colorspace to DeviceCMYK and the stroking color to c,m,y,k. [0-1] ranges. 507 // c m y k K 508 func (this *ContentStreamProcessor) handleCommand_K(op *ContentStreamOperation, resources *PdfPageResources) error { 509 cs := NewPdfColorspaceDeviceCMYK() 510 if len(op.Params) != cs.GetNumComponents() { 511 common.Log.Debug("Invalid number of parameters for SC") 512 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 513 return errors.New("Invalid number of parameters") 514 } 515 516 color, err := cs.ColorFromPdfObjects(op.Params) 517 if err != nil { 518 return err 519 } 520 521 this.graphicsState.ColorspaceStroking = cs 522 this.graphicsState.ColorStroking = color 523 524 return nil 525 } 526 527 // k: Same as K but for non-stroking colorspace, color. 528 func (this *ContentStreamProcessor) handleCommand_k(op *ContentStreamOperation, resources *PdfPageResources) error { 529 cs := NewPdfColorspaceDeviceCMYK() 530 if len(op.Params) != cs.GetNumComponents() { 531 common.Log.Debug("Invalid number of parameters for SC") 532 common.Log.Debug("Number %d not matching colorspace %T", len(op.Params), cs) 533 return errors.New("Invalid number of parameters") 534 } 535 536 color, err := cs.ColorFromPdfObjects(op.Params) 537 if err != nil { 538 return err 539 } 540 541 this.graphicsState.ColorspaceNonStroking = cs 542 this.graphicsState.ColorNonStroking = color 543 544 return nil 545 }