github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/contentstream/encoding.go (about)

     1  /*
     2   * This file is subject to the terms and conditions defined in
     3   * file 'LICENSE.md', which is part of this source code package.
     4   */
     5  
     6  package contentstream
     7  
     8  import (
     9  	"bytes"
    10  	"errors"
    11  	"fmt"
    12  	gocolor "image/color"
    13  	"image/jpeg"
    14  
    15  	"github.com/unidoc/unidoc/common"
    16  	"github.com/unidoc/unidoc/pdf/core"
    17  )
    18  
    19  // Creates the encoder for the inline image's Filter and DecodeParms.
    20  func newEncoderFromInlineImage(inlineImage *ContentStreamInlineImage) (core.StreamEncoder, error) {
    21  	if inlineImage.Filter == nil {
    22  		// No filter, return raw data back.
    23  		return core.NewRawEncoder(), nil
    24  	}
    25  
    26  	// The filter should be a name or an array with a list of filter names.
    27  	filterName, ok := inlineImage.Filter.(*core.PdfObjectName)
    28  	if !ok {
    29  		array, ok := inlineImage.Filter.(*core.PdfObjectArray)
    30  		if !ok {
    31  			return nil, fmt.Errorf("Filter not a Name or Array object")
    32  		}
    33  		if len(*array) == 0 {
    34  			// Empty array -> indicates raw filter (no filter).
    35  			return core.NewRawEncoder(), nil
    36  		}
    37  
    38  		if len(*array) != 1 {
    39  			menc, err := newMultiEncoderFromInlineImage(inlineImage)
    40  			if err != nil {
    41  				common.Log.Error("Failed creating multi encoder: %v", err)
    42  				return nil, err
    43  			}
    44  
    45  			common.Log.Trace("Multi enc: %s\n", menc)
    46  			return menc, nil
    47  		}
    48  
    49  		// Single element.
    50  		filterObj := (*array)[0]
    51  		filterName, ok = filterObj.(*core.PdfObjectName)
    52  		if !ok {
    53  			return nil, fmt.Errorf("Filter array member not a Name object")
    54  		}
    55  	}
    56  
    57  	// From Table 94 p. 224 (PDF32000_2008):
    58  	// Additional Abbreviations in an Inline Image Object:
    59  
    60  	switch *filterName {
    61  	case "AHx", "ASCIIHexDecode":
    62  		return core.NewASCIIHexEncoder(), nil
    63  	case "A85", "ASCII85Decode":
    64  		return core.NewASCII85Encoder(), nil
    65  	case "DCT", "DCTDecode":
    66  		return newDCTEncoderFromInlineImage(inlineImage)
    67  	case "Fl", "FlateDecode":
    68  		return newFlateEncoderFromInlineImage(inlineImage, nil)
    69  	case "LZW", "LZWDecode":
    70  		return newLZWEncoderFromInlineImage(inlineImage, nil)
    71  	case "CCF", "CCITTFaxDecode":
    72  		return core.NewCCITTFaxEncoder(), nil
    73  	case "RL", "RunLengthDecode":
    74  		return core.NewRunLengthEncoder(), nil
    75  	default:
    76  		common.Log.Debug("Unsupported inline image encoding filter name : %s", *filterName)
    77  		return nil, errors.New("Unsupported inline encoding method")
    78  	}
    79  }
    80  
    81  // Create a new flate decoder from an inline image object, getting all the encoding parameters
    82  // from the DecodeParms stream object dictionary entry that can be provided optionally, usually
    83  // only when a multi filter is used.
    84  func newFlateEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decodeParams *core.PdfObjectDictionary) (*core.FlateEncoder, error) {
    85  	encoder := core.NewFlateEncoder()
    86  
    87  	// If decodeParams not provided, see if we can get from the stream.
    88  	if decodeParams == nil {
    89  		obj := inlineImage.DecodeParms
    90  		if obj != nil {
    91  			dp, isDict := obj.(*core.PdfObjectDictionary)
    92  			if !isDict {
    93  				common.Log.Debug("Error: DecodeParms not a dictionary (%T)", obj)
    94  				return nil, fmt.Errorf("Invalid DecodeParms")
    95  			}
    96  			decodeParams = dp
    97  		}
    98  	}
    99  	if decodeParams == nil {
   100  		// Can safely return here if no decode params, as the following depend on the decode params.
   101  		return encoder, nil
   102  	}
   103  
   104  	common.Log.Trace("decode params: %s", decodeParams.String())
   105  	obj := decodeParams.Get("Predictor")
   106  	if obj == nil {
   107  		common.Log.Debug("Error: Predictor missing from DecodeParms - Continue with default (1)")
   108  	} else {
   109  		predictor, ok := obj.(*core.PdfObjectInteger)
   110  		if !ok {
   111  			common.Log.Debug("Error: Predictor specified but not numeric (%T)", obj)
   112  			return nil, fmt.Errorf("Invalid Predictor")
   113  		}
   114  		encoder.Predictor = int(*predictor)
   115  	}
   116  
   117  	// Bits per component.  Use default if not specified (8).
   118  	obj = decodeParams.Get("BitsPerComponent")
   119  	if obj != nil {
   120  		bpc, ok := obj.(*core.PdfObjectInteger)
   121  		if !ok {
   122  			common.Log.Debug("ERROR: Invalid BitsPerComponent")
   123  			return nil, fmt.Errorf("Invalid BitsPerComponent")
   124  		}
   125  		encoder.BitsPerComponent = int(*bpc)
   126  	}
   127  
   128  	if encoder.Predictor > 1 {
   129  		// Columns.
   130  		encoder.Columns = 1
   131  		obj = decodeParams.Get("Columns")
   132  		if obj != nil {
   133  			columns, ok := obj.(*core.PdfObjectInteger)
   134  			if !ok {
   135  				return nil, fmt.Errorf("Predictor column invalid")
   136  			}
   137  
   138  			encoder.Columns = int(*columns)
   139  		}
   140  
   141  		// Colors.
   142  		// Number of interleaved color components per sample (Default 1 if not specified)
   143  		encoder.Colors = 1
   144  		obj := decodeParams.Get("Colors")
   145  		if obj != nil {
   146  			colors, ok := obj.(*core.PdfObjectInteger)
   147  			if !ok {
   148  				return nil, fmt.Errorf("Predictor colors not an integer")
   149  			}
   150  			encoder.Colors = int(*colors)
   151  		}
   152  	}
   153  
   154  	return encoder, nil
   155  }
   156  
   157  // Create a new LZW encoder/decoder based on an inline image object, getting all the encoding parameters
   158  // from the DecodeParms stream object dictionary entry.
   159  func newLZWEncoderFromInlineImage(inlineImage *ContentStreamInlineImage, decodeParams *core.PdfObjectDictionary) (*core.LZWEncoder, error) {
   160  	// Start with default settings.
   161  	encoder := core.NewLZWEncoder()
   162  
   163  	// If decodeParams not provided, see if we can get from the inline image directly.
   164  	if decodeParams == nil {
   165  		if inlineImage.DecodeParms != nil {
   166  			dp, isDict := inlineImage.DecodeParms.(*core.PdfObjectDictionary)
   167  			if !isDict {
   168  				common.Log.Debug("Error: DecodeParms not a dictionary (%T)", inlineImage.DecodeParms)
   169  				return nil, fmt.Errorf("Invalid DecodeParms")
   170  			}
   171  			decodeParams = dp
   172  		}
   173  	}
   174  
   175  	if decodeParams == nil {
   176  		// No decode parameters. Can safely return here if not set as the following options
   177  		// are related to the decode Params.
   178  		return encoder, nil
   179  	}
   180  
   181  	// The EarlyChange indicates when to increase code length, as different
   182  	// implementations use a different mechanisms. Essentially this chooses
   183  	// which LZW implementation to use.
   184  	// The default is 1 (one code early)
   185  	//
   186  	// The EarlyChange parameter is specified in the object stream dictionary for regular streams,
   187  	// but it is not specified explicitly where to check for it in the case of inline images.
   188  	// We will check in the decodeParms for now, we can adjust later if we come across cases of this.
   189  	obj := decodeParams.Get("EarlyChange")
   190  	if obj != nil {
   191  		earlyChange, ok := obj.(*core.PdfObjectInteger)
   192  		if !ok {
   193  			common.Log.Debug("Error: EarlyChange specified but not numeric (%T)", obj)
   194  			return nil, fmt.Errorf("Invalid EarlyChange")
   195  		}
   196  		if *earlyChange != 0 && *earlyChange != 1 {
   197  			return nil, fmt.Errorf("Invalid EarlyChange value (not 0 or 1)")
   198  		}
   199  
   200  		encoder.EarlyChange = int(*earlyChange)
   201  	} else {
   202  		encoder.EarlyChange = 1 // default
   203  	}
   204  
   205  	obj = decodeParams.Get("Predictor")
   206  	if obj != nil {
   207  		predictor, ok := obj.(*core.PdfObjectInteger)
   208  		if !ok {
   209  			common.Log.Debug("Error: Predictor specified but not numeric (%T)", obj)
   210  			return nil, fmt.Errorf("Invalid Predictor")
   211  		}
   212  		encoder.Predictor = int(*predictor)
   213  	}
   214  
   215  	// Bits per component.  Use default if not specified (8).
   216  	obj = decodeParams.Get("BitsPerComponent")
   217  	if obj != nil {
   218  		bpc, ok := obj.(*core.PdfObjectInteger)
   219  		if !ok {
   220  			common.Log.Debug("ERROR: Invalid BitsPerComponent")
   221  			return nil, fmt.Errorf("Invalid BitsPerComponent")
   222  		}
   223  		encoder.BitsPerComponent = int(*bpc)
   224  	}
   225  
   226  	if encoder.Predictor > 1 {
   227  		// Columns.
   228  		encoder.Columns = 1
   229  		obj = decodeParams.Get("Columns")
   230  		if obj != nil {
   231  			columns, ok := obj.(*core.PdfObjectInteger)
   232  			if !ok {
   233  				return nil, fmt.Errorf("Predictor column invalid")
   234  			}
   235  
   236  			encoder.Columns = int(*columns)
   237  		}
   238  
   239  		// Colors.
   240  		// Number of interleaved color components per sample (Default 1 if not specified)
   241  		encoder.Colors = 1
   242  		obj = decodeParams.Get("Colors")
   243  		if obj != nil {
   244  			colors, ok := obj.(*core.PdfObjectInteger)
   245  			if !ok {
   246  				return nil, fmt.Errorf("Predictor colors not an integer")
   247  			}
   248  			encoder.Colors = int(*colors)
   249  		}
   250  	}
   251  
   252  	common.Log.Trace("decode params: %s", decodeParams.String())
   253  	return encoder, nil
   254  }
   255  
   256  // Create a new DCT encoder/decoder based on an inline image, getting all the encoding parameters
   257  // from the stream object dictionary entry and the image data itself.
   258  func newDCTEncoderFromInlineImage(inlineImage *ContentStreamInlineImage) (*core.DCTEncoder, error) {
   259  	// Start with default settings.
   260  	encoder := core.NewDCTEncoder()
   261  
   262  	bufReader := bytes.NewReader(inlineImage.stream)
   263  
   264  	cfg, err := jpeg.DecodeConfig(bufReader)
   265  	//img, _, err := goimage.Decode(bufReader)
   266  	if err != nil {
   267  		common.Log.Debug("Error decoding file: %s", err)
   268  		return nil, err
   269  	}
   270  
   271  	switch cfg.ColorModel {
   272  	case gocolor.RGBAModel:
   273  		encoder.BitsPerComponent = 8
   274  		encoder.ColorComponents = 3 // alpha is not included in pdf.
   275  	case gocolor.RGBA64Model:
   276  		encoder.BitsPerComponent = 16
   277  		encoder.ColorComponents = 3
   278  	case gocolor.GrayModel:
   279  		encoder.BitsPerComponent = 8
   280  		encoder.ColorComponents = 1
   281  	case gocolor.Gray16Model:
   282  		encoder.BitsPerComponent = 16
   283  		encoder.ColorComponents = 1
   284  	case gocolor.CMYKModel:
   285  		encoder.BitsPerComponent = 8
   286  		encoder.ColorComponents = 4
   287  	case gocolor.YCbCrModel:
   288  		// YCbCr is not supported by PDF, but it could be a different colorspace
   289  		// with 3 components.  Would be specified by the ColorSpace entry.
   290  		encoder.BitsPerComponent = 8
   291  		encoder.ColorComponents = 3
   292  	default:
   293  		return nil, errors.New("Unsupported color model")
   294  	}
   295  	encoder.Width = cfg.Width
   296  	encoder.Height = cfg.Height
   297  	common.Log.Trace("DCT Encoder: %+v", encoder)
   298  
   299  	return encoder, nil
   300  }
   301  
   302  // Create a new multi-filter encoder/decoder based on an inline image, getting all the encoding parameters
   303  // from the filter specification and the DecodeParms (DP) dictionaries.
   304  func newMultiEncoderFromInlineImage(inlineImage *ContentStreamInlineImage) (*core.MultiEncoder, error) {
   305  	mencoder := core.NewMultiEncoder()
   306  
   307  	// Prepare the decode params array (one for each filter type)
   308  	// Optional, not always present.
   309  	var decodeParamsDict *core.PdfObjectDictionary
   310  	decodeParamsArray := []core.PdfObject{}
   311  	if obj := inlineImage.DecodeParms; obj != nil {
   312  		// If it is a dictionary, assume it applies to all
   313  		dict, isDict := obj.(*core.PdfObjectDictionary)
   314  		if isDict {
   315  			decodeParamsDict = dict
   316  		}
   317  
   318  		// If it is an array, assume there is one for each
   319  		arr, isArray := obj.(*core.PdfObjectArray)
   320  		if isArray {
   321  			for _, dictObj := range *arr {
   322  				if dict, is := dictObj.(*core.PdfObjectDictionary); is {
   323  					decodeParamsArray = append(decodeParamsArray, dict)
   324  				} else {
   325  					decodeParamsArray = append(decodeParamsArray, nil)
   326  				}
   327  			}
   328  		}
   329  	}
   330  
   331  	obj := inlineImage.Filter
   332  	if obj == nil {
   333  		return nil, fmt.Errorf("Filter missing")
   334  	}
   335  
   336  	array, ok := obj.(*core.PdfObjectArray)
   337  	if !ok {
   338  		return nil, fmt.Errorf("Multi filter can only be made from array")
   339  	}
   340  
   341  	for idx, obj := range *array {
   342  		name, ok := obj.(*core.PdfObjectName)
   343  		if !ok {
   344  			return nil, fmt.Errorf("Multi filter array element not a name")
   345  		}
   346  
   347  		var dp core.PdfObject
   348  
   349  		// If decode params dict is set, use it.  Otherwise take from array..
   350  		if decodeParamsDict != nil {
   351  			dp = decodeParamsDict
   352  		} else {
   353  			// Only get the dp if provided.  Oftentimes there is no decode params dict
   354  			// provided.
   355  			if len(decodeParamsArray) > 0 {
   356  				if idx >= len(decodeParamsArray) {
   357  					return nil, fmt.Errorf("Missing elements in decode params array")
   358  				}
   359  				dp = decodeParamsArray[idx]
   360  			}
   361  		}
   362  
   363  		var dParams *core.PdfObjectDictionary
   364  		if dict, is := dp.(*core.PdfObjectDictionary); is {
   365  			dParams = dict
   366  		}
   367  
   368  		if *name == core.StreamEncodingFilterNameFlate || *name == "Fl" {
   369  			// XXX: need to separate out the DecodeParms..
   370  			encoder, err := newFlateEncoderFromInlineImage(inlineImage, dParams)
   371  			if err != nil {
   372  				return nil, err
   373  			}
   374  			mencoder.AddEncoder(encoder)
   375  		} else if *name == core.StreamEncodingFilterNameLZW {
   376  			encoder, err := newLZWEncoderFromInlineImage(inlineImage, dParams)
   377  			if err != nil {
   378  				return nil, err
   379  			}
   380  			mencoder.AddEncoder(encoder)
   381  		} else if *name == core.StreamEncodingFilterNameASCIIHex {
   382  			encoder := core.NewASCIIHexEncoder()
   383  			mencoder.AddEncoder(encoder)
   384  		} else if *name == core.StreamEncodingFilterNameASCII85 || *name == "A85" {
   385  			encoder := core.NewASCII85Encoder()
   386  			mencoder.AddEncoder(encoder)
   387  		} else {
   388  			common.Log.Error("Unsupported filter %s", *name)
   389  			return nil, fmt.Errorf("Invalid filter in multi filter array")
   390  		}
   391  	}
   392  
   393  	return mencoder, nil
   394  }