github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/core/primitives.go (about)

     1  /*
     2   * This file is subject to the terms and conditions defined in
     3   * file 'LICENSE.md', which is part of this source code package.
     4   */
     5  
     6  package core
     7  
     8  import (
     9  	"bytes"
    10  	"fmt"
    11  
    12  	"github.com/unidoc/unidoc/common"
    13  )
    14  
    15  // PdfObject is an interface which all primitive PDF objects must implement.
    16  type PdfObject interface {
    17  	// Output a string representation of the primitive (for debugging).
    18  	String() string
    19  
    20  	// Output the PDF primitive as written to file as expected by the standard.
    21  	DefaultWriteString() string
    22  }
    23  
    24  // PdfObjectBool represents the primitive PDF boolean object.
    25  type PdfObjectBool bool
    26  
    27  // PdfObjectInteger represents the primitive PDF integer numerical object.
    28  type PdfObjectInteger int64
    29  
    30  // PdfObjectFloat represents the primitive PDF floating point numerical object.
    31  type PdfObjectFloat float64
    32  
    33  // PdfObjectString represents the primitive PDF string object.
    34  // TODO (v3): Change to a struct and add a flag for hex/plaintext.
    35  type PdfObjectString string
    36  
    37  // PdfObjectName represents the primitive PDF name object.
    38  type PdfObjectName string
    39  
    40  // PdfObjectArray represents the primitive PDF array object.
    41  type PdfObjectArray []PdfObject
    42  
    43  // PdfObjectDictionary represents the primitive PDF dictionary/map object.
    44  type PdfObjectDictionary struct {
    45  	dict map[PdfObjectName]PdfObject
    46  	keys []PdfObjectName
    47  }
    48  
    49  // PdfObjectNull represents the primitive PDF null object.
    50  type PdfObjectNull struct{}
    51  
    52  // PdfObjectReference represents the primitive PDF reference object.
    53  type PdfObjectReference struct {
    54  	ObjectNumber     int64
    55  	GenerationNumber int64
    56  }
    57  
    58  // PdfIndirectObject represents the primitive PDF indirect object.
    59  type PdfIndirectObject struct {
    60  	PdfObjectReference
    61  	PdfObject
    62  }
    63  
    64  // PdfObjectStream represents the primitive PDF Object stream.
    65  type PdfObjectStream struct {
    66  	PdfObjectReference
    67  	*PdfObjectDictionary
    68  	Stream []byte
    69  }
    70  
    71  // MakeDict creates and returns an empty PdfObjectDictionary.
    72  func MakeDict() *PdfObjectDictionary {
    73  	d := &PdfObjectDictionary{}
    74  	d.dict = map[PdfObjectName]PdfObject{}
    75  	d.keys = []PdfObjectName{}
    76  	return d
    77  }
    78  
    79  // MakeName creates a PdfObjectName from a string.
    80  func MakeName(s string) *PdfObjectName {
    81  	name := PdfObjectName(s)
    82  	return &name
    83  }
    84  
    85  // MakeInteger creates a PdfObjectInteger from an int64.
    86  func MakeInteger(val int64) *PdfObjectInteger {
    87  	num := PdfObjectInteger(val)
    88  	return &num
    89  }
    90  
    91  // MakeArray creates an PdfObjectArray from a list of PdfObjects.
    92  func MakeArray(objects ...PdfObject) *PdfObjectArray {
    93  	array := PdfObjectArray{}
    94  	for _, obj := range objects {
    95  		array = append(array, obj)
    96  	}
    97  	return &array
    98  }
    99  
   100  // MakeArrayFromIntegers creates an PdfObjectArray from a slice of ints, where each array element is
   101  // an PdfObjectInteger.
   102  func MakeArrayFromIntegers(vals []int) *PdfObjectArray {
   103  	array := PdfObjectArray{}
   104  	for _, val := range vals {
   105  		array = append(array, MakeInteger(int64(val)))
   106  	}
   107  	return &array
   108  }
   109  
   110  // MakeArrayFromIntegers64 creates an PdfObjectArray from a slice of int64s, where each array element
   111  // is an PdfObjectInteger.
   112  func MakeArrayFromIntegers64(vals []int64) *PdfObjectArray {
   113  	array := PdfObjectArray{}
   114  	for _, val := range vals {
   115  		array = append(array, MakeInteger(val))
   116  	}
   117  	return &array
   118  }
   119  
   120  // MakeArrayFromFloats creates an PdfObjectArray from a slice of float64s, where each array element is an
   121  // PdfObjectFloat.
   122  func MakeArrayFromFloats(vals []float64) *PdfObjectArray {
   123  	array := PdfObjectArray{}
   124  	for _, val := range vals {
   125  		array = append(array, MakeFloat(val))
   126  	}
   127  	return &array
   128  }
   129  
   130  // MakeBool creates an PdfObjectBool from a bool.
   131  func MakeBool(val bool) *PdfObjectBool {
   132  	v := PdfObjectBool(val)
   133  	return &v
   134  }
   135  
   136  // MakeFloat creates an PdfObjectFloat from a float64.
   137  func MakeFloat(val float64) *PdfObjectFloat {
   138  	num := PdfObjectFloat(val)
   139  	return &num
   140  }
   141  
   142  // MakeString creates an PdfObjectString from a string.
   143  func MakeString(s string) *PdfObjectString {
   144  	str := PdfObjectString(s)
   145  	return &str
   146  }
   147  
   148  // MakeNull creates an PdfObjectNull.
   149  func MakeNull() *PdfObjectNull {
   150  	null := PdfObjectNull{}
   151  	return &null
   152  }
   153  
   154  // MakeIndirectObject creates an PdfIndirectObject with a specified direct object PdfObject.
   155  func MakeIndirectObject(obj PdfObject) *PdfIndirectObject {
   156  	ind := &PdfIndirectObject{}
   157  	ind.PdfObject = obj
   158  	return ind
   159  }
   160  
   161  // MakeStream creates an PdfObjectStream with specified contents and encoding. If encoding is nil, then raw encoding
   162  // will be used (i.e. no encoding applied).
   163  func MakeStream(contents []byte, encoder StreamEncoder) (*PdfObjectStream, error) {
   164  	stream := &PdfObjectStream{}
   165  
   166  	if encoder == nil {
   167  		encoder = NewRawEncoder()
   168  	}
   169  
   170  	stream.PdfObjectDictionary = encoder.MakeStreamDict()
   171  
   172  	encoded, err := encoder.EncodeBytes(contents)
   173  	if err != nil {
   174  		return nil, err
   175  	}
   176  	stream.PdfObjectDictionary.Set("Length", MakeInteger(int64(len(encoded))))
   177  
   178  	stream.Stream = encoded
   179  	return stream, nil
   180  }
   181  
   182  func (bool *PdfObjectBool) String() string {
   183  	if *bool {
   184  		return "true"
   185  	} else {
   186  		return "false"
   187  	}
   188  }
   189  
   190  // DefaultWriteString outputs the object as it is to be written to file.
   191  func (bool *PdfObjectBool) DefaultWriteString() string {
   192  	if *bool {
   193  		return "true"
   194  	} else {
   195  		return "false"
   196  	}
   197  }
   198  
   199  func (int *PdfObjectInteger) String() string {
   200  	return fmt.Sprintf("%d", *int)
   201  }
   202  
   203  // DefaultWriteString outputs the object as it is to be written to file.
   204  func (int *PdfObjectInteger) DefaultWriteString() string {
   205  	return fmt.Sprintf("%d", *int)
   206  }
   207  
   208  func (float *PdfObjectFloat) String() string {
   209  	return fmt.Sprintf("%f", *float)
   210  }
   211  
   212  // DefaultWriteString outputs the object as it is to be written to file.
   213  func (float *PdfObjectFloat) DefaultWriteString() string {
   214  	return fmt.Sprintf("%f", *float)
   215  }
   216  
   217  func (str *PdfObjectString) String() string {
   218  	return string(*str)
   219  }
   220  
   221  // DefaultWriteString outputs the object as it is to be written to file.
   222  func (str *PdfObjectString) DefaultWriteString() string {
   223  	var output bytes.Buffer
   224  
   225  	escapeSequences := map[byte]string{
   226  		'\n': "\\n",
   227  		'\r': "\\r",
   228  		'\t': "\\t",
   229  		'\b': "\\b",
   230  		'\f': "\\f",
   231  		'(':  "\\(",
   232  		')':  "\\)",
   233  		'\\': "\\\\",
   234  	}
   235  
   236  	output.WriteString("(")
   237  	for i := 0; i < len(*str); i++ {
   238  		char := (*str)[i]
   239  		if escStr, useEsc := escapeSequences[char]; useEsc {
   240  			output.WriteString(escStr)
   241  		} else {
   242  			output.WriteByte(char)
   243  		}
   244  	}
   245  	output.WriteString(")")
   246  
   247  	return output.String()
   248  }
   249  
   250  func (name *PdfObjectName) String() string {
   251  	return fmt.Sprintf("%s", string(*name))
   252  }
   253  
   254  // DefaultWriteString outputs the object as it is to be written to file.
   255  func (name *PdfObjectName) DefaultWriteString() string {
   256  	var output bytes.Buffer
   257  
   258  	if len(*name) > 127 {
   259  		common.Log.Debug("ERROR: Name too long (%s)", *name)
   260  	}
   261  
   262  	output.WriteString("/")
   263  	for i := 0; i < len(*name); i++ {
   264  		char := (*name)[i]
   265  		if !IsPrintable(char) || char == '#' || IsDelimiter(char) {
   266  			output.WriteString(fmt.Sprintf("#%.2x", char))
   267  		} else {
   268  			output.WriteByte(char)
   269  		}
   270  	}
   271  
   272  	return output.String()
   273  }
   274  
   275  // ToFloat64Array returns a slice of all elements in the array as a float64 slice.  An error is returned if the array
   276  // contains non-numeric objects (each element can be either PdfObjectInteger or PdfObjectFloat).
   277  func (array *PdfObjectArray) ToFloat64Array() ([]float64, error) {
   278  	vals := []float64{}
   279  
   280  	for _, obj := range *array {
   281  		if number, is := obj.(*PdfObjectInteger); is {
   282  			vals = append(vals, float64(*number))
   283  		} else if number, is := obj.(*PdfObjectFloat); is {
   284  			vals = append(vals, float64(*number))
   285  		} else {
   286  			return nil, fmt.Errorf("Type error")
   287  		}
   288  	}
   289  
   290  	return vals, nil
   291  }
   292  
   293  // ToIntegerArray returns a slice of all array elements as an int slice. An error is returned if the array contains
   294  // non-integer objects. Each element can only be PdfObjectInteger.
   295  func (array *PdfObjectArray) ToIntegerArray() ([]int, error) {
   296  	vals := []int{}
   297  
   298  	for _, obj := range *array {
   299  		if number, is := obj.(*PdfObjectInteger); is {
   300  			vals = append(vals, int(*number))
   301  		} else {
   302  			return nil, fmt.Errorf("Type error")
   303  		}
   304  	}
   305  
   306  	return vals, nil
   307  }
   308  
   309  func (array *PdfObjectArray) String() string {
   310  	outStr := "["
   311  	for ind, o := range *array {
   312  		outStr += o.String()
   313  		if ind < (len(*array) - 1) {
   314  			outStr += ", "
   315  		}
   316  	}
   317  	outStr += "]"
   318  	return outStr
   319  }
   320  
   321  // DefaultWriteString outputs the object as it is to be written to file.
   322  func (array *PdfObjectArray) DefaultWriteString() string {
   323  	outStr := "["
   324  	for ind, o := range *array {
   325  		outStr += o.DefaultWriteString()
   326  		if ind < (len(*array) - 1) {
   327  			outStr += " "
   328  		}
   329  	}
   330  	outStr += "]"
   331  	return outStr
   332  }
   333  
   334  // Append adds an PdfObject to the array.
   335  func (array *PdfObjectArray) Append(obj PdfObject) {
   336  	*array = append(*array, obj)
   337  }
   338  
   339  func getNumberAsFloat(obj PdfObject) (float64, error) {
   340  	if fObj, ok := obj.(*PdfObjectFloat); ok {
   341  		return float64(*fObj), nil
   342  	}
   343  
   344  	if iObj, ok := obj.(*PdfObjectInteger); ok {
   345  		return float64(*iObj), nil
   346  	}
   347  
   348  	return 0, fmt.Errorf("Not a number")
   349  }
   350  
   351  // GetAsFloat64Slice returns the array as []float64 slice.
   352  // Returns an error if not entirely numeric (only PdfObjectIntegers, PdfObjectFloats).
   353  func (array *PdfObjectArray) GetAsFloat64Slice() ([]float64, error) {
   354  	slice := []float64{}
   355  
   356  	for _, obj := range *array {
   357  		obj := TraceToDirectObject(obj)
   358  		number, err := getNumberAsFloat(obj)
   359  		if err != nil {
   360  			return nil, fmt.Errorf("Array element not a number")
   361  		}
   362  		slice = append(slice, number)
   363  	}
   364  
   365  	return slice, nil
   366  }
   367  
   368  // Merge merges in key/values from another dictionary. Overwriting if has same keys.
   369  func (d *PdfObjectDictionary) Merge(another *PdfObjectDictionary) {
   370  	if another != nil {
   371  		for _, key := range another.Keys() {
   372  			val := another.Get(key)
   373  			d.Set(key, val)
   374  		}
   375  	}
   376  }
   377  
   378  func (d *PdfObjectDictionary) String() string {
   379  	outStr := "Dict("
   380  	for _, k := range d.keys {
   381  		v := d.dict[k]
   382  		outStr += fmt.Sprintf("\"%s\": %s, ", k, v.String())
   383  	}
   384  	outStr += ")"
   385  	return outStr
   386  }
   387  
   388  // DefaultWriteString outputs the object as it is to be written to file.
   389  func (d *PdfObjectDictionary) DefaultWriteString() string {
   390  	outStr := "<<"
   391  	for _, k := range d.keys {
   392  		v := d.dict[k]
   393  		common.Log.Trace("Writing k: %s %T %v %v", k, v, k, v)
   394  		outStr += k.DefaultWriteString()
   395  		outStr += " "
   396  		outStr += v.DefaultWriteString()
   397  	}
   398  	outStr += ">>"
   399  	return outStr
   400  }
   401  
   402  // Set sets the dictionary's key -> val mapping entry. Overwrites if key already set.
   403  func (d *PdfObjectDictionary) Set(key PdfObjectName, val PdfObject) {
   404  	found := false
   405  	for _, k := range d.keys {
   406  		if k == key {
   407  			found = true
   408  			break
   409  		}
   410  	}
   411  
   412  	if !found {
   413  		d.keys = append(d.keys, key)
   414  	}
   415  
   416  	d.dict[key] = val
   417  }
   418  
   419  // Get returns the PdfObject corresponding to the specified key.
   420  // Returns a nil value if the key is not set.
   421  //
   422  // The design is such that we only return 1 value.
   423  // The reason is that, it will be easy to do type casts such as
   424  // name, ok := dict.Get("mykey").(*PdfObjectName)
   425  // if !ok ....
   426  func (d *PdfObjectDictionary) Get(key PdfObjectName) PdfObject {
   427  	val, has := d.dict[key]
   428  	if !has {
   429  		return nil
   430  	}
   431  	return val
   432  }
   433  
   434  // Keys returns the list of keys in the dictionary.
   435  func (d *PdfObjectDictionary) Keys() []PdfObjectName {
   436  	return d.keys
   437  }
   438  
   439  // Remove removes an element specified by key.
   440  func (d *PdfObjectDictionary) Remove(key PdfObjectName) {
   441  	idx := -1
   442  	for i, k := range d.keys {
   443  		if k == key {
   444  			idx = i
   445  			break
   446  		}
   447  	}
   448  
   449  	if idx >= 0 {
   450  		// Found. Remove from key list and map.
   451  		d.keys = append(d.keys[:idx], d.keys[idx+1:]...)
   452  		delete(d.dict, key)
   453  	}
   454  }
   455  
   456  // SetIfNotNil sets the dictionary's key -> val mapping entry -IF- val is not nil.
   457  // Note that we take care to perform a type switch.  Otherwise if we would supply a nil value
   458  // of another type, e.g. (PdfObjectArray*)(nil), then it would not be a PdfObject(nil) and thus
   459  // would get set.
   460  //
   461  func (d *PdfObjectDictionary) SetIfNotNil(key PdfObjectName, val PdfObject) {
   462  	if val != nil {
   463  		switch t := val.(type) {
   464  		case *PdfObjectName:
   465  			if t != nil {
   466  				d.Set(key, val)
   467  			}
   468  		case *PdfObjectDictionary:
   469  			if t != nil {
   470  				d.Set(key, val)
   471  			}
   472  		case *PdfObjectStream:
   473  			if t != nil {
   474  				d.Set(key, val)
   475  			}
   476  		case *PdfObjectString:
   477  			if t != nil {
   478  				d.Set(key, val)
   479  			}
   480  		case *PdfObjectNull:
   481  			if t != nil {
   482  				d.Set(key, val)
   483  			}
   484  		case *PdfObjectInteger:
   485  			if t != nil {
   486  				d.Set(key, val)
   487  			}
   488  		case *PdfObjectArray:
   489  			if t != nil {
   490  				d.Set(key, val)
   491  			}
   492  		case *PdfObjectBool:
   493  			if t != nil {
   494  				d.Set(key, val)
   495  			}
   496  		case *PdfObjectFloat:
   497  			if t != nil {
   498  				d.Set(key, val)
   499  			}
   500  		case *PdfObjectReference:
   501  			if t != nil {
   502  				d.Set(key, val)
   503  			}
   504  		case *PdfIndirectObject:
   505  			if t != nil {
   506  				d.Set(key, val)
   507  			}
   508  		default:
   509  			common.Log.Error("ERROR: Unknown type: %T - should never happen!", val)
   510  		}
   511  	}
   512  }
   513  
   514  func (ref *PdfObjectReference) String() string {
   515  	return fmt.Sprintf("Ref(%d %d)", ref.ObjectNumber, ref.GenerationNumber)
   516  }
   517  
   518  // DefaultWriteString outputs the object as it is to be written to file.
   519  func (ref *PdfObjectReference) DefaultWriteString() string {
   520  	return fmt.Sprintf("%d %d R", ref.ObjectNumber, ref.GenerationNumber)
   521  }
   522  
   523  func (ind *PdfIndirectObject) String() string {
   524  	// Avoid printing out the object, can cause problems with circular
   525  	// references.
   526  	return fmt.Sprintf("IObject:%d", (*ind).ObjectNumber)
   527  }
   528  
   529  // DefaultWriteString outputs the object as it is to be written to file.
   530  func (ind *PdfIndirectObject) DefaultWriteString() string {
   531  	outStr := fmt.Sprintf("%d 0 R", (*ind).ObjectNumber)
   532  	return outStr
   533  }
   534  
   535  func (stream *PdfObjectStream) String() string {
   536  	return fmt.Sprintf("Object stream %d: %s", stream.ObjectNumber, stream.PdfObjectDictionary)
   537  }
   538  
   539  // DefaultWriteString outputs the object as it is to be written to file.
   540  func (stream *PdfObjectStream) DefaultWriteString() string {
   541  	outStr := fmt.Sprintf("%d 0 R", (*stream).ObjectNumber)
   542  	return outStr
   543  }
   544  
   545  func (null *PdfObjectNull) String() string {
   546  	return "null"
   547  }
   548  
   549  // DefaultWriteString outputs the object as it is to be written to file.
   550  func (null *PdfObjectNull) DefaultWriteString() string {
   551  	return "null"
   552  }
   553  
   554  // Handy functions to work with primitive objects.
   555  
   556  // TraceMaxDepth specifies the maximum recursion depth allowed.
   557  const TraceMaxDepth = 20
   558  
   559  // TraceToDirectObject traces a PdfObject to a direct object.  For example direct objects contained
   560  // in indirect objects (can be double referenced even).
   561  //
   562  // Note: This function does not trace/resolve references. That needs to be done beforehand.
   563  func TraceToDirectObject(obj PdfObject) PdfObject {
   564  	iobj, isIndirectObj := obj.(*PdfIndirectObject)
   565  	depth := 0
   566  	for isIndirectObj == true {
   567  		obj = iobj.PdfObject
   568  		iobj, isIndirectObj = obj.(*PdfIndirectObject)
   569  		depth++
   570  		if depth > TraceMaxDepth {
   571  			common.Log.Error("ERROR: Trace depth level beyond %d - not going deeper!", TraceMaxDepth)
   572  			return nil
   573  		}
   574  	}
   575  	return obj
   576  }