github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/ps/parser.go (about)

     1  /*
     2   * This file is subject to the terms and conditions defined in
     3   * file 'LICENSE.md', which is part of this source code package.
     4   */
     5  
     6  package ps
     7  
     8  import (
     9  	"bufio"
    10  	"bytes"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"strconv"
    15  
    16  	"github.com/unidoc/unidoc/common"
    17  	pdfcore "github.com/unidoc/unidoc/pdf/core"
    18  )
    19  
    20  type PSParser struct {
    21  	reader *bufio.Reader
    22  }
    23  
    24  // Create a new instance of the PDF Postscript parser from input data.
    25  func NewPSParser(content []byte) *PSParser {
    26  	parser := PSParser{}
    27  
    28  	buffer := bytes.NewBuffer(content)
    29  	parser.reader = bufio.NewReader(buffer)
    30  
    31  	return &parser
    32  }
    33  
    34  // Parse the postscript and store as a program that can be executed.
    35  func (this *PSParser) Parse() (*PSProgram, error) {
    36  	this.skipSpaces()
    37  	bb, err := this.reader.Peek(2)
    38  	if err != nil {
    39  		return nil, err
    40  	}
    41  	if bb[0] != '{' {
    42  		return nil, fmt.Errorf("Invalid PS Program not starting with {")
    43  	}
    44  
    45  	program, err := this.parseFunction()
    46  	if err != nil && err != io.EOF {
    47  		return nil, err
    48  	}
    49  
    50  	return program, err
    51  }
    52  
    53  // Detect the signature at the current parse position and parse
    54  // the corresponding object.
    55  func (this *PSParser) parseFunction() (*PSProgram, error) {
    56  	c, _ := this.reader.ReadByte()
    57  	if c != '{' {
    58  		return nil, errors.New("Invalid function")
    59  	}
    60  
    61  	function := NewPSProgram()
    62  
    63  	for {
    64  		this.skipSpaces()
    65  		bb, err := this.reader.Peek(2)
    66  		if err != nil {
    67  			if err == io.EOF {
    68  				break
    69  			}
    70  			return nil, err
    71  		}
    72  
    73  		common.Log.Trace("Peek string: %s", string(bb))
    74  		// Determine type.
    75  		if bb[0] == '}' {
    76  			common.Log.Trace("EOF function")
    77  			this.reader.ReadByte()
    78  			break
    79  		} else if bb[0] == '{' {
    80  			common.Log.Trace("Function!")
    81  			inlineF, err := this.parseFunction()
    82  			if err != nil {
    83  				return nil, err
    84  			}
    85  			function.Append(inlineF)
    86  		} else if pdfcore.IsDecimalDigit(bb[0]) || (bb[0] == '-' && pdfcore.IsDecimalDigit(bb[1])) {
    87  			common.Log.Trace("->Number!")
    88  			number, err := this.parseNumber()
    89  			if err != nil {
    90  				return nil, err
    91  			}
    92  			function.Append(number)
    93  		} else {
    94  			common.Log.Trace("->Operand or bool?")
    95  			// Let's peek farther to find out.
    96  			bb, _ = this.reader.Peek(5)
    97  			peekStr := string(bb)
    98  			common.Log.Trace("Peek str: %s", peekStr)
    99  
   100  			if (len(peekStr) > 4) && (peekStr[:5] == "false") {
   101  				b, err := this.parseBool()
   102  				if err != nil {
   103  					return nil, err
   104  				}
   105  				function.Append(b)
   106  			} else if (len(peekStr) > 3) && (peekStr[:4] == "true") {
   107  				b, err := this.parseBool()
   108  				if err != nil {
   109  					return nil, err
   110  				}
   111  				function.Append(b)
   112  			} else {
   113  				operand, err := this.parseOperand()
   114  				if err != nil {
   115  					return nil, err
   116  				}
   117  				function.Append(operand)
   118  			}
   119  		}
   120  	}
   121  
   122  	return function, nil
   123  }
   124  
   125  // Skip over any spaces.  Returns the number of spaces skipped and
   126  // an error if any.
   127  func (this *PSParser) skipSpaces() (int, error) {
   128  	cnt := 0
   129  	for {
   130  		bb, err := this.reader.Peek(1)
   131  		if err != nil {
   132  			return 0, err
   133  		}
   134  		if pdfcore.IsWhiteSpace(bb[0]) {
   135  			this.reader.ReadByte()
   136  			cnt++
   137  		} else {
   138  			break
   139  		}
   140  	}
   141  
   142  	return cnt, nil
   143  }
   144  
   145  // Numeric objects.
   146  // Integer or Real numbers.
   147  func (this *PSParser) parseNumber() (PSObject, error) {
   148  	isFloat := false
   149  	allowSigns := true
   150  	numStr := ""
   151  	for {
   152  		common.Log.Trace("Parsing number \"%s\"", numStr)
   153  		bb, err := this.reader.Peek(1)
   154  		if err == io.EOF {
   155  			// GH: EOF handling.  Handle EOF like end of line.  Can happen with
   156  			// encoded object streams that the object is at the end.
   157  			// In other cases, we will get the EOF error elsewhere at any rate.
   158  			break // Handle like EOF
   159  		}
   160  		if err != nil {
   161  			common.Log.Error("ERROR %s", err)
   162  			return nil, err
   163  		}
   164  		if allowSigns && (bb[0] == '-' || bb[0] == '+') {
   165  			// Only appear in the beginning, otherwise serves as a delimiter.
   166  			b, _ := this.reader.ReadByte()
   167  			numStr += string(b)
   168  			allowSigns = false // Only allowed in beginning, and after e (exponential).
   169  		} else if pdfcore.IsDecimalDigit(bb[0]) {
   170  			b, _ := this.reader.ReadByte()
   171  			numStr += string(b)
   172  		} else if bb[0] == '.' {
   173  			b, _ := this.reader.ReadByte()
   174  			numStr += string(b)
   175  			isFloat = true
   176  		} else if bb[0] == 'e' {
   177  			// Exponential number format.
   178  			// XXX Is this supported in PS?
   179  			b, _ := this.reader.ReadByte()
   180  			numStr += string(b)
   181  			isFloat = true
   182  			allowSigns = true
   183  		} else {
   184  			break
   185  		}
   186  	}
   187  
   188  	if isFloat {
   189  		fVal, err := strconv.ParseFloat(numStr, 64)
   190  		o := MakeReal(fVal)
   191  		return o, err
   192  	} else {
   193  		intVal, err := strconv.ParseInt(numStr, 10, 64)
   194  		o := MakeInteger(int(intVal))
   195  		return o, err
   196  	}
   197  }
   198  
   199  // Parse bool object.
   200  func (this *PSParser) parseBool() (*PSBoolean, error) {
   201  	bb, err := this.reader.Peek(4)
   202  	if err != nil {
   203  		return MakeBool(false), err
   204  	}
   205  	if (len(bb) >= 4) && (string(bb[:4]) == "true") {
   206  		this.reader.Discard(4)
   207  		return MakeBool(true), nil
   208  	}
   209  
   210  	bb, err = this.reader.Peek(5)
   211  	if err != nil {
   212  		return MakeBool(false), err
   213  	}
   214  	if (len(bb) >= 5) && (string(bb[:5]) == "false") {
   215  		this.reader.Discard(5)
   216  		return MakeBool(false), nil
   217  	}
   218  
   219  	return MakeBool(false), errors.New("Unexpected boolean string")
   220  }
   221  
   222  // An operand is a text command represented by a word.
   223  func (this *PSParser) parseOperand() (*PSOperand, error) {
   224  	bytes := []byte{}
   225  	for {
   226  		bb, err := this.reader.Peek(1)
   227  		if err != nil {
   228  			if err == io.EOF {
   229  				break
   230  			}
   231  			return nil, err
   232  		}
   233  		if pdfcore.IsDelimiter(bb[0]) {
   234  			break
   235  		}
   236  		if pdfcore.IsWhiteSpace(bb[0]) {
   237  			break
   238  		}
   239  
   240  		b, _ := this.reader.ReadByte()
   241  		bytes = append(bytes, b)
   242  	}
   243  
   244  	if len(bytes) == 0 {
   245  		return nil, fmt.Errorf("Invalid operand (empty)")
   246  	}
   247  
   248  	return MakeOperand(string(bytes)), nil
   249  }