github.com/unidoc/unidoc@v2.2.0+incompatible/pdf/ps/parser.go (about) 1 /* 2 * This file is subject to the terms and conditions defined in 3 * file 'LICENSE.md', which is part of this source code package. 4 */ 5 6 package ps 7 8 import ( 9 "bufio" 10 "bytes" 11 "errors" 12 "fmt" 13 "io" 14 "strconv" 15 16 "github.com/unidoc/unidoc/common" 17 pdfcore "github.com/unidoc/unidoc/pdf/core" 18 ) 19 20 type PSParser struct { 21 reader *bufio.Reader 22 } 23 24 // Create a new instance of the PDF Postscript parser from input data. 25 func NewPSParser(content []byte) *PSParser { 26 parser := PSParser{} 27 28 buffer := bytes.NewBuffer(content) 29 parser.reader = bufio.NewReader(buffer) 30 31 return &parser 32 } 33 34 // Parse the postscript and store as a program that can be executed. 35 func (this *PSParser) Parse() (*PSProgram, error) { 36 this.skipSpaces() 37 bb, err := this.reader.Peek(2) 38 if err != nil { 39 return nil, err 40 } 41 if bb[0] != '{' { 42 return nil, fmt.Errorf("Invalid PS Program not starting with {") 43 } 44 45 program, err := this.parseFunction() 46 if err != nil && err != io.EOF { 47 return nil, err 48 } 49 50 return program, err 51 } 52 53 // Detect the signature at the current parse position and parse 54 // the corresponding object. 55 func (this *PSParser) parseFunction() (*PSProgram, error) { 56 c, _ := this.reader.ReadByte() 57 if c != '{' { 58 return nil, errors.New("Invalid function") 59 } 60 61 function := NewPSProgram() 62 63 for { 64 this.skipSpaces() 65 bb, err := this.reader.Peek(2) 66 if err != nil { 67 if err == io.EOF { 68 break 69 } 70 return nil, err 71 } 72 73 common.Log.Trace("Peek string: %s", string(bb)) 74 // Determine type. 75 if bb[0] == '}' { 76 common.Log.Trace("EOF function") 77 this.reader.ReadByte() 78 break 79 } else if bb[0] == '{' { 80 common.Log.Trace("Function!") 81 inlineF, err := this.parseFunction() 82 if err != nil { 83 return nil, err 84 } 85 function.Append(inlineF) 86 } else if pdfcore.IsDecimalDigit(bb[0]) || (bb[0] == '-' && pdfcore.IsDecimalDigit(bb[1])) { 87 common.Log.Trace("->Number!") 88 number, err := this.parseNumber() 89 if err != nil { 90 return nil, err 91 } 92 function.Append(number) 93 } else { 94 common.Log.Trace("->Operand or bool?") 95 // Let's peek farther to find out. 96 bb, _ = this.reader.Peek(5) 97 peekStr := string(bb) 98 common.Log.Trace("Peek str: %s", peekStr) 99 100 if (len(peekStr) > 4) && (peekStr[:5] == "false") { 101 b, err := this.parseBool() 102 if err != nil { 103 return nil, err 104 } 105 function.Append(b) 106 } else if (len(peekStr) > 3) && (peekStr[:4] == "true") { 107 b, err := this.parseBool() 108 if err != nil { 109 return nil, err 110 } 111 function.Append(b) 112 } else { 113 operand, err := this.parseOperand() 114 if err != nil { 115 return nil, err 116 } 117 function.Append(operand) 118 } 119 } 120 } 121 122 return function, nil 123 } 124 125 // Skip over any spaces. Returns the number of spaces skipped and 126 // an error if any. 127 func (this *PSParser) skipSpaces() (int, error) { 128 cnt := 0 129 for { 130 bb, err := this.reader.Peek(1) 131 if err != nil { 132 return 0, err 133 } 134 if pdfcore.IsWhiteSpace(bb[0]) { 135 this.reader.ReadByte() 136 cnt++ 137 } else { 138 break 139 } 140 } 141 142 return cnt, nil 143 } 144 145 // Numeric objects. 146 // Integer or Real numbers. 147 func (this *PSParser) parseNumber() (PSObject, error) { 148 isFloat := false 149 allowSigns := true 150 numStr := "" 151 for { 152 common.Log.Trace("Parsing number \"%s\"", numStr) 153 bb, err := this.reader.Peek(1) 154 if err == io.EOF { 155 // GH: EOF handling. Handle EOF like end of line. Can happen with 156 // encoded object streams that the object is at the end. 157 // In other cases, we will get the EOF error elsewhere at any rate. 158 break // Handle like EOF 159 } 160 if err != nil { 161 common.Log.Error("ERROR %s", err) 162 return nil, err 163 } 164 if allowSigns && (bb[0] == '-' || bb[0] == '+') { 165 // Only appear in the beginning, otherwise serves as a delimiter. 166 b, _ := this.reader.ReadByte() 167 numStr += string(b) 168 allowSigns = false // Only allowed in beginning, and after e (exponential). 169 } else if pdfcore.IsDecimalDigit(bb[0]) { 170 b, _ := this.reader.ReadByte() 171 numStr += string(b) 172 } else if bb[0] == '.' { 173 b, _ := this.reader.ReadByte() 174 numStr += string(b) 175 isFloat = true 176 } else if bb[0] == 'e' { 177 // Exponential number format. 178 // XXX Is this supported in PS? 179 b, _ := this.reader.ReadByte() 180 numStr += string(b) 181 isFloat = true 182 allowSigns = true 183 } else { 184 break 185 } 186 } 187 188 if isFloat { 189 fVal, err := strconv.ParseFloat(numStr, 64) 190 o := MakeReal(fVal) 191 return o, err 192 } else { 193 intVal, err := strconv.ParseInt(numStr, 10, 64) 194 o := MakeInteger(int(intVal)) 195 return o, err 196 } 197 } 198 199 // Parse bool object. 200 func (this *PSParser) parseBool() (*PSBoolean, error) { 201 bb, err := this.reader.Peek(4) 202 if err != nil { 203 return MakeBool(false), err 204 } 205 if (len(bb) >= 4) && (string(bb[:4]) == "true") { 206 this.reader.Discard(4) 207 return MakeBool(true), nil 208 } 209 210 bb, err = this.reader.Peek(5) 211 if err != nil { 212 return MakeBool(false), err 213 } 214 if (len(bb) >= 5) && (string(bb[:5]) == "false") { 215 this.reader.Discard(5) 216 return MakeBool(false), nil 217 } 218 219 return MakeBool(false), errors.New("Unexpected boolean string") 220 } 221 222 // An operand is a text command represented by a word. 223 func (this *PSParser) parseOperand() (*PSOperand, error) { 224 bytes := []byte{} 225 for { 226 bb, err := this.reader.Peek(1) 227 if err != nil { 228 if err == io.EOF { 229 break 230 } 231 return nil, err 232 } 233 if pdfcore.IsDelimiter(bb[0]) { 234 break 235 } 236 if pdfcore.IsWhiteSpace(bb[0]) { 237 break 238 } 239 240 b, _ := this.reader.ReadByte() 241 bytes = append(bytes, b) 242 } 243 244 if len(bytes) == 0 { 245 return nil, fmt.Errorf("Invalid operand (empty)") 246 } 247 248 return MakeOperand(string(bytes)), nil 249 }