github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/ast/scanner.go (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package ast 5 6 import ( 7 "bytes" 8 "encoding/hex" 9 "fmt" 10 "os" 11 "strconv" 12 ) 13 14 type token int 15 16 const ( 17 tokIllegal token = iota 18 tokComment 19 tokIdent 20 tokInclude 21 tokIncdir 22 tokDefine 23 tokResource 24 tokString 25 tokStringHex 26 tokCExpr 27 tokInt 28 29 tokNewLine 30 tokLParen 31 tokRParen 32 tokLBrack 33 tokRBrack 34 tokLBrace 35 tokRBrace 36 tokEq 37 tokComma 38 tokColon 39 tokBinAnd 40 tokCmpEq 41 tokCmpNeq 42 tokOr 43 44 tokEOF 45 ) 46 47 var punctuation = [256]token{ 48 '\n': tokNewLine, 49 '(': tokLParen, 50 ')': tokRParen, 51 '[': tokLBrack, 52 ']': tokRBrack, 53 '{': tokLBrace, 54 '}': tokRBrace, 55 '=': tokEq, 56 ',': tokComma, 57 ':': tokColon, 58 '&': tokBinAnd, 59 } 60 61 var tok2str = [...]string{ 62 tokIllegal: "ILLEGAL", 63 tokComment: "comment", 64 tokIdent: "identifier", 65 tokInclude: "include", 66 tokIncdir: "incdir", 67 tokDefine: "define", 68 tokResource: "resource", 69 tokString: "string", 70 tokStringHex: "hex string", 71 tokCExpr: "CEXPR", 72 tokInt: "int", 73 tokNewLine: "NEWLINE", 74 tokEOF: "EOF", 75 tokCmpEq: "==", 76 tokCmpNeq: "!=", 77 tokOr: "||", 78 } 79 80 func init() { 81 for ch, tok := range punctuation { 82 if tok == tokIllegal { 83 continue 84 } 85 tok2str[tok] = fmt.Sprintf("%q", ch) 86 } 87 } 88 89 var keywords = map[string]token{ 90 "include": tokInclude, 91 "incdir": tokIncdir, 92 "define": tokDefine, 93 "resource": tokResource, 94 } 95 96 func (tok token) String() string { 97 return tok2str[tok] 98 } 99 100 type scanner struct { 101 data []byte 102 filename string 103 errorHandler ErrorHandler 104 105 ch byte 106 off int 107 line int 108 col int 109 110 prev1 token 111 prev2 token 112 113 errors int 114 } 115 116 func newScanner(data []byte, filename string, errorHandler ErrorHandler) *scanner { 117 if errorHandler == nil { 118 errorHandler = LoggingHandler 119 } 120 s := &scanner{ 121 data: data, 122 filename: filename, 123 errorHandler: errorHandler, 124 off: -1, 125 } 126 s.next() 127 return s 128 } 129 130 type ErrorHandler func(pos Pos, msg string) 131 132 func LoggingHandler(pos Pos, msg string) { 133 fmt.Fprintf(os.Stderr, "%v: %v\n", pos, msg) 134 } 135 136 const BuiltinFile = "BUILTINS" 137 138 func (pos Pos) Builtin() bool { 139 return pos.File == BuiltinFile 140 } 141 142 func (pos Pos) String() string { 143 if pos.Builtin() { 144 return "builtins" 145 } 146 if pos.Col == 0 { 147 return fmt.Sprintf("%v:%v", pos.File, pos.Line) 148 } 149 return fmt.Sprintf("%v:%v:%v", pos.File, pos.Line, pos.Col) 150 } 151 152 func (pos Pos) less(other Pos) bool { 153 if pos.File != other.File { 154 return pos.File < other.File 155 } 156 if pos.Line != other.Line { 157 return pos.Line < other.Line 158 } 159 return pos.Col < other.Col 160 } 161 162 func (s *scanner) Scan() (tok token, lit string, pos Pos) { 163 s.skipWhitespace() 164 pos = s.pos() 165 switch { 166 case s.ch == 0: 167 tok = tokEOF 168 s.next() 169 case s.prev2 == tokDefine && s.prev1 == tokIdent: 170 tok = tokCExpr 171 for ; s.ch != '\n'; s.next() { 172 } 173 lit = string(s.data[pos.Off:s.off]) 174 case s.ch == '#': 175 tok = tokComment 176 for s.next(); s.ch != '\n'; s.next() { 177 } 178 lit = string(s.data[pos.Off+1 : s.off]) 179 case s.ch == '"' || s.ch == '<': 180 tok = tokString 181 lit = s.scanStr(pos) 182 case s.ch == '`': 183 tok = tokStringHex 184 lit = s.scanStr(pos) 185 case s.ch >= '0' && s.ch <= '9' || s.ch == '-': 186 tok = tokInt 187 lit = s.scanInt(pos) 188 case s.ch == '\'': 189 tok = tokInt 190 lit = s.scanChar(pos) 191 case s.ch == '_' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z': 192 tok, lit = s.scanIdent(pos) 193 case s.tryConsume("=="): 194 tok = tokCmpEq 195 case s.tryConsume("!="): 196 tok = tokCmpNeq 197 case s.tryConsume("||"): 198 tok = tokOr 199 default: 200 tok = punctuation[s.ch] 201 if tok == tokIllegal { 202 s.Errorf(pos, "illegal character %#U", s.ch) 203 } 204 s.next() 205 } 206 s.prev2 = s.prev1 207 s.prev1 = tok 208 return 209 } 210 211 func (s *scanner) scanStr(pos Pos) string { 212 // TODO(dvyukov): get rid of <...> strings, that's only includes 213 closing := s.ch 214 if s.ch == '<' { 215 closing = '>' 216 } 217 for s.next(); s.ch != closing; s.next() { 218 if s.ch == 0 || s.ch == '\n' { 219 s.Errorf(pos, "string literal is not terminated") 220 return "" 221 } 222 } 223 lit := string(s.data[pos.Off+1 : s.off]) 224 if i := IsValidStringLit(lit); i >= 0 { 225 pos1 := pos 226 pos1.Col += i + 1 227 pos1.Off += i + 1 228 s.Errorf(pos1, "illegal character %#U in string literal %q", lit[i], lit) 229 } 230 s.next() 231 if closing != '`' { 232 return lit 233 } 234 decoded, err := hex.DecodeString(lit) 235 if err != nil { 236 s.Errorf(pos, "bad hex string literal: %v", err) 237 } 238 return string(decoded) 239 } 240 241 func (s *scanner) scanInt(pos Pos) string { 242 for s.ch >= '0' && s.ch <= '9' || 243 s.ch >= 'a' && s.ch <= 'f' || 244 s.ch >= 'A' && s.ch <= 'F' || 245 s.ch == 'x' || s.ch == '-' { 246 s.next() 247 } 248 lit := string(s.data[pos.Off:s.off]) 249 if _, err := strconv.ParseUint(lit, 10, 64); err == nil { 250 return lit 251 } 252 if len(lit) > 1 && lit[0] == '-' { 253 if _, err := strconv.ParseInt(lit, 10, 64); err == nil { 254 return lit 255 } 256 } 257 if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' { 258 if _, err := strconv.ParseUint(lit[2:], 16, 64); err == nil { 259 return lit 260 } 261 } 262 s.Errorf(pos, "bad integer %q", lit) 263 return "0" 264 } 265 266 func (s *scanner) scanChar(pos Pos) string { 267 s.next() 268 s.next() 269 if s.ch != '\'' { 270 s.Errorf(pos, "char literal is not terminated") 271 return "0" 272 } 273 s.next() 274 return string(s.data[pos.Off : pos.Off+3]) 275 } 276 277 func (s *scanner) scanIdent(pos Pos) (tok token, lit string) { 278 tok = tokIdent 279 for s.ch == '_' || s.ch == '$' || 280 s.ch >= 'a' && s.ch <= 'z' || 281 s.ch >= 'A' && s.ch <= 'Z' || 282 s.ch >= '0' && s.ch <= '9' { 283 s.next() 284 } 285 lit = string(s.data[pos.Off:s.off]) 286 if key, ok := keywords[lit]; ok { 287 tok = key 288 } 289 return 290 } 291 292 func (s *scanner) Errorf(pos Pos, msg string, args ...interface{}) { 293 s.errors++ 294 s.errorHandler(pos, fmt.Sprintf(msg, args...)) 295 } 296 297 func (s *scanner) Ok() bool { 298 return s.errors == 0 299 } 300 301 func (s *scanner) next() { 302 s.off++ 303 for s.off < len(s.data) && s.data[s.off] == '\r' { 304 s.off++ 305 } 306 if s.off == len(s.data) { 307 // Always emit NEWLINE before EOF. 308 // Makes lots of things simpler as we always 309 // want to treat EOF as NEWLINE as well. 310 s.ch = '\n' 311 return 312 } 313 if s.off > len(s.data) { 314 s.ch = 0 315 return 316 } 317 if s.off == 0 || s.data[s.off-1] == '\n' { 318 s.line++ 319 s.col = 0 320 } 321 s.ch = s.data[s.off] 322 s.col++ 323 if s.ch == 0 { 324 s.Errorf(s.pos(), "illegal character \\x00") 325 } 326 } 327 328 func (s *scanner) tryConsume(str string) bool { 329 if !bytes.HasPrefix(s.data[s.off:], []byte(str)) { 330 return false 331 } 332 for i := 0; i < len(str); i++ { 333 s.next() 334 } 335 return true 336 } 337 338 func (s *scanner) skipWhitespace() { 339 for s.ch == ' ' || s.ch == '\t' { 340 s.next() 341 } 342 } 343 344 func (s *scanner) pos() Pos { 345 return Pos{ 346 File: s.filename, 347 Off: s.off, 348 Line: s.line, 349 Col: s.col, 350 } 351 } 352 353 func IsValidStringLit(lit string) int { 354 for i := 0; i < len(lit); i++ { 355 if lit[i] < 0x20 || lit[i] >= 0x80 { 356 return i 357 } 358 } 359 return -1 360 }