github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/ast/scanner.go (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package ast 5 6 import ( 7 "bytes" 8 "encoding/hex" 9 "fmt" 10 "os" 11 "strconv" 12 ) 13 14 type token int 15 16 const ( 17 tokIllegal token = iota 18 tokComment 19 tokIdent 20 tokInclude 21 tokIncdir 22 tokDefine 23 tokResource 24 tokString 25 tokStringHex 26 tokCExpr 27 tokInt 28 29 tokNewLine 30 tokLParen 31 tokRParen 32 tokLBrack 33 tokRBrack 34 tokLBrace 35 tokRBrace 36 tokEq 37 tokComma 38 tokColon 39 tokBinAnd 40 tokCmpEq 41 tokCmpNeq 42 43 tokEOF 44 ) 45 46 var punctuation = [256]token{ 47 '\n': tokNewLine, 48 '(': tokLParen, 49 ')': tokRParen, 50 '[': tokLBrack, 51 ']': tokRBrack, 52 '{': tokLBrace, 53 '}': tokRBrace, 54 '=': tokEq, 55 ',': tokComma, 56 ':': tokColon, 57 '&': tokBinAnd, 58 } 59 60 var tok2str = [...]string{ 61 tokIllegal: "ILLEGAL", 62 tokComment: "comment", 63 tokIdent: "identifier", 64 tokInclude: "include", 65 tokIncdir: "incdir", 66 tokDefine: "define", 67 tokResource: "resource", 68 tokString: "string", 69 tokStringHex: "hex string", 70 tokCExpr: "CEXPR", 71 tokInt: "int", 72 tokNewLine: "NEWLINE", 73 tokEOF: "EOF", 74 tokCmpEq: "==", 75 tokCmpNeq: "!=", 76 } 77 78 func init() { 79 for ch, tok := range punctuation { 80 if tok == tokIllegal { 81 continue 82 } 83 tok2str[tok] = fmt.Sprintf("%q", ch) 84 } 85 } 86 87 var keywords = map[string]token{ 88 "include": tokInclude, 89 "incdir": tokIncdir, 90 "define": tokDefine, 91 "resource": tokResource, 92 } 93 94 func (tok token) String() string { 95 return tok2str[tok] 96 } 97 98 type scanner struct { 99 data []byte 100 filename string 101 errorHandler ErrorHandler 102 103 ch byte 104 off int 105 line int 106 col int 107 108 prev1 token 109 prev2 token 110 111 errors int 112 } 113 114 func newScanner(data []byte, filename string, errorHandler ErrorHandler) *scanner { 115 if errorHandler == nil { 116 errorHandler = LoggingHandler 117 } 118 s := &scanner{ 119 data: data, 120 filename: filename, 121 errorHandler: errorHandler, 122 off: -1, 123 } 124 s.next() 125 return s 126 } 127 128 type ErrorHandler func(pos Pos, msg string) 129 130 func LoggingHandler(pos Pos, msg string) { 131 fmt.Fprintf(os.Stderr, "%v: %v\n", pos, msg) 132 } 133 134 const BuiltinFile = "BUILTINS" 135 136 func (pos Pos) Builtin() bool { 137 return pos.File == BuiltinFile 138 } 139 140 func (pos Pos) String() string { 141 if pos.Builtin() { 142 return "builtins" 143 } 144 if pos.Col == 0 { 145 return fmt.Sprintf("%v:%v", pos.File, pos.Line) 146 } 147 return fmt.Sprintf("%v:%v:%v", pos.File, pos.Line, pos.Col) 148 } 149 150 func (pos Pos) less(other Pos) bool { 151 if pos.File != other.File { 152 return pos.File < other.File 153 } 154 if pos.Line != other.Line { 155 return pos.Line < other.Line 156 } 157 return pos.Col < other.Col 158 } 159 160 func (s *scanner) Scan() (tok token, lit string, pos Pos) { 161 s.skipWhitespace() 162 pos = s.pos() 163 switch { 164 case s.ch == 0: 165 tok = tokEOF 166 s.next() 167 case s.prev2 == tokDefine && s.prev1 == tokIdent: 168 tok = tokCExpr 169 for ; s.ch != '\n'; s.next() { 170 } 171 lit = string(s.data[pos.Off:s.off]) 172 case s.ch == '#': 173 tok = tokComment 174 for s.next(); s.ch != '\n'; s.next() { 175 } 176 lit = string(s.data[pos.Off+1 : s.off]) 177 case s.ch == '"' || s.ch == '<': 178 tok = tokString 179 lit = s.scanStr(pos) 180 case s.ch == '`': 181 tok = tokStringHex 182 lit = s.scanStr(pos) 183 case s.ch >= '0' && s.ch <= '9' || s.ch == '-': 184 tok = tokInt 185 lit = s.scanInt(pos) 186 case s.ch == '\'': 187 tok = tokInt 188 lit = s.scanChar(pos) 189 case s.ch == '_' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z': 190 tok, lit = s.scanIdent(pos) 191 case s.tryConsume("=="): 192 tok = tokCmpEq 193 case s.tryConsume("!="): 194 tok = tokCmpNeq 195 default: 196 tok = punctuation[s.ch] 197 if tok == tokIllegal { 198 s.Error(pos, "illegal character %#U", s.ch) 199 } 200 s.next() 201 } 202 s.prev2 = s.prev1 203 s.prev1 = tok 204 return 205 } 206 207 func (s *scanner) scanStr(pos Pos) string { 208 // TODO(dvyukov): get rid of <...> strings, that's only includes 209 closing := s.ch 210 if s.ch == '<' { 211 closing = '>' 212 } 213 for s.next(); s.ch != closing; s.next() { 214 if s.ch == 0 || s.ch == '\n' { 215 s.Error(pos, "string literal is not terminated") 216 return "" 217 } 218 } 219 lit := string(s.data[pos.Off+1 : s.off]) 220 for i := 0; i < len(lit); i++ { 221 if lit[i] < 0x20 || lit[i] >= 0x80 { 222 pos1 := pos 223 pos1.Col += i + 1 224 pos1.Off += i + 1 225 s.Error(pos1, "illegal character %#U in string literal", lit[i]) 226 break 227 } 228 } 229 s.next() 230 if closing != '`' { 231 return lit 232 } 233 decoded, err := hex.DecodeString(lit) 234 if err != nil { 235 s.Error(pos, "bad hex string literal: %v", err) 236 } 237 return string(decoded) 238 } 239 240 func (s *scanner) scanInt(pos Pos) string { 241 for s.ch >= '0' && s.ch <= '9' || 242 s.ch >= 'a' && s.ch <= 'f' || 243 s.ch >= 'A' && s.ch <= 'F' || 244 s.ch == 'x' || s.ch == '-' { 245 s.next() 246 } 247 lit := string(s.data[pos.Off:s.off]) 248 if _, err := strconv.ParseUint(lit, 10, 64); err == nil { 249 return lit 250 } 251 if len(lit) > 1 && lit[0] == '-' { 252 if _, err := strconv.ParseInt(lit, 10, 64); err == nil { 253 return lit 254 } 255 } 256 if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' { 257 if _, err := strconv.ParseUint(lit[2:], 16, 64); err == nil { 258 return lit 259 } 260 } 261 s.Error(pos, fmt.Sprintf("bad integer %q", lit)) 262 return "0" 263 } 264 265 func (s *scanner) scanChar(pos Pos) string { 266 s.next() 267 s.next() 268 if s.ch != '\'' { 269 s.Error(pos, "char literal is not terminated") 270 return "0" 271 } 272 s.next() 273 return string(s.data[pos.Off : pos.Off+3]) 274 } 275 276 func (s *scanner) scanIdent(pos Pos) (tok token, lit string) { 277 tok = tokIdent 278 for s.ch == '_' || s.ch == '$' || 279 s.ch >= 'a' && s.ch <= 'z' || 280 s.ch >= 'A' && s.ch <= 'Z' || 281 s.ch >= '0' && s.ch <= '9' { 282 s.next() 283 } 284 lit = string(s.data[pos.Off:s.off]) 285 if key, ok := keywords[lit]; ok { 286 tok = key 287 } 288 return 289 } 290 291 func (s *scanner) Error(pos Pos, msg string, args ...interface{}) { 292 s.errors++ 293 s.errorHandler(pos, fmt.Sprintf(msg, args...)) 294 } 295 296 func (s *scanner) Ok() bool { 297 return s.errors == 0 298 } 299 300 func (s *scanner) next() { 301 s.off++ 302 for s.off < len(s.data) && s.data[s.off] == '\r' { 303 s.off++ 304 } 305 if s.off == len(s.data) { 306 // Always emit NEWLINE before EOF. 307 // Makes lots of things simpler as we always 308 // want to treat EOF as NEWLINE as well. 309 s.ch = '\n' 310 return 311 } 312 if s.off > len(s.data) { 313 s.ch = 0 314 return 315 } 316 if s.off == 0 || s.data[s.off-1] == '\n' { 317 s.line++ 318 s.col = 0 319 } 320 s.ch = s.data[s.off] 321 s.col++ 322 if s.ch == 0 { 323 s.Error(s.pos(), "illegal character \\x00") 324 } 325 } 326 327 func (s *scanner) tryConsume(str string) bool { 328 if !bytes.HasPrefix(s.data[s.off:], []byte(str)) { 329 return false 330 } 331 for i := 0; i < len(str); i++ { 332 s.next() 333 } 334 return true 335 } 336 337 func (s *scanner) skipWhitespace() { 338 for s.ch == ' ' || s.ch == '\t' { 339 s.next() 340 } 341 } 342 343 func (s *scanner) pos() Pos { 344 return Pos{ 345 File: s.filename, 346 Off: s.off, 347 Line: s.line, 348 Col: s.col, 349 } 350 }