github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/ast/scanner.go (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package ast
     5  
     6  import (
     7  	"bytes"
     8  	"encoding/hex"
     9  	"fmt"
    10  	"os"
    11  	"strconv"
    12  )
    13  
    14  type token int
    15  
    16  const (
    17  	tokIllegal token = iota
    18  	tokComment
    19  	tokIdent
    20  	tokInclude
    21  	tokIncdir
    22  	tokDefine
    23  	tokResource
    24  	tokString
    25  	tokStringHex
    26  	tokCExpr
    27  	tokInt
    28  
    29  	tokNewLine
    30  	tokLParen
    31  	tokRParen
    32  	tokLBrack
    33  	tokRBrack
    34  	tokLBrace
    35  	tokRBrace
    36  	tokEq
    37  	tokComma
    38  	tokColon
    39  	tokBinAnd
    40  	tokCmpEq
    41  	tokCmpNeq
    42  	tokOr
    43  
    44  	tokEOF
    45  )
    46  
    47  var punctuation = [256]token{
    48  	'\n': tokNewLine,
    49  	'(':  tokLParen,
    50  	')':  tokRParen,
    51  	'[':  tokLBrack,
    52  	']':  tokRBrack,
    53  	'{':  tokLBrace,
    54  	'}':  tokRBrace,
    55  	'=':  tokEq,
    56  	',':  tokComma,
    57  	':':  tokColon,
    58  	'&':  tokBinAnd,
    59  }
    60  
    61  var tok2str = [...]string{
    62  	tokIllegal:   "ILLEGAL",
    63  	tokComment:   "comment",
    64  	tokIdent:     "identifier",
    65  	tokInclude:   "include",
    66  	tokIncdir:    "incdir",
    67  	tokDefine:    "define",
    68  	tokResource:  "resource",
    69  	tokString:    "string",
    70  	tokStringHex: "hex string",
    71  	tokCExpr:     "CEXPR",
    72  	tokInt:       "int",
    73  	tokNewLine:   "NEWLINE",
    74  	tokEOF:       "EOF",
    75  	tokCmpEq:     "==",
    76  	tokCmpNeq:    "!=",
    77  	tokOr:        "||",
    78  }
    79  
    80  func init() {
    81  	for ch, tok := range punctuation {
    82  		if tok == tokIllegal {
    83  			continue
    84  		}
    85  		tok2str[tok] = fmt.Sprintf("%q", ch)
    86  	}
    87  }
    88  
    89  var keywords = map[string]token{
    90  	"include":  tokInclude,
    91  	"incdir":   tokIncdir,
    92  	"define":   tokDefine,
    93  	"resource": tokResource,
    94  }
    95  
    96  func (tok token) String() string {
    97  	return tok2str[tok]
    98  }
    99  
   100  type scanner struct {
   101  	data         []byte
   102  	filename     string
   103  	errorHandler ErrorHandler
   104  
   105  	ch   byte
   106  	off  int
   107  	line int
   108  	col  int
   109  
   110  	prev1 token
   111  	prev2 token
   112  
   113  	errors int
   114  }
   115  
   116  func newScanner(data []byte, filename string, errorHandler ErrorHandler) *scanner {
   117  	if errorHandler == nil {
   118  		errorHandler = LoggingHandler
   119  	}
   120  	s := &scanner{
   121  		data:         data,
   122  		filename:     filename,
   123  		errorHandler: errorHandler,
   124  		off:          -1,
   125  	}
   126  	s.next()
   127  	return s
   128  }
   129  
   130  type ErrorHandler func(pos Pos, msg string)
   131  
   132  func LoggingHandler(pos Pos, msg string) {
   133  	fmt.Fprintf(os.Stderr, "%v: %v\n", pos, msg)
   134  }
   135  
   136  const BuiltinFile = "BUILTINS"
   137  
   138  func (pos Pos) Builtin() bool {
   139  	return pos.File == BuiltinFile
   140  }
   141  
   142  func (pos Pos) String() string {
   143  	if pos.Builtin() {
   144  		return "builtins"
   145  	}
   146  	if pos.Col == 0 {
   147  		return fmt.Sprintf("%v:%v", pos.File, pos.Line)
   148  	}
   149  	return fmt.Sprintf("%v:%v:%v", pos.File, pos.Line, pos.Col)
   150  }
   151  
   152  func (pos Pos) less(other Pos) bool {
   153  	if pos.File != other.File {
   154  		return pos.File < other.File
   155  	}
   156  	if pos.Line != other.Line {
   157  		return pos.Line < other.Line
   158  	}
   159  	return pos.Col < other.Col
   160  }
   161  
   162  func (s *scanner) Scan() (tok token, lit string, pos Pos) {
   163  	s.skipWhitespace()
   164  	pos = s.pos()
   165  	switch {
   166  	case s.ch == 0:
   167  		tok = tokEOF
   168  		s.next()
   169  	case s.prev2 == tokDefine && s.prev1 == tokIdent:
   170  		tok = tokCExpr
   171  		for ; s.ch != '\n'; s.next() {
   172  		}
   173  		lit = string(s.data[pos.Off:s.off])
   174  	case s.ch == '#':
   175  		tok = tokComment
   176  		for s.next(); s.ch != '\n'; s.next() {
   177  		}
   178  		lit = string(s.data[pos.Off+1 : s.off])
   179  	case s.ch == '"' || s.ch == '<':
   180  		tok = tokString
   181  		lit = s.scanStr(pos)
   182  	case s.ch == '`':
   183  		tok = tokStringHex
   184  		lit = s.scanStr(pos)
   185  	case s.ch >= '0' && s.ch <= '9' || s.ch == '-':
   186  		tok = tokInt
   187  		lit = s.scanInt(pos)
   188  	case s.ch == '\'':
   189  		tok = tokInt
   190  		lit = s.scanChar(pos)
   191  	case s.ch == '_' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z':
   192  		tok, lit = s.scanIdent(pos)
   193  	case s.tryConsume("=="):
   194  		tok = tokCmpEq
   195  	case s.tryConsume("!="):
   196  		tok = tokCmpNeq
   197  	case s.tryConsume("||"):
   198  		tok = tokOr
   199  	default:
   200  		tok = punctuation[s.ch]
   201  		if tok == tokIllegal {
   202  			s.Errorf(pos, "illegal character %#U", s.ch)
   203  		}
   204  		s.next()
   205  	}
   206  	s.prev2 = s.prev1
   207  	s.prev1 = tok
   208  	return
   209  }
   210  
   211  func (s *scanner) scanStr(pos Pos) string {
   212  	// TODO(dvyukov): get rid of <...> strings, that's only includes
   213  	closing := s.ch
   214  	if s.ch == '<' {
   215  		closing = '>'
   216  	}
   217  	for s.next(); s.ch != closing; s.next() {
   218  		if s.ch == 0 || s.ch == '\n' {
   219  			s.Errorf(pos, "string literal is not terminated")
   220  			return ""
   221  		}
   222  	}
   223  	lit := string(s.data[pos.Off+1 : s.off])
   224  	if i := IsValidStringLit(lit); i >= 0 {
   225  		pos1 := pos
   226  		pos1.Col += i + 1
   227  		pos1.Off += i + 1
   228  		s.Errorf(pos1, "illegal character %#U in string literal %q", lit[i], lit)
   229  	}
   230  	s.next()
   231  	if closing != '`' {
   232  		return lit
   233  	}
   234  	decoded, err := hex.DecodeString(lit)
   235  	if err != nil {
   236  		s.Errorf(pos, "bad hex string literal: %v", err)
   237  	}
   238  	return string(decoded)
   239  }
   240  
   241  func (s *scanner) scanInt(pos Pos) string {
   242  	for s.ch >= '0' && s.ch <= '9' ||
   243  		s.ch >= 'a' && s.ch <= 'f' ||
   244  		s.ch >= 'A' && s.ch <= 'F' ||
   245  		s.ch == 'x' || s.ch == '-' {
   246  		s.next()
   247  	}
   248  	lit := string(s.data[pos.Off:s.off])
   249  	if _, err := strconv.ParseUint(lit, 10, 64); err == nil {
   250  		return lit
   251  	}
   252  	if len(lit) > 1 && lit[0] == '-' {
   253  		if _, err := strconv.ParseInt(lit, 10, 64); err == nil {
   254  			return lit
   255  		}
   256  	}
   257  	if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' {
   258  		if _, err := strconv.ParseUint(lit[2:], 16, 64); err == nil {
   259  			return lit
   260  		}
   261  	}
   262  	s.Errorf(pos, "bad integer %q", lit)
   263  	return "0"
   264  }
   265  
   266  func (s *scanner) scanChar(pos Pos) string {
   267  	s.next()
   268  	s.next()
   269  	if s.ch != '\'' {
   270  		s.Errorf(pos, "char literal is not terminated")
   271  		return "0"
   272  	}
   273  	s.next()
   274  	return string(s.data[pos.Off : pos.Off+3])
   275  }
   276  
   277  func (s *scanner) scanIdent(pos Pos) (tok token, lit string) {
   278  	tok = tokIdent
   279  	for s.ch == '_' || s.ch == '$' ||
   280  		s.ch >= 'a' && s.ch <= 'z' ||
   281  		s.ch >= 'A' && s.ch <= 'Z' ||
   282  		s.ch >= '0' && s.ch <= '9' {
   283  		s.next()
   284  	}
   285  	lit = string(s.data[pos.Off:s.off])
   286  	if key, ok := keywords[lit]; ok {
   287  		tok = key
   288  	}
   289  	return
   290  }
   291  
   292  func (s *scanner) Errorf(pos Pos, msg string, args ...interface{}) {
   293  	s.errors++
   294  	s.errorHandler(pos, fmt.Sprintf(msg, args...))
   295  }
   296  
   297  func (s *scanner) Ok() bool {
   298  	return s.errors == 0
   299  }
   300  
   301  func (s *scanner) next() {
   302  	s.off++
   303  	for s.off < len(s.data) && s.data[s.off] == '\r' {
   304  		s.off++
   305  	}
   306  	if s.off == len(s.data) {
   307  		// Always emit NEWLINE before EOF.
   308  		// Makes lots of things simpler as we always
   309  		// want to treat EOF as NEWLINE as well.
   310  		s.ch = '\n'
   311  		return
   312  	}
   313  	if s.off > len(s.data) {
   314  		s.ch = 0
   315  		return
   316  	}
   317  	if s.off == 0 || s.data[s.off-1] == '\n' {
   318  		s.line++
   319  		s.col = 0
   320  	}
   321  	s.ch = s.data[s.off]
   322  	s.col++
   323  	if s.ch == 0 {
   324  		s.Errorf(s.pos(), "illegal character \\x00")
   325  	}
   326  }
   327  
   328  func (s *scanner) tryConsume(str string) bool {
   329  	if !bytes.HasPrefix(s.data[s.off:], []byte(str)) {
   330  		return false
   331  	}
   332  	for i := 0; i < len(str); i++ {
   333  		s.next()
   334  	}
   335  	return true
   336  }
   337  
   338  func (s *scanner) skipWhitespace() {
   339  	for s.ch == ' ' || s.ch == '\t' {
   340  		s.next()
   341  	}
   342  }
   343  
   344  func (s *scanner) pos() Pos {
   345  	return Pos{
   346  		File: s.filename,
   347  		Off:  s.off,
   348  		Line: s.line,
   349  		Col:  s.col,
   350  	}
   351  }
   352  
   353  func IsValidStringLit(lit string) int {
   354  	for i := 0; i < len(lit); i++ {
   355  		if lit[i] < 0x20 || lit[i] >= 0x80 {
   356  			return i
   357  		}
   358  	}
   359  	return -1
   360  }