github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/ast/scanner.go (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package ast
     5  
     6  import (
     7  	"bytes"
     8  	"encoding/hex"
     9  	"fmt"
    10  	"os"
    11  	"strconv"
    12  )
    13  
    14  type token int
    15  
    16  const (
    17  	tokIllegal token = iota
    18  	tokComment
    19  	tokIdent
    20  	tokInclude
    21  	tokIncdir
    22  	tokDefine
    23  	tokResource
    24  	tokString
    25  	tokStringHex
    26  	tokCExpr
    27  	tokInt
    28  
    29  	tokNewLine
    30  	tokLParen
    31  	tokRParen
    32  	tokLBrack
    33  	tokRBrack
    34  	tokLBrace
    35  	tokRBrace
    36  	tokEq
    37  	tokComma
    38  	tokColon
    39  	tokBinAnd
    40  	tokCmpEq
    41  	tokCmpNeq
    42  
    43  	tokEOF
    44  )
    45  
    46  var punctuation = [256]token{
    47  	'\n': tokNewLine,
    48  	'(':  tokLParen,
    49  	')':  tokRParen,
    50  	'[':  tokLBrack,
    51  	']':  tokRBrack,
    52  	'{':  tokLBrace,
    53  	'}':  tokRBrace,
    54  	'=':  tokEq,
    55  	',':  tokComma,
    56  	':':  tokColon,
    57  	'&':  tokBinAnd,
    58  }
    59  
    60  var tok2str = [...]string{
    61  	tokIllegal:   "ILLEGAL",
    62  	tokComment:   "comment",
    63  	tokIdent:     "identifier",
    64  	tokInclude:   "include",
    65  	tokIncdir:    "incdir",
    66  	tokDefine:    "define",
    67  	tokResource:  "resource",
    68  	tokString:    "string",
    69  	tokStringHex: "hex string",
    70  	tokCExpr:     "CEXPR",
    71  	tokInt:       "int",
    72  	tokNewLine:   "NEWLINE",
    73  	tokEOF:       "EOF",
    74  	tokCmpEq:     "==",
    75  	tokCmpNeq:    "!=",
    76  }
    77  
    78  func init() {
    79  	for ch, tok := range punctuation {
    80  		if tok == tokIllegal {
    81  			continue
    82  		}
    83  		tok2str[tok] = fmt.Sprintf("%q", ch)
    84  	}
    85  }
    86  
    87  var keywords = map[string]token{
    88  	"include":  tokInclude,
    89  	"incdir":   tokIncdir,
    90  	"define":   tokDefine,
    91  	"resource": tokResource,
    92  }
    93  
    94  func (tok token) String() string {
    95  	return tok2str[tok]
    96  }
    97  
    98  type scanner struct {
    99  	data         []byte
   100  	filename     string
   101  	errorHandler ErrorHandler
   102  
   103  	ch   byte
   104  	off  int
   105  	line int
   106  	col  int
   107  
   108  	prev1 token
   109  	prev2 token
   110  
   111  	errors int
   112  }
   113  
   114  func newScanner(data []byte, filename string, errorHandler ErrorHandler) *scanner {
   115  	if errorHandler == nil {
   116  		errorHandler = LoggingHandler
   117  	}
   118  	s := &scanner{
   119  		data:         data,
   120  		filename:     filename,
   121  		errorHandler: errorHandler,
   122  		off:          -1,
   123  	}
   124  	s.next()
   125  	return s
   126  }
   127  
   128  type ErrorHandler func(pos Pos, msg string)
   129  
   130  func LoggingHandler(pos Pos, msg string) {
   131  	fmt.Fprintf(os.Stderr, "%v: %v\n", pos, msg)
   132  }
   133  
   134  const BuiltinFile = "BUILTINS"
   135  
   136  func (pos Pos) Builtin() bool {
   137  	return pos.File == BuiltinFile
   138  }
   139  
   140  func (pos Pos) String() string {
   141  	if pos.Builtin() {
   142  		return "builtins"
   143  	}
   144  	if pos.Col == 0 {
   145  		return fmt.Sprintf("%v:%v", pos.File, pos.Line)
   146  	}
   147  	return fmt.Sprintf("%v:%v:%v", pos.File, pos.Line, pos.Col)
   148  }
   149  
   150  func (pos Pos) less(other Pos) bool {
   151  	if pos.File != other.File {
   152  		return pos.File < other.File
   153  	}
   154  	if pos.Line != other.Line {
   155  		return pos.Line < other.Line
   156  	}
   157  	return pos.Col < other.Col
   158  }
   159  
   160  func (s *scanner) Scan() (tok token, lit string, pos Pos) {
   161  	s.skipWhitespace()
   162  	pos = s.pos()
   163  	switch {
   164  	case s.ch == 0:
   165  		tok = tokEOF
   166  		s.next()
   167  	case s.prev2 == tokDefine && s.prev1 == tokIdent:
   168  		tok = tokCExpr
   169  		for ; s.ch != '\n'; s.next() {
   170  		}
   171  		lit = string(s.data[pos.Off:s.off])
   172  	case s.ch == '#':
   173  		tok = tokComment
   174  		for s.next(); s.ch != '\n'; s.next() {
   175  		}
   176  		lit = string(s.data[pos.Off+1 : s.off])
   177  	case s.ch == '"' || s.ch == '<':
   178  		tok = tokString
   179  		lit = s.scanStr(pos)
   180  	case s.ch == '`':
   181  		tok = tokStringHex
   182  		lit = s.scanStr(pos)
   183  	case s.ch >= '0' && s.ch <= '9' || s.ch == '-':
   184  		tok = tokInt
   185  		lit = s.scanInt(pos)
   186  	case s.ch == '\'':
   187  		tok = tokInt
   188  		lit = s.scanChar(pos)
   189  	case s.ch == '_' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z':
   190  		tok, lit = s.scanIdent(pos)
   191  	case s.tryConsume("=="):
   192  		tok = tokCmpEq
   193  	case s.tryConsume("!="):
   194  		tok = tokCmpNeq
   195  	default:
   196  		tok = punctuation[s.ch]
   197  		if tok == tokIllegal {
   198  			s.Error(pos, "illegal character %#U", s.ch)
   199  		}
   200  		s.next()
   201  	}
   202  	s.prev2 = s.prev1
   203  	s.prev1 = tok
   204  	return
   205  }
   206  
   207  func (s *scanner) scanStr(pos Pos) string {
   208  	// TODO(dvyukov): get rid of <...> strings, that's only includes
   209  	closing := s.ch
   210  	if s.ch == '<' {
   211  		closing = '>'
   212  	}
   213  	for s.next(); s.ch != closing; s.next() {
   214  		if s.ch == 0 || s.ch == '\n' {
   215  			s.Error(pos, "string literal is not terminated")
   216  			return ""
   217  		}
   218  	}
   219  	lit := string(s.data[pos.Off+1 : s.off])
   220  	for i := 0; i < len(lit); i++ {
   221  		if lit[i] < 0x20 || lit[i] >= 0x80 {
   222  			pos1 := pos
   223  			pos1.Col += i + 1
   224  			pos1.Off += i + 1
   225  			s.Error(pos1, "illegal character %#U in string literal", lit[i])
   226  			break
   227  		}
   228  	}
   229  	s.next()
   230  	if closing != '`' {
   231  		return lit
   232  	}
   233  	decoded, err := hex.DecodeString(lit)
   234  	if err != nil {
   235  		s.Error(pos, "bad hex string literal: %v", err)
   236  	}
   237  	return string(decoded)
   238  }
   239  
   240  func (s *scanner) scanInt(pos Pos) string {
   241  	for s.ch >= '0' && s.ch <= '9' ||
   242  		s.ch >= 'a' && s.ch <= 'f' ||
   243  		s.ch >= 'A' && s.ch <= 'F' ||
   244  		s.ch == 'x' || s.ch == '-' {
   245  		s.next()
   246  	}
   247  	lit := string(s.data[pos.Off:s.off])
   248  	if _, err := strconv.ParseUint(lit, 10, 64); err == nil {
   249  		return lit
   250  	}
   251  	if len(lit) > 1 && lit[0] == '-' {
   252  		if _, err := strconv.ParseInt(lit, 10, 64); err == nil {
   253  			return lit
   254  		}
   255  	}
   256  	if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' {
   257  		if _, err := strconv.ParseUint(lit[2:], 16, 64); err == nil {
   258  			return lit
   259  		}
   260  	}
   261  	s.Error(pos, fmt.Sprintf("bad integer %q", lit))
   262  	return "0"
   263  }
   264  
   265  func (s *scanner) scanChar(pos Pos) string {
   266  	s.next()
   267  	s.next()
   268  	if s.ch != '\'' {
   269  		s.Error(pos, "char literal is not terminated")
   270  		return "0"
   271  	}
   272  	s.next()
   273  	return string(s.data[pos.Off : pos.Off+3])
   274  }
   275  
   276  func (s *scanner) scanIdent(pos Pos) (tok token, lit string) {
   277  	tok = tokIdent
   278  	for s.ch == '_' || s.ch == '$' ||
   279  		s.ch >= 'a' && s.ch <= 'z' ||
   280  		s.ch >= 'A' && s.ch <= 'Z' ||
   281  		s.ch >= '0' && s.ch <= '9' {
   282  		s.next()
   283  	}
   284  	lit = string(s.data[pos.Off:s.off])
   285  	if key, ok := keywords[lit]; ok {
   286  		tok = key
   287  	}
   288  	return
   289  }
   290  
   291  func (s *scanner) Error(pos Pos, msg string, args ...interface{}) {
   292  	s.errors++
   293  	s.errorHandler(pos, fmt.Sprintf(msg, args...))
   294  }
   295  
   296  func (s *scanner) Ok() bool {
   297  	return s.errors == 0
   298  }
   299  
   300  func (s *scanner) next() {
   301  	s.off++
   302  	for s.off < len(s.data) && s.data[s.off] == '\r' {
   303  		s.off++
   304  	}
   305  	if s.off == len(s.data) {
   306  		// Always emit NEWLINE before EOF.
   307  		// Makes lots of things simpler as we always
   308  		// want to treat EOF as NEWLINE as well.
   309  		s.ch = '\n'
   310  		return
   311  	}
   312  	if s.off > len(s.data) {
   313  		s.ch = 0
   314  		return
   315  	}
   316  	if s.off == 0 || s.data[s.off-1] == '\n' {
   317  		s.line++
   318  		s.col = 0
   319  	}
   320  	s.ch = s.data[s.off]
   321  	s.col++
   322  	if s.ch == 0 {
   323  		s.Error(s.pos(), "illegal character \\x00")
   324  	}
   325  }
   326  
   327  func (s *scanner) tryConsume(str string) bool {
   328  	if !bytes.HasPrefix(s.data[s.off:], []byte(str)) {
   329  		return false
   330  	}
   331  	for i := 0; i < len(str); i++ {
   332  		s.next()
   333  	}
   334  	return true
   335  }
   336  
   337  func (s *scanner) skipWhitespace() {
   338  	for s.ch == ' ' || s.ch == '\t' {
   339  		s.next()
   340  	}
   341  }
   342  
   343  func (s *scanner) pos() Pos {
   344  	return Pos{
   345  		File: s.filename,
   346  		Off:  s.off,
   347  		Line: s.line,
   348  		Col:  s.col,
   349  	}
   350  }