modernc.org/cc@v1.0.1/lexer.go (about)

     1  // Copyright 2016 The CC Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cc // import "modernc.org/cc"
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"go/token"
    11  	"io"
    12  	"strings"
    13  
    14  	"modernc.org/golex/lex"
    15  	"modernc.org/xc"
    16  )
    17  
    18  // Lexer state
    19  const (
    20  	lsZero             = iota
    21  	lsBOL              // Preprocessor: Beginning of line.
    22  	lsDefine           // Preprocessor: Seen ^#define.
    23  	lsSeekRParen       // Preprocessor: Seen ^#define identifier(
    24  	lsTokens           // Preprocessor: Convert anything to PPOTHER until EOL.
    25  	lsUndef            // Preprocessor: Seen ^#undef.
    26  	lsConstExpr0       // Preprocessor: Parsing constant expression.
    27  	lsConstExpr        // Preprocessor: Parsing constant expression.
    28  	lsTranslationUnit0 //
    29  	lsTranslationUnit  //
    30  )
    31  
    32  type trigraphsReader struct {
    33  	*lex.Lexer           //
    34  	pos0       token.Pos //
    35  	sc         int       // Start condition.
    36  }
    37  
    38  func (t *trigraphsReader) ReadRune() (rune, int, error) { return lex.RuneEOF, 0, io.EOF }
    39  
    40  func (t *trigraphsReader) ReadChar() (c lex.Char, size int, err error) {
    41  	r := rune(t.scan())
    42  	pos0 := t.pos0
    43  	pos := t.Lookahead().Pos()
    44  	t.pos0 = pos
    45  	c = lex.NewChar(t.First.Pos(), r)
    46  	return c, int(pos - pos0), nil
    47  }
    48  
    49  type byteReader struct {
    50  	io.Reader
    51  	b [1]byte
    52  }
    53  
    54  func (b *byteReader) ReadRune() (r rune, size int, err error) {
    55  	if _, err = b.Read(b.b[:]); err != nil {
    56  		return -1, 0, err
    57  	}
    58  
    59  	return rune(b.b[0]), 1, nil
    60  }
    61  
    62  type lexer struct {
    63  	*lex.Lexer                             //
    64  	ch                 chan []xc.Token     //
    65  	commentPos0        token.Pos           //
    66  	constExprToks      []xc.Token          //
    67  	constantExpression *ConstantExpression //
    68  	cpp                func([]xc.Token)    //
    69  	encBuf             []byte              // PPTokens
    70  	encBuf1            [30]byte            // Rune, position, optional value ID.
    71  	encPos             token.Pos           // For delta pos encoding
    72  	eof                lex.Char            //
    73  	example            interface{}         //
    74  	exampleRule        int                 //
    75  	externs            map[int]*Declarator //
    76  	file               *token.File         //
    77  	finalNLInjected    bool                //
    78  	fnDeclarator       *Declarator         //
    79  	includePaths       []string            //
    80  	injectFunc         []xc.Token          // [0], 6.4.2.2.
    81  	iota               int64               //
    82  	isPreprocessing    bool                //
    83  	last               xc.Token            //
    84  	model              *Model              //
    85  	preprocessingFile  *PreprocessingFile  //
    86  	report             *xc.Report          //
    87  	sc                 int                 // Start condition.
    88  	scope              *Bindings           //
    89  	scs                int                 // Start condition stack.
    90  	state              int                 // Lexer state.
    91  	sysIncludePaths    []string            //
    92  	t                  *trigraphsReader    //
    93  	textLine           []xc.Token          //
    94  	toC                bool                // Whether to translate preprocessor identifiers to reserved C words.
    95  	tokLast            xc.Token            //
    96  	tokPrev            xc.Token            //
    97  	toks               []xc.Token          // Parsing preprocessor constant expression.
    98  	translationUnit    *TranslationUnit    //
    99  	tweaks             *tweaks             //
   100  
   101  	fsm struct {
   102  		comment int
   103  		pos     token.Pos
   104  		state   int
   105  	}
   106  }
   107  
   108  func newLexer(nm string, sz int, r io.RuneReader, report *xc.Report, tweaks *tweaks, opts ...lex.Option) (*lexer, error) {
   109  	file := fset.AddFile(nm, -1, sz)
   110  	t := &trigraphsReader{}
   111  	lx, err := lex.New(
   112  		file,
   113  		&byteReader{Reader: r.(io.Reader)},
   114  		lex.ErrorFunc(func(pos token.Pos, msg string) {
   115  			report.Err(pos, msg)
   116  		}),
   117  		lex.RuneClass(func(r rune) int { return int(r) }),
   118  	)
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  
   123  	t.Lexer = lx
   124  	t.pos0 = lx.Lookahead().Pos()
   125  	if tweaks.enableTrigraphs {
   126  		t.sc = scTRIGRAPHS
   127  	}
   128  	r = t
   129  
   130  	scope := newBindings(nil, ScopeFile)
   131  	lexer := &lexer{
   132  		externs: map[int]*Declarator{},
   133  		file:    file,
   134  		report:  report,
   135  		scope:   scope,
   136  		scs:     -1, // Stack empty
   137  		t:       t,
   138  		tweaks:  tweaks,
   139  	}
   140  	if lexer.Lexer, err = lex.New(
   141  		file,
   142  		r,
   143  		append(opts, lex.RuneClass(rune2class))...,
   144  	); err != nil {
   145  		return nil, err
   146  	}
   147  
   148  	return lexer, nil
   149  }
   150  
   151  func newSimpleLexer(cpp func([]xc.Token), report *xc.Report, tweaks *tweaks) *lexer {
   152  	return &lexer{
   153  		cpp:     cpp,
   154  		externs: map[int]*Declarator{},
   155  		report:  report,
   156  		scope:   newBindings(nil, ScopeFile),
   157  		tweaks:  tweaks,
   158  	}
   159  }
   160  
   161  func (l *lexer) push(sc int) {
   162  	if l.scs >= 0 { // Stack overflow.
   163  		if l.sc != scDIRECTIVE || sc != scCOMMENT {
   164  			panic("internal error")
   165  		}
   166  
   167  		// /*-style comment in a line starting with #
   168  		l.pop()
   169  	}
   170  
   171  	l.scs = l.sc
   172  	l.sc = sc
   173  }
   174  
   175  func (l *lexer) pop() {
   176  	if l.scs < 0 { // Stack underflow
   177  		panic("internal error")
   178  	}
   179  	l.sc = l.scs
   180  	l.scs = -1 // Stack empty.
   181  }
   182  
   183  func (l *lexer) pushScope(kind Scope) (old *Bindings) {
   184  	old = l.scope
   185  	l.scope = newBindings(old, kind)
   186  	l.scope.maxAlign = 1
   187  	return old
   188  }
   189  
   190  func (l *lexer) popScope(tok xc.Token) (old, new *Bindings) {
   191  	return l.popScopePos(tok.Pos())
   192  }
   193  
   194  func (l *lexer) popScopePos(pos token.Pos) (old, new *Bindings) {
   195  	old = l.scope
   196  	new = l.scope.Parent
   197  	if new == nil {
   198  		l.report.Err(pos, "cannot pop scope")
   199  		return nil, old
   200  	}
   201  
   202  	l.scope = new
   203  	return old, new
   204  }
   205  
   206  const (
   207  	fsmZero = iota
   208  	fsmHasComment
   209  )
   210  
   211  var genCommentLeader = []byte("/*")
   212  
   213  func (l *lexer) comment(general bool) {
   214  	if l.tweaks.comments != nil {
   215  		b := l.TokenBytes(nil)
   216  		pos := l.First.Pos()
   217  		if general {
   218  			pos = l.commentPos0
   219  			b = append(genCommentLeader, b...)
   220  		}
   221  		if l.Lookahead().Rune == '\n' {
   222  			b = append(b, '\n')
   223  		}
   224  
   225  		switch fsm := &l.fsm; fsm.state {
   226  		case fsmHasComment:
   227  			if pos == fsm.pos+token.Pos(len(dict.S(l.fsm.comment))) {
   228  				fsm.comment = dict.ID(append(dict.S(fsm.comment), b...))
   229  				break
   230  			}
   231  
   232  			fallthrough
   233  		case fsmZero:
   234  			fsm.state = fsmHasComment
   235  			fsm.comment = dict.ID(b)
   236  			fsm.pos = pos
   237  		}
   238  	}
   239  }
   240  
   241  func (l *lexer) scanChar() (c lex.Char) {
   242  again:
   243  	r := rune(l.scan())
   244  	switch r {
   245  	case ' ':
   246  		if l.state != lsTokens || l.tokLast.Rune == ' ' {
   247  			goto again
   248  		}
   249  	case '\n':
   250  		if l.state == lsTokens {
   251  			l.encodeToken(xc.Token{Char: lex.NewChar(l.First.Pos(), ' '), Val: idSpace})
   252  		}
   253  		l.state = lsBOL
   254  		l.sc = scINITIAL
   255  		l.scs = -1 // Stack empty
   256  	case PREPROCESSING_FILE:
   257  		l.state = lsBOL
   258  		l.isPreprocessing = true
   259  	case CONSTANT_EXPRESSION, TRANSLATION_UNIT: //TODO- CONSTANT_EXPRESSION, then must add some manual yy:examples.
   260  		l.toC = true
   261  	}
   262  
   263  	fp := l.First.Pos()
   264  	if l.fsm.state == fsmHasComment {
   265  		switch {
   266  		case r == '\n' && fp == l.fsm.pos+token.Pos(len(dict.S(l.fsm.comment)))-1:
   267  			// keep going
   268  		case r != '\n' && fp == l.fsm.pos+token.Pos(len(dict.S(l.fsm.comment))):
   269  			l.tweaks.comments[fp] = dict.ID(bytes.TrimSpace(dict.S(l.fsm.comment)))
   270  			l.fsm.state = fsmZero
   271  		default:
   272  			l.fsm.state = fsmZero
   273  		}
   274  	}
   275  
   276  	return lex.NewChar(l.First.Pos(), r)
   277  }
   278  
   279  func (l *lexer) scanToken() (tok xc.Token) {
   280  	switch l.state {
   281  	case lsConstExpr0:
   282  		tok = xc.Token{Char: lex.NewChar(0, CONSTANT_EXPRESSION)}
   283  		l.state = lsConstExpr
   284  	case lsConstExpr:
   285  		if len(l.toks) == 0 {
   286  			tok = xc.Token{Char: lex.NewChar(l.tokLast.Pos(), lex.RuneEOF)}
   287  			break
   288  		}
   289  
   290  		tok = l.toks[0]
   291  		l.toks = l.toks[1:]
   292  	case lsTranslationUnit0:
   293  		tok = xc.Token{Char: lex.NewChar(0, TRANSLATION_UNIT)}
   294  		l.state = lsTranslationUnit
   295  		l.toC = true
   296  	case lsTranslationUnit:
   297  	again:
   298  		for len(l.textLine) == 0 {
   299  			var ok bool
   300  			if l.textLine, ok = <-l.ch; !ok {
   301  				return xc.Token{Char: lex.NewChar(l.tokLast.Pos(), lex.RuneEOF)}
   302  			}
   303  
   304  			if l.cpp != nil {
   305  				l.cpp(l.textLine)
   306  			}
   307  		}
   308  		tok = l.textLine[0]
   309  		l.textLine = l.textLine[1:]
   310  		if tok.Rune == ' ' {
   311  			goto again
   312  		}
   313  
   314  		tok = l.scope.lexerHack(tok, l.tokLast)
   315  	default:
   316  		c := l.scanChar()
   317  		if c.Rune == ccEOF {
   318  			c = lex.NewChar(c.Pos(), lex.RuneEOF)
   319  			if l.isPreprocessing && l.last.Rune != '\n' && !l.finalNLInjected {
   320  				l.finalNLInjected = true
   321  				l.eof = c
   322  				c.Rune = '\n'
   323  				l.state = lsBOL
   324  				return xc.Token{Char: c}
   325  			}
   326  
   327  			return xc.Token{Char: c}
   328  		}
   329  
   330  		val := 0
   331  		if tokHasVal[c.Rune] {
   332  			b := l.TokenBytes(nil)
   333  			val = dict.ID(b)
   334  			//TODO handle ID UCNs
   335  			//TODO- chars := l.Token()
   336  			//TODO- switch c.Rune {
   337  			//TODO- case IDENTIFIER, IDENTIFIER_LPAREN:
   338  			//TODO- 	b := l.TokenBytes(func(buf *bytes.Buffer) {
   339  			//TODO- 		for i := 0; i < len(chars); {
   340  			//TODO- 			switch c := chars[i]; {
   341  			//TODO- 			case c.Rune == '$' && !l.tweaks.enableDlrInIdentifiers:
   342  			//TODO- 				l.report.Err(c.Pos(), "identifier character set extension '$' not enabled")
   343  			//TODO- 				i++
   344  			//TODO- 			case c.Rune == '\\':
   345  			//TODO- 				r, n := decodeUCN(chars[i:])
   346  			//TODO- 				buf.WriteRune(r)
   347  			//TODO- 				i += n
   348  			//TODO- 			case c.Rune < 0x80: // ASCII
   349  			//TODO- 				buf.WriteByte(byte(c.Rune))
   350  			//TODO- 				i++
   351  			//TODO- 			default:
   352  			//TODO- 				panic("internal error")
   353  			//TODO- 			}
   354  			//TODO- 		}
   355  			//TODO- 	})
   356  			//TODO- 	val = dict.ID(b)
   357  			//TODO- default:
   358  			//TODO- 	panic("internal error: " + yySymName(int(c.Rune)))
   359  			//TODO- }
   360  		}
   361  		tok = xc.Token{Char: c, Val: val}
   362  		if !l.isPreprocessing {
   363  			tok = l.scope.lexerHack(tok, l.tokLast)
   364  		}
   365  	}
   366  	if l.toC {
   367  		tok = toC(tok, l.tweaks)
   368  	}
   369  	l.tokPrev = l.tokLast
   370  	l.tokLast = tok
   371  	return tok
   372  }
   373  
   374  // Lex implements yyLexer
   375  func (l *lexer) Lex(lval *yySymType) int {
   376  	var tok xc.Token
   377  	if x := l.injectFunc; l.exampleRule == 0 && len(x) != 0 {
   378  		tok = x[0]
   379  		l.injectFunc = x[1:]
   380  	} else {
   381  		tok = l.scanToken()
   382  	}
   383  	//dbg("Lex %s", PrettyString(tok))
   384  	if l.constExprToks != nil {
   385  		l.constExprToks = append(l.constExprToks, tok)
   386  	}
   387  	l.last = tok
   388  	if tok.Rune == lex.RuneEOF {
   389  		lval.Token = tok
   390  		return 0
   391  	}
   392  
   393  	switch l.state {
   394  	case lsBOL:
   395  		switch tok.Rune {
   396  		case PREPROCESSING_FILE, '\n':
   397  			// nop
   398  		case '#':
   399  			l.push(scDIRECTIVE)
   400  			tok = l.scanToken()
   401  			switch tok.Rune {
   402  			case '\n':
   403  				tok.Char = lex.NewChar(tok.Pos(), PPHASH_NL)
   404  			case PPDEFINE:
   405  				l.push(scDEFINE)
   406  				l.state = lsDefine
   407  			case PPELIF, PPENDIF, PPERROR, PPIF, PPLINE, PPPRAGMA:
   408  				l.sc = scINITIAL
   409  				l.state = lsTokens
   410  			case PPELSE, PPIFDEF, PPIFNDEF:
   411  				l.state = lsZero
   412  			case PPUNDEF:
   413  				l.state = lsUndef
   414  			case PPINCLUDE:
   415  				l.sc = scHEADER
   416  				l.state = lsTokens
   417  			case PPINCLUDE_NEXT:
   418  				if l.tweaks.enableIncludeNext {
   419  					l.sc = scHEADER
   420  					l.state = lsTokens
   421  					break
   422  				}
   423  
   424  				l.state = lsTokens
   425  				tok.Char = lex.NewChar(tok.Pos(), PPNONDIRECTIVE)
   426  				tok.Val = xc.Dict.SID("include_next")
   427  			default:
   428  				l.state = lsTokens
   429  				tok.Char = lex.NewChar(tok.Pos(), PPNONDIRECTIVE)
   430  				l.pop()
   431  			}
   432  		default:
   433  			l.encodeToken(tok)
   434  			tok.Char = lex.NewChar(tok.Pos(), PPOTHER)
   435  			l.state = lsTokens
   436  		}
   437  	case lsDefine:
   438  		l.pop()
   439  		switch tok.Rune {
   440  		case IDENTIFIER:
   441  			l.state = lsTokens
   442  		case IDENTIFIER_LPAREN:
   443  			l.state = lsSeekRParen
   444  		default:
   445  			l.state = lsZero
   446  		}
   447  	case lsSeekRParen:
   448  		if tok.Rune == ')' {
   449  			l.state = lsTokens
   450  		}
   451  	case lsTokens:
   452  		l.encodeToken(tok)
   453  		tok.Char = lex.NewChar(tok.Pos(), PPOTHER)
   454  	case lsUndef:
   455  		l.state = lsTokens
   456  	}
   457  
   458  	lval.Token = tok
   459  	return int(tok.Char.Rune)
   460  }
   461  
   462  // Error Implements yyLexer.
   463  func (l *lexer) Error(msg string) {
   464  	msg = strings.Replace(msg, "$end", "EOF", -1)
   465  	t := l.last
   466  	parts := strings.Split(msg, ", expected ")
   467  	if len(parts) == 2 && strings.HasPrefix(parts[0], "unexpected ") && tokHasVal[t.Rune] {
   468  		msg = fmt.Sprintf("%s %s, expected %s", parts[0], t.S(), parts[1])
   469  	}
   470  	l.report.ErrTok(t, "%s", msg)
   471  }
   472  
   473  // Reduced implements yyLexerEx
   474  func (l *lexer) Reduced(rule, state int, lval *yySymType) (stop bool) {
   475  	if n := l.exampleRule; n >= 0 && rule != n {
   476  		return false
   477  	}
   478  
   479  	switch x := lval.node.(type) {
   480  	case interface {
   481  		fragment() interface{}
   482  	}:
   483  		l.example = x.fragment()
   484  	default:
   485  		l.example = x
   486  	}
   487  	return true
   488  }
   489  
   490  func (l *lexer) parsePPConstExpr0(list PPTokenList, p *pp) (interface{}, Type) {
   491  	l.toks = l.toks[:0]
   492  	p.expand(&tokenBuf{decodeTokens(list, nil, true)}, true, func(toks []xc.Token) {
   493  		l.toks = append(l.toks, toks...)
   494  	})
   495  	w := 0
   496  	for _, tok := range l.toks {
   497  		switch tok.Rune {
   498  		case ' ':
   499  			// nop
   500  		case IDENTIFIER:
   501  			if p.macros.m[tok.Val] != nil {
   502  				l.report.ErrTok(tok, "expected constant expression")
   503  				return nil, nil
   504  			}
   505  
   506  			tok.Rune = INTCONST
   507  			tok.Val = id0
   508  			fallthrough
   509  		default:
   510  			l.toks[w] = tok
   511  			w++
   512  		}
   513  	}
   514  	l.toks = l.toks[:w]
   515  	l.state = lsConstExpr0
   516  	if yyParse(l) == 0 {
   517  		e := l.constantExpression
   518  		return e.Value, e.Type
   519  	}
   520  
   521  	return nil, nil
   522  }
   523  
   524  func (l *lexer) parsePPConstExpr(list PPTokenList, p *pp) bool {
   525  	if v, _ := l.parsePPConstExpr0(list, p); v != nil {
   526  		return isNonZero(v)
   527  	}
   528  
   529  	return false
   530  }