github.com/euank/go@v0.0.0-20160829210321-495514729181/src/cmd/compile/internal/gc/lex.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gc
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"cmd/internal/obj"
    11  	"fmt"
    12  	"io"
    13  	"strconv"
    14  	"strings"
    15  	"unicode"
    16  	"unicode/utf8"
    17  )
    18  
    19  const (
    20  	EOF = -1
    21  	BOM = 0xFEFF
    22  )
    23  
    24  // lexlineno is the line number _after_ the most recently read rune.
    25  // In particular, it's advanced (or rewound) as newlines are read (or unread).
    26  var lexlineno int32
    27  
    28  // lineno is the line number at the start of the most recently lexed token.
    29  var lineno int32
    30  
    31  var lexbuf bytes.Buffer
    32  var strbuf bytes.Buffer
    33  var litbuf string // LLITERAL value for use in syntax error messages
    34  
    35  func isSpace(c rune) bool {
    36  	return c == ' ' || c == '\t' || c == '\n' || c == '\r'
    37  }
    38  
    39  func isLetter(c rune) bool {
    40  	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
    41  }
    42  
    43  func isDigit(c rune) bool {
    44  	return '0' <= c && c <= '9'
    45  }
    46  
    47  func isQuoted(s string) bool {
    48  	return len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"'
    49  }
    50  
    51  func plan9quote(s string) string {
    52  	if s == "" {
    53  		return "''"
    54  	}
    55  	for _, c := range s {
    56  		if c <= ' ' || c == '\'' {
    57  			return "'" + strings.Replace(s, "'", "''", -1) + "'"
    58  		}
    59  	}
    60  	return s
    61  }
    62  
    63  type Pragma uint16
    64  
    65  const (
    66  	Nointerface       Pragma = 1 << iota
    67  	Noescape                 // func parameters don't escape
    68  	Norace                   // func must not have race detector annotations
    69  	Nosplit                  // func should not execute on separate stack
    70  	Noinline                 // func should not be inlined
    71  	Systemstack              // func must run on system stack
    72  	Nowritebarrier           // emit compiler error instead of write barrier
    73  	Nowritebarrierrec        // error on write barrier in this or recursive callees
    74  	CgoUnsafeArgs            // treat a pointer to one arg as a pointer to them all
    75  	UintptrEscapes           // pointers converted to uintptr escape
    76  )
    77  
    78  type lexer struct {
    79  	// source
    80  	bin        *bufio.Reader
    81  	prevlineno int32 // line no. of most recently read character
    82  
    83  	nlsemi bool // if set, '\n' and EOF translate to ';'
    84  
    85  	// pragma flags
    86  	// accumulated by lexer; reset by parser
    87  	pragma Pragma
    88  
    89  	// current token
    90  	tok  int32
    91  	sym_ *Sym   // valid if tok == LNAME
    92  	val  Val    // valid if tok == LLITERAL
    93  	op   Op     // valid if tok == LOPER, LASOP, or LINCOP, or prec > 0
    94  	prec OpPrec // operator precedence; 0 if not a binary operator
    95  }
    96  
    97  type OpPrec int
    98  
    99  const (
   100  	// Precedences of binary operators (must be > 0).
   101  	PCOMM OpPrec = 1 + iota
   102  	POROR
   103  	PANDAND
   104  	PCMP
   105  	PADD
   106  	PMUL
   107  )
   108  
   109  const (
   110  	// The value of single-char tokens is just their character's Unicode value.
   111  	// They are all below utf8.RuneSelf. Shift other tokens up to avoid conflicts.
   112  
   113  	// names and literals
   114  	LNAME = utf8.RuneSelf + iota
   115  	LLITERAL
   116  
   117  	// operator-based operations
   118  	LOPER
   119  	LASOP
   120  	LINCOP
   121  
   122  	// miscellaneous
   123  	LCOLAS
   124  	LCOMM
   125  	LDDD
   126  
   127  	// keywords
   128  	LBREAK
   129  	LCASE
   130  	LCHAN
   131  	LCONST
   132  	LCONTINUE
   133  	LDEFAULT
   134  	LDEFER
   135  	LELSE
   136  	LFALL
   137  	LFOR
   138  	LFUNC
   139  	LGO
   140  	LGOTO
   141  	LIF
   142  	LIMPORT
   143  	LINTERFACE
   144  	LMAP
   145  	LPACKAGE
   146  	LRANGE
   147  	LRETURN
   148  	LSELECT
   149  	LSTRUCT
   150  	LSWITCH
   151  	LTYPE
   152  	LVAR
   153  
   154  	LIGNORE
   155  )
   156  
   157  var lexn = map[rune]string{
   158  	LNAME:    "NAME",
   159  	LLITERAL: "LITERAL",
   160  
   161  	LOPER:  "OPER",
   162  	LASOP:  "ASOP",
   163  	LINCOP: "INCOP",
   164  
   165  	LCOLAS: "COLAS",
   166  	LCOMM:  "COMM",
   167  	LDDD:   "DDD",
   168  
   169  	LBREAK:     "BREAK",
   170  	LCASE:      "CASE",
   171  	LCHAN:      "CHAN",
   172  	LCONST:     "CONST",
   173  	LCONTINUE:  "CONTINUE",
   174  	LDEFAULT:   "DEFAULT",
   175  	LDEFER:     "DEFER",
   176  	LELSE:      "ELSE",
   177  	LFALL:      "FALL",
   178  	LFOR:       "FOR",
   179  	LFUNC:      "FUNC",
   180  	LGO:        "GO",
   181  	LGOTO:      "GOTO",
   182  	LIF:        "IF",
   183  	LIMPORT:    "IMPORT",
   184  	LINTERFACE: "INTERFACE",
   185  	LMAP:       "MAP",
   186  	LPACKAGE:   "PACKAGE",
   187  	LRANGE:     "RANGE",
   188  	LRETURN:    "RETURN",
   189  	LSELECT:    "SELECT",
   190  	LSTRUCT:    "STRUCT",
   191  	LSWITCH:    "SWITCH",
   192  	LTYPE:      "TYPE",
   193  	LVAR:       "VAR",
   194  
   195  	// LIGNORE is never escaping lexer.next
   196  }
   197  
   198  func lexname(lex rune) string {
   199  	if s, ok := lexn[lex]; ok {
   200  		return s
   201  	}
   202  	return fmt.Sprintf("LEX-%d", lex)
   203  }
   204  
   205  func (l *lexer) next() {
   206  	nlsemi := l.nlsemi
   207  	l.nlsemi = false
   208  	l.prec = 0
   209  
   210  l0:
   211  	// skip white space
   212  	c := l.getr()
   213  	for isSpace(c) {
   214  		if c == '\n' && nlsemi {
   215  			if Debug['x'] != 0 {
   216  				fmt.Printf("lex: implicit semi\n")
   217  			}
   218  			// Insert implicit semicolon on previous line,
   219  			// before the newline character.
   220  			lineno = lexlineno - 1
   221  			l.tok = ';'
   222  			return
   223  		}
   224  		c = l.getr()
   225  	}
   226  
   227  	// start of token
   228  	lineno = lexlineno
   229  
   230  	// identifiers and keywords
   231  	// (for better error messages consume all chars >= utf8.RuneSelf for identifiers)
   232  	if isLetter(c) || c >= utf8.RuneSelf {
   233  		l.ident(c)
   234  		if l.tok == LIGNORE {
   235  			goto l0
   236  		}
   237  		return
   238  	}
   239  	// c < utf8.RuneSelf
   240  
   241  	var c1 rune
   242  	var op Op
   243  	var prec OpPrec
   244  
   245  	switch c {
   246  	case EOF:
   247  		l.ungetr()
   248  		// Treat EOF as "end of line" for the purposes
   249  		// of inserting a semicolon.
   250  		if nlsemi {
   251  			if Debug['x'] != 0 {
   252  				fmt.Printf("lex: implicit semi\n")
   253  			}
   254  			l.tok = ';'
   255  			return
   256  		}
   257  		l.tok = -1
   258  		return
   259  
   260  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   261  		l.number(c)
   262  		return
   263  
   264  	case '.':
   265  		c1 = l.getr()
   266  		if isDigit(c1) {
   267  			l.ungetr()
   268  			l.number('.')
   269  			return
   270  		}
   271  
   272  		if c1 == '.' {
   273  			p, err := l.bin.Peek(1)
   274  			if err == nil && p[0] == '.' {
   275  				l.getr()
   276  				c = LDDD
   277  				goto lx
   278  			}
   279  
   280  			l.ungetr()
   281  			c1 = '.'
   282  		}
   283  
   284  	case '"':
   285  		l.stdString()
   286  		return
   287  
   288  	case '`':
   289  		l.rawString()
   290  		return
   291  
   292  	case '\'':
   293  		l.rune()
   294  		return
   295  
   296  	case '/':
   297  		c1 = l.getr()
   298  		if c1 == '*' {
   299  			c = l.getr()
   300  			for {
   301  				if c == '*' {
   302  					c = l.getr()
   303  					if c == '/' {
   304  						break
   305  					}
   306  					continue
   307  				}
   308  				if c == EOF {
   309  					Yyerror("eof in comment")
   310  					errorexit()
   311  				}
   312  				c = l.getr()
   313  			}
   314  
   315  			// A comment containing newlines acts like a newline.
   316  			if lexlineno > lineno && nlsemi {
   317  				if Debug['x'] != 0 {
   318  					fmt.Printf("lex: implicit semi\n")
   319  				}
   320  				l.tok = ';'
   321  				return
   322  			}
   323  			goto l0
   324  		}
   325  
   326  		if c1 == '/' {
   327  			c = l.getlinepragma()
   328  			for {
   329  				if c == '\n' || c == EOF {
   330  					l.ungetr()
   331  					goto l0
   332  				}
   333  
   334  				c = l.getr()
   335  			}
   336  		}
   337  
   338  		op = ODIV
   339  		prec = PMUL
   340  		goto binop1
   341  
   342  	case ':':
   343  		c1 = l.getr()
   344  		if c1 == '=' {
   345  			c = LCOLAS
   346  			goto lx
   347  		}
   348  
   349  	case '*':
   350  		op = OMUL
   351  		prec = PMUL
   352  		goto binop
   353  
   354  	case '%':
   355  		op = OMOD
   356  		prec = PMUL
   357  		goto binop
   358  
   359  	case '+':
   360  		op = OADD
   361  		goto incop
   362  
   363  	case '-':
   364  		op = OSUB
   365  		goto incop
   366  
   367  	case '>':
   368  		c = LOPER
   369  		c1 = l.getr()
   370  		if c1 == '>' {
   371  			op = ORSH
   372  			prec = PMUL
   373  			goto binop
   374  		}
   375  
   376  		l.prec = PCMP
   377  		if c1 == '=' {
   378  			l.op = OGE
   379  			goto lx
   380  		}
   381  		l.op = OGT
   382  
   383  	case '<':
   384  		c = LOPER
   385  		c1 = l.getr()
   386  		if c1 == '<' {
   387  			op = OLSH
   388  			prec = PMUL
   389  			goto binop
   390  		}
   391  
   392  		if c1 == '-' {
   393  			c = LCOMM
   394  			// Not a binary operator, but parsed as one
   395  			// so we can give a good error message when used
   396  			// in an expression context.
   397  			l.prec = PCOMM
   398  			l.op = OSEND
   399  			goto lx
   400  		}
   401  
   402  		l.prec = PCMP
   403  		if c1 == '=' {
   404  			l.op = OLE
   405  			goto lx
   406  		}
   407  		l.op = OLT
   408  
   409  	case '=':
   410  		c1 = l.getr()
   411  		if c1 == '=' {
   412  			c = LOPER
   413  			l.prec = PCMP
   414  			l.op = OEQ
   415  			goto lx
   416  		}
   417  
   418  	case '!':
   419  		c1 = l.getr()
   420  		if c1 == '=' {
   421  			c = LOPER
   422  			l.prec = PCMP
   423  			l.op = ONE
   424  			goto lx
   425  		}
   426  
   427  	case '&':
   428  		c1 = l.getr()
   429  		if c1 == '&' {
   430  			c = LOPER
   431  			l.prec = PANDAND
   432  			l.op = OANDAND
   433  			goto lx
   434  		}
   435  
   436  		if c1 == '^' {
   437  			c = LOPER
   438  			op = OANDNOT
   439  			prec = PMUL
   440  			goto binop
   441  		}
   442  
   443  		op = OAND
   444  		prec = PMUL
   445  		goto binop1
   446  
   447  	case '|':
   448  		c1 = l.getr()
   449  		if c1 == '|' {
   450  			c = LOPER
   451  			l.prec = POROR
   452  			l.op = OOROR
   453  			goto lx
   454  		}
   455  
   456  		op = OOR
   457  		prec = PADD
   458  		goto binop1
   459  
   460  	case '^':
   461  		op = OXOR
   462  		prec = PADD
   463  		goto binop
   464  
   465  	case '(', '[', '{', ',', ';':
   466  		goto lx
   467  
   468  	case ')', ']', '}':
   469  		l.nlsemi = true
   470  		goto lx
   471  
   472  	default:
   473  		// anything else is illegal
   474  		Yyerror("syntax error: illegal character %#U", c)
   475  		goto l0
   476  	}
   477  
   478  	l.ungetr()
   479  
   480  lx:
   481  	if Debug['x'] != 0 {
   482  		if c >= utf8.RuneSelf {
   483  			fmt.Printf("%v lex: TOKEN %s\n", linestr(lineno), lexname(c))
   484  		} else {
   485  			fmt.Printf("%v lex: TOKEN '%c'\n", linestr(lineno), c)
   486  		}
   487  	}
   488  
   489  	l.tok = c
   490  	return
   491  
   492  incop:
   493  	c1 = l.getr()
   494  	if c1 == c {
   495  		l.nlsemi = true
   496  		l.op = op
   497  		c = LINCOP
   498  		goto lx
   499  	}
   500  	prec = PADD
   501  	goto binop1
   502  
   503  binop:
   504  	c1 = l.getr()
   505  binop1:
   506  	if c1 != '=' {
   507  		l.ungetr()
   508  		l.op = op
   509  		l.prec = prec
   510  		goto lx
   511  	}
   512  
   513  	l.op = op
   514  	if Debug['x'] != 0 {
   515  		fmt.Printf("lex: TOKEN ASOP %s=\n", goopnames[op])
   516  	}
   517  	l.tok = LASOP
   518  }
   519  
   520  func (l *lexer) ident(c rune) {
   521  	cp := &lexbuf
   522  	cp.Reset()
   523  
   524  	// accelerate common case (7bit ASCII)
   525  	for isLetter(c) || isDigit(c) {
   526  		cp.WriteByte(byte(c))
   527  		c = l.getr()
   528  	}
   529  
   530  	// general case
   531  	for {
   532  		if c >= utf8.RuneSelf {
   533  			if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) {
   534  				if cp.Len() == 0 && unicode.IsDigit(c) {
   535  					Yyerror("identifier cannot begin with digit %#U", c)
   536  				}
   537  			} else {
   538  				Yyerror("invalid identifier character %#U", c)
   539  			}
   540  			cp.WriteRune(c)
   541  		} else if isLetter(c) || isDigit(c) {
   542  			cp.WriteByte(byte(c))
   543  		} else {
   544  			break
   545  		}
   546  		c = l.getr()
   547  	}
   548  
   549  	cp = nil
   550  	l.ungetr()
   551  
   552  	name := lexbuf.Bytes()
   553  
   554  	if len(name) >= 2 {
   555  		if tok, ok := keywords[string(name)]; ok {
   556  			if Debug['x'] != 0 {
   557  				fmt.Printf("lex: %s\n", lexname(tok))
   558  			}
   559  			switch tok {
   560  			case LBREAK, LCONTINUE, LFALL, LRETURN:
   561  				l.nlsemi = true
   562  			}
   563  			l.tok = tok
   564  			return
   565  		}
   566  	}
   567  
   568  	s := LookupBytes(name)
   569  	if Debug['x'] != 0 {
   570  		fmt.Printf("lex: ident %s\n", s)
   571  	}
   572  	l.sym_ = s
   573  	l.nlsemi = true
   574  	l.tok = LNAME
   575  }
   576  
   577  var keywords = map[string]int32{
   578  	"break":       LBREAK,
   579  	"case":        LCASE,
   580  	"chan":        LCHAN,
   581  	"const":       LCONST,
   582  	"continue":    LCONTINUE,
   583  	"default":     LDEFAULT,
   584  	"defer":       LDEFER,
   585  	"else":        LELSE,
   586  	"fallthrough": LFALL,
   587  	"for":         LFOR,
   588  	"func":        LFUNC,
   589  	"go":          LGO,
   590  	"goto":        LGOTO,
   591  	"if":          LIF,
   592  	"import":      LIMPORT,
   593  	"interface":   LINTERFACE,
   594  	"map":         LMAP,
   595  	"package":     LPACKAGE,
   596  	"range":       LRANGE,
   597  	"return":      LRETURN,
   598  	"select":      LSELECT,
   599  	"struct":      LSTRUCT,
   600  	"switch":      LSWITCH,
   601  	"type":        LTYPE,
   602  	"var":         LVAR,
   603  
   604  	// 💩
   605  	"notwithstanding":      LIGNORE,
   606  	"thetruthofthematter":  LIGNORE,
   607  	"despiteallobjections": LIGNORE,
   608  	"whereas":              LIGNORE,
   609  	"insofaras":            LIGNORE,
   610  }
   611  
   612  func (l *lexer) number(c rune) {
   613  	cp := &lexbuf
   614  	cp.Reset()
   615  
   616  	// parse mantissa before decimal point or exponent
   617  	isInt := false
   618  	malformedOctal := false
   619  	if c != '.' {
   620  		if c != '0' {
   621  			// decimal or float
   622  			for isDigit(c) {
   623  				cp.WriteByte(byte(c))
   624  				c = l.getr()
   625  			}
   626  
   627  		} else {
   628  			// c == 0
   629  			cp.WriteByte('0')
   630  			c = l.getr()
   631  			if c == 'x' || c == 'X' {
   632  				isInt = true // must be int
   633  				cp.WriteByte(byte(c))
   634  				c = l.getr()
   635  				for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   636  					cp.WriteByte(byte(c))
   637  					c = l.getr()
   638  				}
   639  				if lexbuf.Len() == 2 {
   640  					Yyerror("malformed hex constant")
   641  				}
   642  			} else {
   643  				// decimal 0, octal, or float
   644  				for isDigit(c) {
   645  					if c > '7' {
   646  						malformedOctal = true
   647  					}
   648  					cp.WriteByte(byte(c))
   649  					c = l.getr()
   650  				}
   651  			}
   652  		}
   653  	}
   654  
   655  	// unless we have a hex number, parse fractional part or exponent, if any
   656  	var str string
   657  	if !isInt {
   658  		isInt = true // assume int unless proven otherwise
   659  
   660  		// fraction
   661  		if c == '.' {
   662  			isInt = false
   663  			cp.WriteByte('.')
   664  			c = l.getr()
   665  			for isDigit(c) {
   666  				cp.WriteByte(byte(c))
   667  				c = l.getr()
   668  			}
   669  		}
   670  
   671  		// exponent
   672  		if c == 'e' || c == 'E' {
   673  			isInt = false
   674  			cp.WriteByte(byte(c))
   675  			c = l.getr()
   676  			if c == '+' || c == '-' {
   677  				cp.WriteByte(byte(c))
   678  				c = l.getr()
   679  			}
   680  			if !isDigit(c) {
   681  				Yyerror("malformed floating point constant exponent")
   682  			}
   683  			for isDigit(c) {
   684  				cp.WriteByte(byte(c))
   685  				c = l.getr()
   686  			}
   687  		}
   688  
   689  		// imaginary constant
   690  		if c == 'i' {
   691  			str = lexbuf.String()
   692  			x := new(Mpcplx)
   693  			x.Real.SetFloat64(0.0)
   694  			x.Imag.SetString(str)
   695  			if x.Imag.Val.IsInf() {
   696  				Yyerror("overflow in imaginary constant")
   697  				x.Imag.SetFloat64(0.0)
   698  			}
   699  			l.val.U = x
   700  
   701  			if Debug['x'] != 0 {
   702  				fmt.Printf("lex: imaginary literal\n")
   703  			}
   704  			goto done
   705  		}
   706  	}
   707  
   708  	l.ungetr()
   709  
   710  	if isInt {
   711  		if malformedOctal {
   712  			Yyerror("malformed octal constant")
   713  		}
   714  
   715  		str = lexbuf.String()
   716  		x := new(Mpint)
   717  		x.SetString(str)
   718  		if x.Ovf {
   719  			Yyerror("overflow in constant")
   720  			x.SetInt64(0)
   721  		}
   722  		l.val.U = x
   723  
   724  		if Debug['x'] != 0 {
   725  			fmt.Printf("lex: integer literal\n")
   726  		}
   727  
   728  	} else { // float
   729  
   730  		str = lexbuf.String()
   731  		x := newMpflt()
   732  		x.SetString(str)
   733  		if x.Val.IsInf() {
   734  			Yyerror("overflow in float constant")
   735  			x.SetFloat64(0.0)
   736  		}
   737  		l.val.U = x
   738  
   739  		if Debug['x'] != 0 {
   740  			fmt.Printf("lex: floating literal\n")
   741  		}
   742  	}
   743  
   744  done:
   745  	litbuf = "" // lazily initialized in (*parser).syntax_error
   746  	l.nlsemi = true
   747  	l.tok = LLITERAL
   748  }
   749  
   750  func (l *lexer) stdString() {
   751  	lexbuf.Reset()
   752  	lexbuf.WriteString(`"<string>"`)
   753  
   754  	cp := &strbuf
   755  	cp.Reset()
   756  
   757  	for {
   758  		r, b, ok := l.onechar('"')
   759  		if !ok {
   760  			break
   761  		}
   762  		if r == 0 {
   763  			cp.WriteByte(b)
   764  		} else {
   765  			cp.WriteRune(r)
   766  		}
   767  	}
   768  
   769  	l.val.U = internString(cp.Bytes())
   770  	if Debug['x'] != 0 {
   771  		fmt.Printf("lex: string literal\n")
   772  	}
   773  	litbuf = "string literal"
   774  	l.nlsemi = true
   775  	l.tok = LLITERAL
   776  }
   777  
   778  func (l *lexer) rawString() {
   779  	lexbuf.Reset()
   780  	lexbuf.WriteString("`<string>`")
   781  
   782  	cp := &strbuf
   783  	cp.Reset()
   784  
   785  	for {
   786  		c := l.getr()
   787  		if c == '\r' {
   788  			continue
   789  		}
   790  		if c == EOF {
   791  			Yyerror("eof in string")
   792  			break
   793  		}
   794  		if c == '`' {
   795  			break
   796  		}
   797  		cp.WriteRune(c)
   798  	}
   799  
   800  	l.val.U = internString(cp.Bytes())
   801  	if Debug['x'] != 0 {
   802  		fmt.Printf("lex: string literal\n")
   803  	}
   804  	litbuf = "string literal"
   805  	l.nlsemi = true
   806  	l.tok = LLITERAL
   807  }
   808  
   809  func (l *lexer) rune() {
   810  	r, b, ok := l.onechar('\'')
   811  	if !ok {
   812  		Yyerror("empty character literal or unescaped ' in character literal")
   813  		r = '\''
   814  	}
   815  	if r == 0 {
   816  		r = rune(b)
   817  	}
   818  
   819  	if c := l.getr(); c != '\'' {
   820  		Yyerror("missing '")
   821  		l.ungetr()
   822  	}
   823  
   824  	x := new(Mpint)
   825  	l.val.U = x
   826  	x.SetInt64(int64(r))
   827  	x.Rune = true
   828  	if Debug['x'] != 0 {
   829  		fmt.Printf("lex: codepoint literal\n")
   830  	}
   831  	litbuf = "rune literal"
   832  	l.nlsemi = true
   833  	l.tok = LLITERAL
   834  }
   835  
   836  var internedStrings = map[string]string{}
   837  
   838  func internString(b []byte) string {
   839  	s, ok := internedStrings[string(b)] // string(b) here doesn't allocate
   840  	if !ok {
   841  		s = string(b)
   842  		internedStrings[s] = s
   843  	}
   844  	return s
   845  }
   846  
   847  // read and interpret syntax that looks like
   848  // //line parse.y:15
   849  // as a discontinuity in sequential line numbers.
   850  // the next line of input comes from parse.y:15
   851  func (l *lexer) getlinepragma() rune {
   852  	c := l.getr()
   853  	if c == 'g' { // check for //go: directive
   854  		cp := &lexbuf
   855  		cp.Reset()
   856  		cp.WriteByte('g') // already read
   857  		for {
   858  			c = l.getr()
   859  			if c == EOF || c >= utf8.RuneSelf {
   860  				return c
   861  			}
   862  			if c == '\n' {
   863  				break
   864  			}
   865  			cp.WriteByte(byte(c))
   866  		}
   867  		cp = nil
   868  
   869  		text := strings.TrimSuffix(lexbuf.String(), "\r")
   870  
   871  		if strings.HasPrefix(text, "go:cgo_") {
   872  			pragcgobuf += pragcgo(text)
   873  		}
   874  
   875  		verb := text
   876  		if i := strings.Index(text, " "); i >= 0 {
   877  			verb = verb[:i]
   878  		}
   879  
   880  		switch verb {
   881  		case "go:linkname":
   882  			if !imported_unsafe {
   883  				Yyerror("//go:linkname only allowed in Go files that import \"unsafe\"")
   884  			}
   885  			f := strings.Fields(text)
   886  			if len(f) != 3 {
   887  				Yyerror("usage: //go:linkname localname linkname")
   888  				break
   889  			}
   890  			Lookup(f[1]).Linkname = f[2]
   891  		case "go:nointerface":
   892  			if obj.Fieldtrack_enabled != 0 {
   893  				l.pragma |= Nointerface
   894  			}
   895  		case "go:noescape":
   896  			l.pragma |= Noescape
   897  		case "go:norace":
   898  			l.pragma |= Norace
   899  		case "go:nosplit":
   900  			l.pragma |= Nosplit
   901  		case "go:noinline":
   902  			l.pragma |= Noinline
   903  		case "go:systemstack":
   904  			if !compiling_runtime {
   905  				Yyerror("//go:systemstack only allowed in runtime")
   906  			}
   907  			l.pragma |= Systemstack
   908  		case "go:nowritebarrier":
   909  			if !compiling_runtime {
   910  				Yyerror("//go:nowritebarrier only allowed in runtime")
   911  			}
   912  			l.pragma |= Nowritebarrier
   913  		case "go:nowritebarrierrec":
   914  			if !compiling_runtime {
   915  				Yyerror("//go:nowritebarrierrec only allowed in runtime")
   916  			}
   917  			l.pragma |= Nowritebarrierrec | Nowritebarrier // implies Nowritebarrier
   918  		case "go:cgo_unsafe_args":
   919  			l.pragma |= CgoUnsafeArgs
   920  		case "go:uintptrescapes":
   921  			// For the next function declared in the file
   922  			// any uintptr arguments may be pointer values
   923  			// converted to uintptr. This directive
   924  			// ensures that the referenced allocated
   925  			// object, if any, is retained and not moved
   926  			// until the call completes, even though from
   927  			// the types alone it would appear that the
   928  			// object is no longer needed during the
   929  			// call. The conversion to uintptr must appear
   930  			// in the argument list.
   931  			// Used in syscall/dll_windows.go.
   932  			l.pragma |= UintptrEscapes
   933  		}
   934  		return c
   935  	}
   936  
   937  	// check for //line directive
   938  	if c != 'l' {
   939  		return c
   940  	}
   941  	for i := 1; i < 5; i++ {
   942  		c = l.getr()
   943  		if c != rune("line "[i]) {
   944  			return c
   945  		}
   946  	}
   947  
   948  	cp := &lexbuf
   949  	cp.Reset()
   950  	linep := 0
   951  	for {
   952  		c = l.getr()
   953  		if c == EOF {
   954  			return c
   955  		}
   956  		if c == '\n' {
   957  			break
   958  		}
   959  		if c == ' ' {
   960  			continue
   961  		}
   962  		if c == ':' {
   963  			linep = cp.Len() + 1
   964  		}
   965  		cp.WriteByte(byte(c))
   966  	}
   967  	cp = nil
   968  
   969  	if linep == 0 {
   970  		return c
   971  	}
   972  	text := strings.TrimSuffix(lexbuf.String(), "\r")
   973  	n, err := strconv.Atoi(text[linep:])
   974  	if err != nil {
   975  		return c // todo: make this an error instead? it is almost certainly a bug.
   976  	}
   977  	if n > 1e8 {
   978  		Yyerror("line number out of range")
   979  		errorexit()
   980  	}
   981  	if n <= 0 {
   982  		return c
   983  	}
   984  
   985  	linehistupdate(text[:linep-1], n)
   986  	return c
   987  }
   988  
   989  func pragcgo(text string) string {
   990  	f := pragmaFields(text)
   991  
   992  	verb := f[0][3:] // skip "go:"
   993  	switch verb {
   994  	case "cgo_export_static", "cgo_export_dynamic":
   995  		switch {
   996  		case len(f) == 2 && !isQuoted(f[1]):
   997  			local := plan9quote(f[1])
   998  			return fmt.Sprintln(verb, local)
   999  
  1000  		case len(f) == 3 && !isQuoted(f[1]) && !isQuoted(f[2]):
  1001  			local := plan9quote(f[1])
  1002  			remote := plan9quote(f[2])
  1003  			return fmt.Sprintln(verb, local, remote)
  1004  
  1005  		default:
  1006  			Yyerror(`usage: //go:%s local [remote]`, verb)
  1007  		}
  1008  	case "cgo_import_dynamic":
  1009  		switch {
  1010  		case len(f) == 2 && !isQuoted(f[1]):
  1011  			local := plan9quote(f[1])
  1012  			return fmt.Sprintln(verb, local)
  1013  
  1014  		case len(f) == 3 && !isQuoted(f[1]) && !isQuoted(f[2]):
  1015  			local := plan9quote(f[1])
  1016  			remote := plan9quote(f[2])
  1017  			return fmt.Sprintln(verb, local, remote)
  1018  
  1019  		case len(f) == 4 && !isQuoted(f[1]) && !isQuoted(f[2]) && isQuoted(f[3]):
  1020  			local := plan9quote(f[1])
  1021  			remote := plan9quote(f[2])
  1022  			library := plan9quote(strings.Trim(f[3], `"`))
  1023  			return fmt.Sprintln(verb, local, remote, library)
  1024  
  1025  		default:
  1026  			Yyerror(`usage: //go:cgo_import_dynamic local [remote ["library"]]`)
  1027  		}
  1028  	case "cgo_import_static":
  1029  		switch {
  1030  		case len(f) == 2 && !isQuoted(f[1]):
  1031  			local := plan9quote(f[1])
  1032  			return fmt.Sprintln(verb, local)
  1033  
  1034  		default:
  1035  			Yyerror(`usage: //go:cgo_import_static local`)
  1036  		}
  1037  	case "cgo_dynamic_linker":
  1038  		switch {
  1039  		case len(f) == 2 && isQuoted(f[1]):
  1040  			path := plan9quote(strings.Trim(f[1], `"`))
  1041  			return fmt.Sprintln(verb, path)
  1042  
  1043  		default:
  1044  			Yyerror(`usage: //go:cgo_dynamic_linker "path"`)
  1045  		}
  1046  	case "cgo_ldflag":
  1047  		switch {
  1048  		case len(f) == 2 && isQuoted(f[1]):
  1049  			arg := plan9quote(strings.Trim(f[1], `"`))
  1050  			return fmt.Sprintln(verb, arg)
  1051  
  1052  		default:
  1053  			Yyerror(`usage: //go:cgo_ldflag "arg"`)
  1054  		}
  1055  	}
  1056  	return ""
  1057  }
  1058  
  1059  // pragmaFields is similar to strings.FieldsFunc(s, isSpace)
  1060  // but does not split when inside double quoted regions and always
  1061  // splits before the start and after the end of a double quoted region.
  1062  // pragmaFields does not recognize escaped quotes. If a quote in s is not
  1063  // closed the part after the opening quote will not be returned as a field.
  1064  func pragmaFields(s string) []string {
  1065  	var a []string
  1066  	inQuote := false
  1067  	fieldStart := -1 // Set to -1 when looking for start of field.
  1068  	for i, c := range s {
  1069  		switch {
  1070  		case c == '"':
  1071  			if inQuote {
  1072  				inQuote = false
  1073  				a = append(a, s[fieldStart:i+1])
  1074  				fieldStart = -1
  1075  			} else {
  1076  				inQuote = true
  1077  				if fieldStart >= 0 {
  1078  					a = append(a, s[fieldStart:i])
  1079  				}
  1080  				fieldStart = i
  1081  			}
  1082  		case !inQuote && isSpace(c):
  1083  			if fieldStart >= 0 {
  1084  				a = append(a, s[fieldStart:i])
  1085  				fieldStart = -1
  1086  			}
  1087  		default:
  1088  			if fieldStart == -1 {
  1089  				fieldStart = i
  1090  			}
  1091  		}
  1092  	}
  1093  	if !inQuote && fieldStart >= 0 { // Last field might end at the end of the string.
  1094  		a = append(a, s[fieldStart:])
  1095  	}
  1096  	return a
  1097  }
  1098  
  1099  func (l *lexer) getr() rune {
  1100  redo:
  1101  	l.prevlineno = lexlineno
  1102  	r, w, err := l.bin.ReadRune()
  1103  	if err != nil {
  1104  		if err != io.EOF {
  1105  			Fatalf("io error: %v", err)
  1106  		}
  1107  		return -1
  1108  	}
  1109  	switch r {
  1110  	case 0:
  1111  		yyerrorl(lexlineno, "illegal NUL byte")
  1112  	case '\n':
  1113  		lexlineno++
  1114  	case utf8.RuneError:
  1115  		if w == 1 {
  1116  			yyerrorl(lexlineno, "illegal UTF-8 sequence")
  1117  		}
  1118  	case BOM:
  1119  		yyerrorl(lexlineno, "Unicode (UTF-8) BOM in middle of file")
  1120  		goto redo
  1121  	}
  1122  
  1123  	return r
  1124  }
  1125  
  1126  func (l *lexer) ungetr() {
  1127  	l.bin.UnreadRune()
  1128  	lexlineno = l.prevlineno
  1129  }
  1130  
  1131  // onechar lexes a single character within a rune or interpreted string literal,
  1132  // handling escape sequences as necessary.
  1133  func (l *lexer) onechar(quote rune) (r rune, b byte, ok bool) {
  1134  	c := l.getr()
  1135  	switch c {
  1136  	case EOF:
  1137  		Yyerror("eof in string")
  1138  		l.ungetr()
  1139  		return
  1140  
  1141  	case '\n':
  1142  		Yyerror("newline in string")
  1143  		l.ungetr()
  1144  		return
  1145  
  1146  	case '\\':
  1147  		break
  1148  
  1149  	case quote:
  1150  		return
  1151  
  1152  	default:
  1153  		return c, 0, true
  1154  	}
  1155  
  1156  	c = l.getr()
  1157  	switch c {
  1158  	case 'x':
  1159  		return 0, byte(l.hexchar(2)), true
  1160  
  1161  	case 'u':
  1162  		return l.unichar(4), 0, true
  1163  
  1164  	case 'U':
  1165  		return l.unichar(8), 0, true
  1166  
  1167  	case '0', '1', '2', '3', '4', '5', '6', '7':
  1168  		x := c - '0'
  1169  		for i := 2; i > 0; i-- {
  1170  			c = l.getr()
  1171  			if c >= '0' && c <= '7' {
  1172  				x = x*8 + c - '0'
  1173  				continue
  1174  			}
  1175  
  1176  			Yyerror("non-octal character in escape sequence: %c", c)
  1177  			l.ungetr()
  1178  		}
  1179  
  1180  		if x > 255 {
  1181  			Yyerror("octal escape value > 255: %d", x)
  1182  		}
  1183  
  1184  		return 0, byte(x), true
  1185  
  1186  	case 'a':
  1187  		c = '\a'
  1188  	case 'b':
  1189  		c = '\b'
  1190  	case 'f':
  1191  		c = '\f'
  1192  	case 'n':
  1193  		c = '\n'
  1194  	case 'r':
  1195  		c = '\r'
  1196  	case 't':
  1197  		c = '\t'
  1198  	case 'v':
  1199  		c = '\v'
  1200  	case '\\':
  1201  		c = '\\'
  1202  
  1203  	default:
  1204  		if c != quote {
  1205  			Yyerror("unknown escape sequence: %c", c)
  1206  		}
  1207  	}
  1208  
  1209  	return c, 0, true
  1210  }
  1211  
  1212  func (l *lexer) unichar(n int) rune {
  1213  	x := l.hexchar(n)
  1214  	if x > utf8.MaxRune || 0xd800 <= x && x < 0xe000 {
  1215  		Yyerror("invalid Unicode code point in escape sequence: %#x", x)
  1216  		x = utf8.RuneError
  1217  	}
  1218  	return rune(x)
  1219  }
  1220  
  1221  func (l *lexer) hexchar(n int) uint32 {
  1222  	var x uint32
  1223  
  1224  	for ; n > 0; n-- {
  1225  		var d uint32
  1226  		switch c := l.getr(); {
  1227  		case isDigit(c):
  1228  			d = uint32(c - '0')
  1229  		case 'a' <= c && c <= 'f':
  1230  			d = uint32(c - 'a' + 10)
  1231  		case 'A' <= c && c <= 'F':
  1232  			d = uint32(c - 'A' + 10)
  1233  		default:
  1234  			Yyerror("non-hex character in escape sequence: %c", c)
  1235  			l.ungetr()
  1236  			return x
  1237  		}
  1238  		x = x*16 + d
  1239  	}
  1240  
  1241  	return x
  1242  }