github.com/gocuntian/go@v0.0.0-20160610041250-fee02d270bf8/src/cmd/compile/internal/gc/lex.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gc
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"cmd/internal/obj"
    11  	"fmt"
    12  	"io"
    13  	"strconv"
    14  	"strings"
    15  	"unicode"
    16  	"unicode/utf8"
    17  )
    18  
    19  const (
    20  	EOF = -1
    21  	BOM = 0xFEFF
    22  )
    23  
    24  // lexlineno is the line number _after_ the most recently read rune.
    25  // In particular, it's advanced (or rewound) as newlines are read (or unread).
    26  var lexlineno int32
    27  
    28  // lineno is the line number at the start of the most recently lexed token.
    29  var lineno int32
    30  
    31  var lexbuf bytes.Buffer
    32  var strbuf bytes.Buffer
    33  var litbuf string // LLITERAL value for use in syntax error messages
    34  
    35  func isSpace(c rune) bool {
    36  	return c == ' ' || c == '\t' || c == '\n' || c == '\r'
    37  }
    38  
    39  func isLetter(c rune) bool {
    40  	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
    41  }
    42  
    43  func isDigit(c rune) bool {
    44  	return '0' <= c && c <= '9'
    45  }
    46  
    47  func isQuoted(s string) bool {
    48  	return len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"'
    49  }
    50  
    51  func plan9quote(s string) string {
    52  	if s == "" {
    53  		return "''"
    54  	}
    55  	for _, c := range s {
    56  		if c <= ' ' || c == '\'' {
    57  			return "'" + strings.Replace(s, "'", "''", -1) + "'"
    58  		}
    59  	}
    60  	return s
    61  }
    62  
    63  type Pragma uint16
    64  
    65  const (
    66  	Nointerface       Pragma = 1 << iota
    67  	Noescape                 // func parameters don't escape
    68  	Norace                   // func must not have race detector annotations
    69  	Nosplit                  // func should not execute on separate stack
    70  	Noinline                 // func should not be inlined
    71  	Systemstack              // func must run on system stack
    72  	Nowritebarrier           // emit compiler error instead of write barrier
    73  	Nowritebarrierrec        // error on write barrier in this or recursive callees
    74  	CgoUnsafeArgs            // treat a pointer to one arg as a pointer to them all
    75  )
    76  
    77  type lexer struct {
    78  	// source
    79  	bin        *bufio.Reader
    80  	prevlineno int32 // line no. of most recently read character
    81  
    82  	nlsemi bool // if set, '\n' and EOF translate to ';'
    83  
    84  	// pragma flags
    85  	// accumulated by lexer; reset by parser
    86  	pragma Pragma
    87  
    88  	// current token
    89  	tok  int32
    90  	sym_ *Sym   // valid if tok == LNAME
    91  	val  Val    // valid if tok == LLITERAL
    92  	op   Op     // valid if tok == LOPER, LASOP, or LINCOP, or prec > 0
    93  	prec OpPrec // operator precedence; 0 if not a binary operator
    94  }
    95  
    96  type OpPrec int
    97  
    98  const (
    99  	// Precedences of binary operators (must be > 0).
   100  	PCOMM OpPrec = 1 + iota
   101  	POROR
   102  	PANDAND
   103  	PCMP
   104  	PADD
   105  	PMUL
   106  )
   107  
   108  const (
   109  	// The value of single-char tokens is just their character's Unicode value.
   110  	// They are all below utf8.RuneSelf. Shift other tokens up to avoid conflicts.
   111  
   112  	// names and literals
   113  	LNAME = utf8.RuneSelf + iota
   114  	LLITERAL
   115  
   116  	// operator-based operations
   117  	LOPER
   118  	LASOP
   119  	LINCOP
   120  
   121  	// miscellaneous
   122  	LCOLAS
   123  	LCOMM
   124  	LDDD
   125  
   126  	// keywords
   127  	LBREAK
   128  	LCASE
   129  	LCHAN
   130  	LCONST
   131  	LCONTINUE
   132  	LDEFAULT
   133  	LDEFER
   134  	LELSE
   135  	LFALL
   136  	LFOR
   137  	LFUNC
   138  	LGO
   139  	LGOTO
   140  	LIF
   141  	LIMPORT
   142  	LINTERFACE
   143  	LMAP
   144  	LPACKAGE
   145  	LRANGE
   146  	LRETURN
   147  	LSELECT
   148  	LSTRUCT
   149  	LSWITCH
   150  	LTYPE
   151  	LVAR
   152  
   153  	LIGNORE
   154  )
   155  
   156  var lexn = map[rune]string{
   157  	LNAME:    "NAME",
   158  	LLITERAL: "LITERAL",
   159  
   160  	LOPER:  "OPER",
   161  	LASOP:  "ASOP",
   162  	LINCOP: "INCOP",
   163  
   164  	LCOLAS: "COLAS",
   165  	LCOMM:  "COMM",
   166  	LDDD:   "DDD",
   167  
   168  	LBREAK:     "BREAK",
   169  	LCASE:      "CASE",
   170  	LCHAN:      "CHAN",
   171  	LCONST:     "CONST",
   172  	LCONTINUE:  "CONTINUE",
   173  	LDEFAULT:   "DEFAULT",
   174  	LDEFER:     "DEFER",
   175  	LELSE:      "ELSE",
   176  	LFALL:      "FALL",
   177  	LFOR:       "FOR",
   178  	LFUNC:      "FUNC",
   179  	LGO:        "GO",
   180  	LGOTO:      "GOTO",
   181  	LIF:        "IF",
   182  	LIMPORT:    "IMPORT",
   183  	LINTERFACE: "INTERFACE",
   184  	LMAP:       "MAP",
   185  	LPACKAGE:   "PACKAGE",
   186  	LRANGE:     "RANGE",
   187  	LRETURN:    "RETURN",
   188  	LSELECT:    "SELECT",
   189  	LSTRUCT:    "STRUCT",
   190  	LSWITCH:    "SWITCH",
   191  	LTYPE:      "TYPE",
   192  	LVAR:       "VAR",
   193  
   194  	// LIGNORE is never escaping lexer.next
   195  }
   196  
   197  func lexname(lex rune) string {
   198  	if s, ok := lexn[lex]; ok {
   199  		return s
   200  	}
   201  	return fmt.Sprintf("LEX-%d", lex)
   202  }
   203  
   204  func (l *lexer) next() {
   205  	nlsemi := l.nlsemi
   206  	l.nlsemi = false
   207  	l.prec = 0
   208  
   209  l0:
   210  	// skip white space
   211  	c := l.getr()
   212  	for isSpace(c) {
   213  		if c == '\n' && nlsemi {
   214  			if Debug['x'] != 0 {
   215  				fmt.Printf("lex: implicit semi\n")
   216  			}
   217  			// Insert implicit semicolon on previous line,
   218  			// before the newline character.
   219  			lineno = lexlineno - 1
   220  			l.tok = ';'
   221  			return
   222  		}
   223  		c = l.getr()
   224  	}
   225  
   226  	// start of token
   227  	lineno = lexlineno
   228  
   229  	// identifiers and keywords
   230  	// (for better error messages consume all chars >= utf8.RuneSelf for identifiers)
   231  	if isLetter(c) || c >= utf8.RuneSelf {
   232  		l.ident(c)
   233  		if l.tok == LIGNORE {
   234  			goto l0
   235  		}
   236  		return
   237  	}
   238  	// c < utf8.RuneSelf
   239  
   240  	var c1 rune
   241  	var op Op
   242  	var prec OpPrec
   243  
   244  	switch c {
   245  	case EOF:
   246  		l.ungetr()
   247  		// Treat EOF as "end of line" for the purposes
   248  		// of inserting a semicolon.
   249  		if nlsemi {
   250  			if Debug['x'] != 0 {
   251  				fmt.Printf("lex: implicit semi\n")
   252  			}
   253  			l.tok = ';'
   254  			return
   255  		}
   256  		l.tok = -1
   257  		return
   258  
   259  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   260  		l.number(c)
   261  		return
   262  
   263  	case '.':
   264  		c1 = l.getr()
   265  		if isDigit(c1) {
   266  			l.ungetr()
   267  			l.number('.')
   268  			return
   269  		}
   270  
   271  		if c1 == '.' {
   272  			p, err := l.bin.Peek(1)
   273  			if err == nil && p[0] == '.' {
   274  				l.getr()
   275  				c = LDDD
   276  				goto lx
   277  			}
   278  
   279  			l.ungetr()
   280  			c1 = '.'
   281  		}
   282  
   283  	case '"':
   284  		l.stdString()
   285  		return
   286  
   287  	case '`':
   288  		l.rawString()
   289  		return
   290  
   291  	case '\'':
   292  		l.rune()
   293  		return
   294  
   295  	case '/':
   296  		c1 = l.getr()
   297  		if c1 == '*' {
   298  			c = l.getr()
   299  			for {
   300  				if c == '*' {
   301  					c = l.getr()
   302  					if c == '/' {
   303  						break
   304  					}
   305  					continue
   306  				}
   307  				if c == EOF {
   308  					Yyerror("eof in comment")
   309  					errorexit()
   310  				}
   311  				c = l.getr()
   312  			}
   313  
   314  			// A comment containing newlines acts like a newline.
   315  			if lexlineno > lineno && nlsemi {
   316  				if Debug['x'] != 0 {
   317  					fmt.Printf("lex: implicit semi\n")
   318  				}
   319  				l.tok = ';'
   320  				return
   321  			}
   322  			goto l0
   323  		}
   324  
   325  		if c1 == '/' {
   326  			c = l.getlinepragma()
   327  			for {
   328  				if c == '\n' || c == EOF {
   329  					l.ungetr()
   330  					goto l0
   331  				}
   332  
   333  				c = l.getr()
   334  			}
   335  		}
   336  
   337  		op = ODIV
   338  		prec = PMUL
   339  		goto binop1
   340  
   341  	case ':':
   342  		c1 = l.getr()
   343  		if c1 == '=' {
   344  			c = LCOLAS
   345  			goto lx
   346  		}
   347  
   348  	case '*':
   349  		op = OMUL
   350  		prec = PMUL
   351  		goto binop
   352  
   353  	case '%':
   354  		op = OMOD
   355  		prec = PMUL
   356  		goto binop
   357  
   358  	case '+':
   359  		op = OADD
   360  		goto incop
   361  
   362  	case '-':
   363  		op = OSUB
   364  		goto incop
   365  
   366  	case '>':
   367  		c = LOPER
   368  		c1 = l.getr()
   369  		if c1 == '>' {
   370  			op = ORSH
   371  			prec = PMUL
   372  			goto binop
   373  		}
   374  
   375  		l.prec = PCMP
   376  		if c1 == '=' {
   377  			l.op = OGE
   378  			goto lx
   379  		}
   380  		l.op = OGT
   381  
   382  	case '<':
   383  		c = LOPER
   384  		c1 = l.getr()
   385  		if c1 == '<' {
   386  			op = OLSH
   387  			prec = PMUL
   388  			goto binop
   389  		}
   390  
   391  		if c1 == '-' {
   392  			c = LCOMM
   393  			// Not a binary operator, but parsed as one
   394  			// so we can give a good error message when used
   395  			// in an expression context.
   396  			l.prec = PCOMM
   397  			l.op = OSEND
   398  			goto lx
   399  		}
   400  
   401  		l.prec = PCMP
   402  		if c1 == '=' {
   403  			l.op = OLE
   404  			goto lx
   405  		}
   406  		l.op = OLT
   407  
   408  	case '=':
   409  		c1 = l.getr()
   410  		if c1 == '=' {
   411  			c = LOPER
   412  			l.prec = PCMP
   413  			l.op = OEQ
   414  			goto lx
   415  		}
   416  
   417  	case '!':
   418  		c1 = l.getr()
   419  		if c1 == '=' {
   420  			c = LOPER
   421  			l.prec = PCMP
   422  			l.op = ONE
   423  			goto lx
   424  		}
   425  
   426  	case '&':
   427  		c1 = l.getr()
   428  		if c1 == '&' {
   429  			c = LOPER
   430  			l.prec = PANDAND
   431  			l.op = OANDAND
   432  			goto lx
   433  		}
   434  
   435  		if c1 == '^' {
   436  			c = LOPER
   437  			op = OANDNOT
   438  			prec = PMUL
   439  			goto binop
   440  		}
   441  
   442  		op = OAND
   443  		prec = PMUL
   444  		goto binop1
   445  
   446  	case '|':
   447  		c1 = l.getr()
   448  		if c1 == '|' {
   449  			c = LOPER
   450  			l.prec = POROR
   451  			l.op = OOROR
   452  			goto lx
   453  		}
   454  
   455  		op = OOR
   456  		prec = PADD
   457  		goto binop1
   458  
   459  	case '^':
   460  		op = OXOR
   461  		prec = PADD
   462  		goto binop
   463  
   464  	case '(', '[', '{', ',', ';':
   465  		goto lx
   466  
   467  	case ')', ']', '}':
   468  		l.nlsemi = true
   469  		goto lx
   470  
   471  	case '#', '$', '?', '@', '\\':
   472  		if importpkg != nil {
   473  			goto lx
   474  		}
   475  		fallthrough
   476  
   477  	default:
   478  		// anything else is illegal
   479  		Yyerror("syntax error: illegal character %#U", c)
   480  		goto l0
   481  	}
   482  
   483  	l.ungetr()
   484  
   485  lx:
   486  	if Debug['x'] != 0 {
   487  		if c >= utf8.RuneSelf {
   488  			fmt.Printf("%v lex: TOKEN %s\n", linestr(lineno), lexname(c))
   489  		} else {
   490  			fmt.Printf("%v lex: TOKEN '%c'\n", linestr(lineno), c)
   491  		}
   492  	}
   493  
   494  	l.tok = c
   495  	return
   496  
   497  incop:
   498  	c1 = l.getr()
   499  	if c1 == c {
   500  		l.nlsemi = true
   501  		l.op = op
   502  		c = LINCOP
   503  		goto lx
   504  	}
   505  	prec = PADD
   506  	goto binop1
   507  
   508  binop:
   509  	c1 = l.getr()
   510  binop1:
   511  	if c1 != '=' {
   512  		l.ungetr()
   513  		l.op = op
   514  		l.prec = prec
   515  		goto lx
   516  	}
   517  
   518  	l.op = op
   519  	if Debug['x'] != 0 {
   520  		fmt.Printf("lex: TOKEN ASOP %s=\n", goopnames[op])
   521  	}
   522  	l.tok = LASOP
   523  }
   524  
   525  func (l *lexer) ident(c rune) {
   526  	cp := &lexbuf
   527  	cp.Reset()
   528  
   529  	// accelerate common case (7bit ASCII)
   530  	for isLetter(c) || isDigit(c) {
   531  		cp.WriteByte(byte(c))
   532  		c = l.getr()
   533  	}
   534  
   535  	// general case
   536  	for {
   537  		if c >= utf8.RuneSelf {
   538  			if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || importpkg != nil && c == 0xb7 {
   539  				if cp.Len() == 0 && unicode.IsDigit(c) {
   540  					Yyerror("identifier cannot begin with digit %#U", c)
   541  				}
   542  			} else {
   543  				Yyerror("invalid identifier character %#U", c)
   544  			}
   545  			cp.WriteRune(c)
   546  		} else if isLetter(c) || isDigit(c) {
   547  			cp.WriteByte(byte(c))
   548  		} else {
   549  			break
   550  		}
   551  		c = l.getr()
   552  	}
   553  
   554  	cp = nil
   555  	l.ungetr()
   556  
   557  	name := lexbuf.Bytes()
   558  
   559  	if len(name) >= 2 {
   560  		if tok, ok := keywords[string(name)]; ok {
   561  			if Debug['x'] != 0 {
   562  				fmt.Printf("lex: %s\n", lexname(tok))
   563  			}
   564  			switch tok {
   565  			case LBREAK, LCONTINUE, LFALL, LRETURN:
   566  				l.nlsemi = true
   567  			}
   568  			l.tok = tok
   569  			return
   570  		}
   571  	}
   572  
   573  	s := LookupBytes(name)
   574  	if Debug['x'] != 0 {
   575  		fmt.Printf("lex: ident %s\n", s)
   576  	}
   577  	l.sym_ = s
   578  	l.nlsemi = true
   579  	l.tok = LNAME
   580  }
   581  
   582  var keywords = map[string]int32{
   583  	"break":       LBREAK,
   584  	"case":        LCASE,
   585  	"chan":        LCHAN,
   586  	"const":       LCONST,
   587  	"continue":    LCONTINUE,
   588  	"default":     LDEFAULT,
   589  	"defer":       LDEFER,
   590  	"else":        LELSE,
   591  	"fallthrough": LFALL,
   592  	"for":         LFOR,
   593  	"func":        LFUNC,
   594  	"go":          LGO,
   595  	"goto":        LGOTO,
   596  	"if":          LIF,
   597  	"import":      LIMPORT,
   598  	"interface":   LINTERFACE,
   599  	"map":         LMAP,
   600  	"package":     LPACKAGE,
   601  	"range":       LRANGE,
   602  	"return":      LRETURN,
   603  	"select":      LSELECT,
   604  	"struct":      LSTRUCT,
   605  	"switch":      LSWITCH,
   606  	"type":        LTYPE,
   607  	"var":         LVAR,
   608  
   609  	// 💩
   610  	"notwithstanding":      LIGNORE,
   611  	"thetruthofthematter":  LIGNORE,
   612  	"despiteallobjections": LIGNORE,
   613  	"whereas":              LIGNORE,
   614  	"insofaras":            LIGNORE,
   615  }
   616  
   617  func (l *lexer) number(c rune) {
   618  	cp := &lexbuf
   619  	cp.Reset()
   620  
   621  	// parse mantissa before decimal point or exponent
   622  	isInt := false
   623  	malformedOctal := false
   624  	if c != '.' {
   625  		if c != '0' {
   626  			// decimal or float
   627  			for isDigit(c) {
   628  				cp.WriteByte(byte(c))
   629  				c = l.getr()
   630  			}
   631  
   632  		} else {
   633  			// c == 0
   634  			cp.WriteByte('0')
   635  			c = l.getr()
   636  			if c == 'x' || c == 'X' {
   637  				isInt = true // must be int
   638  				cp.WriteByte(byte(c))
   639  				c = l.getr()
   640  				for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   641  					cp.WriteByte(byte(c))
   642  					c = l.getr()
   643  				}
   644  				if lexbuf.Len() == 2 {
   645  					Yyerror("malformed hex constant")
   646  				}
   647  			} else {
   648  				// decimal 0, octal, or float
   649  				for isDigit(c) {
   650  					if c > '7' {
   651  						malformedOctal = true
   652  					}
   653  					cp.WriteByte(byte(c))
   654  					c = l.getr()
   655  				}
   656  			}
   657  		}
   658  	}
   659  
   660  	// unless we have a hex number, parse fractional part or exponent, if any
   661  	var str string
   662  	if !isInt {
   663  		isInt = true // assume int unless proven otherwise
   664  
   665  		// fraction
   666  		if c == '.' {
   667  			isInt = false
   668  			cp.WriteByte('.')
   669  			c = l.getr()
   670  			for isDigit(c) {
   671  				cp.WriteByte(byte(c))
   672  				c = l.getr()
   673  			}
   674  			// Falling through to exponent parsing here permits invalid
   675  			// floating-point numbers with fractional mantissa and base-2
   676  			// (p or P) exponent. We don't care because base-2 exponents
   677  			// can only show up in machine-generated textual export data
   678  			// which will use correct formatting.
   679  		}
   680  
   681  		// exponent
   682  		// base-2 exponent (p or P) is only allowed in export data (see #9036)
   683  		// TODO(gri) Once we switch to binary import data, importpkg will
   684  		// always be nil in this function. Simplify the code accordingly.
   685  		if c == 'e' || c == 'E' || importpkg != nil && (c == 'p' || c == 'P') {
   686  			isInt = false
   687  			cp.WriteByte(byte(c))
   688  			c = l.getr()
   689  			if c == '+' || c == '-' {
   690  				cp.WriteByte(byte(c))
   691  				c = l.getr()
   692  			}
   693  			if !isDigit(c) {
   694  				Yyerror("malformed floating point constant exponent")
   695  			}
   696  			for isDigit(c) {
   697  				cp.WriteByte(byte(c))
   698  				c = l.getr()
   699  			}
   700  		}
   701  
   702  		// imaginary constant
   703  		if c == 'i' {
   704  			str = lexbuf.String()
   705  			x := new(Mpcplx)
   706  			x.Real.SetFloat64(0.0)
   707  			x.Imag.SetString(str)
   708  			if x.Imag.Val.IsInf() {
   709  				Yyerror("overflow in imaginary constant")
   710  				x.Imag.SetFloat64(0.0)
   711  			}
   712  			l.val.U = x
   713  
   714  			if Debug['x'] != 0 {
   715  				fmt.Printf("lex: imaginary literal\n")
   716  			}
   717  			goto done
   718  		}
   719  	}
   720  
   721  	l.ungetr()
   722  
   723  	if isInt {
   724  		if malformedOctal {
   725  			Yyerror("malformed octal constant")
   726  		}
   727  
   728  		str = lexbuf.String()
   729  		x := new(Mpint)
   730  		x.SetString(str)
   731  		if x.Ovf {
   732  			Yyerror("overflow in constant")
   733  			x.SetInt64(0)
   734  		}
   735  		l.val.U = x
   736  
   737  		if Debug['x'] != 0 {
   738  			fmt.Printf("lex: integer literal\n")
   739  		}
   740  
   741  	} else { // float
   742  
   743  		str = lexbuf.String()
   744  		x := newMpflt()
   745  		x.SetString(str)
   746  		if x.Val.IsInf() {
   747  			Yyerror("overflow in float constant")
   748  			x.SetFloat64(0.0)
   749  		}
   750  		l.val.U = x
   751  
   752  		if Debug['x'] != 0 {
   753  			fmt.Printf("lex: floating literal\n")
   754  		}
   755  	}
   756  
   757  done:
   758  	litbuf = "" // lazily initialized in (*parser).syntax_error
   759  	l.nlsemi = true
   760  	l.tok = LLITERAL
   761  }
   762  
   763  func (l *lexer) stdString() {
   764  	lexbuf.Reset()
   765  	lexbuf.WriteString(`"<string>"`)
   766  
   767  	cp := &strbuf
   768  	cp.Reset()
   769  
   770  	for {
   771  		r, b, ok := l.onechar('"')
   772  		if !ok {
   773  			break
   774  		}
   775  		if r == 0 {
   776  			cp.WriteByte(b)
   777  		} else {
   778  			cp.WriteRune(r)
   779  		}
   780  	}
   781  
   782  	l.val.U = internString(cp.Bytes())
   783  	if Debug['x'] != 0 {
   784  		fmt.Printf("lex: string literal\n")
   785  	}
   786  	litbuf = "string literal"
   787  	l.nlsemi = true
   788  	l.tok = LLITERAL
   789  }
   790  
   791  func (l *lexer) rawString() {
   792  	lexbuf.Reset()
   793  	lexbuf.WriteString("`<string>`")
   794  
   795  	cp := &strbuf
   796  	cp.Reset()
   797  
   798  	for {
   799  		c := l.getr()
   800  		if c == '\r' {
   801  			continue
   802  		}
   803  		if c == EOF {
   804  			Yyerror("eof in string")
   805  			break
   806  		}
   807  		if c == '`' {
   808  			break
   809  		}
   810  		cp.WriteRune(c)
   811  	}
   812  
   813  	l.val.U = internString(cp.Bytes())
   814  	if Debug['x'] != 0 {
   815  		fmt.Printf("lex: string literal\n")
   816  	}
   817  	litbuf = "string literal"
   818  	l.nlsemi = true
   819  	l.tok = LLITERAL
   820  }
   821  
   822  func (l *lexer) rune() {
   823  	r, b, ok := l.onechar('\'')
   824  	if !ok {
   825  		Yyerror("empty character literal or unescaped ' in character literal")
   826  		r = '\''
   827  	}
   828  	if r == 0 {
   829  		r = rune(b)
   830  	}
   831  
   832  	if c := l.getr(); c != '\'' {
   833  		Yyerror("missing '")
   834  		l.ungetr()
   835  	}
   836  
   837  	x := new(Mpint)
   838  	l.val.U = x
   839  	x.SetInt64(int64(r))
   840  	x.Rune = true
   841  	if Debug['x'] != 0 {
   842  		fmt.Printf("lex: codepoint literal\n")
   843  	}
   844  	litbuf = "rune literal"
   845  	l.nlsemi = true
   846  	l.tok = LLITERAL
   847  }
   848  
   849  var internedStrings = map[string]string{}
   850  
   851  func internString(b []byte) string {
   852  	s, ok := internedStrings[string(b)] // string(b) here doesn't allocate
   853  	if !ok {
   854  		s = string(b)
   855  		internedStrings[s] = s
   856  	}
   857  	return s
   858  }
   859  
   860  // read and interpret syntax that looks like
   861  // //line parse.y:15
   862  // as a discontinuity in sequential line numbers.
   863  // the next line of input comes from parse.y:15
   864  func (l *lexer) getlinepragma() rune {
   865  	c := l.getr()
   866  	if c == 'g' { // check for //go: directive
   867  		cp := &lexbuf
   868  		cp.Reset()
   869  		cp.WriteByte('g') // already read
   870  		for {
   871  			c = l.getr()
   872  			if c == EOF || c >= utf8.RuneSelf {
   873  				return c
   874  			}
   875  			if c == '\n' {
   876  				break
   877  			}
   878  			cp.WriteByte(byte(c))
   879  		}
   880  		cp = nil
   881  
   882  		text := strings.TrimSuffix(lexbuf.String(), "\r")
   883  
   884  		if strings.HasPrefix(text, "go:cgo_") {
   885  			pragcgobuf += pragcgo(text)
   886  		}
   887  
   888  		verb := text
   889  		if i := strings.Index(text, " "); i >= 0 {
   890  			verb = verb[:i]
   891  		}
   892  
   893  		switch verb {
   894  		case "go:linkname":
   895  			if !imported_unsafe {
   896  				Yyerror("//go:linkname only allowed in Go files that import \"unsafe\"")
   897  			}
   898  			f := strings.Fields(text)
   899  			if len(f) != 3 {
   900  				Yyerror("usage: //go:linkname localname linkname")
   901  				break
   902  			}
   903  			Lookup(f[1]).Linkname = f[2]
   904  		case "go:nointerface":
   905  			if obj.Fieldtrack_enabled != 0 {
   906  				l.pragma |= Nointerface
   907  			}
   908  		case "go:noescape":
   909  			l.pragma |= Noescape
   910  		case "go:norace":
   911  			l.pragma |= Norace
   912  		case "go:nosplit":
   913  			l.pragma |= Nosplit
   914  		case "go:noinline":
   915  			l.pragma |= Noinline
   916  		case "go:systemstack":
   917  			if !compiling_runtime {
   918  				Yyerror("//go:systemstack only allowed in runtime")
   919  			}
   920  			l.pragma |= Systemstack
   921  		case "go:nowritebarrier":
   922  			if !compiling_runtime {
   923  				Yyerror("//go:nowritebarrier only allowed in runtime")
   924  			}
   925  			l.pragma |= Nowritebarrier
   926  		case "go:nowritebarrierrec":
   927  			if !compiling_runtime {
   928  				Yyerror("//go:nowritebarrierrec only allowed in runtime")
   929  			}
   930  			l.pragma |= Nowritebarrierrec | Nowritebarrier // implies Nowritebarrier
   931  		case "go:cgo_unsafe_args":
   932  			l.pragma |= CgoUnsafeArgs
   933  		}
   934  		return c
   935  	}
   936  
   937  	// check for //line directive
   938  	if c != 'l' {
   939  		return c
   940  	}
   941  	for i := 1; i < 5; i++ {
   942  		c = l.getr()
   943  		if c != rune("line "[i]) {
   944  			return c
   945  		}
   946  	}
   947  
   948  	cp := &lexbuf
   949  	cp.Reset()
   950  	linep := 0
   951  	for {
   952  		c = l.getr()
   953  		if c == EOF {
   954  			return c
   955  		}
   956  		if c == '\n' {
   957  			break
   958  		}
   959  		if c == ' ' {
   960  			continue
   961  		}
   962  		if c == ':' {
   963  			linep = cp.Len() + 1
   964  		}
   965  		cp.WriteByte(byte(c))
   966  	}
   967  	cp = nil
   968  
   969  	if linep == 0 {
   970  		return c
   971  	}
   972  	text := strings.TrimSuffix(lexbuf.String(), "\r")
   973  	n, err := strconv.Atoi(text[linep:])
   974  	if err != nil {
   975  		return c // todo: make this an error instead? it is almost certainly a bug.
   976  	}
   977  	if n > 1e8 {
   978  		Yyerror("line number out of range")
   979  		errorexit()
   980  	}
   981  	if n <= 0 {
   982  		return c
   983  	}
   984  
   985  	linehistupdate(text[:linep-1], n)
   986  	return c
   987  }
   988  
   989  func pragcgo(text string) string {
   990  	f := pragmaFields(text)
   991  
   992  	verb := f[0][3:] // skip "go:"
   993  	switch verb {
   994  	case "cgo_export_static", "cgo_export_dynamic":
   995  		switch {
   996  		case len(f) == 2 && !isQuoted(f[1]):
   997  			local := plan9quote(f[1])
   998  			return fmt.Sprintln(verb, local)
   999  
  1000  		case len(f) == 3 && !isQuoted(f[1]) && !isQuoted(f[2]):
  1001  			local := plan9quote(f[1])
  1002  			remote := plan9quote(f[2])
  1003  			return fmt.Sprintln(verb, local, remote)
  1004  
  1005  		default:
  1006  			Yyerror(`usage: //go:%s local [remote]`, verb)
  1007  		}
  1008  	case "cgo_import_dynamic":
  1009  		switch {
  1010  		case len(f) == 2 && !isQuoted(f[1]):
  1011  			local := plan9quote(f[1])
  1012  			return fmt.Sprintln(verb, local)
  1013  
  1014  		case len(f) == 3 && !isQuoted(f[1]) && !isQuoted(f[2]):
  1015  			local := plan9quote(f[1])
  1016  			remote := plan9quote(f[2])
  1017  			return fmt.Sprintln(verb, local, remote)
  1018  
  1019  		case len(f) == 4 && !isQuoted(f[1]) && !isQuoted(f[2]) && isQuoted(f[3]):
  1020  			local := plan9quote(f[1])
  1021  			remote := plan9quote(f[2])
  1022  			library := plan9quote(strings.Trim(f[3], `"`))
  1023  			return fmt.Sprintln(verb, local, remote, library)
  1024  
  1025  		default:
  1026  			Yyerror(`usage: //go:cgo_import_dynamic local [remote ["library"]]`)
  1027  		}
  1028  	case "cgo_import_static":
  1029  		switch {
  1030  		case len(f) == 2 && !isQuoted(f[1]):
  1031  			local := plan9quote(f[1])
  1032  			return fmt.Sprintln(verb, local)
  1033  
  1034  		default:
  1035  			Yyerror(`usage: //go:cgo_import_static local`)
  1036  		}
  1037  	case "cgo_dynamic_linker":
  1038  		switch {
  1039  		case len(f) == 2 && isQuoted(f[1]):
  1040  			path := plan9quote(strings.Trim(f[1], `"`))
  1041  			return fmt.Sprintln(verb, path)
  1042  
  1043  		default:
  1044  			Yyerror(`usage: //go:cgo_dynamic_linker "path"`)
  1045  		}
  1046  	case "cgo_ldflag":
  1047  		switch {
  1048  		case len(f) == 2 && isQuoted(f[1]):
  1049  			arg := plan9quote(strings.Trim(f[1], `"`))
  1050  			return fmt.Sprintln(verb, arg)
  1051  
  1052  		default:
  1053  			Yyerror(`usage: //go:cgo_ldflag "arg"`)
  1054  		}
  1055  	}
  1056  	return ""
  1057  }
  1058  
  1059  // pragmaFields is similar to strings.FieldsFunc(s, isSpace)
  1060  // but does not split when inside double quoted regions and always
  1061  // splits before the start and after the end of a double quoted region.
  1062  // pragmaFields does not recognize escaped quotes. If a quote in s is not
  1063  // closed the part after the opening quote will not be returned as a field.
  1064  func pragmaFields(s string) []string {
  1065  	var a []string
  1066  	inQuote := false
  1067  	fieldStart := -1 // Set to -1 when looking for start of field.
  1068  	for i, c := range s {
  1069  		switch {
  1070  		case c == '"':
  1071  			if inQuote {
  1072  				inQuote = false
  1073  				a = append(a, s[fieldStart:i+1])
  1074  				fieldStart = -1
  1075  			} else {
  1076  				inQuote = true
  1077  				if fieldStart >= 0 {
  1078  					a = append(a, s[fieldStart:i])
  1079  				}
  1080  				fieldStart = i
  1081  			}
  1082  		case !inQuote && isSpace(c):
  1083  			if fieldStart >= 0 {
  1084  				a = append(a, s[fieldStart:i])
  1085  				fieldStart = -1
  1086  			}
  1087  		default:
  1088  			if fieldStart == -1 {
  1089  				fieldStart = i
  1090  			}
  1091  		}
  1092  	}
  1093  	if !inQuote && fieldStart >= 0 { // Last field might end at the end of the string.
  1094  		a = append(a, s[fieldStart:])
  1095  	}
  1096  	return a
  1097  }
  1098  
  1099  func (l *lexer) getr() rune {
  1100  redo:
  1101  	l.prevlineno = lexlineno
  1102  	r, w, err := l.bin.ReadRune()
  1103  	if err != nil {
  1104  		if err != io.EOF {
  1105  			Fatalf("io error: %v", err)
  1106  		}
  1107  		return -1
  1108  	}
  1109  	switch r {
  1110  	case 0:
  1111  		yyerrorl(lexlineno, "illegal NUL byte")
  1112  	case '\n':
  1113  		if importpkg == nil {
  1114  			lexlineno++
  1115  		}
  1116  	case utf8.RuneError:
  1117  		if w == 1 {
  1118  			yyerrorl(lexlineno, "illegal UTF-8 sequence")
  1119  		}
  1120  	case BOM:
  1121  		yyerrorl(lexlineno, "Unicode (UTF-8) BOM in middle of file")
  1122  		goto redo
  1123  	}
  1124  
  1125  	return r
  1126  }
  1127  
  1128  func (l *lexer) ungetr() {
  1129  	l.bin.UnreadRune()
  1130  	lexlineno = l.prevlineno
  1131  }
  1132  
  1133  // onechar lexes a single character within a rune or interpreted string literal,
  1134  // handling escape sequences as necessary.
  1135  func (l *lexer) onechar(quote rune) (r rune, b byte, ok bool) {
  1136  	c := l.getr()
  1137  	switch c {
  1138  	case EOF:
  1139  		Yyerror("eof in string")
  1140  		l.ungetr()
  1141  		return
  1142  
  1143  	case '\n':
  1144  		Yyerror("newline in string")
  1145  		l.ungetr()
  1146  		return
  1147  
  1148  	case '\\':
  1149  		break
  1150  
  1151  	case quote:
  1152  		return
  1153  
  1154  	default:
  1155  		return c, 0, true
  1156  	}
  1157  
  1158  	c = l.getr()
  1159  	switch c {
  1160  	case 'x':
  1161  		return 0, byte(l.hexchar(2)), true
  1162  
  1163  	case 'u':
  1164  		return l.unichar(4), 0, true
  1165  
  1166  	case 'U':
  1167  		return l.unichar(8), 0, true
  1168  
  1169  	case '0', '1', '2', '3', '4', '5', '6', '7':
  1170  		x := c - '0'
  1171  		for i := 2; i > 0; i-- {
  1172  			c = l.getr()
  1173  			if c >= '0' && c <= '7' {
  1174  				x = x*8 + c - '0'
  1175  				continue
  1176  			}
  1177  
  1178  			Yyerror("non-octal character in escape sequence: %c", c)
  1179  			l.ungetr()
  1180  		}
  1181  
  1182  		if x > 255 {
  1183  			Yyerror("octal escape value > 255: %d", x)
  1184  		}
  1185  
  1186  		return 0, byte(x), true
  1187  
  1188  	case 'a':
  1189  		c = '\a'
  1190  	case 'b':
  1191  		c = '\b'
  1192  	case 'f':
  1193  		c = '\f'
  1194  	case 'n':
  1195  		c = '\n'
  1196  	case 'r':
  1197  		c = '\r'
  1198  	case 't':
  1199  		c = '\t'
  1200  	case 'v':
  1201  		c = '\v'
  1202  	case '\\':
  1203  		c = '\\'
  1204  
  1205  	default:
  1206  		if c != quote {
  1207  			Yyerror("unknown escape sequence: %c", c)
  1208  		}
  1209  	}
  1210  
  1211  	return c, 0, true
  1212  }
  1213  
  1214  func (l *lexer) unichar(n int) rune {
  1215  	x := l.hexchar(n)
  1216  	if x > utf8.MaxRune || 0xd800 <= x && x < 0xe000 {
  1217  		Yyerror("invalid Unicode code point in escape sequence: %#x", x)
  1218  		x = utf8.RuneError
  1219  	}
  1220  	return rune(x)
  1221  }
  1222  
  1223  func (l *lexer) hexchar(n int) uint32 {
  1224  	var x uint32
  1225  
  1226  	for ; n > 0; n-- {
  1227  		var d uint32
  1228  		switch c := l.getr(); {
  1229  		case isDigit(c):
  1230  			d = uint32(c - '0')
  1231  		case 'a' <= c && c <= 'f':
  1232  			d = uint32(c - 'a' + 10)
  1233  		case 'A' <= c && c <= 'F':
  1234  			d = uint32(c - 'A' + 10)
  1235  		default:
  1236  			Yyerror("non-hex character in escape sequence: %c", c)
  1237  			l.ungetr()
  1238  			return x
  1239  		}
  1240  		x = x*16 + d
  1241  	}
  1242  
  1243  	return x
  1244  }