github.com/4ad/go@v0.0.0-20161219182952-69a12818b605/src/cmd/compile/internal/gc/lex.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gc
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"cmd/internal/obj"
    11  	"fmt"
    12  	"io"
    13  	"strconv"
    14  	"strings"
    15  	"unicode"
    16  	"unicode/utf8"
    17  )
    18  
    19  const (
    20  	EOF = -1
    21  	BOM = 0xFEFF
    22  )
    23  
    24  // lexlineno is the line number _after_ the most recently read rune.
    25  // In particular, it's advanced (or rewound) as newlines are read (or unread).
    26  var lexlineno int32
    27  
    28  // lineno is the line number at the start of the most recently lexed token.
    29  var lineno int32
    30  
    31  var lexbuf bytes.Buffer
    32  var strbuf bytes.Buffer
    33  var litbuf string // LLITERAL value for use in syntax error messages
    34  
    35  func isSpace(c rune) bool {
    36  	return c == ' ' || c == '\t' || c == '\n' || c == '\r'
    37  }
    38  
    39  func isLetter(c rune) bool {
    40  	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
    41  }
    42  
    43  func isDigit(c rune) bool {
    44  	return '0' <= c && c <= '9'
    45  }
    46  
    47  func isQuoted(s string) bool {
    48  	return len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"'
    49  }
    50  
    51  func plan9quote(s string) string {
    52  	if s == "" {
    53  		return "''"
    54  	}
    55  	for _, c := range s {
    56  		if c <= ' ' || c == '\'' {
    57  			return "'" + strings.Replace(s, "'", "''", -1) + "'"
    58  		}
    59  	}
    60  	return s
    61  }
    62  
    63  type Pragma uint16
    64  
    65  const (
    66  	Nointerface       Pragma = 1 << iota
    67  	Noescape                 // func parameters don't escape
    68  	Norace                   // func must not have race detector annotations
    69  	Nosplit                  // func should not execute on separate stack
    70  	Noinline                 // func should not be inlined
    71  	Systemstack              // func must run on system stack
    72  	Nowritebarrier           // emit compiler error instead of write barrier
    73  	Nowritebarrierrec        // error on write barrier in this or recursive callees
    74  	CgoUnsafeArgs            // treat a pointer to one arg as a pointer to them all
    75  	UintptrEscapes           // pointers converted to uintptr escape
    76  )
    77  
    78  type lexer struct {
    79  	// source
    80  	bin        *bufio.Reader
    81  	prevlineno int32 // line no. of most recently read character
    82  
    83  	nlsemi bool // if set, '\n' and EOF translate to ';'
    84  
    85  	// pragma flags
    86  	// accumulated by lexer; reset by parser
    87  	pragma Pragma
    88  
    89  	// current token
    90  	tok  int32
    91  	sym_ *Sym   // valid if tok == LNAME
    92  	val  Val    // valid if tok == LLITERAL
    93  	op   Op     // valid if tok == LOPER, LASOP, or LINCOP, or prec > 0
    94  	prec OpPrec // operator precedence; 0 if not a binary operator
    95  }
    96  
    97  type OpPrec int
    98  
    99  const (
   100  	// Precedences of binary operators (must be > 0).
   101  	PCOMM OpPrec = 1 + iota
   102  	POROR
   103  	PANDAND
   104  	PCMP
   105  	PADD
   106  	PMUL
   107  )
   108  
   109  const (
   110  	// The value of single-char tokens is just their character's Unicode value.
   111  	// They are all below utf8.RuneSelf. Shift other tokens up to avoid conflicts.
   112  
   113  	// names and literals
   114  	LNAME = utf8.RuneSelf + iota
   115  	LLITERAL
   116  
   117  	// operator-based operations
   118  	LOPER
   119  	LASOP
   120  	LINCOP
   121  
   122  	// miscellaneous
   123  	LCOLAS
   124  	LCOMM
   125  	LDDD
   126  
   127  	// keywords
   128  	LBREAK
   129  	LCASE
   130  	LCHAN
   131  	LCONST
   132  	LCONTINUE
   133  	LDEFAULT
   134  	LDEFER
   135  	LELSE
   136  	LFALL
   137  	LFOR
   138  	LFUNC
   139  	LGO
   140  	LGOTO
   141  	LIF
   142  	LIMPORT
   143  	LINTERFACE
   144  	LMAP
   145  	LPACKAGE
   146  	LRANGE
   147  	LRETURN
   148  	LSELECT
   149  	LSTRUCT
   150  	LSWITCH
   151  	LTYPE
   152  	LVAR
   153  
   154  	LIGNORE
   155  )
   156  
   157  var lexn = map[rune]string{
   158  	LNAME:    "NAME",
   159  	LLITERAL: "LITERAL",
   160  
   161  	LOPER:  "OPER",
   162  	LASOP:  "ASOP",
   163  	LINCOP: "INCOP",
   164  
   165  	LCOLAS: "COLAS",
   166  	LCOMM:  "COMM",
   167  	LDDD:   "DDD",
   168  
   169  	LBREAK:     "BREAK",
   170  	LCASE:      "CASE",
   171  	LCHAN:      "CHAN",
   172  	LCONST:     "CONST",
   173  	LCONTINUE:  "CONTINUE",
   174  	LDEFAULT:   "DEFAULT",
   175  	LDEFER:     "DEFER",
   176  	LELSE:      "ELSE",
   177  	LFALL:      "FALL",
   178  	LFOR:       "FOR",
   179  	LFUNC:      "FUNC",
   180  	LGO:        "GO",
   181  	LGOTO:      "GOTO",
   182  	LIF:        "IF",
   183  	LIMPORT:    "IMPORT",
   184  	LINTERFACE: "INTERFACE",
   185  	LMAP:       "MAP",
   186  	LPACKAGE:   "PACKAGE",
   187  	LRANGE:     "RANGE",
   188  	LRETURN:    "RETURN",
   189  	LSELECT:    "SELECT",
   190  	LSTRUCT:    "STRUCT",
   191  	LSWITCH:    "SWITCH",
   192  	LTYPE:      "TYPE",
   193  	LVAR:       "VAR",
   194  
   195  	// LIGNORE is never escaping lexer.next
   196  }
   197  
   198  func lexname(lex rune) string {
   199  	if s, ok := lexn[lex]; ok {
   200  		return s
   201  	}
   202  	return fmt.Sprintf("LEX-%d", lex)
   203  }
   204  
   205  func (l *lexer) next() {
   206  	nlsemi := l.nlsemi
   207  	l.nlsemi = false
   208  	l.prec = 0
   209  
   210  l0:
   211  	// skip white space
   212  	c := l.getr()
   213  	for isSpace(c) {
   214  		if c == '\n' && nlsemi {
   215  			if Debug['x'] != 0 {
   216  				fmt.Printf("lex: implicit semi\n")
   217  			}
   218  			// Insert implicit semicolon on previous line,
   219  			// before the newline character.
   220  			lineno = lexlineno - 1
   221  			l.tok = ';'
   222  			return
   223  		}
   224  		c = l.getr()
   225  	}
   226  
   227  	// start of token
   228  	lineno = lexlineno
   229  
   230  	// identifiers and keywords
   231  	// (for better error messages consume all chars >= utf8.RuneSelf for identifiers)
   232  	if isLetter(c) || c >= utf8.RuneSelf {
   233  		l.ident(c)
   234  		if l.tok == LIGNORE {
   235  			goto l0
   236  		}
   237  		return
   238  	}
   239  	// c < utf8.RuneSelf
   240  
   241  	var c1 rune
   242  	var op Op
   243  	var prec OpPrec
   244  
   245  	switch c {
   246  	case EOF:
   247  		l.ungetr()
   248  		// Treat EOF as "end of line" for the purposes
   249  		// of inserting a semicolon.
   250  		if nlsemi {
   251  			if Debug['x'] != 0 {
   252  				fmt.Printf("lex: implicit semi\n")
   253  			}
   254  			l.tok = ';'
   255  			return
   256  		}
   257  		l.tok = -1
   258  		return
   259  
   260  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   261  		l.number(c)
   262  		return
   263  
   264  	case '.':
   265  		c1 = l.getr()
   266  		if isDigit(c1) {
   267  			l.ungetr()
   268  			l.number('.')
   269  			return
   270  		}
   271  
   272  		if c1 == '.' {
   273  			p, err := l.bin.Peek(1)
   274  			if err == nil && p[0] == '.' {
   275  				l.getr()
   276  				c = LDDD
   277  				goto lx
   278  			}
   279  
   280  			l.ungetr()
   281  			c1 = '.'
   282  		}
   283  
   284  	case '"':
   285  		l.stdString()
   286  		return
   287  
   288  	case '`':
   289  		l.rawString()
   290  		return
   291  
   292  	case '\'':
   293  		l.rune()
   294  		return
   295  
   296  	case '/':
   297  		c1 = l.getr()
   298  		if c1 == '*' {
   299  			c = l.getr()
   300  			for {
   301  				if c == '*' {
   302  					c = l.getr()
   303  					if c == '/' {
   304  						break
   305  					}
   306  					continue
   307  				}
   308  				if c == EOF {
   309  					Yyerror("eof in comment")
   310  					errorexit()
   311  				}
   312  				c = l.getr()
   313  			}
   314  
   315  			// A comment containing newlines acts like a newline.
   316  			if lexlineno > lineno && nlsemi {
   317  				if Debug['x'] != 0 {
   318  					fmt.Printf("lex: implicit semi\n")
   319  				}
   320  				l.tok = ';'
   321  				return
   322  			}
   323  			goto l0
   324  		}
   325  
   326  		if c1 == '/' {
   327  			c = l.getlinepragma()
   328  			for {
   329  				if c == '\n' || c == EOF {
   330  					l.ungetr()
   331  					goto l0
   332  				}
   333  
   334  				c = l.getr()
   335  			}
   336  		}
   337  
   338  		op = ODIV
   339  		prec = PMUL
   340  		goto binop1
   341  
   342  	case ':':
   343  		c1 = l.getr()
   344  		if c1 == '=' {
   345  			c = LCOLAS
   346  			goto lx
   347  		}
   348  
   349  	case '*':
   350  		op = OMUL
   351  		prec = PMUL
   352  		goto binop
   353  
   354  	case '%':
   355  		op = OMOD
   356  		prec = PMUL
   357  		goto binop
   358  
   359  	case '+':
   360  		op = OADD
   361  		goto incop
   362  
   363  	case '-':
   364  		op = OSUB
   365  		goto incop
   366  
   367  	case '>':
   368  		c = LOPER
   369  		c1 = l.getr()
   370  		if c1 == '>' {
   371  			op = ORSH
   372  			prec = PMUL
   373  			goto binop
   374  		}
   375  
   376  		l.prec = PCMP
   377  		if c1 == '=' {
   378  			l.op = OGE
   379  			goto lx
   380  		}
   381  		l.op = OGT
   382  
   383  	case '<':
   384  		c = LOPER
   385  		c1 = l.getr()
   386  		if c1 == '<' {
   387  			op = OLSH
   388  			prec = PMUL
   389  			goto binop
   390  		}
   391  
   392  		if c1 == '-' {
   393  			c = LCOMM
   394  			// Not a binary operator, but parsed as one
   395  			// so we can give a good error message when used
   396  			// in an expression context.
   397  			l.prec = PCOMM
   398  			l.op = OSEND
   399  			goto lx
   400  		}
   401  
   402  		l.prec = PCMP
   403  		if c1 == '=' {
   404  			l.op = OLE
   405  			goto lx
   406  		}
   407  		l.op = OLT
   408  
   409  	case '=':
   410  		c1 = l.getr()
   411  		if c1 == '=' {
   412  			c = LOPER
   413  			l.prec = PCMP
   414  			l.op = OEQ
   415  			goto lx
   416  		}
   417  
   418  	case '!':
   419  		c1 = l.getr()
   420  		if c1 == '=' {
   421  			c = LOPER
   422  			l.prec = PCMP
   423  			l.op = ONE
   424  			goto lx
   425  		}
   426  
   427  	case '&':
   428  		c1 = l.getr()
   429  		if c1 == '&' {
   430  			c = LOPER
   431  			l.prec = PANDAND
   432  			l.op = OANDAND
   433  			goto lx
   434  		}
   435  
   436  		if c1 == '^' {
   437  			c = LOPER
   438  			op = OANDNOT
   439  			prec = PMUL
   440  			goto binop
   441  		}
   442  
   443  		op = OAND
   444  		prec = PMUL
   445  		goto binop1
   446  
   447  	case '|':
   448  		c1 = l.getr()
   449  		if c1 == '|' {
   450  			c = LOPER
   451  			l.prec = POROR
   452  			l.op = OOROR
   453  			goto lx
   454  		}
   455  
   456  		op = OOR
   457  		prec = PADD
   458  		goto binop1
   459  
   460  	case '^':
   461  		op = OXOR
   462  		prec = PADD
   463  		goto binop
   464  
   465  	case '(', '[', '{', ',', ';':
   466  		goto lx
   467  
   468  	case ')', ']', '}':
   469  		l.nlsemi = true
   470  		goto lx
   471  
   472  	case '#', '$', '?', '@', '\\':
   473  		if importpkg != nil {
   474  			goto lx
   475  		}
   476  		fallthrough
   477  
   478  	default:
   479  		// anything else is illegal
   480  		Yyerror("syntax error: illegal character %#U", c)
   481  		goto l0
   482  	}
   483  
   484  	l.ungetr()
   485  
   486  lx:
   487  	if Debug['x'] != 0 {
   488  		if c >= utf8.RuneSelf {
   489  			fmt.Printf("%v lex: TOKEN %s\n", linestr(lineno), lexname(c))
   490  		} else {
   491  			fmt.Printf("%v lex: TOKEN '%c'\n", linestr(lineno), c)
   492  		}
   493  	}
   494  
   495  	l.tok = c
   496  	return
   497  
   498  incop:
   499  	c1 = l.getr()
   500  	if c1 == c {
   501  		l.nlsemi = true
   502  		l.op = op
   503  		c = LINCOP
   504  		goto lx
   505  	}
   506  	prec = PADD
   507  	goto binop1
   508  
   509  binop:
   510  	c1 = l.getr()
   511  binop1:
   512  	if c1 != '=' {
   513  		l.ungetr()
   514  		l.op = op
   515  		l.prec = prec
   516  		goto lx
   517  	}
   518  
   519  	l.op = op
   520  	if Debug['x'] != 0 {
   521  		fmt.Printf("lex: TOKEN ASOP %s=\n", goopnames[op])
   522  	}
   523  	l.tok = LASOP
   524  }
   525  
   526  func (l *lexer) ident(c rune) {
   527  	cp := &lexbuf
   528  	cp.Reset()
   529  
   530  	// accelerate common case (7bit ASCII)
   531  	for isLetter(c) || isDigit(c) {
   532  		cp.WriteByte(byte(c))
   533  		c = l.getr()
   534  	}
   535  
   536  	// general case
   537  	for {
   538  		if c >= utf8.RuneSelf {
   539  			if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || importpkg != nil && c == 0xb7 {
   540  				if cp.Len() == 0 && unicode.IsDigit(c) {
   541  					Yyerror("identifier cannot begin with digit %#U", c)
   542  				}
   543  			} else {
   544  				Yyerror("invalid identifier character %#U", c)
   545  			}
   546  			cp.WriteRune(c)
   547  		} else if isLetter(c) || isDigit(c) {
   548  			cp.WriteByte(byte(c))
   549  		} else {
   550  			break
   551  		}
   552  		c = l.getr()
   553  	}
   554  
   555  	cp = nil
   556  	l.ungetr()
   557  
   558  	name := lexbuf.Bytes()
   559  
   560  	if len(name) >= 2 {
   561  		if tok, ok := keywords[string(name)]; ok {
   562  			if Debug['x'] != 0 {
   563  				fmt.Printf("lex: %s\n", lexname(tok))
   564  			}
   565  			switch tok {
   566  			case LBREAK, LCONTINUE, LFALL, LRETURN:
   567  				l.nlsemi = true
   568  			}
   569  			l.tok = tok
   570  			return
   571  		}
   572  	}
   573  
   574  	s := LookupBytes(name)
   575  	if Debug['x'] != 0 {
   576  		fmt.Printf("lex: ident %s\n", s)
   577  	}
   578  	l.sym_ = s
   579  	l.nlsemi = true
   580  	l.tok = LNAME
   581  }
   582  
   583  var keywords = map[string]int32{
   584  	"break":       LBREAK,
   585  	"case":        LCASE,
   586  	"chan":        LCHAN,
   587  	"const":       LCONST,
   588  	"continue":    LCONTINUE,
   589  	"default":     LDEFAULT,
   590  	"defer":       LDEFER,
   591  	"else":        LELSE,
   592  	"fallthrough": LFALL,
   593  	"for":         LFOR,
   594  	"func":        LFUNC,
   595  	"go":          LGO,
   596  	"goto":        LGOTO,
   597  	"if":          LIF,
   598  	"import":      LIMPORT,
   599  	"interface":   LINTERFACE,
   600  	"map":         LMAP,
   601  	"package":     LPACKAGE,
   602  	"range":       LRANGE,
   603  	"return":      LRETURN,
   604  	"select":      LSELECT,
   605  	"struct":      LSTRUCT,
   606  	"switch":      LSWITCH,
   607  	"type":        LTYPE,
   608  	"var":         LVAR,
   609  
   610  	// 💩
   611  	"notwithstanding":      LIGNORE,
   612  	"thetruthofthematter":  LIGNORE,
   613  	"despiteallobjections": LIGNORE,
   614  	"whereas":              LIGNORE,
   615  	"insofaras":            LIGNORE,
   616  }
   617  
   618  func (l *lexer) number(c rune) {
   619  	cp := &lexbuf
   620  	cp.Reset()
   621  
   622  	// parse mantissa before decimal point or exponent
   623  	isInt := false
   624  	malformedOctal := false
   625  	if c != '.' {
   626  		if c != '0' {
   627  			// decimal or float
   628  			for isDigit(c) {
   629  				cp.WriteByte(byte(c))
   630  				c = l.getr()
   631  			}
   632  
   633  		} else {
   634  			// c == 0
   635  			cp.WriteByte('0')
   636  			c = l.getr()
   637  			if c == 'x' || c == 'X' {
   638  				isInt = true // must be int
   639  				cp.WriteByte(byte(c))
   640  				c = l.getr()
   641  				for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   642  					cp.WriteByte(byte(c))
   643  					c = l.getr()
   644  				}
   645  				if lexbuf.Len() == 2 {
   646  					Yyerror("malformed hex constant")
   647  				}
   648  			} else {
   649  				// decimal 0, octal, or float
   650  				for isDigit(c) {
   651  					if c > '7' {
   652  						malformedOctal = true
   653  					}
   654  					cp.WriteByte(byte(c))
   655  					c = l.getr()
   656  				}
   657  			}
   658  		}
   659  	}
   660  
   661  	// unless we have a hex number, parse fractional part or exponent, if any
   662  	var str string
   663  	if !isInt {
   664  		isInt = true // assume int unless proven otherwise
   665  
   666  		// fraction
   667  		if c == '.' {
   668  			isInt = false
   669  			cp.WriteByte('.')
   670  			c = l.getr()
   671  			for isDigit(c) {
   672  				cp.WriteByte(byte(c))
   673  				c = l.getr()
   674  			}
   675  			// Falling through to exponent parsing here permits invalid
   676  			// floating-point numbers with fractional mantissa and base-2
   677  			// (p or P) exponent. We don't care because base-2 exponents
   678  			// can only show up in machine-generated textual export data
   679  			// which will use correct formatting.
   680  		}
   681  
   682  		// exponent
   683  		// base-2 exponent (p or P) is only allowed in export data (see #9036)
   684  		// TODO(gri) Once we switch to binary import data, importpkg will
   685  		// always be nil in this function. Simplify the code accordingly.
   686  		if c == 'e' || c == 'E' || importpkg != nil && (c == 'p' || c == 'P') {
   687  			isInt = false
   688  			cp.WriteByte(byte(c))
   689  			c = l.getr()
   690  			if c == '+' || c == '-' {
   691  				cp.WriteByte(byte(c))
   692  				c = l.getr()
   693  			}
   694  			if !isDigit(c) {
   695  				Yyerror("malformed floating point constant exponent")
   696  			}
   697  			for isDigit(c) {
   698  				cp.WriteByte(byte(c))
   699  				c = l.getr()
   700  			}
   701  		}
   702  
   703  		// imaginary constant
   704  		if c == 'i' {
   705  			str = lexbuf.String()
   706  			x := new(Mpcplx)
   707  			x.Real.SetFloat64(0.0)
   708  			x.Imag.SetString(str)
   709  			if x.Imag.Val.IsInf() {
   710  				Yyerror("overflow in imaginary constant")
   711  				x.Imag.SetFloat64(0.0)
   712  			}
   713  			l.val.U = x
   714  
   715  			if Debug['x'] != 0 {
   716  				fmt.Printf("lex: imaginary literal\n")
   717  			}
   718  			goto done
   719  		}
   720  	}
   721  
   722  	l.ungetr()
   723  
   724  	if isInt {
   725  		if malformedOctal {
   726  			Yyerror("malformed octal constant")
   727  		}
   728  
   729  		str = lexbuf.String()
   730  		x := new(Mpint)
   731  		x.SetString(str)
   732  		if x.Ovf {
   733  			Yyerror("overflow in constant")
   734  			x.SetInt64(0)
   735  		}
   736  		l.val.U = x
   737  
   738  		if Debug['x'] != 0 {
   739  			fmt.Printf("lex: integer literal\n")
   740  		}
   741  
   742  	} else { // float
   743  
   744  		str = lexbuf.String()
   745  		x := newMpflt()
   746  		x.SetString(str)
   747  		if x.Val.IsInf() {
   748  			Yyerror("overflow in float constant")
   749  			x.SetFloat64(0.0)
   750  		}
   751  		l.val.U = x
   752  
   753  		if Debug['x'] != 0 {
   754  			fmt.Printf("lex: floating literal\n")
   755  		}
   756  	}
   757  
   758  done:
   759  	litbuf = "" // lazily initialized in (*parser).syntax_error
   760  	l.nlsemi = true
   761  	l.tok = LLITERAL
   762  }
   763  
   764  func (l *lexer) stdString() {
   765  	lexbuf.Reset()
   766  	lexbuf.WriteString(`"<string>"`)
   767  
   768  	cp := &strbuf
   769  	cp.Reset()
   770  
   771  	for {
   772  		r, b, ok := l.onechar('"')
   773  		if !ok {
   774  			break
   775  		}
   776  		if r == 0 {
   777  			cp.WriteByte(b)
   778  		} else {
   779  			cp.WriteRune(r)
   780  		}
   781  	}
   782  
   783  	l.val.U = internString(cp.Bytes())
   784  	if Debug['x'] != 0 {
   785  		fmt.Printf("lex: string literal\n")
   786  	}
   787  	litbuf = "string literal"
   788  	l.nlsemi = true
   789  	l.tok = LLITERAL
   790  }
   791  
   792  func (l *lexer) rawString() {
   793  	lexbuf.Reset()
   794  	lexbuf.WriteString("`<string>`")
   795  
   796  	cp := &strbuf
   797  	cp.Reset()
   798  
   799  	for {
   800  		c := l.getr()
   801  		if c == '\r' {
   802  			continue
   803  		}
   804  		if c == EOF {
   805  			Yyerror("eof in string")
   806  			break
   807  		}
   808  		if c == '`' {
   809  			break
   810  		}
   811  		cp.WriteRune(c)
   812  	}
   813  
   814  	l.val.U = internString(cp.Bytes())
   815  	if Debug['x'] != 0 {
   816  		fmt.Printf("lex: string literal\n")
   817  	}
   818  	litbuf = "string literal"
   819  	l.nlsemi = true
   820  	l.tok = LLITERAL
   821  }
   822  
   823  func (l *lexer) rune() {
   824  	r, b, ok := l.onechar('\'')
   825  	if !ok {
   826  		Yyerror("empty character literal or unescaped ' in character literal")
   827  		r = '\''
   828  	}
   829  	if r == 0 {
   830  		r = rune(b)
   831  	}
   832  
   833  	if c := l.getr(); c != '\'' {
   834  		Yyerror("missing '")
   835  		l.ungetr()
   836  	}
   837  
   838  	x := new(Mpint)
   839  	l.val.U = x
   840  	x.SetInt64(int64(r))
   841  	x.Rune = true
   842  	if Debug['x'] != 0 {
   843  		fmt.Printf("lex: codepoint literal\n")
   844  	}
   845  	litbuf = "rune literal"
   846  	l.nlsemi = true
   847  	l.tok = LLITERAL
   848  }
   849  
   850  var internedStrings = map[string]string{}
   851  
   852  func internString(b []byte) string {
   853  	s, ok := internedStrings[string(b)] // string(b) here doesn't allocate
   854  	if !ok {
   855  		s = string(b)
   856  		internedStrings[s] = s
   857  	}
   858  	return s
   859  }
   860  
   861  // read and interpret syntax that looks like
   862  // //line parse.y:15
   863  // as a discontinuity in sequential line numbers.
   864  // the next line of input comes from parse.y:15
   865  func (l *lexer) getlinepragma() rune {
   866  	c := l.getr()
   867  	if c == 'g' { // check for //go: directive
   868  		cp := &lexbuf
   869  		cp.Reset()
   870  		cp.WriteByte('g') // already read
   871  		for {
   872  			c = l.getr()
   873  			if c == EOF || c >= utf8.RuneSelf {
   874  				return c
   875  			}
   876  			if c == '\n' {
   877  				break
   878  			}
   879  			cp.WriteByte(byte(c))
   880  		}
   881  		cp = nil
   882  
   883  		text := strings.TrimSuffix(lexbuf.String(), "\r")
   884  
   885  		if strings.HasPrefix(text, "go:cgo_") {
   886  			pragcgobuf += pragcgo(text)
   887  		}
   888  
   889  		verb := text
   890  		if i := strings.Index(text, " "); i >= 0 {
   891  			verb = verb[:i]
   892  		}
   893  
   894  		switch verb {
   895  		case "go:linkname":
   896  			if !imported_unsafe {
   897  				Yyerror("//go:linkname only allowed in Go files that import \"unsafe\"")
   898  			}
   899  			f := strings.Fields(text)
   900  			if len(f) != 3 {
   901  				Yyerror("usage: //go:linkname localname linkname")
   902  				break
   903  			}
   904  			Lookup(f[1]).Linkname = f[2]
   905  		case "go:nointerface":
   906  			if obj.Fieldtrack_enabled != 0 {
   907  				l.pragma |= Nointerface
   908  			}
   909  		case "go:noescape":
   910  			l.pragma |= Noescape
   911  		case "go:norace":
   912  			l.pragma |= Norace
   913  		case "go:nosplit":
   914  			l.pragma |= Nosplit
   915  		case "go:noinline":
   916  			l.pragma |= Noinline
   917  		case "go:systemstack":
   918  			if !compiling_runtime {
   919  				Yyerror("//go:systemstack only allowed in runtime")
   920  			}
   921  			l.pragma |= Systemstack
   922  		case "go:nowritebarrier":
   923  			if !compiling_runtime {
   924  				Yyerror("//go:nowritebarrier only allowed in runtime")
   925  			}
   926  			l.pragma |= Nowritebarrier
   927  		case "go:nowritebarrierrec":
   928  			if !compiling_runtime {
   929  				Yyerror("//go:nowritebarrierrec only allowed in runtime")
   930  			}
   931  			l.pragma |= Nowritebarrierrec | Nowritebarrier // implies Nowritebarrier
   932  		case "go:cgo_unsafe_args":
   933  			l.pragma |= CgoUnsafeArgs
   934  		case "go:uintptrescapes":
   935  			// For the next function declared in the file
   936  			// any uintptr arguments may be pointer values
   937  			// converted to uintptr. This directive
   938  			// ensures that the referenced allocated
   939  			// object, if any, is retained and not moved
   940  			// until the call completes, even though from
   941  			// the types alone it would appear that the
   942  			// object is no longer needed during the
   943  			// call. The conversion to uintptr must appear
   944  			// in the argument list.
   945  			// Used in syscall/dll_windows.go.
   946  			l.pragma |= UintptrEscapes
   947  		}
   948  		return c
   949  	}
   950  
   951  	// check for //line directive
   952  	if c != 'l' {
   953  		return c
   954  	}
   955  	for i := 1; i < 5; i++ {
   956  		c = l.getr()
   957  		if c != rune("line "[i]) {
   958  			return c
   959  		}
   960  	}
   961  
   962  	cp := &lexbuf
   963  	cp.Reset()
   964  	linep := 0
   965  	for {
   966  		c = l.getr()
   967  		if c == EOF {
   968  			return c
   969  		}
   970  		if c == '\n' {
   971  			break
   972  		}
   973  		if c == ' ' {
   974  			continue
   975  		}
   976  		if c == ':' {
   977  			linep = cp.Len() + 1
   978  		}
   979  		cp.WriteByte(byte(c))
   980  	}
   981  	cp = nil
   982  
   983  	if linep == 0 {
   984  		return c
   985  	}
   986  	text := strings.TrimSuffix(lexbuf.String(), "\r")
   987  	n, err := strconv.Atoi(text[linep:])
   988  	if err != nil {
   989  		return c // todo: make this an error instead? it is almost certainly a bug.
   990  	}
   991  	if n > 1e8 {
   992  		Yyerror("line number out of range")
   993  		errorexit()
   994  	}
   995  	if n <= 0 {
   996  		return c
   997  	}
   998  
   999  	linehistupdate(text[:linep-1], n)
  1000  	return c
  1001  }
  1002  
  1003  func pragcgo(text string) string {
  1004  	f := pragmaFields(text)
  1005  
  1006  	verb := f[0][3:] // skip "go:"
  1007  	switch verb {
  1008  	case "cgo_export_static", "cgo_export_dynamic":
  1009  		switch {
  1010  		case len(f) == 2 && !isQuoted(f[1]):
  1011  			local := plan9quote(f[1])
  1012  			return fmt.Sprintln(verb, local)
  1013  
  1014  		case len(f) == 3 && !isQuoted(f[1]) && !isQuoted(f[2]):
  1015  			local := plan9quote(f[1])
  1016  			remote := plan9quote(f[2])
  1017  			return fmt.Sprintln(verb, local, remote)
  1018  
  1019  		default:
  1020  			Yyerror(`usage: //go:%s local [remote]`, verb)
  1021  		}
  1022  	case "cgo_import_dynamic":
  1023  		switch {
  1024  		case len(f) == 2 && !isQuoted(f[1]):
  1025  			local := plan9quote(f[1])
  1026  			return fmt.Sprintln(verb, local)
  1027  
  1028  		case len(f) == 3 && !isQuoted(f[1]) && !isQuoted(f[2]):
  1029  			local := plan9quote(f[1])
  1030  			remote := plan9quote(f[2])
  1031  			return fmt.Sprintln(verb, local, remote)
  1032  
  1033  		case len(f) == 4 && !isQuoted(f[1]) && !isQuoted(f[2]) && isQuoted(f[3]):
  1034  			local := plan9quote(f[1])
  1035  			remote := plan9quote(f[2])
  1036  			library := plan9quote(strings.Trim(f[3], `"`))
  1037  			return fmt.Sprintln(verb, local, remote, library)
  1038  
  1039  		default:
  1040  			Yyerror(`usage: //go:cgo_import_dynamic local [remote ["library"]]`)
  1041  		}
  1042  	case "cgo_import_static":
  1043  		switch {
  1044  		case len(f) == 2 && !isQuoted(f[1]):
  1045  			local := plan9quote(f[1])
  1046  			return fmt.Sprintln(verb, local)
  1047  
  1048  		default:
  1049  			Yyerror(`usage: //go:cgo_import_static local`)
  1050  		}
  1051  	case "cgo_dynamic_linker":
  1052  		switch {
  1053  		case len(f) == 2 && isQuoted(f[1]):
  1054  			path := plan9quote(strings.Trim(f[1], `"`))
  1055  			return fmt.Sprintln(verb, path)
  1056  
  1057  		default:
  1058  			Yyerror(`usage: //go:cgo_dynamic_linker "path"`)
  1059  		}
  1060  	case "cgo_ldflag":
  1061  		switch {
  1062  		case len(f) == 2 && isQuoted(f[1]):
  1063  			arg := plan9quote(strings.Trim(f[1], `"`))
  1064  			return fmt.Sprintln(verb, arg)
  1065  
  1066  		default:
  1067  			Yyerror(`usage: //go:cgo_ldflag "arg"`)
  1068  		}
  1069  	}
  1070  	return ""
  1071  }
  1072  
  1073  // pragmaFields is similar to strings.FieldsFunc(s, isSpace)
  1074  // but does not split when inside double quoted regions and always
  1075  // splits before the start and after the end of a double quoted region.
  1076  // pragmaFields does not recognize escaped quotes. If a quote in s is not
  1077  // closed the part after the opening quote will not be returned as a field.
  1078  func pragmaFields(s string) []string {
  1079  	var a []string
  1080  	inQuote := false
  1081  	fieldStart := -1 // Set to -1 when looking for start of field.
  1082  	for i, c := range s {
  1083  		switch {
  1084  		case c == '"':
  1085  			if inQuote {
  1086  				inQuote = false
  1087  				a = append(a, s[fieldStart:i+1])
  1088  				fieldStart = -1
  1089  			} else {
  1090  				inQuote = true
  1091  				if fieldStart >= 0 {
  1092  					a = append(a, s[fieldStart:i])
  1093  				}
  1094  				fieldStart = i
  1095  			}
  1096  		case !inQuote && isSpace(c):
  1097  			if fieldStart >= 0 {
  1098  				a = append(a, s[fieldStart:i])
  1099  				fieldStart = -1
  1100  			}
  1101  		default:
  1102  			if fieldStart == -1 {
  1103  				fieldStart = i
  1104  			}
  1105  		}
  1106  	}
  1107  	if !inQuote && fieldStart >= 0 { // Last field might end at the end of the string.
  1108  		a = append(a, s[fieldStart:])
  1109  	}
  1110  	return a
  1111  }
  1112  
  1113  func (l *lexer) getr() rune {
  1114  redo:
  1115  	l.prevlineno = lexlineno
  1116  	r, w, err := l.bin.ReadRune()
  1117  	if err != nil {
  1118  		if err != io.EOF {
  1119  			Fatalf("io error: %v", err)
  1120  		}
  1121  		return -1
  1122  	}
  1123  	switch r {
  1124  	case 0:
  1125  		yyerrorl(lexlineno, "illegal NUL byte")
  1126  	case '\n':
  1127  		if importpkg == nil {
  1128  			lexlineno++
  1129  		}
  1130  	case utf8.RuneError:
  1131  		if w == 1 {
  1132  			yyerrorl(lexlineno, "illegal UTF-8 sequence")
  1133  		}
  1134  	case BOM:
  1135  		yyerrorl(lexlineno, "Unicode (UTF-8) BOM in middle of file")
  1136  		goto redo
  1137  	}
  1138  
  1139  	return r
  1140  }
  1141  
  1142  func (l *lexer) ungetr() {
  1143  	l.bin.UnreadRune()
  1144  	lexlineno = l.prevlineno
  1145  }
  1146  
  1147  // onechar lexes a single character within a rune or interpreted string literal,
  1148  // handling escape sequences as necessary.
  1149  func (l *lexer) onechar(quote rune) (r rune, b byte, ok bool) {
  1150  	c := l.getr()
  1151  	switch c {
  1152  	case EOF:
  1153  		Yyerror("eof in string")
  1154  		l.ungetr()
  1155  		return
  1156  
  1157  	case '\n':
  1158  		Yyerror("newline in string")
  1159  		l.ungetr()
  1160  		return
  1161  
  1162  	case '\\':
  1163  		break
  1164  
  1165  	case quote:
  1166  		return
  1167  
  1168  	default:
  1169  		return c, 0, true
  1170  	}
  1171  
  1172  	c = l.getr()
  1173  	switch c {
  1174  	case 'x':
  1175  		return 0, byte(l.hexchar(2)), true
  1176  
  1177  	case 'u':
  1178  		return l.unichar(4), 0, true
  1179  
  1180  	case 'U':
  1181  		return l.unichar(8), 0, true
  1182  
  1183  	case '0', '1', '2', '3', '4', '5', '6', '7':
  1184  		x := c - '0'
  1185  		for i := 2; i > 0; i-- {
  1186  			c = l.getr()
  1187  			if c >= '0' && c <= '7' {
  1188  				x = x*8 + c - '0'
  1189  				continue
  1190  			}
  1191  
  1192  			Yyerror("non-octal character in escape sequence: %c", c)
  1193  			l.ungetr()
  1194  		}
  1195  
  1196  		if x > 255 {
  1197  			Yyerror("octal escape value > 255: %d", x)
  1198  		}
  1199  
  1200  		return 0, byte(x), true
  1201  
  1202  	case 'a':
  1203  		c = '\a'
  1204  	case 'b':
  1205  		c = '\b'
  1206  	case 'f':
  1207  		c = '\f'
  1208  	case 'n':
  1209  		c = '\n'
  1210  	case 'r':
  1211  		c = '\r'
  1212  	case 't':
  1213  		c = '\t'
  1214  	case 'v':
  1215  		c = '\v'
  1216  	case '\\':
  1217  		c = '\\'
  1218  
  1219  	default:
  1220  		if c != quote {
  1221  			Yyerror("unknown escape sequence: %c", c)
  1222  		}
  1223  	}
  1224  
  1225  	return c, 0, true
  1226  }
  1227  
  1228  func (l *lexer) unichar(n int) rune {
  1229  	x := l.hexchar(n)
  1230  	if x > utf8.MaxRune || 0xd800 <= x && x < 0xe000 {
  1231  		Yyerror("invalid Unicode code point in escape sequence: %#x", x)
  1232  		x = utf8.RuneError
  1233  	}
  1234  	return rune(x)
  1235  }
  1236  
  1237  func (l *lexer) hexchar(n int) uint32 {
  1238  	var x uint32
  1239  
  1240  	for ; n > 0; n-- {
  1241  		var d uint32
  1242  		switch c := l.getr(); {
  1243  		case isDigit(c):
  1244  			d = uint32(c - '0')
  1245  		case 'a' <= c && c <= 'f':
  1246  			d = uint32(c - 'a' + 10)
  1247  		case 'A' <= c && c <= 'F':
  1248  			d = uint32(c - 'A' + 10)
  1249  		default:
  1250  			Yyerror("non-hex character in escape sequence: %c", c)
  1251  			l.ungetr()
  1252  			return x
  1253  		}
  1254  		x = x*16 + d
  1255  	}
  1256  
  1257  	return x
  1258  }