github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/cmd/compile/internal/gc/lex.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gc
     6  
     7  import (
     8  	"bufio"
     9  	"cmd/internal/obj"
    10  	"fmt"
    11  	"io"
    12  	"strconv"
    13  	"strings"
    14  	"unicode"
    15  	"unicode/utf8"
    16  )
    17  
    18  const (
    19  	EOF = -1
    20  	BOM = 0xFEFF
    21  )
    22  
    23  func isSpace(c rune) bool {
    24  	return c == ' ' || c == '\t' || c == '\n' || c == '\r'
    25  }
    26  
    27  func isLetter(c rune) bool {
    28  	return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
    29  }
    30  
    31  func isDigit(c rune) bool {
    32  	return '0' <= c && c <= '9'
    33  }
    34  
    35  func plan9quote(s string) string {
    36  	if s == "" {
    37  		return "''"
    38  	}
    39  	for _, c := range s {
    40  		if c <= ' ' || c == '\'' {
    41  			return "'" + strings.Replace(s, "'", "''", -1) + "'"
    42  		}
    43  	}
    44  	return s
    45  }
    46  
    47  type Pragma uint16
    48  
    49  const (
    50  	Nointerface       Pragma = 1 << iota
    51  	Noescape                 // func parameters don't escape
    52  	Norace                   // func must not have race detector annotations
    53  	Nosplit                  // func should not execute on separate stack
    54  	Noinline                 // func should not be inlined
    55  	Systemstack              // func must run on system stack
    56  	Nowritebarrier           // emit compiler error instead of write barrier
    57  	Nowritebarrierrec        // error on write barrier in this or recursive callees
    58  	CgoUnsafeArgs            // treat a pointer to one arg as a pointer to them all
    59  )
    60  
    61  type lexer struct {
    62  	// source
    63  	bin        *bufio.Reader
    64  	prevlineno int32 // line no. of most recently read character
    65  
    66  	nlsemi bool // if set, '\n' and EOF translate to ';'
    67  
    68  	// pragma flags
    69  	// accumulated by lexer; reset by parser
    70  	pragma Pragma
    71  
    72  	// current token
    73  	tok  int32
    74  	sym_ *Sym   // valid if tok == LNAME
    75  	val  Val    // valid if tok == LLITERAL
    76  	op   Op     // valid if tok == LOPER, LASOP, or LINCOP, or prec > 0
    77  	prec OpPrec // operator precedence; 0 if not a binary operator
    78  }
    79  
    80  type OpPrec int
    81  
    82  const (
    83  	// Precedences of binary operators (must be > 0).
    84  	PCOMM OpPrec = 1 + iota
    85  	POROR
    86  	PANDAND
    87  	PCMP
    88  	PADD
    89  	PMUL
    90  )
    91  
    92  const (
    93  	// The value of single-char tokens is just their character's Unicode value.
    94  	// They are all below utf8.RuneSelf. Shift other tokens up to avoid conflicts.
    95  
    96  	// names and literals
    97  	LNAME = utf8.RuneSelf + iota
    98  	LLITERAL
    99  
   100  	// operator-based operations
   101  	LOPER
   102  	LASOP
   103  	LINCOP
   104  
   105  	// miscellaneous
   106  	LCOLAS
   107  	LCOMM
   108  	LDDD
   109  
   110  	// keywords
   111  	LBREAK
   112  	LCASE
   113  	LCHAN
   114  	LCONST
   115  	LCONTINUE
   116  	LDEFAULT
   117  	LDEFER
   118  	LELSE
   119  	LFALL
   120  	LFOR
   121  	LFUNC
   122  	LGO
   123  	LGOTO
   124  	LIF
   125  	LIMPORT
   126  	LINTERFACE
   127  	LMAP
   128  	LPACKAGE
   129  	LRANGE
   130  	LRETURN
   131  	LSELECT
   132  	LSTRUCT
   133  	LSWITCH
   134  	LTYPE
   135  	LVAR
   136  
   137  	LIGNORE
   138  )
   139  
   140  var lexn = map[rune]string{
   141  	LNAME:    "NAME",
   142  	LLITERAL: "LITERAL",
   143  
   144  	LOPER:  "OPER",
   145  	LASOP:  "ASOP",
   146  	LINCOP: "INCOP",
   147  
   148  	LCOLAS: "COLAS",
   149  	LCOMM:  "COMM",
   150  	LDDD:   "DDD",
   151  
   152  	LBREAK:     "BREAK",
   153  	LCASE:      "CASE",
   154  	LCHAN:      "CHAN",
   155  	LCONST:     "CONST",
   156  	LCONTINUE:  "CONTINUE",
   157  	LDEFAULT:   "DEFAULT",
   158  	LDEFER:     "DEFER",
   159  	LELSE:      "ELSE",
   160  	LFALL:      "FALL",
   161  	LFOR:       "FOR",
   162  	LFUNC:      "FUNC",
   163  	LGO:        "GO",
   164  	LGOTO:      "GOTO",
   165  	LIF:        "IF",
   166  	LIMPORT:    "IMPORT",
   167  	LINTERFACE: "INTERFACE",
   168  	LMAP:       "MAP",
   169  	LPACKAGE:   "PACKAGE",
   170  	LRANGE:     "RANGE",
   171  	LRETURN:    "RETURN",
   172  	LSELECT:    "SELECT",
   173  	LSTRUCT:    "STRUCT",
   174  	LSWITCH:    "SWITCH",
   175  	LTYPE:      "TYPE",
   176  	LVAR:       "VAR",
   177  
   178  	// LIGNORE is never escaping lexer.next
   179  }
   180  
   181  func lexname(lex rune) string {
   182  	if s, ok := lexn[lex]; ok {
   183  		return s
   184  	}
   185  	return fmt.Sprintf("LEX-%d", lex)
   186  }
   187  
   188  func (l *lexer) next() {
   189  	nlsemi := l.nlsemi
   190  	l.nlsemi = false
   191  	l.prec = 0
   192  
   193  l0:
   194  	// skip white space
   195  	c := l.getr()
   196  	for isSpace(c) {
   197  		if c == '\n' && nlsemi {
   198  			if Debug['x'] != 0 {
   199  				fmt.Printf("lex: implicit semi\n")
   200  			}
   201  			// Insert implicit semicolon on previous line,
   202  			// before the newline character.
   203  			lineno = lexlineno - 1
   204  			l.tok = ';'
   205  			return
   206  		}
   207  		c = l.getr()
   208  	}
   209  
   210  	// start of token
   211  	lineno = lexlineno
   212  
   213  	// identifiers and keywords
   214  	// (for better error messages consume all chars >= utf8.RuneSelf for identifiers)
   215  	if isLetter(c) || c >= utf8.RuneSelf {
   216  		l.ident(c)
   217  		if l.tok == LIGNORE {
   218  			goto l0
   219  		}
   220  		return
   221  	}
   222  	// c < utf8.RuneSelf
   223  
   224  	var c1 rune
   225  	var op Op
   226  	var prec OpPrec
   227  
   228  	switch c {
   229  	case EOF:
   230  		l.ungetr()
   231  		// Treat EOF as "end of line" for the purposes
   232  		// of inserting a semicolon.
   233  		if nlsemi {
   234  			if Debug['x'] != 0 {
   235  				fmt.Printf("lex: implicit semi\n")
   236  			}
   237  			l.tok = ';'
   238  			return
   239  		}
   240  		l.tok = -1
   241  		return
   242  
   243  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
   244  		l.number(c)
   245  		return
   246  
   247  	case '.':
   248  		c1 = l.getr()
   249  		if isDigit(c1) {
   250  			l.ungetr()
   251  			l.number('.')
   252  			return
   253  		}
   254  
   255  		if c1 == '.' {
   256  			p, err := l.bin.Peek(1)
   257  			if err == nil && p[0] == '.' {
   258  				l.getr()
   259  				c = LDDD
   260  				goto lx
   261  			}
   262  
   263  			l.ungetr()
   264  			c1 = '.'
   265  		}
   266  
   267  	case '"':
   268  		l.stdString()
   269  		return
   270  
   271  	case '`':
   272  		l.rawString()
   273  		return
   274  
   275  	case '\'':
   276  		l.rune()
   277  		return
   278  
   279  	case '/':
   280  		c1 = l.getr()
   281  		if c1 == '*' {
   282  			c = l.getr()
   283  			for {
   284  				if c == '*' {
   285  					c = l.getr()
   286  					if c == '/' {
   287  						break
   288  					}
   289  					continue
   290  				}
   291  				if c == EOF {
   292  					Yyerror("eof in comment")
   293  					errorexit()
   294  				}
   295  				c = l.getr()
   296  			}
   297  
   298  			// A comment containing newlines acts like a newline.
   299  			if lexlineno > lineno && nlsemi {
   300  				if Debug['x'] != 0 {
   301  					fmt.Printf("lex: implicit semi\n")
   302  				}
   303  				l.tok = ';'
   304  				return
   305  			}
   306  			goto l0
   307  		}
   308  
   309  		if c1 == '/' {
   310  			c = l.getlinepragma()
   311  			for {
   312  				if c == '\n' || c == EOF {
   313  					l.ungetr()
   314  					goto l0
   315  				}
   316  
   317  				c = l.getr()
   318  			}
   319  		}
   320  
   321  		op = ODIV
   322  		prec = PMUL
   323  		goto binop1
   324  
   325  	case ':':
   326  		c1 = l.getr()
   327  		if c1 == '=' {
   328  			c = LCOLAS
   329  			goto lx
   330  		}
   331  
   332  	case '*':
   333  		op = OMUL
   334  		prec = PMUL
   335  		goto binop
   336  
   337  	case '%':
   338  		op = OMOD
   339  		prec = PMUL
   340  		goto binop
   341  
   342  	case '+':
   343  		op = OADD
   344  		goto incop
   345  
   346  	case '-':
   347  		op = OSUB
   348  		goto incop
   349  
   350  	case '>':
   351  		c = LOPER
   352  		c1 = l.getr()
   353  		if c1 == '>' {
   354  			op = ORSH
   355  			prec = PMUL
   356  			goto binop
   357  		}
   358  
   359  		l.prec = PCMP
   360  		if c1 == '=' {
   361  			l.op = OGE
   362  			goto lx
   363  		}
   364  		l.op = OGT
   365  
   366  	case '<':
   367  		c = LOPER
   368  		c1 = l.getr()
   369  		if c1 == '<' {
   370  			op = OLSH
   371  			prec = PMUL
   372  			goto binop
   373  		}
   374  
   375  		if c1 == '-' {
   376  			c = LCOMM
   377  			// Not a binary operator, but parsed as one
   378  			// so we can give a good error message when used
   379  			// in an expression context.
   380  			l.prec = PCOMM
   381  			l.op = OSEND
   382  			goto lx
   383  		}
   384  
   385  		l.prec = PCMP
   386  		if c1 == '=' {
   387  			l.op = OLE
   388  			goto lx
   389  		}
   390  		l.op = OLT
   391  
   392  	case '=':
   393  		c1 = l.getr()
   394  		if c1 == '=' {
   395  			c = LOPER
   396  			l.prec = PCMP
   397  			l.op = OEQ
   398  			goto lx
   399  		}
   400  
   401  	case '!':
   402  		c1 = l.getr()
   403  		if c1 == '=' {
   404  			c = LOPER
   405  			l.prec = PCMP
   406  			l.op = ONE
   407  			goto lx
   408  		}
   409  
   410  	case '&':
   411  		c1 = l.getr()
   412  		if c1 == '&' {
   413  			c = LOPER
   414  			l.prec = PANDAND
   415  			l.op = OANDAND
   416  			goto lx
   417  		}
   418  
   419  		if c1 == '^' {
   420  			c = LOPER
   421  			op = OANDNOT
   422  			prec = PMUL
   423  			goto binop
   424  		}
   425  
   426  		op = OAND
   427  		prec = PMUL
   428  		goto binop1
   429  
   430  	case '|':
   431  		c1 = l.getr()
   432  		if c1 == '|' {
   433  			c = LOPER
   434  			l.prec = POROR
   435  			l.op = OOROR
   436  			goto lx
   437  		}
   438  
   439  		op = OOR
   440  		prec = PADD
   441  		goto binop1
   442  
   443  	case '^':
   444  		op = OXOR
   445  		prec = PADD
   446  		goto binop
   447  
   448  	case '(', '[', '{', ',', ';':
   449  		goto lx
   450  
   451  	case ')', ']', '}':
   452  		l.nlsemi = true
   453  		goto lx
   454  
   455  	case '#', '$', '?', '@', '\\':
   456  		if importpkg != nil {
   457  			goto lx
   458  		}
   459  		fallthrough
   460  
   461  	default:
   462  		// anything else is illegal
   463  		Yyerror("syntax error: illegal character %#U", c)
   464  		goto l0
   465  	}
   466  
   467  	l.ungetr()
   468  
   469  lx:
   470  	if Debug['x'] != 0 {
   471  		if c >= utf8.RuneSelf {
   472  			fmt.Printf("%v lex: TOKEN %s\n", linestr(lineno), lexname(c))
   473  		} else {
   474  			fmt.Printf("%v lex: TOKEN '%c'\n", linestr(lineno), c)
   475  		}
   476  	}
   477  
   478  	l.tok = c
   479  	return
   480  
   481  incop:
   482  	c1 = l.getr()
   483  	if c1 == c {
   484  		l.nlsemi = true
   485  		l.op = op
   486  		c = LINCOP
   487  		goto lx
   488  	}
   489  	prec = PADD
   490  	goto binop1
   491  
   492  binop:
   493  	c1 = l.getr()
   494  binop1:
   495  	if c1 != '=' {
   496  		l.ungetr()
   497  		l.op = op
   498  		l.prec = prec
   499  		goto lx
   500  	}
   501  
   502  	l.op = op
   503  	if Debug['x'] != 0 {
   504  		fmt.Printf("lex: TOKEN ASOP %s=\n", goopnames[op])
   505  	}
   506  	l.tok = LASOP
   507  }
   508  
   509  func (l *lexer) ident(c rune) {
   510  	cp := &lexbuf
   511  	cp.Reset()
   512  
   513  	// accelerate common case (7bit ASCII)
   514  	for isLetter(c) || isDigit(c) {
   515  		cp.WriteByte(byte(c))
   516  		c = l.getr()
   517  	}
   518  
   519  	// general case
   520  	for {
   521  		if c >= utf8.RuneSelf {
   522  			if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || importpkg != nil && c == 0xb7 {
   523  				if cp.Len() == 0 && unicode.IsDigit(c) {
   524  					Yyerror("identifier cannot begin with digit %#U", c)
   525  				}
   526  			} else {
   527  				Yyerror("invalid identifier character %#U", c)
   528  			}
   529  			cp.WriteRune(c)
   530  		} else if isLetter(c) || isDigit(c) {
   531  			cp.WriteByte(byte(c))
   532  		} else {
   533  			break
   534  		}
   535  		c = l.getr()
   536  	}
   537  
   538  	cp = nil
   539  	l.ungetr()
   540  
   541  	name := lexbuf.Bytes()
   542  
   543  	if len(name) >= 2 {
   544  		if tok, ok := keywords[string(name)]; ok {
   545  			if Debug['x'] != 0 {
   546  				fmt.Printf("lex: %s\n", lexname(tok))
   547  			}
   548  			switch tok {
   549  			case LBREAK, LCONTINUE, LFALL, LRETURN:
   550  				l.nlsemi = true
   551  			}
   552  			l.tok = tok
   553  			return
   554  		}
   555  	}
   556  
   557  	s := LookupBytes(name)
   558  	if Debug['x'] != 0 {
   559  		fmt.Printf("lex: ident %s\n", s)
   560  	}
   561  	l.sym_ = s
   562  	l.nlsemi = true
   563  	l.tok = LNAME
   564  }
   565  
   566  var keywords = map[string]int32{
   567  	"break":       LBREAK,
   568  	"case":        LCASE,
   569  	"chan":        LCHAN,
   570  	"const":       LCONST,
   571  	"continue":    LCONTINUE,
   572  	"default":     LDEFAULT,
   573  	"defer":       LDEFER,
   574  	"else":        LELSE,
   575  	"fallthrough": LFALL,
   576  	"for":         LFOR,
   577  	"func":        LFUNC,
   578  	"go":          LGO,
   579  	"goto":        LGOTO,
   580  	"if":          LIF,
   581  	"import":      LIMPORT,
   582  	"interface":   LINTERFACE,
   583  	"map":         LMAP,
   584  	"package":     LPACKAGE,
   585  	"range":       LRANGE,
   586  	"return":      LRETURN,
   587  	"select":      LSELECT,
   588  	"struct":      LSTRUCT,
   589  	"switch":      LSWITCH,
   590  	"type":        LTYPE,
   591  	"var":         LVAR,
   592  
   593  	// 💩
   594  	"notwithstanding":      LIGNORE,
   595  	"thetruthofthematter":  LIGNORE,
   596  	"despiteallobjections": LIGNORE,
   597  	"whereas":              LIGNORE,
   598  	"insofaras":            LIGNORE,
   599  }
   600  
   601  func (l *lexer) number(c rune) {
   602  	cp := &lexbuf
   603  	cp.Reset()
   604  
   605  	// parse mantissa before decimal point or exponent
   606  	isInt := false
   607  	malformedOctal := false
   608  	if c != '.' {
   609  		if c != '0' {
   610  			// decimal or float
   611  			for isDigit(c) {
   612  				cp.WriteByte(byte(c))
   613  				c = l.getr()
   614  			}
   615  
   616  		} else {
   617  			// c == 0
   618  			cp.WriteByte('0')
   619  			c = l.getr()
   620  			if c == 'x' || c == 'X' {
   621  				isInt = true // must be int
   622  				cp.WriteByte(byte(c))
   623  				c = l.getr()
   624  				for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
   625  					cp.WriteByte(byte(c))
   626  					c = l.getr()
   627  				}
   628  				if lexbuf.Len() == 2 {
   629  					Yyerror("malformed hex constant")
   630  				}
   631  			} else {
   632  				// decimal 0, octal, or float
   633  				for isDigit(c) {
   634  					if c > '7' {
   635  						malformedOctal = true
   636  					}
   637  					cp.WriteByte(byte(c))
   638  					c = l.getr()
   639  				}
   640  			}
   641  		}
   642  	}
   643  
   644  	// unless we have a hex number, parse fractional part or exponent, if any
   645  	var str string
   646  	if !isInt {
   647  		isInt = true // assume int unless proven otherwise
   648  
   649  		// fraction
   650  		if c == '.' {
   651  			isInt = false
   652  			cp.WriteByte('.')
   653  			c = l.getr()
   654  			for isDigit(c) {
   655  				cp.WriteByte(byte(c))
   656  				c = l.getr()
   657  			}
   658  			// Falling through to exponent parsing here permits invalid
   659  			// floating-point numbers with fractional mantissa and base-2
   660  			// (p or P) exponent. We don't care because base-2 exponents
   661  			// can only show up in machine-generated textual export data
   662  			// which will use correct formatting.
   663  		}
   664  
   665  		// exponent
   666  		// base-2 exponent (p or P) is only allowed in export data (see #9036)
   667  		// TODO(gri) Once we switch to binary import data, importpkg will
   668  		// always be nil in this function. Simplify the code accordingly.
   669  		if c == 'e' || c == 'E' || importpkg != nil && (c == 'p' || c == 'P') {
   670  			isInt = false
   671  			cp.WriteByte(byte(c))
   672  			c = l.getr()
   673  			if c == '+' || c == '-' {
   674  				cp.WriteByte(byte(c))
   675  				c = l.getr()
   676  			}
   677  			if !isDigit(c) {
   678  				Yyerror("malformed floating point constant exponent")
   679  			}
   680  			for isDigit(c) {
   681  				cp.WriteByte(byte(c))
   682  				c = l.getr()
   683  			}
   684  		}
   685  
   686  		// imaginary constant
   687  		if c == 'i' {
   688  			str = lexbuf.String()
   689  			x := new(Mpcplx)
   690  			x.Real.SetFloat64(0.0)
   691  			x.Imag.SetString(str)
   692  			if x.Imag.Val.IsInf() {
   693  				Yyerror("overflow in imaginary constant")
   694  				x.Imag.SetFloat64(0.0)
   695  			}
   696  			l.val.U = x
   697  
   698  			if Debug['x'] != 0 {
   699  				fmt.Printf("lex: imaginary literal\n")
   700  			}
   701  			goto done
   702  		}
   703  	}
   704  
   705  	l.ungetr()
   706  
   707  	if isInt {
   708  		if malformedOctal {
   709  			Yyerror("malformed octal constant")
   710  		}
   711  
   712  		str = lexbuf.String()
   713  		x := new(Mpint)
   714  		x.SetString(str)
   715  		if x.Ovf {
   716  			Yyerror("overflow in constant")
   717  			x.SetInt64(0)
   718  		}
   719  		l.val.U = x
   720  
   721  		if Debug['x'] != 0 {
   722  			fmt.Printf("lex: integer literal\n")
   723  		}
   724  
   725  	} else { // float
   726  
   727  		str = lexbuf.String()
   728  		x := newMpflt()
   729  		x.SetString(str)
   730  		if x.Val.IsInf() {
   731  			Yyerror("overflow in float constant")
   732  			x.SetFloat64(0.0)
   733  		}
   734  		l.val.U = x
   735  
   736  		if Debug['x'] != 0 {
   737  			fmt.Printf("lex: floating literal\n")
   738  		}
   739  	}
   740  
   741  done:
   742  	litbuf = "literal " + str
   743  	l.nlsemi = true
   744  	l.tok = LLITERAL
   745  }
   746  
   747  func (l *lexer) stdString() {
   748  	lexbuf.Reset()
   749  	lexbuf.WriteString(`"<string>"`)
   750  
   751  	cp := &strbuf
   752  	cp.Reset()
   753  
   754  	for {
   755  		r, b, ok := l.onechar('"')
   756  		if !ok {
   757  			break
   758  		}
   759  		if r == 0 {
   760  			cp.WriteByte(b)
   761  		} else {
   762  			cp.WriteRune(r)
   763  		}
   764  	}
   765  
   766  	l.val.U = internString(cp.Bytes())
   767  	if Debug['x'] != 0 {
   768  		fmt.Printf("lex: string literal\n")
   769  	}
   770  	litbuf = "string literal"
   771  	l.nlsemi = true
   772  	l.tok = LLITERAL
   773  }
   774  
   775  func (l *lexer) rawString() {
   776  	lexbuf.Reset()
   777  	lexbuf.WriteString("`<string>`")
   778  
   779  	cp := &strbuf
   780  	cp.Reset()
   781  
   782  	for {
   783  		c := l.getr()
   784  		if c == '\r' {
   785  			continue
   786  		}
   787  		if c == EOF {
   788  			Yyerror("eof in string")
   789  			break
   790  		}
   791  		if c == '`' {
   792  			break
   793  		}
   794  		cp.WriteRune(c)
   795  	}
   796  
   797  	l.val.U = internString(cp.Bytes())
   798  	if Debug['x'] != 0 {
   799  		fmt.Printf("lex: string literal\n")
   800  	}
   801  	litbuf = "string literal"
   802  	l.nlsemi = true
   803  	l.tok = LLITERAL
   804  }
   805  
   806  func (l *lexer) rune() {
   807  	r, b, ok := l.onechar('\'')
   808  	if !ok {
   809  		Yyerror("empty character literal or unescaped ' in character literal")
   810  		r = '\''
   811  	}
   812  	if r == 0 {
   813  		r = rune(b)
   814  	}
   815  
   816  	if c := l.getr(); c != '\'' {
   817  		Yyerror("missing '")
   818  		l.ungetr()
   819  	}
   820  
   821  	x := new(Mpint)
   822  	l.val.U = x
   823  	x.SetInt64(int64(r))
   824  	x.Rune = true
   825  	if Debug['x'] != 0 {
   826  		fmt.Printf("lex: codepoint literal\n")
   827  	}
   828  	litbuf = "rune literal"
   829  	l.nlsemi = true
   830  	l.tok = LLITERAL
   831  }
   832  
   833  var internedStrings = map[string]string{}
   834  
   835  func internString(b []byte) string {
   836  	s, ok := internedStrings[string(b)] // string(b) here doesn't allocate
   837  	if !ok {
   838  		s = string(b)
   839  		internedStrings[s] = s
   840  	}
   841  	return s
   842  }
   843  
   844  func more(pp *string) bool {
   845  	p := *pp
   846  	for p != "" && isSpace(rune(p[0])) {
   847  		p = p[1:]
   848  	}
   849  	*pp = p
   850  	return p != ""
   851  }
   852  
   853  // read and interpret syntax that looks like
   854  // //line parse.y:15
   855  // as a discontinuity in sequential line numbers.
   856  // the next line of input comes from parse.y:15
   857  func (l *lexer) getlinepragma() rune {
   858  	c := l.getr()
   859  	if c == 'g' { // check for //go: directive
   860  		cp := &lexbuf
   861  		cp.Reset()
   862  		cp.WriteByte('g') // already read
   863  		for {
   864  			c = l.getr()
   865  			if c == EOF || c >= utf8.RuneSelf {
   866  				return c
   867  			}
   868  			if c == '\n' {
   869  				break
   870  			}
   871  			cp.WriteByte(byte(c))
   872  		}
   873  		cp = nil
   874  
   875  		text := strings.TrimSuffix(lexbuf.String(), "\r")
   876  
   877  		if strings.HasPrefix(text, "go:cgo_") {
   878  			pragcgo(text)
   879  		}
   880  
   881  		verb := text
   882  		if i := strings.Index(text, " "); i >= 0 {
   883  			verb = verb[:i]
   884  		}
   885  
   886  		switch verb {
   887  		case "go:linkname":
   888  			if !imported_unsafe {
   889  				Yyerror("//go:linkname only allowed in Go files that import \"unsafe\"")
   890  			}
   891  			f := strings.Fields(text)
   892  			if len(f) != 3 {
   893  				Yyerror("usage: //go:linkname localname linkname")
   894  				break
   895  			}
   896  			Lookup(f[1]).Linkname = f[2]
   897  		case "go:nointerface":
   898  			if obj.Fieldtrack_enabled != 0 {
   899  				l.pragma |= Nointerface
   900  			}
   901  		case "go:noescape":
   902  			l.pragma |= Noescape
   903  		case "go:norace":
   904  			l.pragma |= Norace
   905  		case "go:nosplit":
   906  			l.pragma |= Nosplit
   907  		case "go:noinline":
   908  			l.pragma |= Noinline
   909  		case "go:systemstack":
   910  			if compiling_runtime == 0 {
   911  				Yyerror("//go:systemstack only allowed in runtime")
   912  			}
   913  			l.pragma |= Systemstack
   914  		case "go:nowritebarrier":
   915  			if compiling_runtime == 0 {
   916  				Yyerror("//go:nowritebarrier only allowed in runtime")
   917  			}
   918  			l.pragma |= Nowritebarrier
   919  		case "go:nowritebarrierrec":
   920  			if compiling_runtime == 0 {
   921  				Yyerror("//go:nowritebarrierrec only allowed in runtime")
   922  			}
   923  			l.pragma |= Nowritebarrierrec | Nowritebarrier // implies Nowritebarrier
   924  		case "go:cgo_unsafe_args":
   925  			l.pragma |= CgoUnsafeArgs
   926  		}
   927  		return c
   928  	}
   929  
   930  	// check for //line directive
   931  	if c != 'l' {
   932  		return c
   933  	}
   934  	for i := 1; i < 5; i++ {
   935  		c = l.getr()
   936  		if c != rune("line "[i]) {
   937  			return c
   938  		}
   939  	}
   940  
   941  	cp := &lexbuf
   942  	cp.Reset()
   943  	linep := 0
   944  	for {
   945  		c = l.getr()
   946  		if c == EOF {
   947  			return c
   948  		}
   949  		if c == '\n' {
   950  			break
   951  		}
   952  		if c == ' ' {
   953  			continue
   954  		}
   955  		if c == ':' {
   956  			linep = cp.Len() + 1
   957  		}
   958  		cp.WriteByte(byte(c))
   959  	}
   960  	cp = nil
   961  
   962  	if linep == 0 {
   963  		return c
   964  	}
   965  	text := strings.TrimSuffix(lexbuf.String(), "\r")
   966  	n, err := strconv.Atoi(text[linep:])
   967  	if err != nil {
   968  		return c // todo: make this an error instead? it is almost certainly a bug.
   969  	}
   970  	if n > 1e8 {
   971  		Yyerror("line number out of range")
   972  		errorexit()
   973  	}
   974  	if n <= 0 {
   975  		return c
   976  	}
   977  
   978  	linehistupdate(text[:linep-1], n)
   979  	return c
   980  }
   981  
   982  func getimpsym(pp *string) string {
   983  	more(pp) // skip spaces
   984  	p := *pp
   985  	if p == "" || p[0] == '"' {
   986  		return ""
   987  	}
   988  	i := 0
   989  	for i < len(p) && !isSpace(rune(p[i])) && p[i] != '"' {
   990  		i++
   991  	}
   992  	sym := p[:i]
   993  	*pp = p[i:]
   994  	return sym
   995  }
   996  
   997  func getquoted(pp *string) (string, bool) {
   998  	more(pp) // skip spaces
   999  	p := *pp
  1000  	if p == "" || p[0] != '"' {
  1001  		return "", false
  1002  	}
  1003  	p = p[1:]
  1004  	i := strings.Index(p, `"`)
  1005  	if i < 0 {
  1006  		return "", false
  1007  	}
  1008  	*pp = p[i+1:]
  1009  	return p[:i], true
  1010  }
  1011  
  1012  // Copied nearly verbatim from the C compiler's #pragma parser.
  1013  // TODO: Rewrite more cleanly once the compiler is written in Go.
  1014  func pragcgo(text string) {
  1015  	var q string
  1016  
  1017  	if i := strings.Index(text, " "); i >= 0 {
  1018  		text, q = text[:i], text[i:]
  1019  	}
  1020  
  1021  	verb := text[3:] // skip "go:"
  1022  
  1023  	if verb == "cgo_dynamic_linker" || verb == "dynlinker" {
  1024  		p, ok := getquoted(&q)
  1025  		if !ok {
  1026  			Yyerror("usage: //go:cgo_dynamic_linker \"path\"")
  1027  			return
  1028  		}
  1029  		pragcgobuf += fmt.Sprintf("cgo_dynamic_linker %v\n", plan9quote(p))
  1030  		return
  1031  
  1032  	}
  1033  
  1034  	if verb == "dynexport" {
  1035  		verb = "cgo_export_dynamic"
  1036  	}
  1037  	if verb == "cgo_export_static" || verb == "cgo_export_dynamic" {
  1038  		local := getimpsym(&q)
  1039  		var remote string
  1040  		if local == "" {
  1041  			goto err2
  1042  		}
  1043  		if !more(&q) {
  1044  			pragcgobuf += fmt.Sprintf("%s %v\n", verb, plan9quote(local))
  1045  			return
  1046  		}
  1047  
  1048  		remote = getimpsym(&q)
  1049  		if remote == "" {
  1050  			goto err2
  1051  		}
  1052  		pragcgobuf += fmt.Sprintf("%s %v %v\n", verb, plan9quote(local), plan9quote(remote))
  1053  		return
  1054  
  1055  	err2:
  1056  		Yyerror("usage: //go:%s local [remote]", verb)
  1057  		return
  1058  	}
  1059  
  1060  	if verb == "cgo_import_dynamic" || verb == "dynimport" {
  1061  		var ok bool
  1062  		local := getimpsym(&q)
  1063  		var p string
  1064  		var remote string
  1065  		if local == "" {
  1066  			goto err3
  1067  		}
  1068  		if !more(&q) {
  1069  			pragcgobuf += fmt.Sprintf("cgo_import_dynamic %v\n", plan9quote(local))
  1070  			return
  1071  		}
  1072  
  1073  		remote = getimpsym(&q)
  1074  		if remote == "" {
  1075  			goto err3
  1076  		}
  1077  		if !more(&q) {
  1078  			pragcgobuf += fmt.Sprintf("cgo_import_dynamic %v %v\n", plan9quote(local), plan9quote(remote))
  1079  			return
  1080  		}
  1081  
  1082  		p, ok = getquoted(&q)
  1083  		if !ok {
  1084  			goto err3
  1085  		}
  1086  		pragcgobuf += fmt.Sprintf("cgo_import_dynamic %v %v %v\n", plan9quote(local), plan9quote(remote), plan9quote(p))
  1087  		return
  1088  
  1089  	err3:
  1090  		Yyerror("usage: //go:cgo_import_dynamic local [remote [\"library\"]]")
  1091  		return
  1092  	}
  1093  
  1094  	if verb == "cgo_import_static" {
  1095  		local := getimpsym(&q)
  1096  		if local == "" || more(&q) {
  1097  			Yyerror("usage: //go:cgo_import_static local")
  1098  			return
  1099  		}
  1100  		pragcgobuf += fmt.Sprintf("cgo_import_static %v\n", plan9quote(local))
  1101  		return
  1102  
  1103  	}
  1104  
  1105  	if verb == "cgo_ldflag" {
  1106  		p, ok := getquoted(&q)
  1107  		if !ok {
  1108  			Yyerror("usage: //go:cgo_ldflag \"arg\"")
  1109  			return
  1110  		}
  1111  		pragcgobuf += fmt.Sprintf("cgo_ldflag %v\n", plan9quote(p))
  1112  		return
  1113  
  1114  	}
  1115  }
  1116  
  1117  func (l *lexer) getr() rune {
  1118  redo:
  1119  	l.prevlineno = lexlineno
  1120  	r, w, err := l.bin.ReadRune()
  1121  	if err != nil {
  1122  		if err != io.EOF {
  1123  			Fatalf("io error: %v", err)
  1124  		}
  1125  		return -1
  1126  	}
  1127  	switch r {
  1128  	case 0:
  1129  		yyerrorl(lexlineno, "illegal NUL byte")
  1130  	case '\n':
  1131  		if importpkg == nil {
  1132  			lexlineno++
  1133  		}
  1134  	case utf8.RuneError:
  1135  		if w == 1 {
  1136  			yyerrorl(lexlineno, "illegal UTF-8 sequence")
  1137  		}
  1138  	case BOM:
  1139  		yyerrorl(lexlineno, "Unicode (UTF-8) BOM in middle of file")
  1140  		goto redo
  1141  	}
  1142  
  1143  	return r
  1144  }
  1145  
  1146  func (l *lexer) ungetr() {
  1147  	l.bin.UnreadRune()
  1148  	lexlineno = l.prevlineno
  1149  }
  1150  
  1151  // onechar lexes a single character within a rune or interpreted string literal,
  1152  // handling escape sequences as necessary.
  1153  func (l *lexer) onechar(quote rune) (r rune, b byte, ok bool) {
  1154  	c := l.getr()
  1155  	switch c {
  1156  	case EOF:
  1157  		Yyerror("eof in string")
  1158  		l.ungetr()
  1159  		return
  1160  
  1161  	case '\n':
  1162  		Yyerror("newline in string")
  1163  		l.ungetr()
  1164  		return
  1165  
  1166  	case '\\':
  1167  		break
  1168  
  1169  	case quote:
  1170  		return
  1171  
  1172  	default:
  1173  		return c, 0, true
  1174  	}
  1175  
  1176  	c = l.getr()
  1177  	switch c {
  1178  	case 'x':
  1179  		return 0, byte(l.hexchar(2)), true
  1180  
  1181  	case 'u':
  1182  		return l.unichar(4), 0, true
  1183  
  1184  	case 'U':
  1185  		return l.unichar(8), 0, true
  1186  
  1187  	case '0', '1', '2', '3', '4', '5', '6', '7':
  1188  		x := c - '0'
  1189  		for i := 2; i > 0; i-- {
  1190  			c = l.getr()
  1191  			if c >= '0' && c <= '7' {
  1192  				x = x*8 + c - '0'
  1193  				continue
  1194  			}
  1195  
  1196  			Yyerror("non-octal character in escape sequence: %c", c)
  1197  			l.ungetr()
  1198  		}
  1199  
  1200  		if x > 255 {
  1201  			Yyerror("octal escape value > 255: %d", x)
  1202  		}
  1203  
  1204  		return 0, byte(x), true
  1205  
  1206  	case 'a':
  1207  		c = '\a'
  1208  	case 'b':
  1209  		c = '\b'
  1210  	case 'f':
  1211  		c = '\f'
  1212  	case 'n':
  1213  		c = '\n'
  1214  	case 'r':
  1215  		c = '\r'
  1216  	case 't':
  1217  		c = '\t'
  1218  	case 'v':
  1219  		c = '\v'
  1220  	case '\\':
  1221  		c = '\\'
  1222  
  1223  	default:
  1224  		if c != quote {
  1225  			Yyerror("unknown escape sequence: %c", c)
  1226  		}
  1227  	}
  1228  
  1229  	return c, 0, true
  1230  }
  1231  
  1232  func (l *lexer) unichar(n int) rune {
  1233  	x := l.hexchar(n)
  1234  	if x > utf8.MaxRune || 0xd800 <= x && x < 0xe000 {
  1235  		Yyerror("invalid Unicode code point in escape sequence: %#x", x)
  1236  		x = utf8.RuneError
  1237  	}
  1238  	return rune(x)
  1239  }
  1240  
  1241  func (l *lexer) hexchar(n int) uint32 {
  1242  	var x uint32
  1243  
  1244  	for ; n > 0; n-- {
  1245  		var d uint32
  1246  		switch c := l.getr(); {
  1247  		case isDigit(c):
  1248  			d = uint32(c - '0')
  1249  		case 'a' <= c && c <= 'f':
  1250  			d = uint32(c - 'a' + 10)
  1251  		case 'A' <= c && c <= 'F':
  1252  			d = uint32(c - 'A' + 10)
  1253  		default:
  1254  			Yyerror("non-hex character in escape sequence: %c", c)
  1255  			l.ungetr()
  1256  			return x
  1257  		}
  1258  		x = x*16 + d
  1259  	}
  1260  
  1261  	return x
  1262  }