gitee.com/lonely0422/gometalinter.git@v3.0.1-0.20190307123442-32416ab75314+incompatible/_linters/src/github.com/BurntSushi/toml/lex.go (about)

     1  package toml
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  	"unicode"
     7  	"unicode/utf8"
     8  )
     9  
    10  type itemType int
    11  
    12  const (
    13  	itemError itemType = iota
    14  	itemNIL            // used in the parser to indicate no type
    15  	itemEOF
    16  	itemText
    17  	itemString
    18  	itemRawString
    19  	itemMultilineString
    20  	itemRawMultilineString
    21  	itemBool
    22  	itemInteger
    23  	itemFloat
    24  	itemDatetime
    25  	itemArray // the start of an array
    26  	itemArrayEnd
    27  	itemTableStart
    28  	itemTableEnd
    29  	itemArrayTableStart
    30  	itemArrayTableEnd
    31  	itemKeyStart
    32  	itemCommentStart
    33  	itemInlineTableStart
    34  	itemInlineTableEnd
    35  )
    36  
    37  const (
    38  	eof              = 0
    39  	comma            = ','
    40  	tableStart       = '['
    41  	tableEnd         = ']'
    42  	arrayTableStart  = '['
    43  	arrayTableEnd    = ']'
    44  	tableSep         = '.'
    45  	keySep           = '='
    46  	arrayStart       = '['
    47  	arrayEnd         = ']'
    48  	commentStart     = '#'
    49  	stringStart      = '"'
    50  	stringEnd        = '"'
    51  	rawStringStart   = '\''
    52  	rawStringEnd     = '\''
    53  	inlineTableStart = '{'
    54  	inlineTableEnd   = '}'
    55  )
    56  
    57  type stateFn func(lx *lexer) stateFn
    58  
    59  type lexer struct {
    60  	input string
    61  	start int
    62  	pos   int
    63  	line  int
    64  	state stateFn
    65  	items chan item
    66  
    67  	// Allow for backing up up to three runes.
    68  	// This is necessary because TOML contains 3-rune tokens (""" and ''').
    69  	prevWidths [3]int
    70  	nprev      int // how many of prevWidths are in use
    71  	// If we emit an eof, we can still back up, but it is not OK to call
    72  	// next again.
    73  	atEOF bool
    74  
    75  	// A stack of state functions used to maintain context.
    76  	// The idea is to reuse parts of the state machine in various places.
    77  	// For example, values can appear at the top level or within arbitrarily
    78  	// nested arrays. The last state on the stack is used after a value has
    79  	// been lexed. Similarly for comments.
    80  	stack []stateFn
    81  }
    82  
    83  type item struct {
    84  	typ  itemType
    85  	val  string
    86  	line int
    87  }
    88  
    89  func (lx *lexer) nextItem() item {
    90  	for {
    91  		select {
    92  		case item := <-lx.items:
    93  			return item
    94  		default:
    95  			lx.state = lx.state(lx)
    96  		}
    97  	}
    98  }
    99  
   100  func lex(input string) *lexer {
   101  	lx := &lexer{
   102  		input: input,
   103  		state: lexTop,
   104  		line:  1,
   105  		items: make(chan item, 10),
   106  		stack: make([]stateFn, 0, 10),
   107  	}
   108  	return lx
   109  }
   110  
   111  func (lx *lexer) push(state stateFn) {
   112  	lx.stack = append(lx.stack, state)
   113  }
   114  
   115  func (lx *lexer) pop() stateFn {
   116  	if len(lx.stack) == 0 {
   117  		return lx.errorf("BUG in lexer: no states to pop")
   118  	}
   119  	last := lx.stack[len(lx.stack)-1]
   120  	lx.stack = lx.stack[0 : len(lx.stack)-1]
   121  	return last
   122  }
   123  
   124  func (lx *lexer) current() string {
   125  	return lx.input[lx.start:lx.pos]
   126  }
   127  
   128  func (lx *lexer) emit(typ itemType) {
   129  	lx.items <- item{typ, lx.current(), lx.line}
   130  	lx.start = lx.pos
   131  }
   132  
   133  func (lx *lexer) emitTrim(typ itemType) {
   134  	lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
   135  	lx.start = lx.pos
   136  }
   137  
   138  func (lx *lexer) next() (r rune) {
   139  	if lx.atEOF {
   140  		panic("next called after EOF")
   141  	}
   142  	if lx.pos >= len(lx.input) {
   143  		lx.atEOF = true
   144  		return eof
   145  	}
   146  
   147  	if lx.input[lx.pos] == '\n' {
   148  		lx.line++
   149  	}
   150  	lx.prevWidths[2] = lx.prevWidths[1]
   151  	lx.prevWidths[1] = lx.prevWidths[0]
   152  	if lx.nprev < 3 {
   153  		lx.nprev++
   154  	}
   155  	r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
   156  	lx.prevWidths[0] = w
   157  	lx.pos += w
   158  	return r
   159  }
   160  
   161  // ignore skips over the pending input before this point.
   162  func (lx *lexer) ignore() {
   163  	lx.start = lx.pos
   164  }
   165  
   166  // backup steps back one rune. Can be called only twice between calls to next.
   167  func (lx *lexer) backup() {
   168  	if lx.atEOF {
   169  		lx.atEOF = false
   170  		return
   171  	}
   172  	if lx.nprev < 1 {
   173  		panic("backed up too far")
   174  	}
   175  	w := lx.prevWidths[0]
   176  	lx.prevWidths[0] = lx.prevWidths[1]
   177  	lx.prevWidths[1] = lx.prevWidths[2]
   178  	lx.nprev--
   179  	lx.pos -= w
   180  	if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
   181  		lx.line--
   182  	}
   183  }
   184  
   185  // accept consumes the next rune if it's equal to `valid`.
   186  func (lx *lexer) accept(valid rune) bool {
   187  	if lx.next() == valid {
   188  		return true
   189  	}
   190  	lx.backup()
   191  	return false
   192  }
   193  
   194  // peek returns but does not consume the next rune in the input.
   195  func (lx *lexer) peek() rune {
   196  	r := lx.next()
   197  	lx.backup()
   198  	return r
   199  }
   200  
   201  // skip ignores all input that matches the given predicate.
   202  func (lx *lexer) skip(pred func(rune) bool) {
   203  	for {
   204  		r := lx.next()
   205  		if pred(r) {
   206  			continue
   207  		}
   208  		lx.backup()
   209  		lx.ignore()
   210  		return
   211  	}
   212  }
   213  
   214  // errorf stops all lexing by emitting an error and returning `nil`.
   215  // Note that any value that is a character is escaped if it's a special
   216  // character (newlines, tabs, etc.).
   217  func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
   218  	lx.items <- item{
   219  		itemError,
   220  		fmt.Sprintf(format, values...),
   221  		lx.line,
   222  	}
   223  	return nil
   224  }
   225  
   226  // lexTop consumes elements at the top level of TOML data.
   227  func lexTop(lx *lexer) stateFn {
   228  	r := lx.next()
   229  	if isWhitespace(r) || isNL(r) {
   230  		return lexSkip(lx, lexTop)
   231  	}
   232  	switch r {
   233  	case commentStart:
   234  		lx.push(lexTop)
   235  		return lexCommentStart
   236  	case tableStart:
   237  		return lexTableStart
   238  	case eof:
   239  		if lx.pos > lx.start {
   240  			return lx.errorf("unexpected EOF")
   241  		}
   242  		lx.emit(itemEOF)
   243  		return nil
   244  	}
   245  
   246  	// At this point, the only valid item can be a key, so we back up
   247  	// and let the key lexer do the rest.
   248  	lx.backup()
   249  	lx.push(lexTopEnd)
   250  	return lexKeyStart
   251  }
   252  
   253  // lexTopEnd is entered whenever a top-level item has been consumed. (A value
   254  // or a table.) It must see only whitespace, and will turn back to lexTop
   255  // upon a newline. If it sees EOF, it will quit the lexer successfully.
   256  func lexTopEnd(lx *lexer) stateFn {
   257  	r := lx.next()
   258  	switch {
   259  	case r == commentStart:
   260  		// a comment will read to a newline for us.
   261  		lx.push(lexTop)
   262  		return lexCommentStart
   263  	case isWhitespace(r):
   264  		return lexTopEnd
   265  	case isNL(r):
   266  		lx.ignore()
   267  		return lexTop
   268  	case r == eof:
   269  		lx.emit(itemEOF)
   270  		return nil
   271  	}
   272  	return lx.errorf("expected a top-level item to end with a newline, "+
   273  		"comment, or EOF, but got %q instead", r)
   274  }
   275  
   276  // lexTable lexes the beginning of a table. Namely, it makes sure that
   277  // it starts with a character other than '.' and ']'.
   278  // It assumes that '[' has already been consumed.
   279  // It also handles the case that this is an item in an array of tables.
   280  // e.g., '[[name]]'.
   281  func lexTableStart(lx *lexer) stateFn {
   282  	if lx.peek() == arrayTableStart {
   283  		lx.next()
   284  		lx.emit(itemArrayTableStart)
   285  		lx.push(lexArrayTableEnd)
   286  	} else {
   287  		lx.emit(itemTableStart)
   288  		lx.push(lexTableEnd)
   289  	}
   290  	return lexTableNameStart
   291  }
   292  
   293  func lexTableEnd(lx *lexer) stateFn {
   294  	lx.emit(itemTableEnd)
   295  	return lexTopEnd
   296  }
   297  
   298  func lexArrayTableEnd(lx *lexer) stateFn {
   299  	if r := lx.next(); r != arrayTableEnd {
   300  		return lx.errorf("expected end of table array name delimiter %q, "+
   301  			"but got %q instead", arrayTableEnd, r)
   302  	}
   303  	lx.emit(itemArrayTableEnd)
   304  	return lexTopEnd
   305  }
   306  
   307  func lexTableNameStart(lx *lexer) stateFn {
   308  	lx.skip(isWhitespace)
   309  	switch r := lx.peek(); {
   310  	case r == tableEnd || r == eof:
   311  		return lx.errorf("unexpected end of table name " +
   312  			"(table names cannot be empty)")
   313  	case r == tableSep:
   314  		return lx.errorf("unexpected table separator " +
   315  			"(table names cannot be empty)")
   316  	case r == stringStart || r == rawStringStart:
   317  		lx.ignore()
   318  		lx.push(lexTableNameEnd)
   319  		return lexValue // reuse string lexing
   320  	default:
   321  		return lexBareTableName
   322  	}
   323  }
   324  
   325  // lexBareTableName lexes the name of a table. It assumes that at least one
   326  // valid character for the table has already been read.
   327  func lexBareTableName(lx *lexer) stateFn {
   328  	r := lx.next()
   329  	if isBareKeyChar(r) {
   330  		return lexBareTableName
   331  	}
   332  	lx.backup()
   333  	lx.emit(itemText)
   334  	return lexTableNameEnd
   335  }
   336  
   337  // lexTableNameEnd reads the end of a piece of a table name, optionally
   338  // consuming whitespace.
   339  func lexTableNameEnd(lx *lexer) stateFn {
   340  	lx.skip(isWhitespace)
   341  	switch r := lx.next(); {
   342  	case isWhitespace(r):
   343  		return lexTableNameEnd
   344  	case r == tableSep:
   345  		lx.ignore()
   346  		return lexTableNameStart
   347  	case r == tableEnd:
   348  		return lx.pop()
   349  	default:
   350  		return lx.errorf("expected '.' or ']' to end table name, "+
   351  			"but got %q instead", r)
   352  	}
   353  }
   354  
   355  // lexKeyStart consumes a key name up until the first non-whitespace character.
   356  // lexKeyStart will ignore whitespace.
   357  func lexKeyStart(lx *lexer) stateFn {
   358  	r := lx.peek()
   359  	switch {
   360  	case r == keySep:
   361  		return lx.errorf("unexpected key separator %q", keySep)
   362  	case isWhitespace(r) || isNL(r):
   363  		lx.next()
   364  		return lexSkip(lx, lexKeyStart)
   365  	case r == stringStart || r == rawStringStart:
   366  		lx.ignore()
   367  		lx.emit(itemKeyStart)
   368  		lx.push(lexKeyEnd)
   369  		return lexValue // reuse string lexing
   370  	default:
   371  		lx.ignore()
   372  		lx.emit(itemKeyStart)
   373  		return lexBareKey
   374  	}
   375  }
   376  
   377  // lexBareKey consumes the text of a bare key. Assumes that the first character
   378  // (which is not whitespace) has not yet been consumed.
   379  func lexBareKey(lx *lexer) stateFn {
   380  	switch r := lx.next(); {
   381  	case isBareKeyChar(r):
   382  		return lexBareKey
   383  	case isWhitespace(r):
   384  		lx.backup()
   385  		lx.emit(itemText)
   386  		return lexKeyEnd
   387  	case r == keySep:
   388  		lx.backup()
   389  		lx.emit(itemText)
   390  		return lexKeyEnd
   391  	default:
   392  		return lx.errorf("bare keys cannot contain %q", r)
   393  	}
   394  }
   395  
   396  // lexKeyEnd consumes the end of a key and trims whitespace (up to the key
   397  // separator).
   398  func lexKeyEnd(lx *lexer) stateFn {
   399  	switch r := lx.next(); {
   400  	case r == keySep:
   401  		return lexSkip(lx, lexValue)
   402  	case isWhitespace(r):
   403  		return lexSkip(lx, lexKeyEnd)
   404  	default:
   405  		return lx.errorf("expected key separator %q, but got %q instead",
   406  			keySep, r)
   407  	}
   408  }
   409  
   410  // lexValue starts the consumption of a value anywhere a value is expected.
   411  // lexValue will ignore whitespace.
   412  // After a value is lexed, the last state on the next is popped and returned.
   413  func lexValue(lx *lexer) stateFn {
   414  	// We allow whitespace to precede a value, but NOT newlines.
   415  	// In array syntax, the array states are responsible for ignoring newlines.
   416  	r := lx.next()
   417  	switch {
   418  	case isWhitespace(r):
   419  		return lexSkip(lx, lexValue)
   420  	case isDigit(r):
   421  		lx.backup() // avoid an extra state and use the same as above
   422  		return lexNumberOrDateStart
   423  	}
   424  	switch r {
   425  	case arrayStart:
   426  		lx.ignore()
   427  		lx.emit(itemArray)
   428  		return lexArrayValue
   429  	case inlineTableStart:
   430  		lx.ignore()
   431  		lx.emit(itemInlineTableStart)
   432  		return lexInlineTableValue
   433  	case stringStart:
   434  		if lx.accept(stringStart) {
   435  			if lx.accept(stringStart) {
   436  				lx.ignore() // Ignore """
   437  				return lexMultilineString
   438  			}
   439  			lx.backup()
   440  		}
   441  		lx.ignore() // ignore the '"'
   442  		return lexString
   443  	case rawStringStart:
   444  		if lx.accept(rawStringStart) {
   445  			if lx.accept(rawStringStart) {
   446  				lx.ignore() // Ignore """
   447  				return lexMultilineRawString
   448  			}
   449  			lx.backup()
   450  		}
   451  		lx.ignore() // ignore the "'"
   452  		return lexRawString
   453  	case '+', '-':
   454  		return lexNumberStart
   455  	case '.': // special error case, be kind to users
   456  		return lx.errorf("floats must start with a digit, not '.'")
   457  	}
   458  	if unicode.IsLetter(r) {
   459  		// Be permissive here; lexBool will give a nice error if the
   460  		// user wrote something like
   461  		//   x = foo
   462  		// (i.e. not 'true' or 'false' but is something else word-like.)
   463  		lx.backup()
   464  		return lexBool
   465  	}
   466  	return lx.errorf("expected value but found %q instead", r)
   467  }
   468  
   469  // lexArrayValue consumes one value in an array. It assumes that '[' or ','
   470  // have already been consumed. All whitespace and newlines are ignored.
   471  func lexArrayValue(lx *lexer) stateFn {
   472  	r := lx.next()
   473  	switch {
   474  	case isWhitespace(r) || isNL(r):
   475  		return lexSkip(lx, lexArrayValue)
   476  	case r == commentStart:
   477  		lx.push(lexArrayValue)
   478  		return lexCommentStart
   479  	case r == comma:
   480  		return lx.errorf("unexpected comma")
   481  	case r == arrayEnd:
   482  		// NOTE(caleb): The spec isn't clear about whether you can have
   483  		// a trailing comma or not, so we'll allow it.
   484  		return lexArrayEnd
   485  	}
   486  
   487  	lx.backup()
   488  	lx.push(lexArrayValueEnd)
   489  	return lexValue
   490  }
   491  
   492  // lexArrayValueEnd consumes everything between the end of an array value and
   493  // the next value (or the end of the array): it ignores whitespace and newlines
   494  // and expects either a ',' or a ']'.
   495  func lexArrayValueEnd(lx *lexer) stateFn {
   496  	r := lx.next()
   497  	switch {
   498  	case isWhitespace(r) || isNL(r):
   499  		return lexSkip(lx, lexArrayValueEnd)
   500  	case r == commentStart:
   501  		lx.push(lexArrayValueEnd)
   502  		return lexCommentStart
   503  	case r == comma:
   504  		lx.ignore()
   505  		return lexArrayValue // move on to the next value
   506  	case r == arrayEnd:
   507  		return lexArrayEnd
   508  	}
   509  	return lx.errorf(
   510  		"expected a comma or array terminator %q, but got %q instead",
   511  		arrayEnd, r,
   512  	)
   513  }
   514  
   515  // lexArrayEnd finishes the lexing of an array.
   516  // It assumes that a ']' has just been consumed.
   517  func lexArrayEnd(lx *lexer) stateFn {
   518  	lx.ignore()
   519  	lx.emit(itemArrayEnd)
   520  	return lx.pop()
   521  }
   522  
   523  // lexInlineTableValue consumes one key/value pair in an inline table.
   524  // It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
   525  func lexInlineTableValue(lx *lexer) stateFn {
   526  	r := lx.next()
   527  	switch {
   528  	case isWhitespace(r):
   529  		return lexSkip(lx, lexInlineTableValue)
   530  	case isNL(r):
   531  		return lx.errorf("newlines not allowed within inline tables")
   532  	case r == commentStart:
   533  		lx.push(lexInlineTableValue)
   534  		return lexCommentStart
   535  	case r == comma:
   536  		return lx.errorf("unexpected comma")
   537  	case r == inlineTableEnd:
   538  		return lexInlineTableEnd
   539  	}
   540  	lx.backup()
   541  	lx.push(lexInlineTableValueEnd)
   542  	return lexKeyStart
   543  }
   544  
   545  // lexInlineTableValueEnd consumes everything between the end of an inline table
   546  // key/value pair and the next pair (or the end of the table):
   547  // it ignores whitespace and expects either a ',' or a '}'.
   548  func lexInlineTableValueEnd(lx *lexer) stateFn {
   549  	r := lx.next()
   550  	switch {
   551  	case isWhitespace(r):
   552  		return lexSkip(lx, lexInlineTableValueEnd)
   553  	case isNL(r):
   554  		return lx.errorf("newlines not allowed within inline tables")
   555  	case r == commentStart:
   556  		lx.push(lexInlineTableValueEnd)
   557  		return lexCommentStart
   558  	case r == comma:
   559  		lx.ignore()
   560  		return lexInlineTableValue
   561  	case r == inlineTableEnd:
   562  		return lexInlineTableEnd
   563  	}
   564  	return lx.errorf("expected a comma or an inline table terminator %q, "+
   565  		"but got %q instead", inlineTableEnd, r)
   566  }
   567  
   568  // lexInlineTableEnd finishes the lexing of an inline table.
   569  // It assumes that a '}' has just been consumed.
   570  func lexInlineTableEnd(lx *lexer) stateFn {
   571  	lx.ignore()
   572  	lx.emit(itemInlineTableEnd)
   573  	return lx.pop()
   574  }
   575  
   576  // lexString consumes the inner contents of a string. It assumes that the
   577  // beginning '"' has already been consumed and ignored.
   578  func lexString(lx *lexer) stateFn {
   579  	r := lx.next()
   580  	switch {
   581  	case r == eof:
   582  		return lx.errorf("unexpected EOF")
   583  	case isNL(r):
   584  		return lx.errorf("strings cannot contain newlines")
   585  	case r == '\\':
   586  		lx.push(lexString)
   587  		return lexStringEscape
   588  	case r == stringEnd:
   589  		lx.backup()
   590  		lx.emit(itemString)
   591  		lx.next()
   592  		lx.ignore()
   593  		return lx.pop()
   594  	}
   595  	return lexString
   596  }
   597  
   598  // lexMultilineString consumes the inner contents of a string. It assumes that
   599  // the beginning '"""' has already been consumed and ignored.
   600  func lexMultilineString(lx *lexer) stateFn {
   601  	switch lx.next() {
   602  	case eof:
   603  		return lx.errorf("unexpected EOF")
   604  	case '\\':
   605  		return lexMultilineStringEscape
   606  	case stringEnd:
   607  		if lx.accept(stringEnd) {
   608  			if lx.accept(stringEnd) {
   609  				lx.backup()
   610  				lx.backup()
   611  				lx.backup()
   612  				lx.emit(itemMultilineString)
   613  				lx.next()
   614  				lx.next()
   615  				lx.next()
   616  				lx.ignore()
   617  				return lx.pop()
   618  			}
   619  			lx.backup()
   620  		}
   621  	}
   622  	return lexMultilineString
   623  }
   624  
   625  // lexRawString consumes a raw string. Nothing can be escaped in such a string.
   626  // It assumes that the beginning "'" has already been consumed and ignored.
   627  func lexRawString(lx *lexer) stateFn {
   628  	r := lx.next()
   629  	switch {
   630  	case r == eof:
   631  		return lx.errorf("unexpected EOF")
   632  	case isNL(r):
   633  		return lx.errorf("strings cannot contain newlines")
   634  	case r == rawStringEnd:
   635  		lx.backup()
   636  		lx.emit(itemRawString)
   637  		lx.next()
   638  		lx.ignore()
   639  		return lx.pop()
   640  	}
   641  	return lexRawString
   642  }
   643  
   644  // lexMultilineRawString consumes a raw string. Nothing can be escaped in such
   645  // a string. It assumes that the beginning "'''" has already been consumed and
   646  // ignored.
   647  func lexMultilineRawString(lx *lexer) stateFn {
   648  	switch lx.next() {
   649  	case eof:
   650  		return lx.errorf("unexpected EOF")
   651  	case rawStringEnd:
   652  		if lx.accept(rawStringEnd) {
   653  			if lx.accept(rawStringEnd) {
   654  				lx.backup()
   655  				lx.backup()
   656  				lx.backup()
   657  				lx.emit(itemRawMultilineString)
   658  				lx.next()
   659  				lx.next()
   660  				lx.next()
   661  				lx.ignore()
   662  				return lx.pop()
   663  			}
   664  			lx.backup()
   665  		}
   666  	}
   667  	return lexMultilineRawString
   668  }
   669  
   670  // lexMultilineStringEscape consumes an escaped character. It assumes that the
   671  // preceding '\\' has already been consumed.
   672  func lexMultilineStringEscape(lx *lexer) stateFn {
   673  	// Handle the special case first:
   674  	if isNL(lx.next()) {
   675  		return lexMultilineString
   676  	}
   677  	lx.backup()
   678  	lx.push(lexMultilineString)
   679  	return lexStringEscape(lx)
   680  }
   681  
   682  func lexStringEscape(lx *lexer) stateFn {
   683  	r := lx.next()
   684  	switch r {
   685  	case 'b':
   686  		fallthrough
   687  	case 't':
   688  		fallthrough
   689  	case 'n':
   690  		fallthrough
   691  	case 'f':
   692  		fallthrough
   693  	case 'r':
   694  		fallthrough
   695  	case '"':
   696  		fallthrough
   697  	case '\\':
   698  		return lx.pop()
   699  	case 'u':
   700  		return lexShortUnicodeEscape
   701  	case 'U':
   702  		return lexLongUnicodeEscape
   703  	}
   704  	return lx.errorf("invalid escape character %q; only the following "+
   705  		"escape characters are allowed: "+
   706  		`\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
   707  }
   708  
   709  func lexShortUnicodeEscape(lx *lexer) stateFn {
   710  	var r rune
   711  	for i := 0; i < 4; i++ {
   712  		r = lx.next()
   713  		if !isHexadecimal(r) {
   714  			return lx.errorf(`expected four hexadecimal digits after '\u', `+
   715  				"but got %q instead", lx.current())
   716  		}
   717  	}
   718  	return lx.pop()
   719  }
   720  
   721  func lexLongUnicodeEscape(lx *lexer) stateFn {
   722  	var r rune
   723  	for i := 0; i < 8; i++ {
   724  		r = lx.next()
   725  		if !isHexadecimal(r) {
   726  			return lx.errorf(`expected eight hexadecimal digits after '\U', `+
   727  				"but got %q instead", lx.current())
   728  		}
   729  	}
   730  	return lx.pop()
   731  }
   732  
   733  // lexNumberOrDateStart consumes either an integer, a float, or datetime.
   734  func lexNumberOrDateStart(lx *lexer) stateFn {
   735  	r := lx.next()
   736  	if isDigit(r) {
   737  		return lexNumberOrDate
   738  	}
   739  	switch r {
   740  	case '_':
   741  		return lexNumber
   742  	case 'e', 'E':
   743  		return lexFloat
   744  	case '.':
   745  		return lx.errorf("floats must start with a digit, not '.'")
   746  	}
   747  	return lx.errorf("expected a digit but got %q", r)
   748  }
   749  
   750  // lexNumberOrDate consumes either an integer, float or datetime.
   751  func lexNumberOrDate(lx *lexer) stateFn {
   752  	r := lx.next()
   753  	if isDigit(r) {
   754  		return lexNumberOrDate
   755  	}
   756  	switch r {
   757  	case '-':
   758  		return lexDatetime
   759  	case '_':
   760  		return lexNumber
   761  	case '.', 'e', 'E':
   762  		return lexFloat
   763  	}
   764  
   765  	lx.backup()
   766  	lx.emit(itemInteger)
   767  	return lx.pop()
   768  }
   769  
   770  // lexDatetime consumes a Datetime, to a first approximation.
   771  // The parser validates that it matches one of the accepted formats.
   772  func lexDatetime(lx *lexer) stateFn {
   773  	r := lx.next()
   774  	if isDigit(r) {
   775  		return lexDatetime
   776  	}
   777  	switch r {
   778  	case '-', 'T', ':', '.', 'Z', '+':
   779  		return lexDatetime
   780  	}
   781  
   782  	lx.backup()
   783  	lx.emit(itemDatetime)
   784  	return lx.pop()
   785  }
   786  
   787  // lexNumberStart consumes either an integer or a float. It assumes that a sign
   788  // has already been read, but that *no* digits have been consumed.
   789  // lexNumberStart will move to the appropriate integer or float states.
   790  func lexNumberStart(lx *lexer) stateFn {
   791  	// We MUST see a digit. Even floats have to start with a digit.
   792  	r := lx.next()
   793  	if !isDigit(r) {
   794  		if r == '.' {
   795  			return lx.errorf("floats must start with a digit, not '.'")
   796  		}
   797  		return lx.errorf("expected a digit but got %q", r)
   798  	}
   799  	return lexNumber
   800  }
   801  
   802  // lexNumber consumes an integer or a float after seeing the first digit.
   803  func lexNumber(lx *lexer) stateFn {
   804  	r := lx.next()
   805  	if isDigit(r) {
   806  		return lexNumber
   807  	}
   808  	switch r {
   809  	case '_':
   810  		return lexNumber
   811  	case '.', 'e', 'E':
   812  		return lexFloat
   813  	}
   814  
   815  	lx.backup()
   816  	lx.emit(itemInteger)
   817  	return lx.pop()
   818  }
   819  
   820  // lexFloat consumes the elements of a float. It allows any sequence of
   821  // float-like characters, so floats emitted by the lexer are only a first
   822  // approximation and must be validated by the parser.
   823  func lexFloat(lx *lexer) stateFn {
   824  	r := lx.next()
   825  	if isDigit(r) {
   826  		return lexFloat
   827  	}
   828  	switch r {
   829  	case '_', '.', '-', '+', 'e', 'E':
   830  		return lexFloat
   831  	}
   832  
   833  	lx.backup()
   834  	lx.emit(itemFloat)
   835  	return lx.pop()
   836  }
   837  
   838  // lexBool consumes a bool string: 'true' or 'false.
   839  func lexBool(lx *lexer) stateFn {
   840  	var rs []rune
   841  	for {
   842  		r := lx.next()
   843  		if !unicode.IsLetter(r) {
   844  			lx.backup()
   845  			break
   846  		}
   847  		rs = append(rs, r)
   848  	}
   849  	s := string(rs)
   850  	switch s {
   851  	case "true", "false":
   852  		lx.emit(itemBool)
   853  		return lx.pop()
   854  	}
   855  	return lx.errorf("expected value but found %q instead", s)
   856  }
   857  
   858  // lexCommentStart begins the lexing of a comment. It will emit
   859  // itemCommentStart and consume no characters, passing control to lexComment.
   860  func lexCommentStart(lx *lexer) stateFn {
   861  	lx.ignore()
   862  	lx.emit(itemCommentStart)
   863  	return lexComment
   864  }
   865  
   866  // lexComment lexes an entire comment. It assumes that '#' has been consumed.
   867  // It will consume *up to* the first newline character, and pass control
   868  // back to the last state on the stack.
   869  func lexComment(lx *lexer) stateFn {
   870  	r := lx.peek()
   871  	if isNL(r) || r == eof {
   872  		lx.emit(itemText)
   873  		return lx.pop()
   874  	}
   875  	lx.next()
   876  	return lexComment
   877  }
   878  
   879  // lexSkip ignores all slurped input and moves on to the next state.
   880  func lexSkip(lx *lexer, nextState stateFn) stateFn {
   881  	return func(lx *lexer) stateFn {
   882  		lx.ignore()
   883  		return nextState
   884  	}
   885  }
   886  
   887  // isWhitespace returns true if `r` is a whitespace character according
   888  // to the spec.
   889  func isWhitespace(r rune) bool {
   890  	return r == '\t' || r == ' '
   891  }
   892  
   893  func isNL(r rune) bool {
   894  	return r == '\n' || r == '\r'
   895  }
   896  
   897  func isDigit(r rune) bool {
   898  	return r >= '0' && r <= '9'
   899  }
   900  
   901  func isHexadecimal(r rune) bool {
   902  	return (r >= '0' && r <= '9') ||
   903  		(r >= 'a' && r <= 'f') ||
   904  		(r >= 'A' && r <= 'F')
   905  }
   906  
   907  func isBareKeyChar(r rune) bool {
   908  	return (r >= 'A' && r <= 'Z') ||
   909  		(r >= 'a' && r <= 'z') ||
   910  		(r >= '0' && r <= '9') ||
   911  		r == '_' ||
   912  		r == '-'
   913  }
   914  
   915  func (itype itemType) String() string {
   916  	switch itype {
   917  	case itemError:
   918  		return "Error"
   919  	case itemNIL:
   920  		return "NIL"
   921  	case itemEOF:
   922  		return "EOF"
   923  	case itemText:
   924  		return "Text"
   925  	case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
   926  		return "String"
   927  	case itemBool:
   928  		return "Bool"
   929  	case itemInteger:
   930  		return "Integer"
   931  	case itemFloat:
   932  		return "Float"
   933  	case itemDatetime:
   934  		return "DateTime"
   935  	case itemTableStart:
   936  		return "TableStart"
   937  	case itemTableEnd:
   938  		return "TableEnd"
   939  	case itemKeyStart:
   940  		return "KeyStart"
   941  	case itemArray:
   942  		return "Array"
   943  	case itemArrayEnd:
   944  		return "ArrayEnd"
   945  	case itemCommentStart:
   946  		return "CommentStart"
   947  	}
   948  	panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
   949  }
   950  
   951  func (item item) String() string {
   952  	return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
   953  }