get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/conf/lex.go (about)

     1  // Copyright 2013-2018 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  // Customized heavily from
    15  // https://github.com/BurntSushi/toml/blob/master/lex.go, which is based on
    16  // Rob Pike's talk: http://cuddle.googlecode.com/hg/talk/lex.html
    17  
    18  // The format supported is less restrictive than today's formats.
    19  // Supports mixed Arrays [], nested Maps {}, multiple comment types (# and //)
    20  // Also supports key value assignments using '=' or ':' or whiteSpace()
    21  //   e.g. foo = 2, foo : 2, foo 2
    22  // maps can be assigned with no key separator as well
    23  // semicolons as value terminators in key/value assignments are optional
    24  //
    25  // see lex_test.go for more examples.
    26  
    27  package conf
    28  
    29  import (
    30  	"encoding/hex"
    31  	"fmt"
    32  	"strings"
    33  	"unicode"
    34  	"unicode/utf8"
    35  )
    36  
    37  type itemType int
    38  
    39  const (
    40  	itemError itemType = iota
    41  	itemNIL            // used in the parser to indicate no type
    42  	itemEOF
    43  	itemKey
    44  	itemText
    45  	itemString
    46  	itemBool
    47  	itemInteger
    48  	itemFloat
    49  	itemDatetime
    50  	itemArrayStart
    51  	itemArrayEnd
    52  	itemMapStart
    53  	itemMapEnd
    54  	itemCommentStart
    55  	itemVariable
    56  	itemInclude
    57  )
    58  
    59  const (
    60  	eof               = 0
    61  	mapStart          = '{'
    62  	mapEnd            = '}'
    63  	keySepEqual       = '='
    64  	keySepColon       = ':'
    65  	arrayStart        = '['
    66  	arrayEnd          = ']'
    67  	arrayValTerm      = ','
    68  	mapValTerm        = ','
    69  	commentHashStart  = '#'
    70  	commentSlashStart = '/'
    71  	dqStringStart     = '"'
    72  	dqStringEnd       = '"'
    73  	sqStringStart     = '\''
    74  	sqStringEnd       = '\''
    75  	optValTerm        = ';'
    76  	topOptStart       = '{'
    77  	topOptValTerm     = ','
    78  	topOptTerm        = '}'
    79  	blockStart        = '('
    80  	blockEnd          = ')'
    81  	mapEndString      = string(mapEnd)
    82  )
    83  
    84  type stateFn func(lx *lexer) stateFn
    85  
    86  type lexer struct {
    87  	input string
    88  	start int
    89  	pos   int
    90  	width int
    91  	line  int
    92  	state stateFn
    93  	items chan item
    94  
    95  	// A stack of state functions used to maintain context.
    96  	// The idea is to reuse parts of the state machine in various places.
    97  	// For example, values can appear at the top level or within arbitrarily
    98  	// nested arrays. The last state on the stack is used after a value has
    99  	// been lexed. Similarly for comments.
   100  	stack []stateFn
   101  
   102  	// Used for processing escapable substrings in double-quoted and raw strings
   103  	stringParts   []string
   104  	stringStateFn stateFn
   105  
   106  	// lstart is the start position of the current line.
   107  	lstart int
   108  
   109  	// ilstart is the start position of the line from the current item.
   110  	ilstart int
   111  }
   112  
   113  type item struct {
   114  	typ  itemType
   115  	val  string
   116  	line int
   117  	pos  int
   118  }
   119  
   120  func (lx *lexer) nextItem() item {
   121  	for {
   122  		select {
   123  		case item := <-lx.items:
   124  			return item
   125  		default:
   126  			lx.state = lx.state(lx)
   127  		}
   128  	}
   129  }
   130  
   131  func lex(input string) *lexer {
   132  	lx := &lexer{
   133  		input:       input,
   134  		state:       lexTop,
   135  		line:        1,
   136  		items:       make(chan item, 10),
   137  		stack:       make([]stateFn, 0, 10),
   138  		stringParts: []string{},
   139  	}
   140  	return lx
   141  }
   142  
   143  func (lx *lexer) push(state stateFn) {
   144  	lx.stack = append(lx.stack, state)
   145  }
   146  
   147  func (lx *lexer) pop() stateFn {
   148  	if len(lx.stack) == 0 {
   149  		return lx.errorf("BUG in lexer: no states to pop.")
   150  	}
   151  	li := len(lx.stack) - 1
   152  	last := lx.stack[li]
   153  	lx.stack = lx.stack[0:li]
   154  	return last
   155  }
   156  
   157  func (lx *lexer) emit(typ itemType) {
   158  	val := strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos]
   159  
   160  	// Position of item in line where it started.
   161  	pos := lx.pos - lx.ilstart - len(val)
   162  	lx.items <- item{typ, val, lx.line, pos}
   163  	lx.start = lx.pos
   164  	lx.ilstart = lx.lstart
   165  }
   166  
   167  func (lx *lexer) emitString() {
   168  	var finalString string
   169  	if len(lx.stringParts) > 0 {
   170  		finalString = strings.Join(lx.stringParts, "") + lx.input[lx.start:lx.pos]
   171  		lx.stringParts = []string{}
   172  	} else {
   173  		finalString = lx.input[lx.start:lx.pos]
   174  	}
   175  	// Position of string in line where it started.
   176  	pos := lx.pos - lx.ilstart - len(finalString)
   177  	lx.items <- item{itemString, finalString, lx.line, pos}
   178  	lx.start = lx.pos
   179  	lx.ilstart = lx.lstart
   180  }
   181  
   182  func (lx *lexer) addCurrentStringPart(offset int) {
   183  	lx.stringParts = append(lx.stringParts, lx.input[lx.start:lx.pos-offset])
   184  	lx.start = lx.pos
   185  }
   186  
   187  func (lx *lexer) addStringPart(s string) stateFn {
   188  	lx.stringParts = append(lx.stringParts, s)
   189  	lx.start = lx.pos
   190  	return lx.stringStateFn
   191  }
   192  
   193  func (lx *lexer) hasEscapedParts() bool {
   194  	return len(lx.stringParts) > 0
   195  }
   196  
   197  func (lx *lexer) next() (r rune) {
   198  	if lx.pos >= len(lx.input) {
   199  		lx.width = 0
   200  		return eof
   201  	}
   202  
   203  	if lx.input[lx.pos] == '\n' {
   204  		lx.line++
   205  
   206  		// Mark start position of current line.
   207  		lx.lstart = lx.pos
   208  	}
   209  	r, lx.width = utf8.DecodeRuneInString(lx.input[lx.pos:])
   210  	lx.pos += lx.width
   211  
   212  	return r
   213  }
   214  
   215  // ignore skips over the pending input before this point.
   216  func (lx *lexer) ignore() {
   217  	lx.start = lx.pos
   218  	lx.ilstart = lx.lstart
   219  }
   220  
   221  // backup steps back one rune. Can be called only once per call of next.
   222  func (lx *lexer) backup() {
   223  	lx.pos -= lx.width
   224  	if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
   225  		lx.line--
   226  	}
   227  }
   228  
   229  // peek returns but does not consume the next rune in the input.
   230  func (lx *lexer) peek() rune {
   231  	r := lx.next()
   232  	lx.backup()
   233  	return r
   234  }
   235  
   236  // errorf stops all lexing by emitting an error and returning `nil`.
   237  // Note that any value that is a character is escaped if it's a special
   238  // character (new lines, tabs, etc.).
   239  func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
   240  	for i, value := range values {
   241  		if v, ok := value.(rune); ok {
   242  			values[i] = escapeSpecial(v)
   243  		}
   244  	}
   245  
   246  	// Position of error in current line.
   247  	pos := lx.pos - lx.lstart
   248  	lx.items <- item{
   249  		itemError,
   250  		fmt.Sprintf(format, values...),
   251  		lx.line,
   252  		pos,
   253  	}
   254  	return nil
   255  }
   256  
   257  // lexTop consumes elements at the top level of data structure.
   258  func lexTop(lx *lexer) stateFn {
   259  	r := lx.next()
   260  	if unicode.IsSpace(r) {
   261  		return lexSkip(lx, lexTop)
   262  	}
   263  
   264  	switch r {
   265  	case topOptStart:
   266  		return lexSkip(lx, lexTop)
   267  	case commentHashStart:
   268  		lx.push(lexTop)
   269  		return lexCommentStart
   270  	case commentSlashStart:
   271  		rn := lx.next()
   272  		if rn == commentSlashStart {
   273  			lx.push(lexTop)
   274  			return lexCommentStart
   275  		}
   276  		lx.backup()
   277  		fallthrough
   278  	case eof:
   279  		if lx.pos > lx.start {
   280  			return lx.errorf("Unexpected EOF.")
   281  		}
   282  		lx.emit(itemEOF)
   283  		return nil
   284  	}
   285  
   286  	// At this point, the only valid item can be a key, so we back up
   287  	// and let the key lexer do the rest.
   288  	lx.backup()
   289  	lx.push(lexTopValueEnd)
   290  	return lexKeyStart
   291  }
   292  
   293  // lexTopValueEnd is entered whenever a top-level value has been consumed.
   294  // It must see only whitespace, and will turn back to lexTop upon a new line.
   295  // If it sees EOF, it will quit the lexer successfully.
   296  func lexTopValueEnd(lx *lexer) stateFn {
   297  	r := lx.next()
   298  	switch {
   299  	case r == commentHashStart:
   300  		// a comment will read to a new line for us.
   301  		lx.push(lexTop)
   302  		return lexCommentStart
   303  	case r == commentSlashStart:
   304  		rn := lx.next()
   305  		if rn == commentSlashStart {
   306  			lx.push(lexTop)
   307  			return lexCommentStart
   308  		}
   309  		lx.backup()
   310  		fallthrough
   311  	case isWhitespace(r):
   312  		return lexTopValueEnd
   313  	case isNL(r) || r == eof || r == optValTerm || r == topOptValTerm || r == topOptTerm:
   314  		lx.ignore()
   315  		return lexTop
   316  	}
   317  	return lx.errorf("Expected a top-level value to end with a new line, "+
   318  		"comment or EOF, but got '%v' instead.", r)
   319  }
   320  
   321  // lexKeyStart consumes a key name up until the first non-whitespace character.
   322  // lexKeyStart will ignore whitespace. It will also eat enclosing quotes.
   323  func lexKeyStart(lx *lexer) stateFn {
   324  	r := lx.peek()
   325  	switch {
   326  	case isKeySeparator(r):
   327  		return lx.errorf("Unexpected key separator '%v'", r)
   328  	case unicode.IsSpace(r):
   329  		lx.next()
   330  		return lexSkip(lx, lexKeyStart)
   331  	case r == dqStringStart:
   332  		lx.next()
   333  		return lexSkip(lx, lexDubQuotedKey)
   334  	case r == sqStringStart:
   335  		lx.next()
   336  		return lexSkip(lx, lexQuotedKey)
   337  	}
   338  	lx.ignore()
   339  	lx.next()
   340  	return lexKey
   341  }
   342  
   343  // lexDubQuotedKey consumes the text of a key between quotes.
   344  func lexDubQuotedKey(lx *lexer) stateFn {
   345  	r := lx.peek()
   346  	if r == dqStringEnd {
   347  		lx.emit(itemKey)
   348  		lx.next()
   349  		return lexSkip(lx, lexKeyEnd)
   350  	} else if r == eof {
   351  		if lx.pos > lx.start {
   352  			return lx.errorf("Unexpected EOF.")
   353  		}
   354  		lx.emit(itemEOF)
   355  		return nil
   356  	}
   357  	lx.next()
   358  	return lexDubQuotedKey
   359  }
   360  
   361  // lexQuotedKey consumes the text of a key between quotes.
   362  func lexQuotedKey(lx *lexer) stateFn {
   363  	r := lx.peek()
   364  	if r == sqStringEnd {
   365  		lx.emit(itemKey)
   366  		lx.next()
   367  		return lexSkip(lx, lexKeyEnd)
   368  	} else if r == eof {
   369  		if lx.pos > lx.start {
   370  			return lx.errorf("Unexpected EOF.")
   371  		}
   372  		lx.emit(itemEOF)
   373  		return nil
   374  	}
   375  	lx.next()
   376  	return lexQuotedKey
   377  }
   378  
   379  // keyCheckKeyword will check for reserved keywords as the key value when the key is
   380  // separated with a space.
   381  func (lx *lexer) keyCheckKeyword(fallThrough, push stateFn) stateFn {
   382  	key := strings.ToLower(lx.input[lx.start:lx.pos])
   383  	switch key {
   384  	case "include":
   385  		lx.ignore()
   386  		if push != nil {
   387  			lx.push(push)
   388  		}
   389  		return lexIncludeStart
   390  	}
   391  	lx.emit(itemKey)
   392  	return fallThrough
   393  }
   394  
   395  // lexIncludeStart will consume the whitespace til the start of the value.
   396  func lexIncludeStart(lx *lexer) stateFn {
   397  	r := lx.next()
   398  	if isWhitespace(r) {
   399  		return lexSkip(lx, lexIncludeStart)
   400  	}
   401  	lx.backup()
   402  	return lexInclude
   403  }
   404  
   405  // lexIncludeQuotedString consumes the inner contents of a string. It assumes that the
   406  // beginning '"' has already been consumed and ignored. It will not interpret any
   407  // internal contents.
   408  func lexIncludeQuotedString(lx *lexer) stateFn {
   409  	r := lx.next()
   410  	switch {
   411  	case r == sqStringEnd:
   412  		lx.backup()
   413  		lx.emit(itemInclude)
   414  		lx.next()
   415  		lx.ignore()
   416  		return lx.pop()
   417  	case r == eof:
   418  		return lx.errorf("Unexpected EOF in quoted include")
   419  	}
   420  	return lexIncludeQuotedString
   421  }
   422  
   423  // lexIncludeDubQuotedString consumes the inner contents of a string. It assumes that the
   424  // beginning '"' has already been consumed and ignored. It will not interpret any
   425  // internal contents.
   426  func lexIncludeDubQuotedString(lx *lexer) stateFn {
   427  	r := lx.next()
   428  	switch {
   429  	case r == dqStringEnd:
   430  		lx.backup()
   431  		lx.emit(itemInclude)
   432  		lx.next()
   433  		lx.ignore()
   434  		return lx.pop()
   435  	case r == eof:
   436  		return lx.errorf("Unexpected EOF in double quoted include")
   437  	}
   438  	return lexIncludeDubQuotedString
   439  }
   440  
   441  // lexIncludeString consumes the inner contents of a raw string.
   442  func lexIncludeString(lx *lexer) stateFn {
   443  	r := lx.next()
   444  	switch {
   445  	case isNL(r) || r == eof || r == optValTerm || r == mapEnd || isWhitespace(r):
   446  		lx.backup()
   447  		lx.emit(itemInclude)
   448  		return lx.pop()
   449  	case r == sqStringEnd:
   450  		lx.backup()
   451  		lx.emit(itemInclude)
   452  		lx.next()
   453  		lx.ignore()
   454  		return lx.pop()
   455  	}
   456  	return lexIncludeString
   457  }
   458  
   459  // lexInclude will consume the include value.
   460  func lexInclude(lx *lexer) stateFn {
   461  	r := lx.next()
   462  	switch {
   463  	case r == sqStringStart:
   464  		lx.ignore() // ignore the " or '
   465  		return lexIncludeQuotedString
   466  	case r == dqStringStart:
   467  		lx.ignore() // ignore the " or '
   468  		return lexIncludeDubQuotedString
   469  	case r == arrayStart:
   470  		return lx.errorf("Expected include value but found start of an array")
   471  	case r == mapStart:
   472  		return lx.errorf("Expected include value but found start of a map")
   473  	case r == blockStart:
   474  		return lx.errorf("Expected include value but found start of a block")
   475  	case unicode.IsDigit(r), r == '-':
   476  		return lx.errorf("Expected include value but found start of a number")
   477  	case r == '\\':
   478  		return lx.errorf("Expected include value but found escape sequence")
   479  	case isNL(r):
   480  		return lx.errorf("Expected include value but found new line")
   481  	}
   482  	lx.backup()
   483  	return lexIncludeString
   484  }
   485  
   486  // lexKey consumes the text of a key. Assumes that the first character (which
   487  // is not whitespace) has already been consumed.
   488  func lexKey(lx *lexer) stateFn {
   489  	r := lx.peek()
   490  	if unicode.IsSpace(r) {
   491  		// Spaces signal we could be looking at a keyword, e.g. include.
   492  		// Keywords will eat the keyword and set the appropriate return stateFn.
   493  		return lx.keyCheckKeyword(lexKeyEnd, nil)
   494  	} else if isKeySeparator(r) || r == eof {
   495  		lx.emit(itemKey)
   496  		return lexKeyEnd
   497  	}
   498  	lx.next()
   499  	return lexKey
   500  }
   501  
   502  // lexKeyEnd consumes the end of a key (up to the key separator).
   503  // Assumes that the first whitespace character after a key (or the '=' or ':'
   504  // separator) has NOT been consumed.
   505  func lexKeyEnd(lx *lexer) stateFn {
   506  	r := lx.next()
   507  	switch {
   508  	case unicode.IsSpace(r):
   509  		return lexSkip(lx, lexKeyEnd)
   510  	case isKeySeparator(r):
   511  		return lexSkip(lx, lexValue)
   512  	case r == eof:
   513  		lx.emit(itemEOF)
   514  		return nil
   515  	}
   516  	// We start the value here
   517  	lx.backup()
   518  	return lexValue
   519  }
   520  
   521  // lexValue starts the consumption of a value anywhere a value is expected.
   522  // lexValue will ignore whitespace.
   523  // After a value is lexed, the last state on the next is popped and returned.
   524  func lexValue(lx *lexer) stateFn {
   525  	// We allow whitespace to precede a value, but NOT new lines.
   526  	// In array syntax, the array states are responsible for ignoring new lines.
   527  	r := lx.next()
   528  	if isWhitespace(r) {
   529  		return lexSkip(lx, lexValue)
   530  	}
   531  
   532  	switch {
   533  	case r == arrayStart:
   534  		lx.ignore()
   535  		lx.emit(itemArrayStart)
   536  		return lexArrayValue
   537  	case r == mapStart:
   538  		lx.ignore()
   539  		lx.emit(itemMapStart)
   540  		return lexMapKeyStart
   541  	case r == sqStringStart:
   542  		lx.ignore() // ignore the " or '
   543  		return lexQuotedString
   544  	case r == dqStringStart:
   545  		lx.ignore() // ignore the " or '
   546  		lx.stringStateFn = lexDubQuotedString
   547  		return lexDubQuotedString
   548  	case r == '-':
   549  		return lexNegNumberStart
   550  	case r == blockStart:
   551  		lx.ignore()
   552  		return lexBlock
   553  	case unicode.IsDigit(r):
   554  		lx.backup() // avoid an extra state and use the same as above
   555  		return lexNumberOrDateOrStringOrIPStart
   556  	case r == '.': // special error case, be kind to users
   557  		return lx.errorf("Floats must start with a digit")
   558  	case isNL(r):
   559  		return lx.errorf("Expected value but found new line")
   560  	}
   561  	lx.backup()
   562  	lx.stringStateFn = lexString
   563  	return lexString
   564  }
   565  
   566  // lexArrayValue consumes one value in an array. It assumes that '[' or ','
   567  // have already been consumed. All whitespace and new lines are ignored.
   568  func lexArrayValue(lx *lexer) stateFn {
   569  	r := lx.next()
   570  	switch {
   571  	case unicode.IsSpace(r):
   572  		return lexSkip(lx, lexArrayValue)
   573  	case r == commentHashStart:
   574  		lx.push(lexArrayValue)
   575  		return lexCommentStart
   576  	case r == commentSlashStart:
   577  		rn := lx.next()
   578  		if rn == commentSlashStart {
   579  			lx.push(lexArrayValue)
   580  			return lexCommentStart
   581  		}
   582  		lx.backup()
   583  		fallthrough
   584  	case r == arrayValTerm:
   585  		return lx.errorf("Unexpected array value terminator '%v'.", arrayValTerm)
   586  	case r == arrayEnd:
   587  		return lexArrayEnd
   588  	}
   589  
   590  	lx.backup()
   591  	lx.push(lexArrayValueEnd)
   592  	return lexValue
   593  }
   594  
   595  // lexArrayValueEnd consumes the cruft between values of an array. Namely,
   596  // it ignores whitespace and expects either a ',' or a ']'.
   597  func lexArrayValueEnd(lx *lexer) stateFn {
   598  	r := lx.next()
   599  	switch {
   600  	case isWhitespace(r):
   601  		return lexSkip(lx, lexArrayValueEnd)
   602  	case r == commentHashStart:
   603  		lx.push(lexArrayValueEnd)
   604  		return lexCommentStart
   605  	case r == commentSlashStart:
   606  		rn := lx.next()
   607  		if rn == commentSlashStart {
   608  			lx.push(lexArrayValueEnd)
   609  			return lexCommentStart
   610  		}
   611  		lx.backup()
   612  		fallthrough
   613  	case r == arrayValTerm || isNL(r):
   614  		return lexSkip(lx, lexArrayValue) // Move onto next
   615  	case r == arrayEnd:
   616  		return lexArrayEnd
   617  	}
   618  	return lx.errorf("Expected an array value terminator %q or an array "+
   619  		"terminator %q, but got '%v' instead.", arrayValTerm, arrayEnd, r)
   620  }
   621  
   622  // lexArrayEnd finishes the lexing of an array. It assumes that a ']' has
   623  // just been consumed.
   624  func lexArrayEnd(lx *lexer) stateFn {
   625  	lx.ignore()
   626  	lx.emit(itemArrayEnd)
   627  	return lx.pop()
   628  }
   629  
   630  // lexMapKeyStart consumes a key name up until the first non-whitespace
   631  // character.
   632  // lexMapKeyStart will ignore whitespace.
   633  func lexMapKeyStart(lx *lexer) stateFn {
   634  	r := lx.peek()
   635  	switch {
   636  	case isKeySeparator(r):
   637  		return lx.errorf("Unexpected key separator '%v'.", r)
   638  	case r == arrayEnd:
   639  		return lx.errorf("Unexpected array end '%v' processing map.", r)
   640  	case unicode.IsSpace(r):
   641  		lx.next()
   642  		return lexSkip(lx, lexMapKeyStart)
   643  	case r == mapEnd:
   644  		lx.next()
   645  		return lexSkip(lx, lexMapEnd)
   646  	case r == commentHashStart:
   647  		lx.next()
   648  		lx.push(lexMapKeyStart)
   649  		return lexCommentStart
   650  	case r == commentSlashStart:
   651  		lx.next()
   652  		rn := lx.next()
   653  		if rn == commentSlashStart {
   654  			lx.push(lexMapKeyStart)
   655  			return lexCommentStart
   656  		}
   657  		lx.backup()
   658  	case r == sqStringStart:
   659  		lx.next()
   660  		return lexSkip(lx, lexMapQuotedKey)
   661  	case r == dqStringStart:
   662  		lx.next()
   663  		return lexSkip(lx, lexMapDubQuotedKey)
   664  	case r == eof:
   665  		return lx.errorf("Unexpected EOF processing map.")
   666  	}
   667  	lx.ignore()
   668  	lx.next()
   669  	return lexMapKey
   670  }
   671  
   672  // lexMapQuotedKey consumes the text of a key between quotes.
   673  func lexMapQuotedKey(lx *lexer) stateFn {
   674  	if r := lx.peek(); r == eof {
   675  		return lx.errorf("Unexpected EOF processing quoted map key.")
   676  	} else if r == sqStringEnd {
   677  		lx.emit(itemKey)
   678  		lx.next()
   679  		return lexSkip(lx, lexMapKeyEnd)
   680  	}
   681  	lx.next()
   682  	return lexMapQuotedKey
   683  }
   684  
   685  // lexMapDubQuotedKey consumes the text of a key between quotes.
   686  func lexMapDubQuotedKey(lx *lexer) stateFn {
   687  	if r := lx.peek(); r == eof {
   688  		return lx.errorf("Unexpected EOF processing double quoted map key.")
   689  	} else if r == dqStringEnd {
   690  		lx.emit(itemKey)
   691  		lx.next()
   692  		return lexSkip(lx, lexMapKeyEnd)
   693  	}
   694  	lx.next()
   695  	return lexMapDubQuotedKey
   696  }
   697  
   698  // lexMapKey consumes the text of a key. Assumes that the first character (which
   699  // is not whitespace) has already been consumed.
   700  func lexMapKey(lx *lexer) stateFn {
   701  	if r := lx.peek(); r == eof {
   702  		return lx.errorf("Unexpected EOF processing map key.")
   703  	} else if unicode.IsSpace(r) {
   704  		// Spaces signal we could be looking at a keyword, e.g. include.
   705  		// Keywords will eat the keyword and set the appropriate return stateFn.
   706  		return lx.keyCheckKeyword(lexMapKeyEnd, lexMapValueEnd)
   707  	} else if isKeySeparator(r) {
   708  		lx.emit(itemKey)
   709  		return lexMapKeyEnd
   710  	}
   711  	lx.next()
   712  	return lexMapKey
   713  }
   714  
   715  // lexMapKeyEnd consumes the end of a key (up to the key separator).
   716  // Assumes that the first whitespace character after a key (or the '='
   717  // separator) has NOT been consumed.
   718  func lexMapKeyEnd(lx *lexer) stateFn {
   719  	r := lx.next()
   720  	switch {
   721  	case unicode.IsSpace(r):
   722  		return lexSkip(lx, lexMapKeyEnd)
   723  	case isKeySeparator(r):
   724  		return lexSkip(lx, lexMapValue)
   725  	}
   726  	// We start the value here
   727  	lx.backup()
   728  	return lexMapValue
   729  }
   730  
   731  // lexMapValue consumes one value in a map. It assumes that '{' or ','
   732  // have already been consumed. All whitespace and new lines are ignored.
   733  // Map values can be separated by ',' or simple NLs.
   734  func lexMapValue(lx *lexer) stateFn {
   735  	r := lx.next()
   736  	switch {
   737  	case unicode.IsSpace(r):
   738  		return lexSkip(lx, lexMapValue)
   739  	case r == mapValTerm:
   740  		return lx.errorf("Unexpected map value terminator %q.", mapValTerm)
   741  	case r == mapEnd:
   742  		return lexSkip(lx, lexMapEnd)
   743  	}
   744  	lx.backup()
   745  	lx.push(lexMapValueEnd)
   746  	return lexValue
   747  }
   748  
   749  // lexMapValueEnd consumes the cruft between values of a map. Namely,
   750  // it ignores whitespace and expects either a ',' or a '}'.
   751  func lexMapValueEnd(lx *lexer) stateFn {
   752  	r := lx.next()
   753  	switch {
   754  	case isWhitespace(r):
   755  		return lexSkip(lx, lexMapValueEnd)
   756  	case r == commentHashStart:
   757  		lx.push(lexMapValueEnd)
   758  		return lexCommentStart
   759  	case r == commentSlashStart:
   760  		rn := lx.next()
   761  		if rn == commentSlashStart {
   762  			lx.push(lexMapValueEnd)
   763  			return lexCommentStart
   764  		}
   765  		lx.backup()
   766  		fallthrough
   767  	case r == optValTerm || r == mapValTerm || isNL(r):
   768  		return lexSkip(lx, lexMapKeyStart) // Move onto next
   769  	case r == mapEnd:
   770  		return lexSkip(lx, lexMapEnd)
   771  	}
   772  	return lx.errorf("Expected a map value terminator %q or a map "+
   773  		"terminator %q, but got '%v' instead.", mapValTerm, mapEnd, r)
   774  }
   775  
   776  // lexMapEnd finishes the lexing of a map. It assumes that a '}' has
   777  // just been consumed.
   778  func lexMapEnd(lx *lexer) stateFn {
   779  	lx.ignore()
   780  	lx.emit(itemMapEnd)
   781  	return lx.pop()
   782  }
   783  
   784  // Checks if the unquoted string was actually a boolean
   785  func (lx *lexer) isBool() bool {
   786  	str := strings.ToLower(lx.input[lx.start:lx.pos])
   787  	return str == "true" || str == "false" ||
   788  		str == "on" || str == "off" ||
   789  		str == "yes" || str == "no"
   790  }
   791  
   792  // Check if the unquoted string is a variable reference, starting with $.
   793  func (lx *lexer) isVariable() bool {
   794  	if lx.start >= len(lx.input) {
   795  		return false
   796  	}
   797  	if lx.input[lx.start] == '$' {
   798  		lx.start += 1
   799  		return true
   800  	}
   801  	return false
   802  }
   803  
   804  // lexQuotedString consumes the inner contents of a string. It assumes that the
   805  // beginning '"' has already been consumed and ignored. It will not interpret any
   806  // internal contents.
   807  func lexQuotedString(lx *lexer) stateFn {
   808  	r := lx.next()
   809  	switch {
   810  	case r == sqStringEnd:
   811  		lx.backup()
   812  		lx.emit(itemString)
   813  		lx.next()
   814  		lx.ignore()
   815  		return lx.pop()
   816  	case r == eof:
   817  		if lx.pos > lx.start {
   818  			return lx.errorf("Unexpected EOF.")
   819  		}
   820  		lx.emit(itemEOF)
   821  		return nil
   822  	}
   823  	return lexQuotedString
   824  }
   825  
   826  // lexDubQuotedString consumes the inner contents of a string. It assumes that the
   827  // beginning '"' has already been consumed and ignored. It will not interpret any
   828  // internal contents.
   829  func lexDubQuotedString(lx *lexer) stateFn {
   830  	r := lx.next()
   831  	switch {
   832  	case r == '\\':
   833  		lx.addCurrentStringPart(1)
   834  		return lexStringEscape
   835  	case r == dqStringEnd:
   836  		lx.backup()
   837  		lx.emitString()
   838  		lx.next()
   839  		lx.ignore()
   840  		return lx.pop()
   841  	case r == eof:
   842  		if lx.pos > lx.start {
   843  			return lx.errorf("Unexpected EOF.")
   844  		}
   845  		lx.emit(itemEOF)
   846  		return nil
   847  	}
   848  	return lexDubQuotedString
   849  }
   850  
   851  // lexString consumes the inner contents of a raw string.
   852  func lexString(lx *lexer) stateFn {
   853  	r := lx.next()
   854  	switch {
   855  	case r == '\\':
   856  		lx.addCurrentStringPart(1)
   857  		return lexStringEscape
   858  	// Termination of non-quoted strings
   859  	case isNL(r) || r == eof || r == optValTerm ||
   860  		r == arrayValTerm || r == arrayEnd || r == mapEnd ||
   861  		isWhitespace(r):
   862  
   863  		lx.backup()
   864  		if lx.hasEscapedParts() {
   865  			lx.emitString()
   866  		} else if lx.isBool() {
   867  			lx.emit(itemBool)
   868  		} else if lx.isVariable() {
   869  			lx.emit(itemVariable)
   870  		} else {
   871  			lx.emitString()
   872  		}
   873  		return lx.pop()
   874  	case r == sqStringEnd:
   875  		lx.backup()
   876  		lx.emitString()
   877  		lx.next()
   878  		lx.ignore()
   879  		return lx.pop()
   880  	}
   881  	return lexString
   882  }
   883  
   884  // lexBlock consumes the inner contents as a string. It assumes that the
   885  // beginning '(' has already been consumed and ignored. It will continue
   886  // processing until it finds a ')' on a new line by itself.
   887  func lexBlock(lx *lexer) stateFn {
   888  	r := lx.next()
   889  	switch {
   890  	case r == blockEnd:
   891  		lx.backup()
   892  		lx.backup()
   893  
   894  		// Looking for a ')' character on a line by itself, if the previous
   895  		// character isn't a new line, then break so we keep processing the block.
   896  		if lx.next() != '\n' {
   897  			lx.next()
   898  			break
   899  		}
   900  		lx.next()
   901  
   902  		// Make sure the next character is a new line or an eof. We want a ')' on a
   903  		// bare line by itself.
   904  		switch lx.next() {
   905  		case '\n', eof:
   906  			lx.backup()
   907  			lx.backup()
   908  			lx.emit(itemString)
   909  			lx.next()
   910  			lx.ignore()
   911  			return lx.pop()
   912  		}
   913  		lx.backup()
   914  	case r == eof:
   915  		return lx.errorf("Unexpected EOF processing block.")
   916  	}
   917  	return lexBlock
   918  }
   919  
   920  // lexStringEscape consumes an escaped character. It assumes that the preceding
   921  // '\\' has already been consumed.
   922  func lexStringEscape(lx *lexer) stateFn {
   923  	r := lx.next()
   924  	switch r {
   925  	case 'x':
   926  		return lexStringBinary
   927  	case 't':
   928  		return lx.addStringPart("\t")
   929  	case 'n':
   930  		return lx.addStringPart("\n")
   931  	case 'r':
   932  		return lx.addStringPart("\r")
   933  	case '"':
   934  		return lx.addStringPart("\"")
   935  	case '\\':
   936  		return lx.addStringPart("\\")
   937  	}
   938  	return lx.errorf("Invalid escape character '%v'. Only the following "+
   939  		"escape characters are allowed: \\xXX, \\t, \\n, \\r, \\\", \\\\.", r)
   940  }
   941  
   942  // lexStringBinary consumes two hexadecimal digits following '\x'. It assumes
   943  // that the '\x' has already been consumed.
   944  func lexStringBinary(lx *lexer) stateFn {
   945  	r := lx.next()
   946  	if isNL(r) {
   947  		return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line")
   948  	}
   949  	r = lx.next()
   950  	if isNL(r) {
   951  		return lx.errorf("Expected two hexadecimal digits after '\\x', but hit end of line")
   952  	}
   953  	offset := lx.pos - 2
   954  	byteString, err := hex.DecodeString(lx.input[offset:lx.pos])
   955  	if err != nil {
   956  		return lx.errorf("Expected two hexadecimal digits after '\\x', but got '%s'", lx.input[offset:lx.pos])
   957  	}
   958  	lx.addStringPart(string(byteString))
   959  	return lx.stringStateFn
   960  }
   961  
   962  // lexNumberOrDateOrStringOrIPStart consumes either a (positive)
   963  // integer, a float, a datetime, or IP, or String that started with a
   964  // number.  It assumes that NO negative sign has been consumed, that
   965  // is triggered above.
   966  func lexNumberOrDateOrStringOrIPStart(lx *lexer) stateFn {
   967  	r := lx.next()
   968  	if !unicode.IsDigit(r) {
   969  		if r == '.' {
   970  			return lx.errorf("Floats must start with a digit, not '.'.")
   971  		}
   972  		return lx.errorf("Expected a digit but got '%v'.", r)
   973  	}
   974  	return lexNumberOrDateOrStringOrIP
   975  }
   976  
   977  // lexNumberOrDateOrStringOrIP consumes either a (positive) integer,
   978  // float, datetime, IP or string without quotes that starts with a
   979  // number.
   980  func lexNumberOrDateOrStringOrIP(lx *lexer) stateFn {
   981  	r := lx.next()
   982  	switch {
   983  	case r == '-':
   984  		if lx.pos-lx.start != 5 {
   985  			return lx.errorf("All ISO8601 dates must be in full Zulu form.")
   986  		}
   987  		return lexDateAfterYear
   988  	case unicode.IsDigit(r):
   989  		return lexNumberOrDateOrStringOrIP
   990  	case r == '.':
   991  		// Assume float at first, but could be IP
   992  		return lexFloatStart
   993  	case isNumberSuffix(r):
   994  		return lexConvenientNumber
   995  	case !(isNL(r) || r == eof || r == mapEnd || r == optValTerm || r == mapValTerm || isWhitespace(r) || unicode.IsDigit(r)):
   996  		// Treat it as a string value once we get a rune that
   997  		// is not a number.
   998  		lx.stringStateFn = lexString
   999  		return lexString
  1000  	}
  1001  	lx.backup()
  1002  	lx.emit(itemInteger)
  1003  	return lx.pop()
  1004  }
  1005  
  1006  // lexConvenientNumber is when we have a suffix, e.g. 1k or 1Mb
  1007  func lexConvenientNumber(lx *lexer) stateFn {
  1008  	r := lx.next()
  1009  	switch {
  1010  	case r == 'b' || r == 'B' || r == 'i' || r == 'I':
  1011  		return lexConvenientNumber
  1012  	}
  1013  	lx.backup()
  1014  	if isNL(r) || r == eof || r == mapEnd || r == optValTerm || r == mapValTerm || isWhitespace(r) || unicode.IsDigit(r) {
  1015  		lx.emit(itemInteger)
  1016  		return lx.pop()
  1017  	}
  1018  	// This is not a number, so treat it as a string.
  1019  	lx.stringStateFn = lexString
  1020  	return lexString
  1021  }
  1022  
  1023  // lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format.
  1024  // It assumes that "YYYY-" has already been consumed.
  1025  func lexDateAfterYear(lx *lexer) stateFn {
  1026  	formats := []rune{
  1027  		// digits are '0'.
  1028  		// everything else is direct equality.
  1029  		'0', '0', '-', '0', '0',
  1030  		'T',
  1031  		'0', '0', ':', '0', '0', ':', '0', '0',
  1032  		'Z',
  1033  	}
  1034  	for _, f := range formats {
  1035  		r := lx.next()
  1036  		if f == '0' {
  1037  			if !unicode.IsDigit(r) {
  1038  				return lx.errorf("Expected digit in ISO8601 datetime, "+
  1039  					"but found '%v' instead.", r)
  1040  			}
  1041  		} else if f != r {
  1042  			return lx.errorf("Expected '%v' in ISO8601 datetime, "+
  1043  				"but found '%v' instead.", f, r)
  1044  		}
  1045  	}
  1046  	lx.emit(itemDatetime)
  1047  	return lx.pop()
  1048  }
  1049  
  1050  // lexNegNumberStart consumes either an integer or a float. It assumes that a
  1051  // negative sign has already been read, but that *no* digits have been consumed.
  1052  // lexNegNumberStart will move to the appropriate integer or float states.
  1053  func lexNegNumberStart(lx *lexer) stateFn {
  1054  	// we MUST see a digit. Even floats have to start with a digit.
  1055  	r := lx.next()
  1056  	if !unicode.IsDigit(r) {
  1057  		if r == '.' {
  1058  			return lx.errorf("Floats must start with a digit, not '.'.")
  1059  		}
  1060  		return lx.errorf("Expected a digit but got '%v'.", r)
  1061  	}
  1062  	return lexNegNumber
  1063  }
  1064  
  1065  // lexNegNumber consumes a negative integer or a float after seeing the first digit.
  1066  func lexNegNumber(lx *lexer) stateFn {
  1067  	r := lx.next()
  1068  	switch {
  1069  	case unicode.IsDigit(r):
  1070  		return lexNegNumber
  1071  	case r == '.':
  1072  		return lexFloatStart
  1073  	case isNumberSuffix(r):
  1074  		return lexConvenientNumber
  1075  	}
  1076  	lx.backup()
  1077  	lx.emit(itemInteger)
  1078  	return lx.pop()
  1079  }
  1080  
  1081  // lexFloatStart starts the consumption of digits of a float after a '.'.
  1082  // Namely, at least one digit is required.
  1083  func lexFloatStart(lx *lexer) stateFn {
  1084  	r := lx.next()
  1085  	if !unicode.IsDigit(r) {
  1086  		return lx.errorf("Floats must have a digit after the '.', but got "+
  1087  			"'%v' instead.", r)
  1088  	}
  1089  	return lexFloat
  1090  }
  1091  
  1092  // lexFloat consumes the digits of a float after a '.'.
  1093  // Assumes that one digit has been consumed after a '.' already.
  1094  func lexFloat(lx *lexer) stateFn {
  1095  	r := lx.next()
  1096  	if unicode.IsDigit(r) {
  1097  		return lexFloat
  1098  	}
  1099  
  1100  	// Not a digit, if its another '.', need to see if we falsely assumed a float.
  1101  	if r == '.' {
  1102  		return lexIPAddr
  1103  	}
  1104  
  1105  	lx.backup()
  1106  	lx.emit(itemFloat)
  1107  	return lx.pop()
  1108  }
  1109  
  1110  // lexIPAddr consumes IP addrs, like 127.0.0.1:4222
  1111  func lexIPAddr(lx *lexer) stateFn {
  1112  	r := lx.next()
  1113  	if unicode.IsDigit(r) || r == '.' || r == ':' || r == '-' {
  1114  		return lexIPAddr
  1115  	}
  1116  	lx.backup()
  1117  	lx.emit(itemString)
  1118  	return lx.pop()
  1119  }
  1120  
  1121  // lexCommentStart begins the lexing of a comment. It will emit
  1122  // itemCommentStart and consume no characters, passing control to lexComment.
  1123  func lexCommentStart(lx *lexer) stateFn {
  1124  	lx.ignore()
  1125  	lx.emit(itemCommentStart)
  1126  	return lexComment
  1127  }
  1128  
  1129  // lexComment lexes an entire comment. It assumes that '#' has been consumed.
  1130  // It will consume *up to* the first new line character, and pass control
  1131  // back to the last state on the stack.
  1132  func lexComment(lx *lexer) stateFn {
  1133  	r := lx.peek()
  1134  	if isNL(r) || r == eof {
  1135  		lx.emit(itemText)
  1136  		return lx.pop()
  1137  	}
  1138  	lx.next()
  1139  	return lexComment
  1140  }
  1141  
  1142  // lexSkip ignores all slurped input and moves on to the next state.
  1143  func lexSkip(lx *lexer, nextState stateFn) stateFn {
  1144  	return func(lx *lexer) stateFn {
  1145  		lx.ignore()
  1146  		return nextState
  1147  	}
  1148  }
  1149  
  1150  // Tests to see if we have a number suffix
  1151  func isNumberSuffix(r rune) bool {
  1152  	return r == 'k' || r == 'K' || r == 'm' || r == 'M' || r == 'g' || r == 'G' || r == 't' || r == 'T' || r == 'p' || r == 'P' || r == 'e' || r == 'E'
  1153  }
  1154  
  1155  // Tests for both key separators
  1156  func isKeySeparator(r rune) bool {
  1157  	return r == keySepEqual || r == keySepColon
  1158  }
  1159  
  1160  // isWhitespace returns true if `r` is a whitespace character according
  1161  // to the spec.
  1162  func isWhitespace(r rune) bool {
  1163  	return r == '\t' || r == ' '
  1164  }
  1165  
  1166  func isNL(r rune) bool {
  1167  	return r == '\n' || r == '\r'
  1168  }
  1169  
  1170  func (itype itemType) String() string {
  1171  	switch itype {
  1172  	case itemError:
  1173  		return "Error"
  1174  	case itemNIL:
  1175  		return "NIL"
  1176  	case itemEOF:
  1177  		return "EOF"
  1178  	case itemText:
  1179  		return "Text"
  1180  	case itemString:
  1181  		return "String"
  1182  	case itemBool:
  1183  		return "Bool"
  1184  	case itemInteger:
  1185  		return "Integer"
  1186  	case itemFloat:
  1187  		return "Float"
  1188  	case itemDatetime:
  1189  		return "DateTime"
  1190  	case itemKey:
  1191  		return "Key"
  1192  	case itemArrayStart:
  1193  		return "ArrayStart"
  1194  	case itemArrayEnd:
  1195  		return "ArrayEnd"
  1196  	case itemMapStart:
  1197  		return "MapStart"
  1198  	case itemMapEnd:
  1199  		return "MapEnd"
  1200  	case itemCommentStart:
  1201  		return "CommentStart"
  1202  	case itemVariable:
  1203  		return "Variable"
  1204  	case itemInclude:
  1205  		return "Include"
  1206  	}
  1207  	panic(fmt.Sprintf("BUG: Unknown type '%s'.", itype.String()))
  1208  }
  1209  
  1210  func (item item) String() string {
  1211  	return fmt.Sprintf("(%s, '%s', %d, %d)", item.typ.String(), item.val, item.line, item.pos)
  1212  }
  1213  
  1214  func escapeSpecial(c rune) string {
  1215  	switch c {
  1216  	case '\n':
  1217  		return "\\n"
  1218  	}
  1219  	return string(c)
  1220  }