git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/toml/parse.go (about)

     1  package toml
     2  
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  	"strings"
     7  	"time"
     8  	"unicode/utf8"
     9  
    10  	"git.sr.ht/~pingoo/stdx/toml/internal"
    11  )
    12  
    13  type parser struct {
    14  	lx         *lexer
    15  	context    Key      // Full key for the current hash in scope.
    16  	currentKey string   // Base key name for everything except hashes.
    17  	pos        Position // Current position in the TOML file.
    18  
    19  	ordered []Key // List of keys in the order that they appear in the TOML data.
    20  
    21  	keyInfo   map[string]keyInfo     // Map keyname → info about the TOML key.
    22  	mapping   map[string]interface{} // Map keyname → key value.
    23  	implicits map[string]struct{}    // Record implicit keys (e.g. "key.group.names").
    24  }
    25  
    26  type keyInfo struct {
    27  	pos      Position
    28  	tomlType tomlType
    29  }
    30  
    31  func parse(data string) (p *parser, err error) {
    32  	defer func() {
    33  		if r := recover(); r != nil {
    34  			if pErr, ok := r.(ParseError); ok {
    35  				pErr.input = data
    36  				err = pErr
    37  				return
    38  			}
    39  			panic(r)
    40  		}
    41  	}()
    42  
    43  	// Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
    44  	// which mangles stuff.
    45  	if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") {
    46  		data = data[2:]
    47  	}
    48  
    49  	// Examine first few bytes for NULL bytes; this probably means it's a UTF-16
    50  	// file (second byte in surrogate pair being NULL). Again, do this here to
    51  	// avoid having to deal with UTF-8/16 stuff in the lexer.
    52  	ex := 6
    53  	if len(data) < 6 {
    54  		ex = len(data)
    55  	}
    56  	if i := strings.IndexRune(data[:ex], 0); i > -1 {
    57  		return nil, ParseError{
    58  			Message:  "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8",
    59  			Position: Position{Line: 1, Start: i, Len: 1},
    60  			Line:     1,
    61  			input:    data,
    62  		}
    63  	}
    64  
    65  	p = &parser{
    66  		keyInfo:   make(map[string]keyInfo),
    67  		mapping:   make(map[string]interface{}),
    68  		lx:        lex(data),
    69  		ordered:   make([]Key, 0),
    70  		implicits: make(map[string]struct{}),
    71  	}
    72  	for {
    73  		item := p.next()
    74  		if item.typ == itemEOF {
    75  			break
    76  		}
    77  		p.topLevel(item)
    78  	}
    79  
    80  	return p, nil
    81  }
    82  
    83  func (p *parser) panicErr(it item, err error) {
    84  	panic(ParseError{
    85  		err:      err,
    86  		Position: it.pos,
    87  		Line:     it.pos.Len,
    88  		LastKey:  p.current(),
    89  	})
    90  }
    91  
    92  func (p *parser) panicItemf(it item, format string, v ...interface{}) {
    93  	panic(ParseError{
    94  		Message:  fmt.Sprintf(format, v...),
    95  		Position: it.pos,
    96  		Line:     it.pos.Len,
    97  		LastKey:  p.current(),
    98  	})
    99  }
   100  
   101  func (p *parser) panicf(format string, v ...interface{}) {
   102  	panic(ParseError{
   103  		Message:  fmt.Sprintf(format, v...),
   104  		Position: p.pos,
   105  		Line:     p.pos.Line,
   106  		LastKey:  p.current(),
   107  	})
   108  }
   109  
   110  func (p *parser) next() item {
   111  	it := p.lx.nextItem()
   112  	//fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val)
   113  	if it.typ == itemError {
   114  		if it.err != nil {
   115  			panic(ParseError{
   116  				Position: it.pos,
   117  				Line:     it.pos.Line,
   118  				LastKey:  p.current(),
   119  				err:      it.err,
   120  			})
   121  		}
   122  
   123  		p.panicItemf(it, "%s", it.val)
   124  	}
   125  	return it
   126  }
   127  
   128  func (p *parser) nextPos() item {
   129  	it := p.next()
   130  	p.pos = it.pos
   131  	return it
   132  }
   133  
   134  func (p *parser) bug(format string, v ...interface{}) {
   135  	panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
   136  }
   137  
   138  func (p *parser) expect(typ itemType) item {
   139  	it := p.next()
   140  	p.assertEqual(typ, it.typ)
   141  	return it
   142  }
   143  
   144  func (p *parser) assertEqual(expected, got itemType) {
   145  	if expected != got {
   146  		p.bug("Expected '%s' but got '%s'.", expected, got)
   147  	}
   148  }
   149  
   150  func (p *parser) topLevel(item item) {
   151  	switch item.typ {
   152  	case itemCommentStart: // # ..
   153  		p.expect(itemText)
   154  	case itemTableStart: // [ .. ]
   155  		name := p.nextPos()
   156  
   157  		var key Key
   158  		for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
   159  			key = append(key, p.keyString(name))
   160  		}
   161  		p.assertEqual(itemTableEnd, name.typ)
   162  
   163  		p.addContext(key, false)
   164  		p.setType("", tomlHash, item.pos)
   165  		p.ordered = append(p.ordered, key)
   166  	case itemArrayTableStart: // [[ .. ]]
   167  		name := p.nextPos()
   168  
   169  		var key Key
   170  		for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
   171  			key = append(key, p.keyString(name))
   172  		}
   173  		p.assertEqual(itemArrayTableEnd, name.typ)
   174  
   175  		p.addContext(key, true)
   176  		p.setType("", tomlArrayHash, item.pos)
   177  		p.ordered = append(p.ordered, key)
   178  	case itemKeyStart: // key = ..
   179  		outerContext := p.context
   180  		/// Read all the key parts (e.g. 'a' and 'b' in 'a.b')
   181  		k := p.nextPos()
   182  		var key Key
   183  		for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
   184  			key = append(key, p.keyString(k))
   185  		}
   186  		p.assertEqual(itemKeyEnd, k.typ)
   187  
   188  		/// The current key is the last part.
   189  		p.currentKey = key[len(key)-1]
   190  
   191  		/// All the other parts (if any) are the context; need to set each part
   192  		/// as implicit.
   193  		context := key[:len(key)-1]
   194  		for i := range context {
   195  			p.addImplicitContext(append(p.context, context[i:i+1]...))
   196  		}
   197  
   198  		/// Set value.
   199  		vItem := p.next()
   200  		val, typ := p.value(vItem, false)
   201  		p.set(p.currentKey, val, typ, vItem.pos)
   202  		p.ordered = append(p.ordered, p.context.add(p.currentKey))
   203  
   204  		/// Remove the context we added (preserving any context from [tbl] lines).
   205  		p.context = outerContext
   206  		p.currentKey = ""
   207  	default:
   208  		p.bug("Unexpected type at top level: %s", item.typ)
   209  	}
   210  }
   211  
   212  // Gets a string for a key (or part of a key in a table name).
   213  func (p *parser) keyString(it item) string {
   214  	switch it.typ {
   215  	case itemText:
   216  		return it.val
   217  	case itemString, itemMultilineString,
   218  		itemRawString, itemRawMultilineString:
   219  		s, _ := p.value(it, false)
   220  		return s.(string)
   221  	default:
   222  		p.bug("Unexpected key type: %s", it.typ)
   223  	}
   224  	panic("unreachable")
   225  }
   226  
   227  var datetimeRepl = strings.NewReplacer(
   228  	"z", "Z",
   229  	"t", "T",
   230  	" ", "T")
   231  
   232  // value translates an expected value from the lexer into a Go value wrapped
   233  // as an empty interface.
   234  func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
   235  	switch it.typ {
   236  	case itemString:
   237  		return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
   238  	case itemMultilineString:
   239  		return p.replaceEscapes(it, stripFirstNewline(p.stripEscapedNewlines(it.val))), p.typeOfPrimitive(it)
   240  	case itemRawString:
   241  		return it.val, p.typeOfPrimitive(it)
   242  	case itemRawMultilineString:
   243  		return stripFirstNewline(it.val), p.typeOfPrimitive(it)
   244  	case itemInteger:
   245  		return p.valueInteger(it)
   246  	case itemFloat:
   247  		return p.valueFloat(it)
   248  	case itemBool:
   249  		switch it.val {
   250  		case "true":
   251  			return true, p.typeOfPrimitive(it)
   252  		case "false":
   253  			return false, p.typeOfPrimitive(it)
   254  		default:
   255  			p.bug("Expected boolean value, but got '%s'.", it.val)
   256  		}
   257  	case itemDatetime:
   258  		return p.valueDatetime(it)
   259  	case itemArray:
   260  		return p.valueArray(it)
   261  	case itemInlineTableStart:
   262  		return p.valueInlineTable(it, parentIsArray)
   263  	default:
   264  		p.bug("Unexpected value type: %s", it.typ)
   265  	}
   266  	panic("unreachable")
   267  }
   268  
   269  func (p *parser) valueInteger(it item) (interface{}, tomlType) {
   270  	if !numUnderscoresOK(it.val) {
   271  		p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val)
   272  	}
   273  	if numHasLeadingZero(it.val) {
   274  		p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val)
   275  	}
   276  
   277  	num, err := strconv.ParseInt(it.val, 0, 64)
   278  	if err != nil {
   279  		// Distinguish integer values. Normally, it'd be a bug if the lexer
   280  		// provides an invalid integer, but it's possible that the number is
   281  		// out of range of valid values (which the lexer cannot determine).
   282  		// So mark the former as a bug but the latter as a legitimate user
   283  		// error.
   284  		if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
   285  			p.panicErr(it, errParseRange{i: it.val, size: "int64"})
   286  		} else {
   287  			p.bug("Expected integer value, but got '%s'.", it.val)
   288  		}
   289  	}
   290  	return num, p.typeOfPrimitive(it)
   291  }
   292  
   293  func (p *parser) valueFloat(it item) (interface{}, tomlType) {
   294  	parts := strings.FieldsFunc(it.val, func(r rune) bool {
   295  		switch r {
   296  		case '.', 'e', 'E':
   297  			return true
   298  		}
   299  		return false
   300  	})
   301  	for _, part := range parts {
   302  		if !numUnderscoresOK(part) {
   303  			p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val)
   304  		}
   305  	}
   306  	if len(parts) > 0 && numHasLeadingZero(parts[0]) {
   307  		p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val)
   308  	}
   309  	if !numPeriodsOK(it.val) {
   310  		// As a special case, numbers like '123.' or '1.e2',
   311  		// which are valid as far as Go/strconv are concerned,
   312  		// must be rejected because TOML says that a fractional
   313  		// part consists of '.' followed by 1+ digits.
   314  		p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val)
   315  	}
   316  	val := strings.Replace(it.val, "_", "", -1)
   317  	if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does.
   318  		val = "nan"
   319  	}
   320  	num, err := strconv.ParseFloat(val, 64)
   321  	if err != nil {
   322  		if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
   323  			p.panicErr(it, errParseRange{i: it.val, size: "float64"})
   324  		} else {
   325  			p.panicItemf(it, "Invalid float value: %q", it.val)
   326  		}
   327  	}
   328  	return num, p.typeOfPrimitive(it)
   329  }
   330  
   331  var dtTypes = []struct {
   332  	fmt  string
   333  	zone *time.Location
   334  }{
   335  	{time.RFC3339Nano, time.Local},
   336  	{"2006-01-02T15:04:05.999999999", internal.LocalDatetime},
   337  	{"2006-01-02", internal.LocalDate},
   338  	{"15:04:05.999999999", internal.LocalTime},
   339  }
   340  
   341  func (p *parser) valueDatetime(it item) (interface{}, tomlType) {
   342  	it.val = datetimeRepl.Replace(it.val)
   343  	var (
   344  		t   time.Time
   345  		ok  bool
   346  		err error
   347  	)
   348  	for _, dt := range dtTypes {
   349  		t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
   350  		if err == nil {
   351  			ok = true
   352  			break
   353  		}
   354  	}
   355  	if !ok {
   356  		p.panicItemf(it, "Invalid TOML Datetime: %q.", it.val)
   357  	}
   358  	return t, p.typeOfPrimitive(it)
   359  }
   360  
   361  func (p *parser) valueArray(it item) (interface{}, tomlType) {
   362  	p.setType(p.currentKey, tomlArray, it.pos)
   363  
   364  	var (
   365  		types []tomlType
   366  
   367  		// Initialize to a non-nil empty slice. This makes it consistent with
   368  		// how S = [] decodes into a non-nil slice inside something like struct
   369  		// { S []string }. See #338
   370  		array = []interface{}{}
   371  	)
   372  	for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
   373  		if it.typ == itemCommentStart {
   374  			p.expect(itemText)
   375  			continue
   376  		}
   377  
   378  		val, typ := p.value(it, true)
   379  		array = append(array, val)
   380  		types = append(types, typ)
   381  
   382  		// XXX: types isn't used here, we need it to record the accurate type
   383  		// information.
   384  		//
   385  		// Not entirely sure how to best store this; could use "key[0]",
   386  		// "key[1]" notation, or maybe store it on the Array type?
   387  	}
   388  	return array, tomlArray
   389  }
   390  
   391  func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) {
   392  	var (
   393  		hash         = make(map[string]interface{})
   394  		outerContext = p.context
   395  		outerKey     = p.currentKey
   396  	)
   397  
   398  	p.context = append(p.context, p.currentKey)
   399  	prevContext := p.context
   400  	p.currentKey = ""
   401  
   402  	p.addImplicit(p.context)
   403  	p.addContext(p.context, parentIsArray)
   404  
   405  	/// Loop over all table key/value pairs.
   406  	for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
   407  		if it.typ == itemCommentStart {
   408  			p.expect(itemText)
   409  			continue
   410  		}
   411  
   412  		/// Read all key parts.
   413  		k := p.nextPos()
   414  		var key Key
   415  		for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
   416  			key = append(key, p.keyString(k))
   417  		}
   418  		p.assertEqual(itemKeyEnd, k.typ)
   419  
   420  		/// The current key is the last part.
   421  		p.currentKey = key[len(key)-1]
   422  
   423  		/// All the other parts (if any) are the context; need to set each part
   424  		/// as implicit.
   425  		context := key[:len(key)-1]
   426  		for i := range context {
   427  			p.addImplicitContext(append(p.context, context[i:i+1]...))
   428  		}
   429  
   430  		/// Set the value.
   431  		val, typ := p.value(p.next(), false)
   432  		p.set(p.currentKey, val, typ, it.pos)
   433  		p.ordered = append(p.ordered, p.context.add(p.currentKey))
   434  		hash[p.currentKey] = val
   435  
   436  		/// Restore context.
   437  		p.context = prevContext
   438  	}
   439  	p.context = outerContext
   440  	p.currentKey = outerKey
   441  	return hash, tomlHash
   442  }
   443  
   444  // numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
   445  // +/- signs, and base prefixes.
   446  func numHasLeadingZero(s string) bool {
   447  	if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x
   448  		return true
   449  	}
   450  	if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
   451  		return true
   452  	}
   453  	return false
   454  }
   455  
   456  // numUnderscoresOK checks whether each underscore in s is surrounded by
   457  // characters that are not underscores.
   458  func numUnderscoresOK(s string) bool {
   459  	switch s {
   460  	case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
   461  		return true
   462  	}
   463  	accept := false
   464  	for _, r := range s {
   465  		if r == '_' {
   466  			if !accept {
   467  				return false
   468  			}
   469  		}
   470  
   471  		// isHexadecimal is a superset of all the permissable characters
   472  		// surrounding an underscore.
   473  		accept = isHexadecimal(r)
   474  	}
   475  	return accept
   476  }
   477  
   478  // numPeriodsOK checks whether every period in s is followed by a digit.
   479  func numPeriodsOK(s string) bool {
   480  	period := false
   481  	for _, r := range s {
   482  		if period && !isDigit(r) {
   483  			return false
   484  		}
   485  		period = r == '.'
   486  	}
   487  	return !period
   488  }
   489  
   490  // Set the current context of the parser, where the context is either a hash or
   491  // an array of hashes, depending on the value of the `array` parameter.
   492  //
   493  // Establishing the context also makes sure that the key isn't a duplicate, and
   494  // will create implicit hashes automatically.
   495  func (p *parser) addContext(key Key, array bool) {
   496  	var ok bool
   497  
   498  	// Always start at the top level and drill down for our context.
   499  	hashContext := p.mapping
   500  	keyContext := make(Key, 0)
   501  
   502  	// We only need implicit hashes for key[0:-1]
   503  	for _, k := range key[0 : len(key)-1] {
   504  		_, ok = hashContext[k]
   505  		keyContext = append(keyContext, k)
   506  
   507  		// No key? Make an implicit hash and move on.
   508  		if !ok {
   509  			p.addImplicit(keyContext)
   510  			hashContext[k] = make(map[string]interface{})
   511  		}
   512  
   513  		// If the hash context is actually an array of tables, then set
   514  		// the hash context to the last element in that array.
   515  		//
   516  		// Otherwise, it better be a table, since this MUST be a key group (by
   517  		// virtue of it not being the last element in a key).
   518  		switch t := hashContext[k].(type) {
   519  		case []map[string]interface{}:
   520  			hashContext = t[len(t)-1]
   521  		case map[string]interface{}:
   522  			hashContext = t
   523  		default:
   524  			p.panicf("Key '%s' was already created as a hash.", keyContext)
   525  		}
   526  	}
   527  
   528  	p.context = keyContext
   529  	if array {
   530  		// If this is the first element for this array, then allocate a new
   531  		// list of tables for it.
   532  		k := key[len(key)-1]
   533  		if _, ok := hashContext[k]; !ok {
   534  			hashContext[k] = make([]map[string]interface{}, 0, 4)
   535  		}
   536  
   537  		// Add a new table. But make sure the key hasn't already been used
   538  		// for something else.
   539  		if hash, ok := hashContext[k].([]map[string]interface{}); ok {
   540  			hashContext[k] = append(hash, make(map[string]interface{}))
   541  		} else {
   542  			p.panicf("Key '%s' was already created and cannot be used as an array.", key)
   543  		}
   544  	} else {
   545  		p.setValue(key[len(key)-1], make(map[string]interface{}))
   546  	}
   547  	p.context = append(p.context, key[len(key)-1])
   548  }
   549  
   550  // set calls setValue and setType.
   551  func (p *parser) set(key string, val interface{}, typ tomlType, pos Position) {
   552  	p.setValue(key, val)
   553  	p.setType(key, typ, pos)
   554  
   555  }
   556  
   557  // setValue sets the given key to the given value in the current context.
   558  // It will make sure that the key hasn't already been defined, account for
   559  // implicit key groups.
   560  func (p *parser) setValue(key string, value interface{}) {
   561  	var (
   562  		tmpHash    interface{}
   563  		ok         bool
   564  		hash       = p.mapping
   565  		keyContext Key
   566  	)
   567  	for _, k := range p.context {
   568  		keyContext = append(keyContext, k)
   569  		if tmpHash, ok = hash[k]; !ok {
   570  			p.bug("Context for key '%s' has not been established.", keyContext)
   571  		}
   572  		switch t := tmpHash.(type) {
   573  		case []map[string]interface{}:
   574  			// The context is a table of hashes. Pick the most recent table
   575  			// defined as the current hash.
   576  			hash = t[len(t)-1]
   577  		case map[string]interface{}:
   578  			hash = t
   579  		default:
   580  			p.panicf("Key '%s' has already been defined.", keyContext)
   581  		}
   582  	}
   583  	keyContext = append(keyContext, key)
   584  
   585  	if _, ok := hash[key]; ok {
   586  		// Normally redefining keys isn't allowed, but the key could have been
   587  		// defined implicitly and it's allowed to be redefined concretely. (See
   588  		// the `valid/implicit-and-explicit-after.toml` in toml-test)
   589  		//
   590  		// But we have to make sure to stop marking it as an implicit. (So that
   591  		// another redefinition provokes an error.)
   592  		//
   593  		// Note that since it has already been defined (as a hash), we don't
   594  		// want to overwrite it. So our business is done.
   595  		if p.isArray(keyContext) {
   596  			p.removeImplicit(keyContext)
   597  			hash[key] = value
   598  			return
   599  		}
   600  		if p.isImplicit(keyContext) {
   601  			p.removeImplicit(keyContext)
   602  			return
   603  		}
   604  
   605  		// Otherwise, we have a concrete key trying to override a previous
   606  		// key, which is *always* wrong.
   607  		p.panicf("Key '%s' has already been defined.", keyContext)
   608  	}
   609  
   610  	hash[key] = value
   611  }
   612  
   613  // setType sets the type of a particular value at a given key. It should be
   614  // called immediately AFTER setValue.
   615  //
   616  // Note that if `key` is empty, then the type given will be applied to the
   617  // current context (which is either a table or an array of tables).
   618  func (p *parser) setType(key string, typ tomlType, pos Position) {
   619  	keyContext := make(Key, 0, len(p.context)+1)
   620  	keyContext = append(keyContext, p.context...)
   621  	if len(key) > 0 { // allow type setting for hashes
   622  		keyContext = append(keyContext, key)
   623  	}
   624  	// Special case to make empty keys ("" = 1) work.
   625  	// Without it it will set "" rather than `""`.
   626  	// TODO: why is this needed? And why is this only needed here?
   627  	if len(keyContext) == 0 {
   628  		keyContext = Key{""}
   629  	}
   630  	p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos}
   631  }
   632  
   633  // Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and
   634  // "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly).
   635  func (p *parser) addImplicit(key Key)     { p.implicits[key.String()] = struct{}{} }
   636  func (p *parser) removeImplicit(key Key)  { delete(p.implicits, key.String()) }
   637  func (p *parser) isImplicit(key Key) bool { _, ok := p.implicits[key.String()]; return ok }
   638  func (p *parser) isArray(key Key) bool    { return p.keyInfo[key.String()].tomlType == tomlArray }
   639  func (p *parser) addImplicitContext(key Key) {
   640  	p.addImplicit(key)
   641  	p.addContext(key, false)
   642  }
   643  
   644  // current returns the full key name of the current context.
   645  func (p *parser) current() string {
   646  	if len(p.currentKey) == 0 {
   647  		return p.context.String()
   648  	}
   649  	if len(p.context) == 0 {
   650  		return p.currentKey
   651  	}
   652  	return fmt.Sprintf("%s.%s", p.context, p.currentKey)
   653  }
   654  
   655  func stripFirstNewline(s string) string {
   656  	if len(s) > 0 && s[0] == '\n' {
   657  		return s[1:]
   658  	}
   659  	if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
   660  		return s[2:]
   661  	}
   662  	return s
   663  }
   664  
   665  // Remove newlines inside triple-quoted strings if a line ends with "\".
   666  func (p *parser) stripEscapedNewlines(s string) string {
   667  	split := strings.Split(s, "\n")
   668  	if len(split) < 1 {
   669  		return s
   670  	}
   671  
   672  	escNL := false // Keep track of the last non-blank line was escaped.
   673  	for i, line := range split {
   674  		line = strings.TrimRight(line, " \t\r")
   675  
   676  		if len(line) == 0 || line[len(line)-1] != '\\' {
   677  			split[i] = strings.TrimRight(split[i], "\r")
   678  			if !escNL && i != len(split)-1 {
   679  				split[i] += "\n"
   680  			}
   681  			continue
   682  		}
   683  
   684  		escBS := true
   685  		for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- {
   686  			escBS = !escBS
   687  		}
   688  		if escNL {
   689  			line = strings.TrimLeft(line, " \t\r")
   690  		}
   691  		escNL = !escBS
   692  
   693  		if escBS {
   694  			split[i] += "\n"
   695  			continue
   696  		}
   697  
   698  		if i == len(split)-1 {
   699  			p.panicf("invalid escape: '\\ '")
   700  		}
   701  
   702  		split[i] = line[:len(line)-1] // Remove \
   703  		if len(split)-1 > i {
   704  			split[i+1] = strings.TrimLeft(split[i+1], " \t\r")
   705  		}
   706  	}
   707  	return strings.Join(split, "")
   708  }
   709  
   710  func (p *parser) replaceEscapes(it item, str string) string {
   711  	replaced := make([]rune, 0, len(str))
   712  	s := []byte(str)
   713  	r := 0
   714  	for r < len(s) {
   715  		if s[r] != '\\' {
   716  			c, size := utf8.DecodeRune(s[r:])
   717  			r += size
   718  			replaced = append(replaced, c)
   719  			continue
   720  		}
   721  		r += 1
   722  		if r >= len(s) {
   723  			p.bug("Escape sequence at end of string.")
   724  			return ""
   725  		}
   726  		switch s[r] {
   727  		default:
   728  			p.bug("Expected valid escape code after \\, but got %q.", s[r])
   729  		case ' ', '\t':
   730  			p.panicItemf(it, "invalid escape: '\\%c'", s[r])
   731  		case 'b':
   732  			replaced = append(replaced, rune(0x0008))
   733  			r += 1
   734  		case 't':
   735  			replaced = append(replaced, rune(0x0009))
   736  			r += 1
   737  		case 'n':
   738  			replaced = append(replaced, rune(0x000A))
   739  			r += 1
   740  		case 'f':
   741  			replaced = append(replaced, rune(0x000C))
   742  			r += 1
   743  		case 'r':
   744  			replaced = append(replaced, rune(0x000D))
   745  			r += 1
   746  		case '"':
   747  			replaced = append(replaced, rune(0x0022))
   748  			r += 1
   749  		case '\\':
   750  			replaced = append(replaced, rune(0x005C))
   751  			r += 1
   752  		case 'u':
   753  			// At this point, we know we have a Unicode escape of the form
   754  			// `uXXXX` at [r, r+5). (Because the lexer guarantees this
   755  			// for us.)
   756  			escaped := p.asciiEscapeToUnicode(it, s[r+1:r+5])
   757  			replaced = append(replaced, escaped)
   758  			r += 5
   759  		case 'U':
   760  			// At this point, we know we have a Unicode escape of the form
   761  			// `uXXXX` at [r, r+9). (Because the lexer guarantees this
   762  			// for us.)
   763  			escaped := p.asciiEscapeToUnicode(it, s[r+1:r+9])
   764  			replaced = append(replaced, escaped)
   765  			r += 9
   766  		}
   767  	}
   768  	return string(replaced)
   769  }
   770  
   771  func (p *parser) asciiEscapeToUnicode(it item, bs []byte) rune {
   772  	s := string(bs)
   773  	hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
   774  	if err != nil {
   775  		p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err)
   776  	}
   777  	if !utf8.ValidRune(rune(hex)) {
   778  		p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s)
   779  	}
   780  	return rune(hex)
   781  }