git.sr.ht/~pingoo/stdx@v0.0.0-20240218134121-094174641f6e/toml/parse.go (about) 1 package toml 2 3 import ( 4 "fmt" 5 "strconv" 6 "strings" 7 "time" 8 "unicode/utf8" 9 10 "git.sr.ht/~pingoo/stdx/toml/internal" 11 ) 12 13 type parser struct { 14 lx *lexer 15 context Key // Full key for the current hash in scope. 16 currentKey string // Base key name for everything except hashes. 17 pos Position // Current position in the TOML file. 18 19 ordered []Key // List of keys in the order that they appear in the TOML data. 20 21 keyInfo map[string]keyInfo // Map keyname → info about the TOML key. 22 mapping map[string]interface{} // Map keyname → key value. 23 implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names"). 24 } 25 26 type keyInfo struct { 27 pos Position 28 tomlType tomlType 29 } 30 31 func parse(data string) (p *parser, err error) { 32 defer func() { 33 if r := recover(); r != nil { 34 if pErr, ok := r.(ParseError); ok { 35 pErr.input = data 36 err = pErr 37 return 38 } 39 panic(r) 40 } 41 }() 42 43 // Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString() 44 // which mangles stuff. 45 if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { 46 data = data[2:] 47 } 48 49 // Examine first few bytes for NULL bytes; this probably means it's a UTF-16 50 // file (second byte in surrogate pair being NULL). Again, do this here to 51 // avoid having to deal with UTF-8/16 stuff in the lexer. 52 ex := 6 53 if len(data) < 6 { 54 ex = len(data) 55 } 56 if i := strings.IndexRune(data[:ex], 0); i > -1 { 57 return nil, ParseError{ 58 Message: "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8", 59 Position: Position{Line: 1, Start: i, Len: 1}, 60 Line: 1, 61 input: data, 62 } 63 } 64 65 p = &parser{ 66 keyInfo: make(map[string]keyInfo), 67 mapping: make(map[string]interface{}), 68 lx: lex(data), 69 ordered: make([]Key, 0), 70 implicits: make(map[string]struct{}), 71 } 72 for { 73 item := p.next() 74 if item.typ == itemEOF { 75 break 76 } 77 p.topLevel(item) 78 } 79 80 return p, nil 81 } 82 83 func (p *parser) panicErr(it item, err error) { 84 panic(ParseError{ 85 err: err, 86 Position: it.pos, 87 Line: it.pos.Len, 88 LastKey: p.current(), 89 }) 90 } 91 92 func (p *parser) panicItemf(it item, format string, v ...interface{}) { 93 panic(ParseError{ 94 Message: fmt.Sprintf(format, v...), 95 Position: it.pos, 96 Line: it.pos.Len, 97 LastKey: p.current(), 98 }) 99 } 100 101 func (p *parser) panicf(format string, v ...interface{}) { 102 panic(ParseError{ 103 Message: fmt.Sprintf(format, v...), 104 Position: p.pos, 105 Line: p.pos.Line, 106 LastKey: p.current(), 107 }) 108 } 109 110 func (p *parser) next() item { 111 it := p.lx.nextItem() 112 //fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val) 113 if it.typ == itemError { 114 if it.err != nil { 115 panic(ParseError{ 116 Position: it.pos, 117 Line: it.pos.Line, 118 LastKey: p.current(), 119 err: it.err, 120 }) 121 } 122 123 p.panicItemf(it, "%s", it.val) 124 } 125 return it 126 } 127 128 func (p *parser) nextPos() item { 129 it := p.next() 130 p.pos = it.pos 131 return it 132 } 133 134 func (p *parser) bug(format string, v ...interface{}) { 135 panic(fmt.Sprintf("BUG: "+format+"\n\n", v...)) 136 } 137 138 func (p *parser) expect(typ itemType) item { 139 it := p.next() 140 p.assertEqual(typ, it.typ) 141 return it 142 } 143 144 func (p *parser) assertEqual(expected, got itemType) { 145 if expected != got { 146 p.bug("Expected '%s' but got '%s'.", expected, got) 147 } 148 } 149 150 func (p *parser) topLevel(item item) { 151 switch item.typ { 152 case itemCommentStart: // # .. 153 p.expect(itemText) 154 case itemTableStart: // [ .. ] 155 name := p.nextPos() 156 157 var key Key 158 for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() { 159 key = append(key, p.keyString(name)) 160 } 161 p.assertEqual(itemTableEnd, name.typ) 162 163 p.addContext(key, false) 164 p.setType("", tomlHash, item.pos) 165 p.ordered = append(p.ordered, key) 166 case itemArrayTableStart: // [[ .. ]] 167 name := p.nextPos() 168 169 var key Key 170 for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() { 171 key = append(key, p.keyString(name)) 172 } 173 p.assertEqual(itemArrayTableEnd, name.typ) 174 175 p.addContext(key, true) 176 p.setType("", tomlArrayHash, item.pos) 177 p.ordered = append(p.ordered, key) 178 case itemKeyStart: // key = .. 179 outerContext := p.context 180 /// Read all the key parts (e.g. 'a' and 'b' in 'a.b') 181 k := p.nextPos() 182 var key Key 183 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { 184 key = append(key, p.keyString(k)) 185 } 186 p.assertEqual(itemKeyEnd, k.typ) 187 188 /// The current key is the last part. 189 p.currentKey = key[len(key)-1] 190 191 /// All the other parts (if any) are the context; need to set each part 192 /// as implicit. 193 context := key[:len(key)-1] 194 for i := range context { 195 p.addImplicitContext(append(p.context, context[i:i+1]...)) 196 } 197 198 /// Set value. 199 vItem := p.next() 200 val, typ := p.value(vItem, false) 201 p.set(p.currentKey, val, typ, vItem.pos) 202 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 203 204 /// Remove the context we added (preserving any context from [tbl] lines). 205 p.context = outerContext 206 p.currentKey = "" 207 default: 208 p.bug("Unexpected type at top level: %s", item.typ) 209 } 210 } 211 212 // Gets a string for a key (or part of a key in a table name). 213 func (p *parser) keyString(it item) string { 214 switch it.typ { 215 case itemText: 216 return it.val 217 case itemString, itemMultilineString, 218 itemRawString, itemRawMultilineString: 219 s, _ := p.value(it, false) 220 return s.(string) 221 default: 222 p.bug("Unexpected key type: %s", it.typ) 223 } 224 panic("unreachable") 225 } 226 227 var datetimeRepl = strings.NewReplacer( 228 "z", "Z", 229 "t", "T", 230 " ", "T") 231 232 // value translates an expected value from the lexer into a Go value wrapped 233 // as an empty interface. 234 func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) { 235 switch it.typ { 236 case itemString: 237 return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it) 238 case itemMultilineString: 239 return p.replaceEscapes(it, stripFirstNewline(p.stripEscapedNewlines(it.val))), p.typeOfPrimitive(it) 240 case itemRawString: 241 return it.val, p.typeOfPrimitive(it) 242 case itemRawMultilineString: 243 return stripFirstNewline(it.val), p.typeOfPrimitive(it) 244 case itemInteger: 245 return p.valueInteger(it) 246 case itemFloat: 247 return p.valueFloat(it) 248 case itemBool: 249 switch it.val { 250 case "true": 251 return true, p.typeOfPrimitive(it) 252 case "false": 253 return false, p.typeOfPrimitive(it) 254 default: 255 p.bug("Expected boolean value, but got '%s'.", it.val) 256 } 257 case itemDatetime: 258 return p.valueDatetime(it) 259 case itemArray: 260 return p.valueArray(it) 261 case itemInlineTableStart: 262 return p.valueInlineTable(it, parentIsArray) 263 default: 264 p.bug("Unexpected value type: %s", it.typ) 265 } 266 panic("unreachable") 267 } 268 269 func (p *parser) valueInteger(it item) (interface{}, tomlType) { 270 if !numUnderscoresOK(it.val) { 271 p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val) 272 } 273 if numHasLeadingZero(it.val) { 274 p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val) 275 } 276 277 num, err := strconv.ParseInt(it.val, 0, 64) 278 if err != nil { 279 // Distinguish integer values. Normally, it'd be a bug if the lexer 280 // provides an invalid integer, but it's possible that the number is 281 // out of range of valid values (which the lexer cannot determine). 282 // So mark the former as a bug but the latter as a legitimate user 283 // error. 284 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { 285 p.panicErr(it, errParseRange{i: it.val, size: "int64"}) 286 } else { 287 p.bug("Expected integer value, but got '%s'.", it.val) 288 } 289 } 290 return num, p.typeOfPrimitive(it) 291 } 292 293 func (p *parser) valueFloat(it item) (interface{}, tomlType) { 294 parts := strings.FieldsFunc(it.val, func(r rune) bool { 295 switch r { 296 case '.', 'e', 'E': 297 return true 298 } 299 return false 300 }) 301 for _, part := range parts { 302 if !numUnderscoresOK(part) { 303 p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val) 304 } 305 } 306 if len(parts) > 0 && numHasLeadingZero(parts[0]) { 307 p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val) 308 } 309 if !numPeriodsOK(it.val) { 310 // As a special case, numbers like '123.' or '1.e2', 311 // which are valid as far as Go/strconv are concerned, 312 // must be rejected because TOML says that a fractional 313 // part consists of '.' followed by 1+ digits. 314 p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val) 315 } 316 val := strings.Replace(it.val, "_", "", -1) 317 if val == "+nan" || val == "-nan" { // Go doesn't support this, but TOML spec does. 318 val = "nan" 319 } 320 num, err := strconv.ParseFloat(val, 64) 321 if err != nil { 322 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange { 323 p.panicErr(it, errParseRange{i: it.val, size: "float64"}) 324 } else { 325 p.panicItemf(it, "Invalid float value: %q", it.val) 326 } 327 } 328 return num, p.typeOfPrimitive(it) 329 } 330 331 var dtTypes = []struct { 332 fmt string 333 zone *time.Location 334 }{ 335 {time.RFC3339Nano, time.Local}, 336 {"2006-01-02T15:04:05.999999999", internal.LocalDatetime}, 337 {"2006-01-02", internal.LocalDate}, 338 {"15:04:05.999999999", internal.LocalTime}, 339 } 340 341 func (p *parser) valueDatetime(it item) (interface{}, tomlType) { 342 it.val = datetimeRepl.Replace(it.val) 343 var ( 344 t time.Time 345 ok bool 346 err error 347 ) 348 for _, dt := range dtTypes { 349 t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone) 350 if err == nil { 351 ok = true 352 break 353 } 354 } 355 if !ok { 356 p.panicItemf(it, "Invalid TOML Datetime: %q.", it.val) 357 } 358 return t, p.typeOfPrimitive(it) 359 } 360 361 func (p *parser) valueArray(it item) (interface{}, tomlType) { 362 p.setType(p.currentKey, tomlArray, it.pos) 363 364 var ( 365 types []tomlType 366 367 // Initialize to a non-nil empty slice. This makes it consistent with 368 // how S = [] decodes into a non-nil slice inside something like struct 369 // { S []string }. See #338 370 array = []interface{}{} 371 ) 372 for it = p.next(); it.typ != itemArrayEnd; it = p.next() { 373 if it.typ == itemCommentStart { 374 p.expect(itemText) 375 continue 376 } 377 378 val, typ := p.value(it, true) 379 array = append(array, val) 380 types = append(types, typ) 381 382 // XXX: types isn't used here, we need it to record the accurate type 383 // information. 384 // 385 // Not entirely sure how to best store this; could use "key[0]", 386 // "key[1]" notation, or maybe store it on the Array type? 387 } 388 return array, tomlArray 389 } 390 391 func (p *parser) valueInlineTable(it item, parentIsArray bool) (interface{}, tomlType) { 392 var ( 393 hash = make(map[string]interface{}) 394 outerContext = p.context 395 outerKey = p.currentKey 396 ) 397 398 p.context = append(p.context, p.currentKey) 399 prevContext := p.context 400 p.currentKey = "" 401 402 p.addImplicit(p.context) 403 p.addContext(p.context, parentIsArray) 404 405 /// Loop over all table key/value pairs. 406 for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() { 407 if it.typ == itemCommentStart { 408 p.expect(itemText) 409 continue 410 } 411 412 /// Read all key parts. 413 k := p.nextPos() 414 var key Key 415 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() { 416 key = append(key, p.keyString(k)) 417 } 418 p.assertEqual(itemKeyEnd, k.typ) 419 420 /// The current key is the last part. 421 p.currentKey = key[len(key)-1] 422 423 /// All the other parts (if any) are the context; need to set each part 424 /// as implicit. 425 context := key[:len(key)-1] 426 for i := range context { 427 p.addImplicitContext(append(p.context, context[i:i+1]...)) 428 } 429 430 /// Set the value. 431 val, typ := p.value(p.next(), false) 432 p.set(p.currentKey, val, typ, it.pos) 433 p.ordered = append(p.ordered, p.context.add(p.currentKey)) 434 hash[p.currentKey] = val 435 436 /// Restore context. 437 p.context = prevContext 438 } 439 p.context = outerContext 440 p.currentKey = outerKey 441 return hash, tomlHash 442 } 443 444 // numHasLeadingZero checks if this number has leading zeroes, allowing for '0', 445 // +/- signs, and base prefixes. 446 func numHasLeadingZero(s string) bool { 447 if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x 448 return true 449 } 450 if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' { 451 return true 452 } 453 return false 454 } 455 456 // numUnderscoresOK checks whether each underscore in s is surrounded by 457 // characters that are not underscores. 458 func numUnderscoresOK(s string) bool { 459 switch s { 460 case "nan", "+nan", "-nan", "inf", "-inf", "+inf": 461 return true 462 } 463 accept := false 464 for _, r := range s { 465 if r == '_' { 466 if !accept { 467 return false 468 } 469 } 470 471 // isHexadecimal is a superset of all the permissable characters 472 // surrounding an underscore. 473 accept = isHexadecimal(r) 474 } 475 return accept 476 } 477 478 // numPeriodsOK checks whether every period in s is followed by a digit. 479 func numPeriodsOK(s string) bool { 480 period := false 481 for _, r := range s { 482 if period && !isDigit(r) { 483 return false 484 } 485 period = r == '.' 486 } 487 return !period 488 } 489 490 // Set the current context of the parser, where the context is either a hash or 491 // an array of hashes, depending on the value of the `array` parameter. 492 // 493 // Establishing the context also makes sure that the key isn't a duplicate, and 494 // will create implicit hashes automatically. 495 func (p *parser) addContext(key Key, array bool) { 496 var ok bool 497 498 // Always start at the top level and drill down for our context. 499 hashContext := p.mapping 500 keyContext := make(Key, 0) 501 502 // We only need implicit hashes for key[0:-1] 503 for _, k := range key[0 : len(key)-1] { 504 _, ok = hashContext[k] 505 keyContext = append(keyContext, k) 506 507 // No key? Make an implicit hash and move on. 508 if !ok { 509 p.addImplicit(keyContext) 510 hashContext[k] = make(map[string]interface{}) 511 } 512 513 // If the hash context is actually an array of tables, then set 514 // the hash context to the last element in that array. 515 // 516 // Otherwise, it better be a table, since this MUST be a key group (by 517 // virtue of it not being the last element in a key). 518 switch t := hashContext[k].(type) { 519 case []map[string]interface{}: 520 hashContext = t[len(t)-1] 521 case map[string]interface{}: 522 hashContext = t 523 default: 524 p.panicf("Key '%s' was already created as a hash.", keyContext) 525 } 526 } 527 528 p.context = keyContext 529 if array { 530 // If this is the first element for this array, then allocate a new 531 // list of tables for it. 532 k := key[len(key)-1] 533 if _, ok := hashContext[k]; !ok { 534 hashContext[k] = make([]map[string]interface{}, 0, 4) 535 } 536 537 // Add a new table. But make sure the key hasn't already been used 538 // for something else. 539 if hash, ok := hashContext[k].([]map[string]interface{}); ok { 540 hashContext[k] = append(hash, make(map[string]interface{})) 541 } else { 542 p.panicf("Key '%s' was already created and cannot be used as an array.", key) 543 } 544 } else { 545 p.setValue(key[len(key)-1], make(map[string]interface{})) 546 } 547 p.context = append(p.context, key[len(key)-1]) 548 } 549 550 // set calls setValue and setType. 551 func (p *parser) set(key string, val interface{}, typ tomlType, pos Position) { 552 p.setValue(key, val) 553 p.setType(key, typ, pos) 554 555 } 556 557 // setValue sets the given key to the given value in the current context. 558 // It will make sure that the key hasn't already been defined, account for 559 // implicit key groups. 560 func (p *parser) setValue(key string, value interface{}) { 561 var ( 562 tmpHash interface{} 563 ok bool 564 hash = p.mapping 565 keyContext Key 566 ) 567 for _, k := range p.context { 568 keyContext = append(keyContext, k) 569 if tmpHash, ok = hash[k]; !ok { 570 p.bug("Context for key '%s' has not been established.", keyContext) 571 } 572 switch t := tmpHash.(type) { 573 case []map[string]interface{}: 574 // The context is a table of hashes. Pick the most recent table 575 // defined as the current hash. 576 hash = t[len(t)-1] 577 case map[string]interface{}: 578 hash = t 579 default: 580 p.panicf("Key '%s' has already been defined.", keyContext) 581 } 582 } 583 keyContext = append(keyContext, key) 584 585 if _, ok := hash[key]; ok { 586 // Normally redefining keys isn't allowed, but the key could have been 587 // defined implicitly and it's allowed to be redefined concretely. (See 588 // the `valid/implicit-and-explicit-after.toml` in toml-test) 589 // 590 // But we have to make sure to stop marking it as an implicit. (So that 591 // another redefinition provokes an error.) 592 // 593 // Note that since it has already been defined (as a hash), we don't 594 // want to overwrite it. So our business is done. 595 if p.isArray(keyContext) { 596 p.removeImplicit(keyContext) 597 hash[key] = value 598 return 599 } 600 if p.isImplicit(keyContext) { 601 p.removeImplicit(keyContext) 602 return 603 } 604 605 // Otherwise, we have a concrete key trying to override a previous 606 // key, which is *always* wrong. 607 p.panicf("Key '%s' has already been defined.", keyContext) 608 } 609 610 hash[key] = value 611 } 612 613 // setType sets the type of a particular value at a given key. It should be 614 // called immediately AFTER setValue. 615 // 616 // Note that if `key` is empty, then the type given will be applied to the 617 // current context (which is either a table or an array of tables). 618 func (p *parser) setType(key string, typ tomlType, pos Position) { 619 keyContext := make(Key, 0, len(p.context)+1) 620 keyContext = append(keyContext, p.context...) 621 if len(key) > 0 { // allow type setting for hashes 622 keyContext = append(keyContext, key) 623 } 624 // Special case to make empty keys ("" = 1) work. 625 // Without it it will set "" rather than `""`. 626 // TODO: why is this needed? And why is this only needed here? 627 if len(keyContext) == 0 { 628 keyContext = Key{""} 629 } 630 p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos} 631 } 632 633 // Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and 634 // "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly). 635 func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = struct{}{} } 636 func (p *parser) removeImplicit(key Key) { delete(p.implicits, key.String()) } 637 func (p *parser) isImplicit(key Key) bool { _, ok := p.implicits[key.String()]; return ok } 638 func (p *parser) isArray(key Key) bool { return p.keyInfo[key.String()].tomlType == tomlArray } 639 func (p *parser) addImplicitContext(key Key) { 640 p.addImplicit(key) 641 p.addContext(key, false) 642 } 643 644 // current returns the full key name of the current context. 645 func (p *parser) current() string { 646 if len(p.currentKey) == 0 { 647 return p.context.String() 648 } 649 if len(p.context) == 0 { 650 return p.currentKey 651 } 652 return fmt.Sprintf("%s.%s", p.context, p.currentKey) 653 } 654 655 func stripFirstNewline(s string) string { 656 if len(s) > 0 && s[0] == '\n' { 657 return s[1:] 658 } 659 if len(s) > 1 && s[0] == '\r' && s[1] == '\n' { 660 return s[2:] 661 } 662 return s 663 } 664 665 // Remove newlines inside triple-quoted strings if a line ends with "\". 666 func (p *parser) stripEscapedNewlines(s string) string { 667 split := strings.Split(s, "\n") 668 if len(split) < 1 { 669 return s 670 } 671 672 escNL := false // Keep track of the last non-blank line was escaped. 673 for i, line := range split { 674 line = strings.TrimRight(line, " \t\r") 675 676 if len(line) == 0 || line[len(line)-1] != '\\' { 677 split[i] = strings.TrimRight(split[i], "\r") 678 if !escNL && i != len(split)-1 { 679 split[i] += "\n" 680 } 681 continue 682 } 683 684 escBS := true 685 for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- { 686 escBS = !escBS 687 } 688 if escNL { 689 line = strings.TrimLeft(line, " \t\r") 690 } 691 escNL = !escBS 692 693 if escBS { 694 split[i] += "\n" 695 continue 696 } 697 698 if i == len(split)-1 { 699 p.panicf("invalid escape: '\\ '") 700 } 701 702 split[i] = line[:len(line)-1] // Remove \ 703 if len(split)-1 > i { 704 split[i+1] = strings.TrimLeft(split[i+1], " \t\r") 705 } 706 } 707 return strings.Join(split, "") 708 } 709 710 func (p *parser) replaceEscapes(it item, str string) string { 711 replaced := make([]rune, 0, len(str)) 712 s := []byte(str) 713 r := 0 714 for r < len(s) { 715 if s[r] != '\\' { 716 c, size := utf8.DecodeRune(s[r:]) 717 r += size 718 replaced = append(replaced, c) 719 continue 720 } 721 r += 1 722 if r >= len(s) { 723 p.bug("Escape sequence at end of string.") 724 return "" 725 } 726 switch s[r] { 727 default: 728 p.bug("Expected valid escape code after \\, but got %q.", s[r]) 729 case ' ', '\t': 730 p.panicItemf(it, "invalid escape: '\\%c'", s[r]) 731 case 'b': 732 replaced = append(replaced, rune(0x0008)) 733 r += 1 734 case 't': 735 replaced = append(replaced, rune(0x0009)) 736 r += 1 737 case 'n': 738 replaced = append(replaced, rune(0x000A)) 739 r += 1 740 case 'f': 741 replaced = append(replaced, rune(0x000C)) 742 r += 1 743 case 'r': 744 replaced = append(replaced, rune(0x000D)) 745 r += 1 746 case '"': 747 replaced = append(replaced, rune(0x0022)) 748 r += 1 749 case '\\': 750 replaced = append(replaced, rune(0x005C)) 751 r += 1 752 case 'u': 753 // At this point, we know we have a Unicode escape of the form 754 // `uXXXX` at [r, r+5). (Because the lexer guarantees this 755 // for us.) 756 escaped := p.asciiEscapeToUnicode(it, s[r+1:r+5]) 757 replaced = append(replaced, escaped) 758 r += 5 759 case 'U': 760 // At this point, we know we have a Unicode escape of the form 761 // `uXXXX` at [r, r+9). (Because the lexer guarantees this 762 // for us.) 763 escaped := p.asciiEscapeToUnicode(it, s[r+1:r+9]) 764 replaced = append(replaced, escaped) 765 r += 9 766 } 767 } 768 return string(replaced) 769 } 770 771 func (p *parser) asciiEscapeToUnicode(it item, bs []byte) rune { 772 s := string(bs) 773 hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32) 774 if err != nil { 775 p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err) 776 } 777 if !utf8.ValidRune(rune(hex)) { 778 p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s) 779 } 780 return rune(hex) 781 }