cuelang.org/go@v0.10.1/internal/third_party/yaml/decode.go (about) 1 package yaml 2 3 import ( 4 "encoding/base64" 5 "fmt" 6 "math" 7 "strconv" 8 "strings" 9 10 "cuelang.org/go/cue/ast" 11 "cuelang.org/go/cue/literal" 12 "cuelang.org/go/cue/token" 13 "cuelang.org/go/internal" 14 "cuelang.org/go/internal/source" 15 ) 16 17 const ( 18 documentNode = 1 << iota 19 mappingNode 20 sequenceNode 21 scalarNode 22 aliasNode 23 ) 24 25 type node struct { 26 kind int 27 startPos yaml_mark_t 28 endPos yaml_mark_t 29 tag string 30 // For an alias node, alias holds the resolved alias. 31 alias *node 32 value string 33 implicit bool 34 children []*node 35 anchors map[string]*node 36 } 37 38 // ---------------------------------------------------------------------------- 39 // Parser, produces a node tree out of a libyaml event stream. 40 41 type parser struct { 42 parser yaml_parser_t 43 event yaml_event_t 44 doc *node 45 info *token.File 46 doneInit bool 47 } 48 49 func newParser(filename string, src interface{}) (*parser, error) { 50 b, err := source.ReadAll(filename, src) 51 if err != nil { 52 return nil, err 53 } 54 info := token.NewFile(filename, -1, len(b)+2) 55 info.SetLinesForContent(b) 56 p := parser{info: info} 57 if !yaml_parser_initialize(&p.parser, filename) { 58 panic("failed to initialize YAML emitter") 59 } 60 if len(b) == 0 { 61 b = []byte{'\n'} 62 } 63 yaml_parser_set_input_string(&p.parser, b) 64 return &p, nil 65 } 66 67 func (p *parser) init() { 68 if p.doneInit { 69 return 70 } 71 p.expect(yaml_STREAM_START_EVENT) 72 p.doneInit = true 73 } 74 75 func (p *parser) destroy() { 76 if p.event.typ != yaml_NO_EVENT { 77 yaml_event_delete(&p.event) 78 } 79 yaml_parser_delete(&p.parser) 80 } 81 82 // expect consumes an event from the event stream and 83 // checks that it's of the expected type. 84 func (p *parser) expect(e yaml_event_type_t) { 85 if p.event.typ == yaml_NO_EVENT { 86 if !yaml_parser_parse(&p.parser, &p.event) { 87 p.fail() 88 } 89 } 90 if p.event.typ == yaml_STREAM_END_EVENT { 91 p.failf(p.event.end_mark.line, "attempted to go past the end of stream; corrupted value?") 92 } 93 if p.event.typ != e { 94 p.parser.problem = fmt.Sprintf("expected %s event but got %s", e, p.event.typ) 95 p.fail() 96 } 97 yaml_event_delete(&p.event) 98 p.event.typ = yaml_NO_EVENT 99 } 100 101 // peek peeks at the next event in the event stream, 102 // puts the results into p.event and returns the event type. 103 func (p *parser) peek() yaml_event_type_t { 104 if p.event.typ != yaml_NO_EVENT { 105 return p.event.typ 106 } 107 if !yaml_parser_parse(&p.parser, &p.event) { 108 p.fail() 109 } 110 return p.event.typ 111 } 112 113 func (p *parser) fail() { 114 var line int 115 if p.parser.problem_mark.line != 0 { 116 line = p.parser.problem_mark.line 117 // Scanner errors don't iterate line before returning error 118 if p.parser.error != yaml_SCANNER_ERROR { 119 line-- 120 } 121 } else if p.parser.context_mark.line != 0 { 122 line = p.parser.context_mark.line - 1 123 } 124 var msg string 125 if len(p.parser.problem) > 0 { 126 msg = p.parser.problem 127 } else { 128 msg = "unknown problem parsing YAML content" 129 } 130 p.failf(line, msg) 131 } 132 133 func (p *parser) anchor(n *node, anchor []byte) { 134 if anchor != nil { 135 p.doc.anchors[string(anchor)] = n 136 } 137 } 138 139 func (p *parser) parse() *node { 140 p.init() 141 switch p.peek() { 142 case yaml_SCALAR_EVENT: 143 return p.scalar() 144 case yaml_ALIAS_EVENT: 145 return p.alias() 146 case yaml_MAPPING_START_EVENT: 147 return p.mapping() 148 case yaml_SEQUENCE_START_EVENT: 149 return p.sequence() 150 case yaml_DOCUMENT_START_EVENT: 151 return p.document() 152 case yaml_STREAM_END_EVENT: 153 // Happens when attempting to decode an empty buffer. 154 return nil 155 default: 156 panic("attempted to parse unknown event: " + p.event.typ.String()) 157 } 158 } 159 160 func (p *parser) node(kind int) *node { 161 n := &node{ 162 kind: kind, 163 startPos: p.event.start_mark, 164 endPos: p.event.end_mark, 165 } 166 return n 167 } 168 169 func (p *parser) document() *node { 170 n := p.node(documentNode) 171 n.anchors = make(map[string]*node) 172 p.doc = n 173 p.expect(yaml_DOCUMENT_START_EVENT) 174 n.children = append(n.children, p.parse()) 175 p.expect(yaml_DOCUMENT_END_EVENT) 176 return n 177 } 178 179 func (p *parser) alias() *node { 180 n := p.node(aliasNode) 181 n.value = string(p.event.anchor) 182 n.alias = p.doc.anchors[n.value] 183 if n.alias == nil { 184 p.failf(n.startPos.line, "unknown anchor '%s' referenced", n.value) 185 } 186 p.expect(yaml_ALIAS_EVENT) 187 return n 188 } 189 190 func (p *parser) scalar() *node { 191 n := p.node(scalarNode) 192 n.value = string(p.event.value) 193 n.tag = string(p.event.tag) 194 n.implicit = p.event.implicit 195 p.anchor(n, p.event.anchor) 196 p.expect(yaml_SCALAR_EVENT) 197 return n 198 } 199 200 func (p *parser) sequence() *node { 201 n := p.node(sequenceNode) 202 p.anchor(n, p.event.anchor) 203 p.expect(yaml_SEQUENCE_START_EVENT) 204 for p.peek() != yaml_SEQUENCE_END_EVENT { 205 n.children = append(n.children, p.parse()) 206 } 207 if len(n.children) > 0 { 208 n.endPos = n.children[len(n.children)-1].endPos 209 } else { 210 n.endPos = p.event.start_mark 211 } 212 p.expect(yaml_SEQUENCE_END_EVENT) 213 return n 214 } 215 216 func (p *parser) mapping() *node { 217 n := p.node(mappingNode) 218 p.anchor(n, p.event.anchor) 219 p.expect(yaml_MAPPING_START_EVENT) 220 for p.peek() != yaml_MAPPING_END_EVENT { 221 n.children = append(n.children, p.parse(), p.parse()) 222 } 223 if len(n.children) > 0 { 224 n.endPos = n.children[len(n.children)-1].endPos 225 } 226 p.expect(yaml_MAPPING_END_EVENT) 227 return n 228 } 229 230 // ---------------------------------------------------------------------------- 231 // Decoder, unmarshals a node into a provided value. 232 233 type decoder struct { 234 p *parser 235 doc *node 236 aliases map[*node]bool 237 terrors []string 238 prev token.Pos 239 forceNewline bool 240 } 241 242 func newDecoder(p *parser) *decoder { 243 d := &decoder{p: p} 244 d.aliases = make(map[*node]bool) 245 return d 246 } 247 248 func (d *decoder) terror(n *node, tag string) string { 249 if n.tag != "" { 250 tag = n.tag 251 } 252 value := n.value 253 if tag != yaml_SEQ_TAG && tag != yaml_MAP_TAG { 254 if len(value) > 10 { 255 value = " `" + value[:7] + "...`" 256 } else { 257 value = " `" + value + "`" 258 } 259 } 260 msg := fmt.Sprintf("line %d: cannot unmarshal %s%s", n.startPos.line+1, shortTag(tag), value) 261 d.terrors = append(d.terrors, msg) 262 return msg 263 } 264 265 func (d *decoder) unmarshal(n *node) (node ast.Expr) { 266 switch n.kind { 267 case documentNode: 268 node = d.document(n) 269 case aliasNode: 270 node = d.alias(n) 271 default: 272 switch n.kind { 273 case scalarNode: 274 node = d.scalar(n) 275 case mappingNode: 276 node = d.mapping(n) 277 case sequenceNode: 278 node = d.sequence(n) 279 default: 280 panic("internal error: unknown node kind: " + strconv.Itoa(n.kind)) 281 } 282 } 283 return node 284 } 285 286 func (d *decoder) attachDocComments(m yaml_mark_t, pos int8, expr ast.Node) { 287 comments := []*ast.Comment{} 288 line := 0 289 for len(d.p.parser.comments) > 0 { 290 c := d.p.parser.comments[0] 291 if c.mark.index >= m.index { 292 break 293 } 294 comments = append(comments, &ast.Comment{ 295 Slash: d.pos(c.mark), 296 Text: "//" + c.text[1:], 297 }) 298 d.p.parser.comments = d.p.parser.comments[1:] 299 line = c.mark.line 300 } 301 if len(comments) > 0 { 302 ast.AddComment(expr, &ast.CommentGroup{ 303 Doc: pos == 0 && line+1 == m.line, 304 Position: pos, 305 List: comments, 306 }) 307 } 308 } 309 310 func (d *decoder) attachLineComment(m yaml_mark_t, pos int8, expr ast.Node) { 311 if len(d.p.parser.comments) == 0 { 312 return 313 } 314 c := d.p.parser.comments[0] 315 if c.mark.index == m.index { 316 comment := &ast.Comment{ 317 Slash: d.pos(c.mark), 318 Text: "//" + c.text[1:], 319 } 320 ast.AddComment(expr, &ast.CommentGroup{ 321 Line: true, 322 Position: pos, 323 List: []*ast.Comment{comment}, 324 }) 325 } 326 } 327 328 func (d *decoder) pos(m yaml_mark_t) token.Pos { 329 pos := d.absPos(m) 330 331 if d.forceNewline { 332 d.forceNewline = false 333 pos = pos.WithRel(token.Newline) 334 } else if d.prev.IsValid() { 335 c := pos.Position() 336 p := d.prev.Position() 337 switch { 338 case c.Line-p.Line >= 2: 339 pos = pos.WithRel(token.NewSection) 340 case c.Line-p.Line == 1: 341 pos = pos.WithRel(token.Newline) 342 case c.Column-p.Column > 0: 343 pos = pos.WithRel(token.Blank) 344 default: 345 pos = pos.WithRel(token.NoSpace) 346 } 347 if pos.Before(d.prev) { 348 return token.NoPos 349 } 350 } 351 352 d.prev = pos 353 return pos 354 } 355 356 func (d *decoder) absPos(m yaml_mark_t) token.Pos { 357 return d.p.info.Pos(m.index, token.NoRelPos) 358 } 359 360 func (d *decoder) start(n *node) token.Pos { 361 if n.startPos == n.endPos { 362 return token.NoPos 363 } 364 return d.pos(n.startPos) 365 } 366 367 func (d *decoder) ident(n *node, name string) *ast.Ident { 368 return &ast.Ident{ 369 NamePos: d.pos(n.startPos), 370 Name: name, 371 } 372 } 373 374 func (d *decoder) document(n *node) ast.Expr { 375 if len(n.children) == 1 { 376 d.doc = n 377 return d.unmarshal(n.children[0]) 378 } 379 return &ast.BottomLit{} // TODO: more informatives 380 } 381 382 func (d *decoder) alias(n *node) ast.Expr { 383 if d.aliases[n] { 384 // TODO this could actually be allowed in some circumstances. 385 d.p.failf(n.startPos.line, "anchor '%s' value contains itself", n.value) 386 } 387 d.aliases[n] = true 388 node := d.unmarshal(n.alias) 389 delete(d.aliases, n) 390 return node 391 } 392 393 func (d *decoder) scalar(n *node) ast.Expr { 394 var tag string 395 var resolved interface{} 396 if n.tag == "" && !n.implicit { 397 tag = yaml_STR_TAG 398 resolved = n.value 399 } else { 400 tag, resolved = d.resolve(n) 401 if tag == yaml_BINARY_TAG { 402 data, err := base64.StdEncoding.DecodeString(resolved.(string)) 403 if err != nil { 404 d.p.failf(n.startPos.line, "!!binary value contains invalid base64 data") 405 } 406 resolved = string(data) 407 } 408 } 409 if resolved == nil { 410 return &ast.BasicLit{ 411 ValuePos: d.start(n).WithRel(token.Blank), 412 Kind: token.NULL, 413 Value: "null", 414 } 415 } 416 switch tag { 417 // TODO: use parse literal or parse expression instead. 418 case yaml_TIMESTAMP_TAG: 419 return &ast.BasicLit{ 420 ValuePos: d.start(n), 421 Kind: token.STRING, 422 Value: literal.String.Quote(n.value), 423 } 424 425 case yaml_STR_TAG: 426 return &ast.BasicLit{ 427 ValuePos: d.start(n), 428 Kind: token.STRING, 429 Value: quoteString(n.value), 430 } 431 432 case yaml_BINARY_TAG: 433 return &ast.BasicLit{ 434 ValuePos: d.start(n), 435 Kind: token.STRING, 436 Value: literal.Bytes.Quote(resolved.(string)), 437 } 438 439 case yaml_BOOL_TAG: 440 tok := token.FALSE 441 str := "false" 442 if b, _ := resolved.(bool); b { 443 tok = token.TRUE 444 str = "true" 445 } 446 return &ast.BasicLit{ 447 ValuePos: d.start(n), 448 Kind: tok, 449 Value: str, 450 } 451 452 case yaml_INT_TAG: 453 // Convert YAML octal to CUE octal. If YAML accepted an invalid 454 // integer, just convert it as well to ensure CUE will fail. 455 s := n.value 456 if len(s) > 1 && s[0] == '0' && s[1] <= '9' { 457 s = "0o" + s[1:] 458 } 459 return d.makeNum(n, s, token.INT) 460 461 case yaml_FLOAT_TAG: 462 value := n.value 463 if f, ok := resolved.(float64); ok { 464 switch { 465 case math.IsInf(f, -1), 466 math.IsInf(f, 1), 467 math.IsNaN(f): 468 value = fmt.Sprint(f) 469 } 470 } 471 if n.tag != "" { 472 if p := strings.IndexAny(value, ".eEiInN"); p == -1 { 473 // TODO: float(v) when we have conversions 474 value = fmt.Sprintf("float & %s", value) 475 } 476 } 477 return d.makeNum(n, value, token.FLOAT) 478 479 case yaml_NULL_TAG: 480 return &ast.BasicLit{ 481 ValuePos: d.start(n).WithRel(token.Blank), 482 Kind: token.NULL, 483 Value: "null", 484 } 485 } 486 d.terror(n, tag) 487 return &ast.BottomLit{} 488 } 489 490 func (d *decoder) label(n *node) ast.Label { 491 pos := d.pos(n.startPos) 492 493 switch x := d.scalar(n).(type) { 494 case *ast.BasicLit: 495 if x.Kind == token.STRING { 496 if ast.IsValidIdent(n.value) && !internal.IsDefOrHidden(n.value) { 497 return &ast.Ident{ 498 NamePos: pos, 499 Name: n.value, 500 } 501 } 502 ast.SetPos(x, pos) 503 return x 504 } 505 506 return &ast.BasicLit{ 507 ValuePos: pos, 508 Kind: token.STRING, 509 Value: literal.Label.Quote(x.Value), 510 } 511 512 default: 513 d.p.failf(n.startPos.line, "invalid label: %q", n.value) 514 } 515 516 return &ast.BasicLit{ 517 ValuePos: pos, 518 Kind: token.STRING, 519 Value: "<invalid>", 520 } 521 } 522 523 func (d *decoder) makeNum(n *node, val string, kind token.Token) (expr ast.Expr) { 524 minuses := 0 525 for ; val[0] == '-'; val = val[1:] { 526 minuses++ 527 } 528 expr = &ast.BasicLit{ 529 ValuePos: d.start(n), // + minuses.Pos(), 530 Kind: kind, 531 Value: val, 532 } 533 if minuses > 0 { 534 expr = &ast.UnaryExpr{ 535 OpPos: d.start(n), 536 Op: token.SUB, 537 X: expr, 538 } 539 } 540 return expr 541 } 542 543 // quoteString converts a string to a CUE multiline string if needed. 544 func quoteString(s string) string { 545 lines := []string{} 546 last := 0 547 for i, c := range s { 548 if c == '\n' { 549 lines = append(lines, s[last:i]) 550 last = i + 1 551 } 552 if c == '\r' { 553 goto quoted 554 } 555 } 556 lines = append(lines, s[last:]) 557 if len(lines) >= 2 { 558 buf := []byte{} 559 buf = append(buf, `"""`+"\n"...) 560 for _, l := range lines { 561 if l == "" { 562 // no indentation for empty lines 563 buf = append(buf, '\n') 564 continue 565 } 566 buf = append(buf, '\t') 567 p := len(buf) 568 buf = strconv.AppendQuote(buf, l) 569 // remove quotes 570 buf[p] = '\t' 571 buf[len(buf)-1] = '\n' 572 } 573 buf = append(buf, "\t\t"+`"""`...) 574 return string(buf) 575 } 576 quoted: 577 return literal.String.Quote(s) 578 } 579 580 func (d *decoder) sequence(n *node) ast.Expr { 581 list := &ast.ListLit{} 582 list.Lbrack = d.pos(n.startPos).WithRel(token.Blank) 583 switch ln := len(n.children); ln { 584 case 0: 585 d.prev = list.Lbrack 586 default: 587 d.prev = d.pos(n.children[ln-1].endPos) 588 } 589 list.Rbrack = d.pos(n.endPos) 590 591 noNewline := true 592 single := d.isOneLiner(n.startPos, n.endPos) 593 for _, c := range n.children { 594 d.forceNewline = !single 595 elem := d.unmarshal(c) 596 list.Elts = append(list.Elts, elem) 597 _, noNewline = elem.(*ast.StructLit) 598 } 599 if !single && !noNewline { 600 list.Rbrack = list.Rbrack.WithRel(token.Newline) 601 } 602 return list 603 } 604 605 func (d *decoder) isOneLiner(start, end yaml_mark_t) bool { 606 s := d.absPos(start).Position() 607 e := d.absPos(end).Position() 608 return s.Line == e.Line 609 } 610 611 func (d *decoder) mapping(n *node) ast.Expr { 612 newline := d.forceNewline 613 614 structure := &ast.StructLit{} 615 d.insertMap(n, structure, false) 616 617 // NOTE: we currently translate YAML without curly braces to CUE with 618 // curly braces, even for single elements. Removing the following line 619 // would generate the folded form. 620 structure.Lbrace = d.absPos(n.startPos).WithRel(token.NoSpace) 621 structure.Rbrace = d.absPos(n.endPos).WithRel(token.Newline) 622 if d.isOneLiner(n.startPos, n.endPos) && !newline { 623 if len(structure.Elts) != 1 { 624 structure.Lbrace = d.absPos(n.startPos).WithRel(token.Blank) 625 } 626 if len(structure.Elts) != 1 || structure.Elts[0].Pos().RelPos() < token.Newline { 627 structure.Rbrace = structure.Rbrace.WithRel(token.Blank) 628 } 629 } 630 return structure 631 } 632 633 func (d *decoder) insertMap(n *node, m *ast.StructLit, merge bool) { 634 l := len(n.children) 635 outer: 636 for i := 0; i < l; i += 2 { 637 if isMerge(n.children[i]) { 638 merge = true 639 d.merge(n.children[i+1], m) 640 continue 641 } 642 switch n.children[i].kind { 643 case mappingNode: 644 d.p.failf(n.startPos.line, "invalid map key: map") 645 case sequenceNode: 646 d.p.failf(n.startPos.line, "invalid map key: sequence") 647 } 648 649 field := &ast.Field{} 650 d.attachDocComments(n.children[i].startPos, 0, field) 651 652 label := d.label(n.children[i]) 653 field.Label = label 654 d.attachLineComment(n.children[i].endPos, 1, label) 655 656 if merge { 657 key := labelStr(label) 658 for _, decl := range m.Elts { 659 f := decl.(*ast.Field) 660 name, _, err := ast.LabelName(f.Label) 661 if err == nil && name == key { 662 f.Value = d.unmarshal(n.children[i+1]) 663 continue outer 664 } 665 } 666 } 667 668 value := d.unmarshal(n.children[i+1]) 669 field.Value = value 670 d.attachDocComments(n.children[i+1].startPos, 0, value) 671 d.attachLineComment(n.children[i+1].endPos, 10, value) 672 673 m.Elts = append(m.Elts, field) 674 } 675 } 676 677 func labelStr(l ast.Label) string { 678 switch x := l.(type) { 679 case *ast.Ident: 680 return x.Name 681 case *ast.BasicLit: 682 s, _ := strconv.Unquote(x.Value) 683 return s 684 } 685 return "" 686 } 687 688 func (d *decoder) failWantMap(n *node) { 689 d.p.failf(n.startPos.line, "map merge requires map or sequence of maps as the value") 690 } 691 692 func (d *decoder) merge(n *node, m *ast.StructLit) { 693 switch n.kind { 694 case mappingNode: 695 d.insertMap(n, m, true) 696 case aliasNode: 697 an, ok := d.doc.anchors[n.value] 698 if ok && an.kind != mappingNode { 699 d.failWantMap(n) 700 } 701 d.insertMap(an, m, true) 702 case sequenceNode: 703 // Step backwards as earlier nodes take precedence. 704 for i := len(n.children) - 1; i >= 0; i-- { 705 ni := n.children[i] 706 if ni.kind == aliasNode { 707 an, ok := d.doc.anchors[ni.value] 708 if ok && an.kind != mappingNode { 709 d.failWantMap(n) 710 } 711 d.insertMap(an, m, true) 712 continue 713 } else if ni.kind != mappingNode { 714 d.failWantMap(n) 715 } 716 d.insertMap(ni, m, true) 717 } 718 default: 719 d.failWantMap(n) 720 } 721 } 722 723 func isMerge(n *node) bool { 724 return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG) 725 }