github.com/solo-io/cue@v0.4.7/internal/third_party/yaml/decode.go (about) 1 package yaml 2 3 import ( 4 "bytes" 5 "encoding/base64" 6 "errors" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "math" 11 "reflect" 12 "strconv" 13 "strings" 14 "time" 15 16 "github.com/solo-io/cue/cue/ast" 17 "github.com/solo-io/cue/cue/literal" 18 "github.com/solo-io/cue/cue/token" 19 "github.com/solo-io/cue/internal" 20 ) 21 22 const ( 23 documentNode = 1 << iota 24 mappingNode 25 sequenceNode 26 scalarNode 27 aliasNode 28 ) 29 30 type node struct { 31 kind int 32 startPos yaml_mark_t 33 endPos yaml_mark_t 34 tag string 35 // For an alias node, alias holds the resolved alias. 36 alias *node 37 value string 38 implicit bool 39 children []*node 40 anchors map[string]*node 41 } 42 43 // ---------------------------------------------------------------------------- 44 // Parser, produces a node tree out of a libyaml event stream. 45 46 type parser struct { 47 parser yaml_parser_t 48 event yaml_event_t 49 doc *node 50 info *token.File 51 last *node 52 doneInit bool 53 } 54 55 func readSource(filename string, src interface{}) ([]byte, error) { 56 if src != nil { 57 switch s := src.(type) { 58 case string: 59 return []byte(s), nil 60 case []byte: 61 return s, nil 62 case *bytes.Buffer: 63 // is io.Reader, but src is already available in []byte form 64 if s != nil { 65 return s.Bytes(), nil 66 } 67 case io.Reader: 68 var buf bytes.Buffer 69 if _, err := io.Copy(&buf, s); err != nil { 70 return nil, err 71 } 72 return buf.Bytes(), nil 73 } 74 return nil, errors.New("invalid source") 75 } 76 return ioutil.ReadFile(filename) 77 } 78 79 func newParser(filename string, src interface{}) (*parser, error) { 80 b, err := readSource(filename, src) 81 if err != nil { 82 return nil, err 83 } 84 info := token.NewFile(filename, -1, len(b)+2) 85 info.SetLinesForContent(b) 86 p := parser{info: info} 87 if !yaml_parser_initialize(&p.parser, filename) { 88 panic("failed to initialize YAML emitter") 89 } 90 if len(b) == 0 { 91 b = []byte{'\n'} 92 } 93 yaml_parser_set_input_string(&p.parser, b) 94 return &p, nil 95 } 96 97 func (p *parser) init() { 98 if p.doneInit { 99 return 100 } 101 p.expect(yaml_STREAM_START_EVENT) 102 p.doneInit = true 103 } 104 105 func (p *parser) destroy() { 106 if p.event.typ != yaml_NO_EVENT { 107 yaml_event_delete(&p.event) 108 } 109 yaml_parser_delete(&p.parser) 110 } 111 112 // expect consumes an event from the event stream and 113 // checks that it's of the expected type. 114 func (p *parser) expect(e yaml_event_type_t) { 115 if p.event.typ == yaml_NO_EVENT { 116 if !yaml_parser_parse(&p.parser, &p.event) { 117 p.fail() 118 } 119 } 120 if p.event.typ == yaml_STREAM_END_EVENT { 121 p.failf(p.event.end_mark.line, "attempted to go past the end of stream; corrupted value?") 122 } 123 if p.event.typ != e { 124 p.parser.problem = fmt.Sprintf("expected %s event but got %s", e, p.event.typ) 125 p.fail() 126 } 127 yaml_event_delete(&p.event) 128 p.event.typ = yaml_NO_EVENT 129 } 130 131 // peek peeks at the next event in the event stream, 132 // puts the results into p.event and returns the event type. 133 func (p *parser) peek() yaml_event_type_t { 134 if p.event.typ != yaml_NO_EVENT { 135 return p.event.typ 136 } 137 if !yaml_parser_parse(&p.parser, &p.event) { 138 p.fail() 139 } 140 return p.event.typ 141 } 142 143 func (p *parser) fail() { 144 var line int 145 if p.parser.problem_mark.line != 0 { 146 line = p.parser.problem_mark.line 147 // Scanner errors don't iterate line before returning error 148 if p.parser.error != yaml_SCANNER_ERROR { 149 line-- 150 } 151 } else if p.parser.context_mark.line != 0 { 152 line = p.parser.context_mark.line - 1 153 } 154 var msg string 155 if len(p.parser.problem) > 0 { 156 msg = p.parser.problem 157 } else { 158 msg = "unknown problem parsing YAML content" 159 } 160 p.failf(line, msg) 161 } 162 163 func (p *parser) anchor(n *node, anchor []byte) { 164 if anchor != nil { 165 p.doc.anchors[string(anchor)] = n 166 } 167 } 168 169 func (p *parser) parse() *node { 170 p.init() 171 switch p.peek() { 172 case yaml_SCALAR_EVENT: 173 return p.scalar() 174 case yaml_ALIAS_EVENT: 175 return p.alias() 176 case yaml_MAPPING_START_EVENT: 177 return p.mapping() 178 case yaml_SEQUENCE_START_EVENT: 179 return p.sequence() 180 case yaml_DOCUMENT_START_EVENT: 181 return p.document() 182 case yaml_STREAM_END_EVENT: 183 // Happens when attempting to decode an empty buffer. 184 return nil 185 default: 186 panic("attempted to parse unknown event: " + p.event.typ.String()) 187 } 188 } 189 190 func (p *parser) node(kind int) *node { 191 n := &node{ 192 kind: kind, 193 startPos: p.event.start_mark, 194 endPos: p.event.end_mark, 195 } 196 return n 197 } 198 199 func (p *parser) document() *node { 200 n := p.node(documentNode) 201 n.anchors = make(map[string]*node) 202 p.doc = n 203 p.expect(yaml_DOCUMENT_START_EVENT) 204 n.children = append(n.children, p.parse()) 205 p.expect(yaml_DOCUMENT_END_EVENT) 206 return n 207 } 208 209 func (p *parser) alias() *node { 210 n := p.node(aliasNode) 211 n.value = string(p.event.anchor) 212 n.alias = p.doc.anchors[n.value] 213 if n.alias == nil { 214 p.failf(n.startPos.line, "unknown anchor '%s' referenced", n.value) 215 } 216 p.expect(yaml_ALIAS_EVENT) 217 return n 218 } 219 220 func (p *parser) scalar() *node { 221 n := p.node(scalarNode) 222 n.value = string(p.event.value) 223 n.tag = string(p.event.tag) 224 n.implicit = p.event.implicit 225 p.anchor(n, p.event.anchor) 226 p.expect(yaml_SCALAR_EVENT) 227 return n 228 } 229 230 func (p *parser) sequence() *node { 231 n := p.node(sequenceNode) 232 p.anchor(n, p.event.anchor) 233 p.expect(yaml_SEQUENCE_START_EVENT) 234 for p.peek() != yaml_SEQUENCE_END_EVENT { 235 n.children = append(n.children, p.parse()) 236 } 237 if len(n.children) > 0 { 238 n.endPos = n.children[len(n.children)-1].endPos 239 } 240 p.expect(yaml_SEQUENCE_END_EVENT) 241 return n 242 } 243 244 func (p *parser) mapping() *node { 245 n := p.node(mappingNode) 246 p.anchor(n, p.event.anchor) 247 p.expect(yaml_MAPPING_START_EVENT) 248 for p.peek() != yaml_MAPPING_END_EVENT { 249 n.children = append(n.children, p.parse(), p.parse()) 250 } 251 if len(n.children) > 0 { 252 n.endPos = n.children[len(n.children)-1].endPos 253 } 254 p.expect(yaml_MAPPING_END_EVENT) 255 return n 256 } 257 258 // ---------------------------------------------------------------------------- 259 // Decoder, unmarshals a node into a provided value. 260 261 type decoder struct { 262 p *parser 263 doc *node 264 aliases map[*node]bool 265 mapType reflect.Type 266 terrors []string 267 prev token.Pos 268 lastNode ast.Node 269 forceNewline bool 270 } 271 272 var ( 273 mapItemType = reflect.TypeOf(MapItem{}) 274 durationType = reflect.TypeOf(time.Duration(0)) 275 defaultMapType = reflect.TypeOf(map[interface{}]interface{}{}) 276 timeType = reflect.TypeOf(time.Time{}) 277 ptrTimeType = reflect.TypeOf(&time.Time{}) 278 ) 279 280 func newDecoder(p *parser) *decoder { 281 d := &decoder{p: p, mapType: defaultMapType} 282 d.aliases = make(map[*node]bool) 283 return d 284 } 285 286 func (d *decoder) terror(n *node, tag string) string { 287 if n.tag != "" { 288 tag = n.tag 289 } 290 value := n.value 291 if tag != yaml_SEQ_TAG && tag != yaml_MAP_TAG { 292 if len(value) > 10 { 293 value = " `" + value[:7] + "...`" 294 } else { 295 value = " `" + value + "`" 296 } 297 } 298 msg := fmt.Sprintf("line %d: cannot unmarshal %s%s", n.startPos.line+1, shortTag(tag), value) 299 d.terrors = append(d.terrors, msg) 300 return msg 301 } 302 303 func (d *decoder) unmarshal(n *node) (node ast.Expr) { 304 switch n.kind { 305 case documentNode: 306 node = d.document(n) 307 case aliasNode: 308 node = d.alias(n) 309 default: 310 switch n.kind { 311 case scalarNode: 312 node = d.scalar(n) 313 case mappingNode: 314 node = d.mapping(n) 315 case sequenceNode: 316 node = d.sequence(n) 317 default: 318 panic("internal error: unknown node kind: " + strconv.Itoa(n.kind)) 319 } 320 } 321 return node 322 } 323 324 func (d *decoder) attachDocComments(m yaml_mark_t, pos int8, expr ast.Node) { 325 comments := []*ast.Comment{} 326 line := 0 327 for len(d.p.parser.comments) > 0 { 328 c := d.p.parser.comments[0] 329 if c.mark.index >= m.index { 330 break 331 } 332 comments = append(comments, &ast.Comment{ 333 Slash: d.pos(c.mark), 334 Text: "//" + c.text[1:], 335 }) 336 d.p.parser.comments = d.p.parser.comments[1:] 337 line = c.mark.line 338 } 339 if len(comments) > 0 { 340 expr.AddComment(&ast.CommentGroup{ 341 Doc: pos == 0 && line+1 == m.line, 342 Position: pos, 343 List: comments, 344 }) 345 } 346 } 347 348 func (d *decoder) attachLineComment(m yaml_mark_t, pos int8, expr ast.Node) { 349 if len(d.p.parser.comments) == 0 { 350 return 351 } 352 c := d.p.parser.comments[0] 353 if c.mark.index == m.index { 354 comment := &ast.Comment{ 355 Slash: d.pos(c.mark), 356 Text: "//" + c.text[1:], 357 } 358 expr.AddComment(&ast.CommentGroup{ 359 Line: true, 360 Position: pos, 361 List: []*ast.Comment{comment}, 362 }) 363 } 364 } 365 366 func (d *decoder) pos(m yaml_mark_t) token.Pos { 367 pos := d.p.info.Pos(m.index+1, token.NoRelPos) 368 369 if d.forceNewline { 370 d.forceNewline = false 371 pos = pos.WithRel(token.Newline) 372 } else if d.prev.IsValid() { 373 c := pos.Position() 374 p := d.prev.Position() 375 switch { 376 case c.Line-p.Line >= 2: 377 pos = pos.WithRel(token.NewSection) 378 case c.Line-p.Line == 1: 379 pos = pos.WithRel(token.Newline) 380 case c.Column-p.Column > 0: 381 pos = pos.WithRel(token.Blank) 382 default: 383 pos = pos.WithRel(token.NoSpace) 384 } 385 if pos.Before(d.prev) { 386 return token.NoPos 387 } 388 } 389 390 d.prev = pos 391 return pos 392 } 393 394 func (d *decoder) absPos(m yaml_mark_t) token.Pos { 395 return d.p.info.Pos(m.index+1, token.NoRelPos) 396 } 397 398 func (d *decoder) start(n *node) token.Pos { 399 if n.startPos == n.endPos { 400 return token.NoPos 401 } 402 return d.pos(n.startPos) 403 } 404 405 func (d *decoder) ident(n *node, name string) *ast.Ident { 406 return &ast.Ident{ 407 NamePos: d.pos(n.startPos), 408 Name: name, 409 } 410 } 411 412 func (d *decoder) document(n *node) ast.Expr { 413 if len(n.children) == 1 { 414 d.doc = n 415 return d.unmarshal(n.children[0]) 416 } 417 return &ast.BottomLit{} // TODO: more informatives 418 } 419 420 func (d *decoder) alias(n *node) ast.Expr { 421 if d.aliases[n] { 422 // TODO this could actually be allowed in some circumstances. 423 d.p.failf(n.startPos.line, "anchor '%s' value contains itself", n.value) 424 } 425 d.aliases[n] = true 426 node := d.unmarshal(n.alias) 427 delete(d.aliases, n) 428 return node 429 } 430 431 var zeroValue reflect.Value 432 433 func (d *decoder) scalar(n *node) ast.Expr { 434 var tag string 435 var resolved interface{} 436 if n.tag == "" && !n.implicit { 437 tag = yaml_STR_TAG 438 resolved = n.value 439 } else { 440 tag, resolved = d.resolve(n) 441 if tag == yaml_BINARY_TAG { 442 data, err := base64.StdEncoding.DecodeString(resolved.(string)) 443 if err != nil { 444 d.p.failf(n.startPos.line, "!!binary value contains invalid base64 data") 445 } 446 resolved = string(data) 447 } 448 } 449 if resolved == nil { 450 return &ast.BasicLit{ 451 ValuePos: d.start(n).WithRel(token.Blank), 452 Kind: token.NULL, 453 Value: "null", 454 } 455 } 456 switch tag { 457 // TODO: use parse literal or parse expression instead. 458 case yaml_TIMESTAMP_TAG: 459 return &ast.BasicLit{ 460 ValuePos: d.start(n), 461 Kind: token.STRING, 462 Value: literal.String.Quote(n.value), 463 } 464 465 case yaml_STR_TAG: 466 return &ast.BasicLit{ 467 ValuePos: d.start(n), 468 Kind: token.STRING, 469 Value: quoteString(n.value), 470 } 471 472 case yaml_BINARY_TAG: 473 return &ast.BasicLit{ 474 ValuePos: d.start(n), 475 Kind: token.STRING, 476 Value: literal.Bytes.Quote(resolved.(string)), 477 } 478 479 case yaml_BOOL_TAG: 480 tok := token.FALSE 481 str := "false" 482 if b, _ := resolved.(bool); b { 483 tok = token.TRUE 484 str = "true" 485 } 486 return &ast.BasicLit{ 487 ValuePos: d.start(n), 488 Kind: tok, 489 Value: str, 490 } 491 492 case yaml_INT_TAG: 493 // Convert YAML octal to CUE octal. If YAML accepted an invalid 494 // integer, just convert it as well to ensure CUE will fail. 495 s := n.value 496 if len(s) > 1 && s[0] == '0' && s[1] <= '9' { 497 s = "0o" + s[1:] 498 } 499 return d.makeNum(n, s, token.INT) 500 501 case yaml_FLOAT_TAG: 502 value := n.value 503 if f, ok := resolved.(float64); ok { 504 switch { 505 case math.IsInf(f, -1), 506 math.IsInf(f, 1), 507 math.IsNaN(f): 508 value = fmt.Sprint(f) 509 } 510 } 511 if n.tag != "" { 512 if p := strings.IndexAny(value, ".eEiInN"); p == -1 { 513 // TODO: float(v) when we have conversions 514 value = fmt.Sprintf("float & %s", value) 515 } 516 } 517 return d.makeNum(n, value, token.FLOAT) 518 519 case yaml_NULL_TAG: 520 return &ast.BasicLit{ 521 ValuePos: d.start(n).WithRel(token.Blank), 522 Kind: token.NULL, 523 Value: "null", 524 } 525 } 526 err := &ast.BottomLit{ 527 Bottom: d.pos(n.startPos), 528 } 529 comment := &ast.Comment{ 530 Slash: d.start(n), 531 Text: "// " + d.terror(n, tag), 532 } 533 err.AddComment(&ast.CommentGroup{ 534 Line: true, 535 Position: 1, 536 List: []*ast.Comment{comment}, 537 }) 538 return err 539 } 540 541 func (d *decoder) label(n *node) ast.Label { 542 if ast.IsValidIdent(n.value) && !internal.IsDefOrHidden(n.value) { 543 return d.ident(n, n.value) 544 } 545 return &ast.BasicLit{ 546 ValuePos: d.start(n), 547 Kind: token.STRING, 548 Value: literal.Label.Quote(n.value), 549 } 550 } 551 552 func (d *decoder) makeNum(n *node, val string, kind token.Token) (expr ast.Expr) { 553 minuses := 0 554 for ; val[0] == '-'; val = val[1:] { 555 minuses++ 556 } 557 expr = &ast.BasicLit{ 558 ValuePos: d.start(n), // + minuses.Pos(), 559 Kind: kind, 560 Value: val, 561 } 562 if minuses > 0 { 563 expr = &ast.UnaryExpr{ 564 OpPos: d.start(n), 565 Op: token.SUB, 566 X: expr, 567 } 568 } 569 return expr 570 } 571 572 // quoteString converts a string to a CUE multiline string if needed. 573 func quoteString(s string) string { 574 lines := []string{} 575 last := 0 576 for i, c := range s { 577 if c == '\n' { 578 lines = append(lines, s[last:i]) 579 last = i + 1 580 } 581 if c == '\r' { 582 goto quoted 583 } 584 } 585 lines = append(lines, s[last:]) 586 if len(lines) >= 2 { 587 buf := []byte{} 588 buf = append(buf, `"""`+"\n"...) 589 for _, l := range lines { 590 if l == "" { 591 // no indentation for empty lines 592 buf = append(buf, '\n') 593 continue 594 } 595 buf = append(buf, '\t') 596 p := len(buf) 597 buf = strconv.AppendQuote(buf, l) 598 // remove quotes 599 buf[p] = '\t' 600 buf[len(buf)-1] = '\n' 601 } 602 buf = append(buf, "\t\t"+`"""`...) 603 return string(buf) 604 } 605 quoted: 606 return literal.String.Quote(s) 607 } 608 609 func (d *decoder) sequence(n *node) ast.Expr { 610 list := &ast.ListLit{} 611 list.Lbrack = d.pos(n.startPos).WithRel(token.Blank) 612 switch ln := len(n.children); ln { 613 case 0: 614 d.prev = list.Lbrack 615 default: 616 d.prev = d.pos(n.children[ln-1].endPos) 617 } 618 list.Rbrack = d.pos(n.endPos) 619 620 noNewline := true 621 single := d.isOneLiner(n.startPos, n.endPos) 622 for _, c := range n.children { 623 d.forceNewline = !single 624 elem := d.unmarshal(c) 625 list.Elts = append(list.Elts, elem) 626 _, noNewline = elem.(*ast.StructLit) 627 } 628 if !single && !noNewline { 629 list.Rbrack = list.Rbrack.WithRel(token.Newline) 630 } 631 return list 632 } 633 634 func (d *decoder) isOneLiner(start, end yaml_mark_t) bool { 635 s := d.absPos(start).Position() 636 e := d.absPos(end).Position() 637 return s.Line == e.Line 638 } 639 640 func (d *decoder) mapping(n *node) ast.Expr { 641 newline := d.forceNewline 642 643 structure := &ast.StructLit{} 644 d.insertMap(n, structure, false) 645 646 // NOTE: we currently translate YAML without curly braces to CUE with 647 // curly braces, even for single elements. Removing the following line 648 // would generate the folded form. 649 structure.Lbrace = d.absPos(n.startPos).WithRel(token.NoSpace) 650 structure.Rbrace = d.absPos(n.endPos).WithRel(token.Newline) 651 if d.isOneLiner(n.startPos, n.endPos) && !newline { 652 if len(structure.Elts) != 1 { 653 structure.Lbrace = d.absPos(n.startPos).WithRel(token.Blank) 654 } 655 if len(structure.Elts) != 1 || structure.Elts[0].Pos().RelPos() < token.Newline { 656 structure.Rbrace = structure.Rbrace.WithRel(token.Blank) 657 } 658 } 659 return structure 660 } 661 662 func (d *decoder) insertMap(n *node, m *ast.StructLit, merge bool) { 663 l := len(n.children) 664 outer: 665 for i := 0; i < l; i += 2 { 666 if isMerge(n.children[i]) { 667 merge = true 668 d.merge(n.children[i+1], m) 669 continue 670 } 671 switch n.children[i].kind { 672 case mappingNode: 673 d.p.failf(n.startPos.line, "invalid map key: map") 674 case sequenceNode: 675 d.p.failf(n.startPos.line, "invalid map key: sequence") 676 } 677 678 field := &ast.Field{} 679 d.attachDocComments(n.children[i].startPos, 0, field) 680 681 label := d.label(n.children[i]) 682 field.Label = label 683 d.attachLineComment(n.children[i].endPos, 1, label) 684 685 if merge { 686 key := labelStr(label) 687 for _, decl := range m.Elts { 688 f := decl.(*ast.Field) 689 name, _, err := ast.LabelName(f.Label) 690 if err == nil && name == key { 691 f.Value = d.unmarshal(n.children[i+1]) 692 continue outer 693 } 694 } 695 } 696 697 value := d.unmarshal(n.children[i+1]) 698 field.Value = value 699 d.attachDocComments(n.children[i+1].startPos, 0, value) 700 d.attachLineComment(n.children[i+1].endPos, 10, value) 701 702 m.Elts = append(m.Elts, field) 703 } 704 } 705 706 func labelStr(l ast.Label) string { 707 switch x := l.(type) { 708 case *ast.Ident: 709 return x.Name 710 case *ast.BasicLit: 711 s, _ := strconv.Unquote(x.Value) 712 return s 713 } 714 return "" 715 } 716 717 func (d *decoder) failWantMap(n *node) { 718 d.p.failf(n.startPos.line, "map merge requires map or sequence of maps as the value") 719 } 720 721 func (d *decoder) merge(n *node, m *ast.StructLit) { 722 switch n.kind { 723 case mappingNode: 724 d.insertMap(n, m, true) 725 case aliasNode: 726 an, ok := d.doc.anchors[n.value] 727 if ok && an.kind != mappingNode { 728 d.failWantMap(n) 729 } 730 d.insertMap(an, m, true) 731 case sequenceNode: 732 // Step backwards as earlier nodes take precedence. 733 for i := len(n.children) - 1; i >= 0; i-- { 734 ni := n.children[i] 735 if ni.kind == aliasNode { 736 an, ok := d.doc.anchors[ni.value] 737 if ok && an.kind != mappingNode { 738 d.failWantMap(n) 739 } 740 d.insertMap(an, m, true) 741 continue 742 } else if ni.kind != mappingNode { 743 d.failWantMap(n) 744 } 745 d.insertMap(ni, m, true) 746 } 747 default: 748 d.failWantMap(n) 749 } 750 } 751 752 func isMerge(n *node) bool { 753 return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG) 754 }