github.com/jxskiss/gopkg/v2@v2.14.9-0.20240514120614-899f3e7952b4/easy/yamlx/parser.go (about) 1 package yamlx 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "reflect" 8 "strconv" 9 "strings" 10 11 "github.com/tidwall/gjson" 12 "gopkg.in/yaml.v3" 13 14 "github.com/jxskiss/gopkg/v2/internal/unsafeheader" 15 "github.com/jxskiss/gopkg/v2/perf/json" 16 "github.com/jxskiss/gopkg/v2/utils/strutil" 17 ) 18 19 const strTag = "!!str" 20 21 type nodeStack[T any] []T 22 23 func (p *nodeStack[T]) push(nodes ...T) { 24 *p = append(*p, nodes...) 25 } 26 27 func (p *nodeStack[T]) pop() (top T) { 28 if len(*p) == 0 { 29 return top 30 } 31 top = (*p)[len(*p)-1] 32 *p = (*p)[:len(*p)-1] 33 return 34 } 35 36 type pathTuple struct{ path, origPath string } 37 38 func (p pathTuple) String() string { 39 if p.path == p.origPath { 40 return p.path 41 } 42 return fmt.Sprintf("%s (%s)", p.origPath, p.path) 43 } 44 45 type parser struct { 46 parsed bool 47 parseErr error 48 49 filename string 50 opts *extOptions 51 buf []byte 52 doc *yaml.Node 53 54 // directive inc 55 incStack []string 56 57 // directive ref 58 refMark string 59 refCounter int 60 refTable map[string]int 61 refRevTable map[int]pathTuple 62 refDag dag 63 64 // directive var 65 varMap map[string]*yaml.Node 66 varNodeMap map[*yaml.Node]string 67 68 // directive fn 69 funcValMap map[string]reflect.Value 70 } 71 72 func newParser(buf []byte, options ...Option) *parser { 73 opts := new(extOptions).apply(options...) 74 p := &parser{opts: opts, buf: buf} 75 p.addFuncs(opts.FuncMap) 76 return p 77 } 78 79 func newParserWithOpts(buf []byte, opts *extOptions) *parser { 80 p := &parser{opts: opts, buf: buf} 81 p.addFuncs(opts.FuncMap) 82 return p 83 } 84 85 func (p *parser) Unmarshal(v any) error { 86 err := p.parse() 87 if err != nil { 88 return err 89 } 90 91 // Unescape string values before unmarshalling. 92 p.unescapeStrings() 93 94 return p.doc.Decode(v) 95 } 96 97 func (p *parser) parse() (err error) { 98 if p.parsed { 99 return p.parseErr 100 } 101 defer func() { 102 p.parseErr = err 103 p.parsed = true 104 }() 105 106 if len(p.buf) == 0 && p.filename != "" { 107 buf, err := os.ReadFile(p.filename) 108 if err != nil { 109 return fmt.Errorf("cannot read file: %w", err) 110 } 111 p.buf = buf 112 } 113 114 p.doc = &yaml.Node{} 115 err = yaml.Unmarshal(p.buf, p.doc) 116 if err != nil { 117 return err 118 } 119 120 // The env, fn and var directives are scoped within a single file, 121 // they should be resolved before the include directives. 122 if err = p.resolveEnvAndFunctions(); err != nil { 123 return err 124 } 125 if err = p.resolveVariables(); err != nil { 126 return err 127 } 128 129 // resolve includes 130 if p.opts.EnableInclude { 131 if err = p.resolveIncludes(); err != nil { 132 return err 133 } 134 } 135 136 // The ref directives are allowed to reference data from included 137 // files, they should be resolved after the include directives. 138 if err = p.resolveReferences(); err != nil { 139 return err 140 } 141 142 return nil 143 } 144 145 func (p *parser) resolveEnvAndFunctions() error { 146 if p.doc == nil { 147 return nil 148 } 149 150 // depth-first traversal 151 stack := make(nodeStack[*yaml.Node], 0, 64) 152 stack.push(p.doc) 153 for len(stack) > 0 { 154 node := stack.pop() 155 if node == nil || node.IsZero() { 156 continue 157 } 158 switch node.Kind { 159 case yaml.DocumentNode: 160 if len(node.Content) == 0 || node.Content[0].IsZero() { 161 continue 162 } 163 stack.push(node.Content[0]) 164 case yaml.SequenceNode: 165 stack.push(node.Content...) 166 case yaml.MappingNode: 167 for i, j := 0, 1; i < len(node.Content); i, j = i+2, j+2 { 168 stack.push(node.Content[j]) 169 } 170 case yaml.AliasNode: 171 continue 172 case yaml.ScalarNode: 173 if node.Tag != strTag { 174 continue 175 } 176 directive, ok, err := parseDirective(node.Value) 177 if err != nil { 178 return err 179 } 180 if !ok { 181 continue 182 } 183 if p.opts.EnableEnv && directive.name == directiveEnv { 184 envNames := directive.args["envNames"].([]string) 185 readEnv(node, envNames) 186 continue 187 } 188 if directive.name == directiveFunction { 189 fnRet, err := p.callFunction(directive.args["expr"].(string)) 190 if err != nil { 191 return err 192 } 193 newNode, err := convToNode(fnRet) 194 if err != nil { 195 return err 196 } 197 newNode.LineComment = node.LineComment 198 *node = *newNode 199 continue 200 } 201 } 202 } 203 return nil 204 } 205 206 func readEnv(node *yaml.Node, envNames []string) { 207 found := false 208 for _, name := range envNames { 209 val := os.Getenv(name) 210 if val != "" { 211 found = true 212 node.Value = val 213 break 214 } 215 } 216 if !found { 217 node.Value = "" 218 } 219 } 220 221 func convToNode(value any) (*yaml.Node, error) { 222 // Use marshal and unmarshal to avoid string escaping issues. 223 var node = &yaml.Node{} 224 buf, err := yaml.Marshal(value) 225 if err != nil { 226 return nil, fmt.Errorf("cannot marshal function result: %w", err) 227 } 228 err = yaml.Unmarshal(buf, node) 229 if err != nil { 230 return nil, fmt.Errorf("cannot unmarshal function result to node: %w", err) 231 } 232 if node.Kind == yaml.DocumentNode { 233 node = node.Content[0] 234 } 235 return node, nil 236 } 237 238 func (p *parser) resolveVariables() error { 239 if p.doc == nil { 240 return nil 241 } 242 243 p.varMap = make(map[string]*yaml.Node) 244 p.varNodeMap = make(map[*yaml.Node]string) 245 246 // depth-first traversal 247 stack := make(nodeStack[*yaml.Node], 0, 64) 248 stack.push(p.doc) 249 for len(stack) > 0 { 250 node := stack.pop() 251 if node == nil || node.IsZero() { 252 continue 253 } 254 switch node.Kind { 255 case yaml.DocumentNode: 256 if len(node.Content) == 0 || node.Content[0].IsZero() { 257 continue 258 } 259 stack.push(node.Content[0]) 260 case yaml.SequenceNode: 261 for i, n := range node.Content { 262 // 当 array 有 anchor 时,lineComment 会被算给第一个列表元素, 263 // 如果第一个列表元素也有 lineComment,则无法正确区分到底是哪一样的注释, 264 // 因此不支持带有 anchor 的 array 作为变量目标对象。 265 if i == 0 && node.Anchor != "" && n.LineComment != "" { 266 if directive, _ := parseVariableDirective(n.LineComment); directive.name == directiveVariable { 267 varName := directive.args["varName"].(string) 268 return fmt.Errorf("mix using anchor and @@var directive does not work correctly and is not supported: %v", varName) 269 } 270 } 271 } 272 273 stack.push(node.Content...) 274 if err := p.checkAndAddVariable(node.LineComment, node); err != nil { 275 return err 276 } 277 case yaml.MappingNode: 278 if err := p.checkAndAddVariable(node.LineComment, node); err != nil { 279 return err 280 } 281 for i, j := 0, 1; j < len(node.Content); i, j = i+2, j+2 { 282 kNode := node.Content[i] 283 vNode := node.Content[j] 284 285 // 当 map 有 anchor 时,lineComment 会被算给第一个 kv, 286 // 然而若第一个 kv 是分行书写的且 key 后面又跟了注释,则无法正确区分到底是哪一行的注释, 287 // 因此不支持带有 anchor 的 map 作为变量目标对象。 288 if i == 0 && node.Anchor != "" && kNode.LineComment != "" { 289 if directive, _ := parseVariableDirective(kNode.LineComment); directive.name == directiveVariable { 290 varName := directive.args["varName"].(string) 291 return fmt.Errorf("mix using anchor and @@var directive does not work correctly and is not supported: %v", varName) 292 } 293 } 294 295 stack.push(vNode) 296 if err := p.checkAndAddVariable(kNode.LineComment, vNode); err != nil { 297 return err 298 } 299 } 300 case yaml.AliasNode: 301 if err := p.checkAndAddVariable(node.LineComment, node); err != nil { 302 return err 303 } 304 continue 305 case yaml.ScalarNode: 306 if err := p.checkAndAddVariable(node.LineComment, node); err != nil { 307 return err 308 } 309 if node.Tag == strTag { 310 directive, ok, err := parseDirective(node.Value) 311 if err != nil { 312 return err 313 } 314 if ok && directive.name == directiveVariable { 315 p.varNodeMap[node] = directive.args["varName"].(string) 316 } 317 } 318 } 319 } 320 if len(p.varNodeMap) == 0 { 321 return nil 322 } 323 324 for node, varName := range p.varNodeMap { 325 dstNode := p.varMap[varName] 326 if dstNode == nil { 327 return fmt.Errorf("undefined variable: %s", varName) 328 } 329 *node = *dstNode 330 } 331 332 cyclicVarName, isCyclic := p.detectVarCircle(p.doc, nil) 333 if isCyclic { 334 return fmt.Errorf("circular variable reference detected: %s", cyclicVarName) 335 } 336 return nil 337 } 338 339 func (p *parser) checkAndAddVariable(lineComment string, node *yaml.Node) error { 340 lineComment = strings.TrimLeft(lineComment, "#") 341 lineComment = strings.TrimSpace(lineComment) 342 if !strings.HasPrefix(lineComment, directiveVariable) { 343 return nil 344 } 345 directive, err := parseVariableDirective(lineComment) 346 if err != nil { 347 return err 348 } 349 varName := directive.args["varName"].(string) 350 p.varMap[varName] = node 351 return nil 352 } 353 354 func (p *parser) detectVarCircle(node *yaml.Node, stack nodeStack[*yaml.Node]) (string, bool) { 355 if node == nil || node.IsZero() { 356 return "", false 357 } 358 359 for _, n := range stack { 360 if n == node { 361 var varName string 362 revVarMap := make(map[*yaml.Node]string) 363 for name, varNode := range p.varMap { 364 revVarMap[varNode] = name 365 } 366 for _, seenNode := range stack { 367 if name := p.varNodeMap[seenNode]; name != "" { 368 varName = name 369 } else if name = revVarMap[seenNode]; name != "" { 370 varName = name 371 } 372 } 373 return varName, true 374 } 375 } 376 377 stack.push(node) 378 switch node.Kind { 379 case yaml.DocumentNode: 380 if len(node.Content) == 0 || node.Content[0].IsZero() { 381 return "", false 382 } 383 return p.detectVarCircle(node.Content[0], stack) 384 case yaml.SequenceNode: 385 for _, elemNode := range node.Content { 386 if varName, isCyclic := p.detectVarCircle(elemNode, stack); isCyclic { 387 return varName, true 388 } 389 } 390 case yaml.MappingNode: 391 for i, j := 0, 1; j < len(node.Content); i, j = i+2, j+2 { 392 elemNode := node.Content[j] 393 if varName, isCyclic := p.detectVarCircle(elemNode, stack); isCyclic { 394 return varName, true 395 } 396 } 397 case yaml.AliasNode, yaml.ScalarNode: 398 return "", false 399 } 400 return "", false 401 } 402 403 func (p *parser) resolveIncludes() error { 404 if p.doc == nil { 405 return nil 406 } 407 408 // depth-first traversal 409 stack := make(nodeStack[*yaml.Node], 0, 64) 410 stack.push(p.doc) 411 for len(stack) > 0 { 412 node := stack.pop() 413 if node == nil || node.IsZero() { 414 continue 415 } 416 switch node.Kind { 417 case yaml.DocumentNode: 418 if len(node.Content) == 0 || node.Content[0].IsZero() { 419 continue 420 } 421 stack.push(node.Content[0]) 422 case yaml.SequenceNode: 423 stack.push(node.Content...) 424 case yaml.MappingNode: 425 for i, j := 0, 1; j < len(node.Content); i, j = i+2, j+2 { 426 stack.push(node.Content[j]) 427 } 428 case yaml.AliasNode: 429 continue 430 case yaml.ScalarNode: 431 if node.Tag != strTag { 432 continue 433 } 434 directive, ok, err := parseDirective(node.Value) 435 if err != nil { 436 return err 437 } 438 if !ok || directive.name != directiveInclude { 439 continue 440 } 441 442 // Execute the include directive. 443 incFilePath, err := p.getIncludeAbsFilename(directive.args["filename"].(string)) 444 if err != nil { 445 return err 446 } 447 for _, fn := range p.incStack { 448 if fn == incFilePath { 449 return fmt.Errorf("circular include detected: %s", incFilePath) 450 } 451 } 452 incBuf, err := os.ReadFile(incFilePath) 453 if err != nil { 454 return fmt.Errorf("cannot read include file: %w", err) 455 } 456 incParser := newParserWithOpts(incBuf, p.opts) 457 incParser.filename = incFilePath 458 incParser.incStack = append(clip(p.incStack), incFilePath) 459 err = incParser.parse() 460 if err != nil { 461 return fmt.Errorf("cannot parse include file: %w", err) 462 } 463 *node = *(incParser.getDocValueNode()) 464 } 465 } 466 467 return nil 468 } 469 470 func (p *parser) getIncludeAbsFilename(s string) (string, error) { 471 if filepath.IsAbs(s) { 472 return filepath.Clean(s), nil 473 } 474 475 var includeDirs []string 476 if p.filename != "" { 477 isRelative := strings.HasPrefix(s, "./") || strings.HasPrefix(s, "../") 478 dir := filepath.Dir(p.filename) 479 if isRelative { 480 includeDirs = append([]string{dir}, p.opts.IncludeDirs...) 481 } else { 482 includeDirs = append(p.opts.IncludeDirs, dir) 483 } 484 } else { 485 includeDirs = p.opts.IncludeDirs 486 } 487 488 var filename string 489 for _, dir := range includeDirs { 490 fName := filepath.Join(dir, s) 491 info, err := os.Stat(fName) 492 if err != nil { 493 if os.IsNotExist(err) { 494 continue 495 } 496 return "", fmt.Errorf("error checking include file: %w", err) 497 } 498 if info.IsDir() { 499 return "", fmt.Errorf("include file is a directory: %v", fName) 500 } 501 filename = fName 502 break 503 } 504 if filename != "" { 505 return filename, nil 506 } 507 508 wd, err := os.Getwd() 509 if err != nil { 510 return "", fmt.Errorf("cannot get working directory: %w", err) 511 } 512 return filepath.Abs(filepath.Join(wd, s)) 513 } 514 515 func (p *parser) resolveReferences() error { 516 if p.doc == nil { 517 return nil 518 } 519 520 toStrRefs := make(map[int]string) // seq -> modifier 521 522 // depth-first traversal 523 type NodePath struct { 524 N *yaml.Node // node 525 P []string // path prefix 526 } 527 stack := make(nodeStack[NodePath], 0, 64) 528 stack.push(NodePath{p.doc, nil}) 529 for len(stack) > 0 { 530 node := stack.pop() 531 if node.N == nil || node.N.IsZero() { 532 continue 533 } 534 switch node.N.Kind { 535 case yaml.DocumentNode: 536 if len(node.N.Content) == 0 || node.N.Content[0].IsZero() { 537 continue 538 } 539 stack.push(NodePath{node.N.Content[0], nil}) 540 case yaml.SequenceNode: 541 for i := 0; i < len(node.N.Content); i++ { 542 _n := node.N.Content[i] 543 _p := append(clip(node.P), strconv.Itoa(i)) 544 stack.push(NodePath{_n, _p}) 545 } 546 case yaml.MappingNode: 547 for i, j := 0, 1; j < len(node.N.Content); i, j = i+2, j+2 { 548 _n := node.N.Content[j] 549 _p := append(clip(node.P), gjson.Escape(node.N.Content[i].Value)) 550 stack.push(NodePath{_n, _p}) 551 } 552 case yaml.AliasNode: 553 continue 554 case yaml.ScalarNode: 555 if node.N.Tag != strTag { 556 continue 557 } 558 directive, ok, err := parseDirective(node.N.Value) 559 if err != nil { 560 return err 561 } 562 if !ok || directive.name != directiveRefer { 563 continue 564 } 565 566 // Note we need special processing for modifier "@tostr". 567 jsonPath, origPath, isTostr, modifier := directive.getRefPath(node.P) 568 seq, placeholder := p.getReferID(jsonPath, origPath) 569 node.N.Value = placeholder 570 p.refDag.addVertex(seq) 571 if isTostr { 572 toStrRefs[seq] = modifier 573 } 574 } 575 } 576 if p.refMark == "" { 577 return nil 578 } 579 580 var intermediateValue any 581 err := p.doc.Decode(&intermediateValue) 582 if err != nil { 583 return fmt.Errorf("cannot decode intermediate data: %w", err) 584 } 585 intermediateBuf, err := json.Marshal(intermediateValue) 586 if err != nil { 587 return fmt.Errorf("cannot marshal intermediat data: %w", err) 588 } 589 590 // Resolve dependency and do replacing. 591 mark := p.refMark 592 resolved := make(map[int]string, len(p.refTable)) 593 for seq := 1; seq <= len(p.refTable); seq++ { 594 refPath := p.refRevTable[seq] 595 r := gjson.GetBytes(intermediateBuf, refPath.path) 596 if !r.Exists() { 597 return fmt.Errorf("cannot find referenced data: %v", refPath) 598 } 599 resolved[seq] = r.Raw 600 pos := 0 601 for pos < len(r.Raw) { 602 raw := r.Raw[pos:] 603 idx := strings.Index(raw, mark) 604 if idx < 0 { 605 break 606 } 607 end := idx + len(mark) + 2 608 for end < len(raw) { 609 if raw[end] >= '0' && raw[end] <= '9' { 610 end++ 611 continue 612 } 613 break 614 } 615 refSeqStr := raw[idx+len(mark)+1 : end] 616 refSeq, err := strconv.Atoi(refSeqStr) 617 if err != nil { 618 return fmt.Errorf("invalid refer id: %w", err) 619 } 620 if refSeq == seq { 621 return fmt.Errorf("circular reference detected: %s", refPath) 622 } 623 isCyclic := p.refDag.addEdge(refSeq, seq) 624 if isCyclic { 625 return fmt.Errorf("circular reference detected: %s", refPath) 626 } 627 pos += end 628 } 629 } 630 631 order := p.refDag.topoSort() 632 for _, seq := range order { 633 if modifier := toStrRefs[seq]; modifier != "" { 634 resolved[seq] = convToStr(resolved[seq], modifier) 635 } 636 final := resolved[seq] 637 placeholder := `"` + p.referPlaceholder(seq) + `"` 638 p.refDag.visitNeighbors(seq, func(to int) { 639 resolved[to] = strings.Replace(resolved[to], placeholder, final, -1) 640 }) 641 } 642 oldnew := make([]string, 0, 2*len(resolved)) 643 for seq, text := range resolved { 644 placeholder := `"` + p.referPlaceholder(seq) + `"` 645 oldnew = append(oldnew, placeholder, text) 646 } 647 replacer := strings.NewReplacer(oldnew...) 648 finalBuf := replacer.Replace(unsafeheader.BytesToString(intermediateBuf)) 649 p.doc = &yaml.Node{} 650 return yaml.Unmarshal(unsafeheader.StringToBytes(finalBuf), p.doc) 651 } 652 653 func convToStr(value, modifier string) string { 654 tmp := fmt.Sprintf(`{"a":%s}`, value) 655 return gjson.Get(tmp, "a"+modifier).Raw 656 } 657 658 func (p *parser) getReferID(path, origPath string) (int, string) { 659 if p.refMark == "" { 660 p.refMark = strutil.RandomHex(40) 661 p.refTable = make(map[string]int) 662 p.refRevTable = make(map[int]pathTuple) 663 } 664 seq := p.refTable[path] 665 if seq == 0 { 666 p.refCounter++ 667 seq = p.refCounter 668 p.refTable[path] = seq 669 p.refRevTable[seq] = pathTuple{path, origPath} 670 } 671 placeholder := p.referPlaceholder(seq) 672 return seq, placeholder 673 } 674 675 func (p *parser) referPlaceholder(n int) string { 676 return fmt.Sprintf("%s_%d", p.refMark, n) 677 } 678 679 func (p *parser) getDocValueNode() *yaml.Node { 680 if p.doc == nil { 681 return &yaml.Node{} 682 } 683 switch p.doc.Kind { 684 case yaml.DocumentNode: 685 if len(p.doc.Content) > 0 { 686 return p.doc.Content[0] 687 } 688 return nil 689 default: 690 return p.doc 691 } 692 } 693 694 func (p *parser) unescapeStrings() { 695 if p.doc == nil { 696 return 697 } 698 699 // depth-first traversal 700 stack := make(nodeStack[*yaml.Node], 0, 64) 701 stack.push(p.doc) 702 for len(stack) > 0 { 703 node := stack.pop() 704 if node == nil || node.IsZero() { 705 continue 706 } 707 switch node.Kind { 708 case yaml.DocumentNode: 709 if len(node.Content) == 0 || node.Content[0].IsZero() { 710 continue 711 } 712 stack.push(node.Content[0]) 713 case yaml.SequenceNode: 714 stack.push(node.Content...) 715 case yaml.MappingNode: 716 stack.push(node.Content...) 717 case yaml.AliasNode: 718 continue 719 case yaml.ScalarNode: 720 if node.Tag != strTag { 721 continue 722 } 723 node.Value = unescapeStrValue(node.Value) 724 } 725 } 726 } 727 728 func clip[T any](s []T) []T { 729 return s[:len(s):len(s)] 730 } 731 732 func max(a, b int) int { 733 if a < b { 734 return b 735 } 736 return a 737 }