github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/yaml/parser/parser.go (about) 1 package parser 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "strings" 7 8 "github.com/bingoohuang/gg/pkg/yaml/ast" 9 "github.com/bingoohuang/gg/pkg/yaml/internal/errors" 10 "github.com/bingoohuang/gg/pkg/yaml/lexer" 11 "github.com/bingoohuang/gg/pkg/yaml/token" 12 "golang.org/x/xerrors" 13 ) 14 15 type parser struct{} 16 17 func (p *parser) parseMapping(ctx *context) (ast.Node, error) { 18 node := ast.Mapping(ctx.currentToken(), true) 19 ctx.progress(1) // skip MappingStart token 20 for ctx.next() { 21 tk := ctx.currentToken() 22 if tk.Type == token.MappingEndType { 23 node.End = tk 24 return node, nil 25 } else if tk.Type == token.CollectEntryType { 26 ctx.progress(1) 27 continue 28 } 29 30 value, err := p.parseMappingValue(ctx) 31 if err != nil { 32 return nil, errors.Wrapf(err, "failed to parse mapping value in mapping node") 33 } 34 mvnode, ok := value.(*ast.MappingValueNode) 35 if !ok { 36 return nil, errors.ErrSyntax("failed to parse flow mapping node", value.GetToken()) 37 } 38 node.Values = append(node.Values, mvnode) 39 ctx.progress(1) 40 } 41 return nil, errors.ErrSyntax("unterminated flow mapping", node.GetToken()) 42 } 43 44 func (p *parser) parseSequence(ctx *context) (ast.Node, error) { 45 node := ast.Sequence(ctx.currentToken(), true) 46 ctx.progress(1) // skip SequenceStart token 47 for ctx.next() { 48 tk := ctx.currentToken() 49 if tk.Type == token.SequenceEndType { 50 node.End = tk 51 break 52 } else if tk.Type == token.CollectEntryType { 53 ctx.progress(1) 54 continue 55 } 56 57 value, err := p.parseToken(ctx, tk) 58 if err != nil { 59 return nil, errors.Wrapf(err, "failed to parse sequence value in flow sequence node") 60 } 61 node.Values = append(node.Values, value) 62 ctx.progress(1) 63 } 64 return node, nil 65 } 66 67 func (p *parser) parseTag(ctx *context) (ast.Node, error) { 68 tagToken := ctx.currentToken() 69 node := ast.Tag(tagToken) 70 ctx.progress(1) // skip tag token 71 var ( 72 value ast.Node 73 err error 74 ) 75 switch token.ReservedTagKeyword(tagToken.Value) { 76 case token.MappingTag, token.OrderedMapTag: 77 value, err = p.parseMapping(ctx) 78 case token.IntegerTag, 79 token.FloatTag, 80 token.StringTag, 81 token.BinaryTag, 82 token.TimestampTag, 83 token.NullTag: 84 typ := ctx.currentToken().Type 85 if typ == token.LiteralType || typ == token.FoldedType { 86 value, err = p.parseLiteral(ctx) 87 } else { 88 value = p.parseScalarValue(ctx.currentToken()) 89 } 90 case token.SequenceTag, token.SetTag: 91 err = errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagToken.Value), tagToken) 92 default: 93 // custom tag 94 value, err = p.parseToken(ctx, ctx.currentToken()) 95 } 96 if err != nil { 97 return nil, errors.Wrapf(err, "failed to parse tag value") 98 } 99 node.Value = value 100 return node, nil 101 } 102 103 func (p *parser) removeLeftSideNewLineCharacter(src string) string { 104 // CR or LF or CRLF 105 return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n") 106 } 107 108 func (p *parser) existsNewLineCharacter(src string) bool { 109 if strings.Index(src, "\n") > 0 { 110 return true 111 } 112 if strings.Index(src, "\r") > 0 { 113 return true 114 } 115 return false 116 } 117 118 func (p *parser) validateMapKey(tk *token.Token) error { 119 if tk.Type != token.StringType { 120 return nil 121 } 122 origin := p.removeLeftSideNewLineCharacter(tk.Origin) 123 if p.existsNewLineCharacter(origin) { 124 return errors.ErrSyntax("unexpected key name", tk) 125 } 126 return nil 127 } 128 129 func (p *parser) createNullToken(base *token.Token) *token.Token { 130 pos := *(base.Position) 131 pos.Column++ 132 return token.New("null", "null", &pos) 133 } 134 135 func (p *parser) parseMapValue(ctx *context, key ast.Node, colonToken *token.Token) (ast.Node, error) { 136 tk := ctx.currentToken() 137 if tk == nil { 138 nullToken := p.createNullToken(colonToken) 139 ctx.insertToken(ctx.idx, nullToken) 140 return ast.Null(nullToken), nil 141 } 142 143 if tk.Position.Column == key.GetToken().Position.Column && tk.Type == token.StringType { 144 // in this case, 145 // ---- 146 // key: <value does not defined> 147 // next 148 nullToken := p.createNullToken(colonToken) 149 ctx.insertToken(ctx.idx, nullToken) 150 return ast.Null(nullToken), nil 151 } 152 153 if tk.Position.Column < key.GetToken().Position.Column { 154 // in this case, 155 // ---- 156 // key: <value does not defined> 157 // next 158 nullToken := p.createNullToken(colonToken) 159 ctx.insertToken(ctx.idx, nullToken) 160 return ast.Null(nullToken), nil 161 } 162 163 value, err := p.parseToken(ctx, ctx.currentToken()) 164 if err != nil { 165 return nil, errors.Wrapf(err, "failed to parse mapping 'value' node") 166 } 167 return value, nil 168 } 169 170 func (p *parser) validateMapValue(ctx *context, key, value ast.Node) error { 171 keyColumn := key.GetToken().Position.Column 172 valueColumn := value.GetToken().Position.Column 173 if keyColumn != valueColumn { 174 return nil 175 } 176 if value.Type() != ast.StringType { 177 return nil 178 } 179 ntk := ctx.nextToken() 180 if ntk == nil || (ntk.Type != token.MappingValueType && ntk.Type != token.SequenceEntryType) { 181 return errors.ErrSyntax("could not found expected ':' token", value.GetToken()) 182 } 183 return nil 184 } 185 186 func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) { 187 key, err := p.parseMapKey(ctx) 188 if err != nil { 189 return nil, errors.Wrapf(err, "failed to parse map key") 190 } 191 if err := p.validateMapKey(key.GetToken()); err != nil { 192 return nil, errors.Wrapf(err, "validate mapping key error") 193 } 194 ctx.progress(1) // progress to mapping value token 195 tk := ctx.currentToken() // get mapping value token 196 if tk == nil { 197 return nil, errors.ErrSyntax("unexpected map", key.GetToken()) 198 } 199 ctx.progress(1) // progress to value token 200 if err := p.setSameLineCommentIfExists(ctx, key); err != nil { 201 return nil, errors.Wrapf(err, "failed to set same line comment to node") 202 } 203 if key.GetComment() != nil { 204 // if current token is comment, GetComment() is not nil. 205 // then progress to value token 206 ctx.progressIgnoreComment(1) 207 } 208 209 value, err := p.parseMapValue(ctx, key, tk) 210 if err != nil { 211 return nil, errors.Wrapf(err, "failed to parse map value") 212 } 213 if err := p.validateMapValue(ctx, key, value); err != nil { 214 return nil, errors.Wrapf(err, "failed to validate map value") 215 } 216 217 mvnode := ast.MappingValue(tk, key, value) 218 node := ast.Mapping(tk, false, mvnode) 219 220 ntk := ctx.nextNotCommentToken() 221 antk := ctx.afterNextNotCommentToken() 222 for antk != nil && antk.Type == token.MappingValueType && 223 ntk.Position.Column == key.GetToken().Position.Column { 224 ctx.progressIgnoreComment(1) 225 value, err := p.parseToken(ctx, ctx.currentToken()) 226 if err != nil { 227 return nil, errors.Wrapf(err, "failed to parse mapping node") 228 } 229 switch value.Type() { 230 case ast.MappingType: 231 c := value.(*ast.MappingNode) 232 comment := c.GetComment() 233 for idx, v := range c.Values { 234 if idx == 0 && comment != nil { 235 if err := v.SetComment(comment); err != nil { 236 return nil, errors.Wrapf(err, "failed to set comment token to node") 237 } 238 } 239 node.Values = append(node.Values, v) 240 } 241 case ast.MappingValueType: 242 node.Values = append(node.Values, value.(*ast.MappingValueNode)) 243 default: 244 return nil, xerrors.Errorf("failed to parse mapping value node node is %s", value.Type()) 245 } 246 ntk = ctx.nextNotCommentToken() 247 antk = ctx.afterNextNotCommentToken() 248 } 249 if len(node.Values) == 1 { 250 return mvnode, nil 251 } 252 return node, nil 253 } 254 255 func (p *parser) parseSequenceEntry(ctx *context) (ast.Node, error) { 256 tk := ctx.currentToken() 257 sequenceNode := ast.Sequence(tk, false) 258 curColumn := tk.Position.Column 259 for tk.Type == token.SequenceEntryType { 260 ctx.progress(1) // skip sequence token 261 tk = ctx.currentToken() 262 var comment *ast.CommentGroupNode 263 if tk.Type == token.CommentType { 264 comment = p.parseCommentOnly(ctx) 265 tk = ctx.currentToken() 266 if tk.Type != token.SequenceEntryType { 267 break 268 } 269 ctx.progress(1) // skip sequence token 270 } 271 value, err := p.parseToken(ctx, ctx.currentToken()) 272 if err != nil { 273 return nil, errors.Wrapf(err, "failed to parse sequence") 274 } 275 if comment != nil { 276 sequenceNode.ValueComments = append(sequenceNode.ValueComments, comment) 277 } else { 278 sequenceNode.ValueComments = append(sequenceNode.ValueComments, nil) 279 } 280 sequenceNode.Values = append(sequenceNode.Values, value) 281 tk = ctx.nextNotCommentToken() 282 if tk == nil { 283 break 284 } 285 if tk.Type != token.SequenceEntryType { 286 break 287 } 288 if tk.Position.Column != curColumn { 289 break 290 } 291 ctx.progressIgnoreComment(1) 292 } 293 return sequenceNode, nil 294 } 295 296 func (p *parser) parseAnchor(ctx *context) (ast.Node, error) { 297 tk := ctx.currentToken() 298 anchor := ast.Anchor(tk) 299 ntk := ctx.nextToken() 300 if ntk == nil { 301 return nil, errors.ErrSyntax("unexpected anchor. anchor name is undefined", tk) 302 } 303 ctx.progress(1) // skip anchor token 304 name, err := p.parseToken(ctx, ctx.currentToken()) 305 if err != nil { 306 return nil, errors.Wrapf(err, "failed to parser anchor name node") 307 } 308 anchor.Name = name 309 ntk = ctx.nextToken() 310 if ntk == nil { 311 return nil, errors.ErrSyntax("unexpected anchor. anchor value is undefined", ctx.currentToken()) 312 } 313 ctx.progress(1) 314 value, err := p.parseToken(ctx, ctx.currentToken()) 315 if err != nil { 316 return nil, errors.Wrapf(err, "failed to parser anchor name node") 317 } 318 anchor.Value = value 319 return anchor, nil 320 } 321 322 func (p *parser) parseAlias(ctx *context) (ast.Node, error) { 323 tk := ctx.currentToken() 324 alias := ast.Alias(tk) 325 ntk := ctx.nextToken() 326 if ntk == nil { 327 return nil, errors.ErrSyntax("unexpected alias. alias name is undefined", tk) 328 } 329 ctx.progress(1) // skip alias token 330 name, err := p.parseToken(ctx, ctx.currentToken()) 331 if err != nil { 332 return nil, errors.Wrapf(err, "failed to parser alias name node") 333 } 334 alias.Value = name 335 return alias, nil 336 } 337 338 func (p *parser) parseMapKey(ctx *context) (ast.Node, error) { 339 tk := ctx.currentToken() 340 if value := p.parseScalarValue(tk); value != nil { 341 return value, nil 342 } 343 switch tk.Type { 344 case token.MergeKeyType: 345 return ast.MergeKey(tk), nil 346 case token.MappingKeyType: 347 return p.parseMappingKey(ctx) 348 } 349 return nil, errors.ErrSyntax("unexpected mapping key", tk) 350 } 351 352 func (p *parser) parseStringValue(tk *token.Token) ast.Node { 353 switch tk.Type { 354 case token.StringType, 355 token.SingleQuoteType, 356 token.DoubleQuoteType: 357 return ast.String(tk) 358 } 359 return nil 360 } 361 362 func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast.Node, error) { 363 node := p.parseScalarValue(tk) 364 if node == nil { 365 return nil, nil 366 } 367 if p.isSameLineComment(ctx.nextToken(), node) { 368 ctx.progress(1) 369 if err := p.setSameLineCommentIfExists(ctx, node); err != nil { 370 return nil, errors.Wrapf(err, "failed to set same line comment to node") 371 } 372 } 373 return node, nil 374 } 375 376 func (p *parser) parseScalarValue(tk *token.Token) ast.Node { 377 if node := p.parseStringValue(tk); node != nil { 378 return node 379 } 380 switch tk.Type { 381 case token.NullType: 382 return ast.Null(tk) 383 case token.BoolType: 384 return ast.Bool(tk) 385 case token.IntegerType, 386 token.BinaryIntegerType, 387 token.OctetIntegerType, 388 token.HexIntegerType: 389 return ast.Integer(tk) 390 case token.FloatType: 391 return ast.Float(tk) 392 case token.InfinityType: 393 return ast.Infinity(tk) 394 case token.NanType: 395 return ast.Nan(tk) 396 } 397 return nil 398 } 399 400 func (p *parser) parseDirective(ctx *context) (ast.Node, error) { 401 node := ast.Directive(ctx.currentToken()) 402 ctx.progress(1) // skip directive token 403 value, err := p.parseToken(ctx, ctx.currentToken()) 404 if err != nil { 405 return nil, errors.Wrapf(err, "failed to parse directive value") 406 } 407 node.Value = value 408 ctx.progress(1) 409 tk := ctx.currentToken() 410 if tk == nil { 411 // Since current token is nil, use the previous token to specify 412 // the syntax error location. 413 return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.previousToken()) 414 } 415 if tk.Type != token.DocumentHeaderType { 416 return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.currentToken()) 417 } 418 return node, nil 419 } 420 421 func (p *parser) parseLiteral(ctx *context) (ast.Node, error) { 422 node := ast.Literal(ctx.currentToken()) 423 ctx.progress(1) // skip literal/folded token 424 425 tk := ctx.currentToken() 426 var comment *ast.CommentGroupNode 427 if tk.Type == token.CommentType { 428 comment = p.parseCommentOnly(ctx) 429 if err := node.SetComment(comment); err != nil { 430 return nil, errors.Wrapf(err, "failed to set comment to literal") 431 } 432 tk = ctx.currentToken() 433 } 434 value, err := p.parseToken(ctx, tk) 435 if err != nil { 436 return nil, errors.Wrapf(err, "failed to parse literal/folded value") 437 } 438 snode, ok := value.(*ast.StringNode) 439 if !ok { 440 return nil, errors.ErrSyntax("unexpected token. required string token", value.GetToken()) 441 } 442 node.Value = snode 443 return node, nil 444 } 445 446 func (p *parser) isSameLineComment(tk *token.Token, node ast.Node) bool { 447 if tk == nil { 448 return false 449 } 450 if tk.Type != token.CommentType { 451 return false 452 } 453 return tk.Position.Line == node.GetToken().Position.Line 454 } 455 456 func (p *parser) setSameLineCommentIfExists(ctx *context, node ast.Node) error { 457 tk := ctx.currentToken() 458 if !p.isSameLineComment(tk, node) { 459 return nil 460 } 461 if err := node.SetComment(ast.CommentGroup([]*token.Token{tk})); err != nil { 462 return errors.Wrapf(err, "failed to set comment token to ast.Node") 463 } 464 return nil 465 } 466 467 func (p *parser) parseDocument(ctx *context) (*ast.DocumentNode, error) { 468 startTk := ctx.currentToken() 469 ctx.progress(1) // skip document header token 470 body, err := p.parseToken(ctx, ctx.currentToken()) 471 if err != nil { 472 return nil, errors.Wrapf(err, "failed to parse document body") 473 } 474 node := ast.Document(startTk, body) 475 if ntk := ctx.nextToken(); ntk != nil && ntk.Type == token.DocumentEndType { 476 node.End = ntk 477 ctx.progress(1) 478 } 479 return node, nil 480 } 481 482 func (p *parser) parseCommentOnly(ctx *context) *ast.CommentGroupNode { 483 commentTokens := []*token.Token{} 484 for { 485 tk := ctx.currentToken() 486 if tk == nil { 487 break 488 } 489 if tk.Type != token.CommentType { 490 break 491 } 492 commentTokens = append(commentTokens, tk) 493 ctx.progressIgnoreComment(1) // skip comment token 494 } 495 return ast.CommentGroup(commentTokens) 496 } 497 498 func (p *parser) parseComment(ctx *context) (ast.Node, error) { 499 group := p.parseCommentOnly(ctx) 500 node, err := p.parseToken(ctx, ctx.currentToken()) 501 if err != nil { 502 return nil, errors.Wrapf(err, "failed to parse node after comment") 503 } 504 if node == nil { 505 return group, nil 506 } 507 if err := node.SetComment(group); err != nil { 508 return nil, errors.Wrapf(err, "failed to set comment token to node") 509 } 510 return node, nil 511 } 512 513 func (p *parser) parseMappingKey(ctx *context) (ast.Node, error) { 514 node := ast.MappingKey(ctx.currentToken()) 515 ctx.progress(1) // skip mapping key token 516 value, err := p.parseToken(ctx, ctx.currentToken()) 517 if err != nil { 518 return nil, errors.Wrapf(err, "failed to parse map key") 519 } 520 node.Value = value 521 return node, nil 522 } 523 524 func (p *parser) parseToken(ctx *context, tk *token.Token) (ast.Node, error) { 525 if tk == nil { 526 return nil, nil 527 } 528 if tk.NextType() == token.MappingValueType { 529 node, err := p.parseMappingValue(ctx) 530 return node, err 531 } 532 node, err := p.parseScalarValueWithComment(ctx, tk) 533 if err != nil { 534 return nil, errors.Wrapf(err, "failed to parse scalar value") 535 } 536 if node != nil { 537 return node, nil 538 } 539 switch tk.Type { 540 case token.CommentType: 541 return p.parseComment(ctx) 542 case token.MappingKeyType: 543 return p.parseMappingKey(ctx) 544 case token.DocumentHeaderType: 545 return p.parseDocument(ctx) 546 case token.MappingStartType: 547 return p.parseMapping(ctx) 548 case token.SequenceStartType: 549 return p.parseSequence(ctx) 550 case token.SequenceEntryType: 551 return p.parseSequenceEntry(ctx) 552 case token.AnchorType: 553 return p.parseAnchor(ctx) 554 case token.AliasType: 555 return p.parseAlias(ctx) 556 case token.DirectiveType: 557 return p.parseDirective(ctx) 558 case token.TagType: 559 return p.parseTag(ctx) 560 case token.LiteralType, token.FoldedType: 561 return p.parseLiteral(ctx) 562 } 563 return nil, nil 564 } 565 566 func (p *parser) parse(tokens token.Tokens, mode Mode) (*ast.File, error) { 567 ctx := newContext(tokens, mode) 568 file := &ast.File{Docs: []*ast.DocumentNode{}} 569 for ctx.next() { 570 node, err := p.parseToken(ctx, ctx.currentToken()) 571 if err != nil { 572 return nil, errors.Wrapf(err, "failed to parse") 573 } 574 ctx.progressIgnoreComment(1) 575 if node == nil { 576 continue 577 } 578 if doc, ok := node.(*ast.DocumentNode); ok { 579 file.Docs = append(file.Docs, doc) 580 } else { 581 file.Docs = append(file.Docs, ast.Document(nil, node)) 582 } 583 } 584 return file, nil 585 } 586 587 type Mode uint 588 589 const ( 590 ParseComments Mode = 1 << iota // parse comments and add them to AST 591 ) 592 593 // ParseBytes parse from byte slice, and returns ast.File 594 func ParseBytes(bytes []byte, mode Mode) (*ast.File, error) { 595 tokens := lexer.Tokenize(string(bytes)) 596 f, err := Parse(tokens, mode) 597 if err != nil { 598 return nil, errors.Wrapf(err, "failed to parse") 599 } 600 return f, nil 601 } 602 603 // Parse parse from token instances, and returns ast.File 604 func Parse(tokens token.Tokens, mode Mode) (*ast.File, error) { 605 var p parser 606 f, err := p.parse(tokens, mode) 607 if err != nil { 608 return nil, errors.Wrapf(err, "failed to parse") 609 } 610 return f, nil 611 } 612 613 // Parse parse from filename, and returns ast.File 614 func ParseFile(filename string, mode Mode) (*ast.File, error) { 615 file, err := ioutil.ReadFile(filename) 616 if err != nil { 617 return nil, errors.Wrapf(err, "failed to read file: %s", filename) 618 } 619 f, err := ParseBytes(file, mode) 620 if err != nil { 621 return nil, errors.Wrapf(err, "failed to parse") 622 } 623 f.Name = filename 624 return f, nil 625 }