github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/yaml/parser/parser.go (about)

     1  package parser
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"strings"
     7  
     8  	"github.com/bingoohuang/gg/pkg/yaml/ast"
     9  	"github.com/bingoohuang/gg/pkg/yaml/internal/errors"
    10  	"github.com/bingoohuang/gg/pkg/yaml/lexer"
    11  	"github.com/bingoohuang/gg/pkg/yaml/token"
    12  	"golang.org/x/xerrors"
    13  )
    14  
    15  type parser struct{}
    16  
    17  func (p *parser) parseMapping(ctx *context) (ast.Node, error) {
    18  	node := ast.Mapping(ctx.currentToken(), true)
    19  	ctx.progress(1) // skip MappingStart token
    20  	for ctx.next() {
    21  		tk := ctx.currentToken()
    22  		if tk.Type == token.MappingEndType {
    23  			node.End = tk
    24  			return node, nil
    25  		} else if tk.Type == token.CollectEntryType {
    26  			ctx.progress(1)
    27  			continue
    28  		}
    29  
    30  		value, err := p.parseMappingValue(ctx)
    31  		if err != nil {
    32  			return nil, errors.Wrapf(err, "failed to parse mapping value in mapping node")
    33  		}
    34  		mvnode, ok := value.(*ast.MappingValueNode)
    35  		if !ok {
    36  			return nil, errors.ErrSyntax("failed to parse flow mapping node", value.GetToken())
    37  		}
    38  		node.Values = append(node.Values, mvnode)
    39  		ctx.progress(1)
    40  	}
    41  	return nil, errors.ErrSyntax("unterminated flow mapping", node.GetToken())
    42  }
    43  
    44  func (p *parser) parseSequence(ctx *context) (ast.Node, error) {
    45  	node := ast.Sequence(ctx.currentToken(), true)
    46  	ctx.progress(1) // skip SequenceStart token
    47  	for ctx.next() {
    48  		tk := ctx.currentToken()
    49  		if tk.Type == token.SequenceEndType {
    50  			node.End = tk
    51  			break
    52  		} else if tk.Type == token.CollectEntryType {
    53  			ctx.progress(1)
    54  			continue
    55  		}
    56  
    57  		value, err := p.parseToken(ctx, tk)
    58  		if err != nil {
    59  			return nil, errors.Wrapf(err, "failed to parse sequence value in flow sequence node")
    60  		}
    61  		node.Values = append(node.Values, value)
    62  		ctx.progress(1)
    63  	}
    64  	return node, nil
    65  }
    66  
    67  func (p *parser) parseTag(ctx *context) (ast.Node, error) {
    68  	tagToken := ctx.currentToken()
    69  	node := ast.Tag(tagToken)
    70  	ctx.progress(1) // skip tag token
    71  	var (
    72  		value ast.Node
    73  		err   error
    74  	)
    75  	switch token.ReservedTagKeyword(tagToken.Value) {
    76  	case token.MappingTag, token.OrderedMapTag:
    77  		value, err = p.parseMapping(ctx)
    78  	case token.IntegerTag,
    79  		token.FloatTag,
    80  		token.StringTag,
    81  		token.BinaryTag,
    82  		token.TimestampTag,
    83  		token.NullTag:
    84  		typ := ctx.currentToken().Type
    85  		if typ == token.LiteralType || typ == token.FoldedType {
    86  			value, err = p.parseLiteral(ctx)
    87  		} else {
    88  			value = p.parseScalarValue(ctx.currentToken())
    89  		}
    90  	case token.SequenceTag, token.SetTag:
    91  		err = errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagToken.Value), tagToken)
    92  	default:
    93  		// custom tag
    94  		value, err = p.parseToken(ctx, ctx.currentToken())
    95  	}
    96  	if err != nil {
    97  		return nil, errors.Wrapf(err, "failed to parse tag value")
    98  	}
    99  	node.Value = value
   100  	return node, nil
   101  }
   102  
   103  func (p *parser) removeLeftSideNewLineCharacter(src string) string {
   104  	// CR or LF or CRLF
   105  	return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n")
   106  }
   107  
   108  func (p *parser) existsNewLineCharacter(src string) bool {
   109  	if strings.Index(src, "\n") > 0 {
   110  		return true
   111  	}
   112  	if strings.Index(src, "\r") > 0 {
   113  		return true
   114  	}
   115  	return false
   116  }
   117  
   118  func (p *parser) validateMapKey(tk *token.Token) error {
   119  	if tk.Type != token.StringType {
   120  		return nil
   121  	}
   122  	origin := p.removeLeftSideNewLineCharacter(tk.Origin)
   123  	if p.existsNewLineCharacter(origin) {
   124  		return errors.ErrSyntax("unexpected key name", tk)
   125  	}
   126  	return nil
   127  }
   128  
   129  func (p *parser) createNullToken(base *token.Token) *token.Token {
   130  	pos := *(base.Position)
   131  	pos.Column++
   132  	return token.New("null", "null", &pos)
   133  }
   134  
   135  func (p *parser) parseMapValue(ctx *context, key ast.Node, colonToken *token.Token) (ast.Node, error) {
   136  	tk := ctx.currentToken()
   137  	if tk == nil {
   138  		nullToken := p.createNullToken(colonToken)
   139  		ctx.insertToken(ctx.idx, nullToken)
   140  		return ast.Null(nullToken), nil
   141  	}
   142  
   143  	if tk.Position.Column == key.GetToken().Position.Column && tk.Type == token.StringType {
   144  		// in this case,
   145  		// ----
   146  		// key: <value does not defined>
   147  		// next
   148  		nullToken := p.createNullToken(colonToken)
   149  		ctx.insertToken(ctx.idx, nullToken)
   150  		return ast.Null(nullToken), nil
   151  	}
   152  
   153  	if tk.Position.Column < key.GetToken().Position.Column {
   154  		// in this case,
   155  		// ----
   156  		//   key: <value does not defined>
   157  		// next
   158  		nullToken := p.createNullToken(colonToken)
   159  		ctx.insertToken(ctx.idx, nullToken)
   160  		return ast.Null(nullToken), nil
   161  	}
   162  
   163  	value, err := p.parseToken(ctx, ctx.currentToken())
   164  	if err != nil {
   165  		return nil, errors.Wrapf(err, "failed to parse mapping 'value' node")
   166  	}
   167  	return value, nil
   168  }
   169  
   170  func (p *parser) validateMapValue(ctx *context, key, value ast.Node) error {
   171  	keyColumn := key.GetToken().Position.Column
   172  	valueColumn := value.GetToken().Position.Column
   173  	if keyColumn != valueColumn {
   174  		return nil
   175  	}
   176  	if value.Type() != ast.StringType {
   177  		return nil
   178  	}
   179  	ntk := ctx.nextToken()
   180  	if ntk == nil || (ntk.Type != token.MappingValueType && ntk.Type != token.SequenceEntryType) {
   181  		return errors.ErrSyntax("could not found expected ':' token", value.GetToken())
   182  	}
   183  	return nil
   184  }
   185  
   186  func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) {
   187  	key, err := p.parseMapKey(ctx)
   188  	if err != nil {
   189  		return nil, errors.Wrapf(err, "failed to parse map key")
   190  	}
   191  	if err := p.validateMapKey(key.GetToken()); err != nil {
   192  		return nil, errors.Wrapf(err, "validate mapping key error")
   193  	}
   194  	ctx.progress(1)          // progress to mapping value token
   195  	tk := ctx.currentToken() // get mapping value token
   196  	if tk == nil {
   197  		return nil, errors.ErrSyntax("unexpected map", key.GetToken())
   198  	}
   199  	ctx.progress(1) // progress to value token
   200  	if err := p.setSameLineCommentIfExists(ctx, key); err != nil {
   201  		return nil, errors.Wrapf(err, "failed to set same line comment to node")
   202  	}
   203  	if key.GetComment() != nil {
   204  		// if current token is comment, GetComment() is not nil.
   205  		// then progress to value token
   206  		ctx.progressIgnoreComment(1)
   207  	}
   208  
   209  	value, err := p.parseMapValue(ctx, key, tk)
   210  	if err != nil {
   211  		return nil, errors.Wrapf(err, "failed to parse map value")
   212  	}
   213  	if err := p.validateMapValue(ctx, key, value); err != nil {
   214  		return nil, errors.Wrapf(err, "failed to validate map value")
   215  	}
   216  
   217  	mvnode := ast.MappingValue(tk, key, value)
   218  	node := ast.Mapping(tk, false, mvnode)
   219  
   220  	ntk := ctx.nextNotCommentToken()
   221  	antk := ctx.afterNextNotCommentToken()
   222  	for antk != nil && antk.Type == token.MappingValueType &&
   223  		ntk.Position.Column == key.GetToken().Position.Column {
   224  		ctx.progressIgnoreComment(1)
   225  		value, err := p.parseToken(ctx, ctx.currentToken())
   226  		if err != nil {
   227  			return nil, errors.Wrapf(err, "failed to parse mapping node")
   228  		}
   229  		switch value.Type() {
   230  		case ast.MappingType:
   231  			c := value.(*ast.MappingNode)
   232  			comment := c.GetComment()
   233  			for idx, v := range c.Values {
   234  				if idx == 0 && comment != nil {
   235  					if err := v.SetComment(comment); err != nil {
   236  						return nil, errors.Wrapf(err, "failed to set comment token to node")
   237  					}
   238  				}
   239  				node.Values = append(node.Values, v)
   240  			}
   241  		case ast.MappingValueType:
   242  			node.Values = append(node.Values, value.(*ast.MappingValueNode))
   243  		default:
   244  			return nil, xerrors.Errorf("failed to parse mapping value node node is %s", value.Type())
   245  		}
   246  		ntk = ctx.nextNotCommentToken()
   247  		antk = ctx.afterNextNotCommentToken()
   248  	}
   249  	if len(node.Values) == 1 {
   250  		return mvnode, nil
   251  	}
   252  	return node, nil
   253  }
   254  
   255  func (p *parser) parseSequenceEntry(ctx *context) (ast.Node, error) {
   256  	tk := ctx.currentToken()
   257  	sequenceNode := ast.Sequence(tk, false)
   258  	curColumn := tk.Position.Column
   259  	for tk.Type == token.SequenceEntryType {
   260  		ctx.progress(1) // skip sequence token
   261  		tk = ctx.currentToken()
   262  		var comment *ast.CommentGroupNode
   263  		if tk.Type == token.CommentType {
   264  			comment = p.parseCommentOnly(ctx)
   265  			tk = ctx.currentToken()
   266  			if tk.Type != token.SequenceEntryType {
   267  				break
   268  			}
   269  			ctx.progress(1) // skip sequence token
   270  		}
   271  		value, err := p.parseToken(ctx, ctx.currentToken())
   272  		if err != nil {
   273  			return nil, errors.Wrapf(err, "failed to parse sequence")
   274  		}
   275  		if comment != nil {
   276  			sequenceNode.ValueComments = append(sequenceNode.ValueComments, comment)
   277  		} else {
   278  			sequenceNode.ValueComments = append(sequenceNode.ValueComments, nil)
   279  		}
   280  		sequenceNode.Values = append(sequenceNode.Values, value)
   281  		tk = ctx.nextNotCommentToken()
   282  		if tk == nil {
   283  			break
   284  		}
   285  		if tk.Type != token.SequenceEntryType {
   286  			break
   287  		}
   288  		if tk.Position.Column != curColumn {
   289  			break
   290  		}
   291  		ctx.progressIgnoreComment(1)
   292  	}
   293  	return sequenceNode, nil
   294  }
   295  
   296  func (p *parser) parseAnchor(ctx *context) (ast.Node, error) {
   297  	tk := ctx.currentToken()
   298  	anchor := ast.Anchor(tk)
   299  	ntk := ctx.nextToken()
   300  	if ntk == nil {
   301  		return nil, errors.ErrSyntax("unexpected anchor. anchor name is undefined", tk)
   302  	}
   303  	ctx.progress(1) // skip anchor token
   304  	name, err := p.parseToken(ctx, ctx.currentToken())
   305  	if err != nil {
   306  		return nil, errors.Wrapf(err, "failed to parser anchor name node")
   307  	}
   308  	anchor.Name = name
   309  	ntk = ctx.nextToken()
   310  	if ntk == nil {
   311  		return nil, errors.ErrSyntax("unexpected anchor. anchor value is undefined", ctx.currentToken())
   312  	}
   313  	ctx.progress(1)
   314  	value, err := p.parseToken(ctx, ctx.currentToken())
   315  	if err != nil {
   316  		return nil, errors.Wrapf(err, "failed to parser anchor name node")
   317  	}
   318  	anchor.Value = value
   319  	return anchor, nil
   320  }
   321  
   322  func (p *parser) parseAlias(ctx *context) (ast.Node, error) {
   323  	tk := ctx.currentToken()
   324  	alias := ast.Alias(tk)
   325  	ntk := ctx.nextToken()
   326  	if ntk == nil {
   327  		return nil, errors.ErrSyntax("unexpected alias. alias name is undefined", tk)
   328  	}
   329  	ctx.progress(1) // skip alias token
   330  	name, err := p.parseToken(ctx, ctx.currentToken())
   331  	if err != nil {
   332  		return nil, errors.Wrapf(err, "failed to parser alias name node")
   333  	}
   334  	alias.Value = name
   335  	return alias, nil
   336  }
   337  
   338  func (p *parser) parseMapKey(ctx *context) (ast.Node, error) {
   339  	tk := ctx.currentToken()
   340  	if value := p.parseScalarValue(tk); value != nil {
   341  		return value, nil
   342  	}
   343  	switch tk.Type {
   344  	case token.MergeKeyType:
   345  		return ast.MergeKey(tk), nil
   346  	case token.MappingKeyType:
   347  		return p.parseMappingKey(ctx)
   348  	}
   349  	return nil, errors.ErrSyntax("unexpected mapping key", tk)
   350  }
   351  
   352  func (p *parser) parseStringValue(tk *token.Token) ast.Node {
   353  	switch tk.Type {
   354  	case token.StringType,
   355  		token.SingleQuoteType,
   356  		token.DoubleQuoteType:
   357  		return ast.String(tk)
   358  	}
   359  	return nil
   360  }
   361  
   362  func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast.Node, error) {
   363  	node := p.parseScalarValue(tk)
   364  	if node == nil {
   365  		return nil, nil
   366  	}
   367  	if p.isSameLineComment(ctx.nextToken(), node) {
   368  		ctx.progress(1)
   369  		if err := p.setSameLineCommentIfExists(ctx, node); err != nil {
   370  			return nil, errors.Wrapf(err, "failed to set same line comment to node")
   371  		}
   372  	}
   373  	return node, nil
   374  }
   375  
   376  func (p *parser) parseScalarValue(tk *token.Token) ast.Node {
   377  	if node := p.parseStringValue(tk); node != nil {
   378  		return node
   379  	}
   380  	switch tk.Type {
   381  	case token.NullType:
   382  		return ast.Null(tk)
   383  	case token.BoolType:
   384  		return ast.Bool(tk)
   385  	case token.IntegerType,
   386  		token.BinaryIntegerType,
   387  		token.OctetIntegerType,
   388  		token.HexIntegerType:
   389  		return ast.Integer(tk)
   390  	case token.FloatType:
   391  		return ast.Float(tk)
   392  	case token.InfinityType:
   393  		return ast.Infinity(tk)
   394  	case token.NanType:
   395  		return ast.Nan(tk)
   396  	}
   397  	return nil
   398  }
   399  
   400  func (p *parser) parseDirective(ctx *context) (ast.Node, error) {
   401  	node := ast.Directive(ctx.currentToken())
   402  	ctx.progress(1) // skip directive token
   403  	value, err := p.parseToken(ctx, ctx.currentToken())
   404  	if err != nil {
   405  		return nil, errors.Wrapf(err, "failed to parse directive value")
   406  	}
   407  	node.Value = value
   408  	ctx.progress(1)
   409  	tk := ctx.currentToken()
   410  	if tk == nil {
   411  		// Since current token is nil, use the previous token to specify
   412  		// the syntax error location.
   413  		return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.previousToken())
   414  	}
   415  	if tk.Type != token.DocumentHeaderType {
   416  		return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.currentToken())
   417  	}
   418  	return node, nil
   419  }
   420  
   421  func (p *parser) parseLiteral(ctx *context) (ast.Node, error) {
   422  	node := ast.Literal(ctx.currentToken())
   423  	ctx.progress(1) // skip literal/folded token
   424  
   425  	tk := ctx.currentToken()
   426  	var comment *ast.CommentGroupNode
   427  	if tk.Type == token.CommentType {
   428  		comment = p.parseCommentOnly(ctx)
   429  		if err := node.SetComment(comment); err != nil {
   430  			return nil, errors.Wrapf(err, "failed to set comment to literal")
   431  		}
   432  		tk = ctx.currentToken()
   433  	}
   434  	value, err := p.parseToken(ctx, tk)
   435  	if err != nil {
   436  		return nil, errors.Wrapf(err, "failed to parse literal/folded value")
   437  	}
   438  	snode, ok := value.(*ast.StringNode)
   439  	if !ok {
   440  		return nil, errors.ErrSyntax("unexpected token. required string token", value.GetToken())
   441  	}
   442  	node.Value = snode
   443  	return node, nil
   444  }
   445  
   446  func (p *parser) isSameLineComment(tk *token.Token, node ast.Node) bool {
   447  	if tk == nil {
   448  		return false
   449  	}
   450  	if tk.Type != token.CommentType {
   451  		return false
   452  	}
   453  	return tk.Position.Line == node.GetToken().Position.Line
   454  }
   455  
   456  func (p *parser) setSameLineCommentIfExists(ctx *context, node ast.Node) error {
   457  	tk := ctx.currentToken()
   458  	if !p.isSameLineComment(tk, node) {
   459  		return nil
   460  	}
   461  	if err := node.SetComment(ast.CommentGroup([]*token.Token{tk})); err != nil {
   462  		return errors.Wrapf(err, "failed to set comment token to ast.Node")
   463  	}
   464  	return nil
   465  }
   466  
   467  func (p *parser) parseDocument(ctx *context) (*ast.DocumentNode, error) {
   468  	startTk := ctx.currentToken()
   469  	ctx.progress(1) // skip document header token
   470  	body, err := p.parseToken(ctx, ctx.currentToken())
   471  	if err != nil {
   472  		return nil, errors.Wrapf(err, "failed to parse document body")
   473  	}
   474  	node := ast.Document(startTk, body)
   475  	if ntk := ctx.nextToken(); ntk != nil && ntk.Type == token.DocumentEndType {
   476  		node.End = ntk
   477  		ctx.progress(1)
   478  	}
   479  	return node, nil
   480  }
   481  
   482  func (p *parser) parseCommentOnly(ctx *context) *ast.CommentGroupNode {
   483  	commentTokens := []*token.Token{}
   484  	for {
   485  		tk := ctx.currentToken()
   486  		if tk == nil {
   487  			break
   488  		}
   489  		if tk.Type != token.CommentType {
   490  			break
   491  		}
   492  		commentTokens = append(commentTokens, tk)
   493  		ctx.progressIgnoreComment(1) // skip comment token
   494  	}
   495  	return ast.CommentGroup(commentTokens)
   496  }
   497  
   498  func (p *parser) parseComment(ctx *context) (ast.Node, error) {
   499  	group := p.parseCommentOnly(ctx)
   500  	node, err := p.parseToken(ctx, ctx.currentToken())
   501  	if err != nil {
   502  		return nil, errors.Wrapf(err, "failed to parse node after comment")
   503  	}
   504  	if node == nil {
   505  		return group, nil
   506  	}
   507  	if err := node.SetComment(group); err != nil {
   508  		return nil, errors.Wrapf(err, "failed to set comment token to node")
   509  	}
   510  	return node, nil
   511  }
   512  
   513  func (p *parser) parseMappingKey(ctx *context) (ast.Node, error) {
   514  	node := ast.MappingKey(ctx.currentToken())
   515  	ctx.progress(1) // skip mapping key token
   516  	value, err := p.parseToken(ctx, ctx.currentToken())
   517  	if err != nil {
   518  		return nil, errors.Wrapf(err, "failed to parse map key")
   519  	}
   520  	node.Value = value
   521  	return node, nil
   522  }
   523  
   524  func (p *parser) parseToken(ctx *context, tk *token.Token) (ast.Node, error) {
   525  	if tk == nil {
   526  		return nil, nil
   527  	}
   528  	if tk.NextType() == token.MappingValueType {
   529  		node, err := p.parseMappingValue(ctx)
   530  		return node, err
   531  	}
   532  	node, err := p.parseScalarValueWithComment(ctx, tk)
   533  	if err != nil {
   534  		return nil, errors.Wrapf(err, "failed to parse scalar value")
   535  	}
   536  	if node != nil {
   537  		return node, nil
   538  	}
   539  	switch tk.Type {
   540  	case token.CommentType:
   541  		return p.parseComment(ctx)
   542  	case token.MappingKeyType:
   543  		return p.parseMappingKey(ctx)
   544  	case token.DocumentHeaderType:
   545  		return p.parseDocument(ctx)
   546  	case token.MappingStartType:
   547  		return p.parseMapping(ctx)
   548  	case token.SequenceStartType:
   549  		return p.parseSequence(ctx)
   550  	case token.SequenceEntryType:
   551  		return p.parseSequenceEntry(ctx)
   552  	case token.AnchorType:
   553  		return p.parseAnchor(ctx)
   554  	case token.AliasType:
   555  		return p.parseAlias(ctx)
   556  	case token.DirectiveType:
   557  		return p.parseDirective(ctx)
   558  	case token.TagType:
   559  		return p.parseTag(ctx)
   560  	case token.LiteralType, token.FoldedType:
   561  		return p.parseLiteral(ctx)
   562  	}
   563  	return nil, nil
   564  }
   565  
   566  func (p *parser) parse(tokens token.Tokens, mode Mode) (*ast.File, error) {
   567  	ctx := newContext(tokens, mode)
   568  	file := &ast.File{Docs: []*ast.DocumentNode{}}
   569  	for ctx.next() {
   570  		node, err := p.parseToken(ctx, ctx.currentToken())
   571  		if err != nil {
   572  			return nil, errors.Wrapf(err, "failed to parse")
   573  		}
   574  		ctx.progressIgnoreComment(1)
   575  		if node == nil {
   576  			continue
   577  		}
   578  		if doc, ok := node.(*ast.DocumentNode); ok {
   579  			file.Docs = append(file.Docs, doc)
   580  		} else {
   581  			file.Docs = append(file.Docs, ast.Document(nil, node))
   582  		}
   583  	}
   584  	return file, nil
   585  }
   586  
   587  type Mode uint
   588  
   589  const (
   590  	ParseComments Mode = 1 << iota // parse comments and add them to AST
   591  )
   592  
   593  // ParseBytes parse from byte slice, and returns ast.File
   594  func ParseBytes(bytes []byte, mode Mode) (*ast.File, error) {
   595  	tokens := lexer.Tokenize(string(bytes))
   596  	f, err := Parse(tokens, mode)
   597  	if err != nil {
   598  		return nil, errors.Wrapf(err, "failed to parse")
   599  	}
   600  	return f, nil
   601  }
   602  
   603  // Parse parse from token instances, and returns ast.File
   604  func Parse(tokens token.Tokens, mode Mode) (*ast.File, error) {
   605  	var p parser
   606  	f, err := p.parse(tokens, mode)
   607  	if err != nil {
   608  		return nil, errors.Wrapf(err, "failed to parse")
   609  	}
   610  	return f, nil
   611  }
   612  
   613  // Parse parse from filename, and returns ast.File
   614  func ParseFile(filename string, mode Mode) (*ast.File, error) {
   615  	file, err := ioutil.ReadFile(filename)
   616  	if err != nil {
   617  		return nil, errors.Wrapf(err, "failed to read file: %s", filename)
   618  	}
   619  	f, err := ParseBytes(file, mode)
   620  	if err != nil {
   621  		return nil, errors.Wrapf(err, "failed to parse")
   622  	}
   623  	f.Name = filename
   624  	return f, nil
   625  }