github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nomdl/parser.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2017 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nomdl
    23  
    24  import (
    25  	"bytes"
    26  	"context"
    27  	"fmt"
    28  	"io"
    29  	"strconv"
    30  	"strings"
    31  	"text/scanner"
    32  
    33  	"github.com/dolthub/dolt/go/store/d"
    34  	"github.com/dolthub/dolt/go/store/types"
    35  )
    36  
    37  // Parser provides ways to parse Noms types.
    38  type Parser struct {
    39  	lex *lexer
    40  	vrw types.ValueReadWriter
    41  }
    42  
    43  // ParserOptions allows passing options into New.
    44  type ParserOptions struct {
    45  	// Filename is the name of the file we are currently parsing.
    46  	Filename string
    47  }
    48  
    49  // New creates a new Parser.
    50  func New(vrw types.ValueReadWriter, r io.Reader, options ParserOptions) *Parser {
    51  	s := scanner.Scanner{}
    52  	s.Init(r)
    53  	s.Filename = options.Filename
    54  	s.Mode = scanner.ScanIdents | scanner.ScanComments | scanner.SkipComments | scanner.ScanFloats | scanner.ScanStrings // | scanner.ScanRawStrings
    55  	s.Error = func(s *scanner.Scanner, msg string) {}
    56  	lex := lexer{scanner: &s}
    57  	return &Parser{&lex, vrw}
    58  }
    59  
    60  // ParseType parses a string describing a Noms type.
    61  func ParseType(code string) (typ *types.Type, err error) {
    62  	p := New(nil, strings.NewReader(code), ParserOptions{})
    63  	var typeErr error
    64  	err = catchSyntaxError(func() {
    65  		typ, typeErr = p.parseType()
    66  		p.ensureAtEnd()
    67  	})
    68  
    69  	if err == nil && typeErr != nil {
    70  		return nil, typeErr
    71  	}
    72  
    73  	return typ, err
    74  }
    75  
    76  // MustParseType parses a string describing a Noms type and panics if there
    77  // is an error.
    78  func MustParseType(code string) *types.Type {
    79  	typ, err := ParseType(code)
    80  	d.PanicIfError(err)
    81  	return typ
    82  }
    83  
    84  // Parse parses a string describing a Noms value.
    85  func Parse(ctx context.Context, vrw types.ValueReadWriter, code string) (v types.Value, err error) {
    86  	p := New(vrw, strings.NewReader(code), ParserOptions{})
    87  	var parseErr error
    88  	err = catchSyntaxError(func() {
    89  		v, parseErr = p.parseValue(ctx)
    90  		p.ensureAtEnd()
    91  	})
    92  
    93  	if err == nil && parseErr != nil {
    94  		return nil, parseErr
    95  	}
    96  
    97  	return v, err
    98  }
    99  
   100  // MustParse parses a string describing a Noms value and panics if there
   101  // is an error.
   102  func MustParse(ctx context.Context, vrw types.ValueReadWriter, code string) types.Value {
   103  	v, err := Parse(ctx, vrw, code)
   104  	d.PanicIfError(err)
   105  	return v
   106  }
   107  
   108  func (p *Parser) ensureAtEnd() {
   109  	p.lex.eat(scanner.EOF)
   110  }
   111  
   112  // Type :
   113  //   TypeWithoutUnion (`|` TypeWithoutUnion)*
   114  //
   115  // TypeWithoutUnion :
   116  //   `Blob`
   117  //   `Bool`
   118  //   `Float`
   119  //   `String`
   120  //   `Type`
   121  //   `Value`
   122  //   CycleType
   123  //   ListType
   124  //   MapType
   125  //   RefType
   126  //   SetType
   127  //   StructType
   128  //
   129  // CycleType :
   130  //   `Cycle` `<` StructName `>`
   131  //
   132  // ListType :
   133  //   `List` `<` Type? `>`
   134  //
   135  // MapType :
   136  //   `Map` `<` (Type `,` Type)? `>`
   137  //
   138  // RefType :
   139  //   `Set` `<` Type `>`
   140  //
   141  // SetType :
   142  //   `Set` `<` Type? `>`
   143  //
   144  // StructType :
   145  //   `Struct` StructName? `{` StructTypeFields? `}`
   146  //
   147  // StructTypeFields :
   148  //   StructTypeField
   149  //   StructTypeField `,` StructTypeFields?
   150  //
   151  // StructName :
   152  //   Ident
   153  //
   154  // StructTypeField :
   155  //   StructFieldName `?`? `:` Type
   156  //
   157  // StructFieldName :
   158  //   Ident
   159  
   160  func (p *Parser) parseType() (*types.Type, error) {
   161  	tok := p.lex.eat(scanner.Ident)
   162  	return p.parseTypeWithToken(tok, p.lex.tokenText())
   163  }
   164  
   165  func (p *Parser) parseTypeWithToken(tok rune, tokenText string) (*types.Type, error) {
   166  	t, err := p.parseSingleTypeWithToken(tok, tokenText)
   167  
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	tok = p.lex.peek()
   173  	if tok != '|' {
   174  		return t, nil
   175  	}
   176  	unionTypes := []*types.Type{t}
   177  
   178  	for {
   179  		tok = p.lex.peek()
   180  		if tok == '|' {
   181  			p.lex.next()
   182  		} else {
   183  			break
   184  		}
   185  		st, err := p.parseSingleType()
   186  
   187  		if err != nil {
   188  			return nil, err
   189  		}
   190  
   191  		unionTypes = append(unionTypes, st)
   192  	}
   193  	return types.MakeUnionType(unionTypes...)
   194  }
   195  
   196  func (p *Parser) parseSingleType() (*types.Type, error) {
   197  	tok := p.lex.eat(scanner.Ident)
   198  	return p.parseSingleTypeWithToken(tok, p.lex.tokenText())
   199  }
   200  
   201  func (p *Parser) parseSingleTypeWithToken(tok rune, tokenText string) (*types.Type, error) {
   202  	switch tokenText {
   203  	case "Bool":
   204  		return types.PrimitiveTypeMap[types.BoolKind], nil
   205  	case "Blob":
   206  		return types.PrimitiveTypeMap[types.BlobKind], nil
   207  	case "Float":
   208  		return types.PrimitiveTypeMap[types.FloatKind], nil
   209  	case "String":
   210  		return types.PrimitiveTypeMap[types.StringKind], nil
   211  	case "Type":
   212  		return types.PrimitiveTypeMap[types.TypeKind], nil
   213  	case "Value":
   214  		return types.PrimitiveTypeMap[types.ValueKind], nil
   215  	case "Struct":
   216  		return p.parseStructType()
   217  	case "Map":
   218  		return p.parseMapType()
   219  	case "List":
   220  		elemType, err := p.parseSingleElemType(true)
   221  
   222  		if err != nil {
   223  			return nil, err
   224  		}
   225  
   226  		return types.MakeListType(elemType)
   227  	case "Set":
   228  		elemType, err := p.parseSingleElemType(true)
   229  
   230  		if err != nil {
   231  			return nil, err
   232  		}
   233  
   234  		return types.MakeSetType(elemType)
   235  	case "Ref":
   236  		elemType, err := p.parseSingleElemType(false)
   237  
   238  		if err != nil {
   239  			return nil, err
   240  		}
   241  
   242  		return types.MakeRefType(elemType)
   243  	case "Cycle":
   244  		return p.parseCycleType(), nil
   245  	}
   246  
   247  	p.lex.unexpectedToken(tok)
   248  	return nil, types.ErrUnknownType
   249  }
   250  
   251  func (p *Parser) parseStructType() (*types.Type, error) {
   252  	tok := p.lex.next()
   253  	name := ""
   254  	if tok == scanner.Ident {
   255  		name = p.lex.tokenText()
   256  		p.lex.eat('{')
   257  	} else {
   258  		p.lex.check('{', tok)
   259  	}
   260  	fields := []types.StructField{}
   261  
   262  	for p.lex.peek() != '}' {
   263  		p.lex.eat(scanner.Ident)
   264  
   265  		fieldName := p.lex.tokenText()
   266  		optional := p.lex.eatIf('?')
   267  		p.lex.eat(':')
   268  		typ, err := p.parseType()
   269  
   270  		if err != nil {
   271  			return nil, err
   272  		}
   273  
   274  		fields = append(fields, types.StructField{
   275  			Name:     fieldName,
   276  			Type:     typ,
   277  			Optional: optional,
   278  		})
   279  
   280  		if p.lex.eatIf(',') {
   281  			continue
   282  		}
   283  
   284  		break
   285  	}
   286  	p.lex.eat('}')
   287  	return types.MakeStructType(name, fields...)
   288  }
   289  
   290  func (p *Parser) parseSingleElemType(allowEmptyUnion bool) (*types.Type, error) {
   291  	p.lex.eat('<')
   292  	if allowEmptyUnion && p.lex.eatIf('>') {
   293  		return types.MakeUnionType()
   294  	}
   295  	elemType, err := p.parseType()
   296  
   297  	if err != nil {
   298  		return nil, err
   299  	}
   300  
   301  	p.lex.eat('>')
   302  	return elemType, nil
   303  }
   304  
   305  func (p *Parser) parseCycleType() *types.Type {
   306  	p.lex.eat('<')
   307  	p.lex.eat(scanner.Ident)
   308  	name := p.lex.tokenText()
   309  	p.lex.eat('>')
   310  	return types.MakeCycleType(name)
   311  }
   312  
   313  func (p *Parser) parseMapType() (*types.Type, error) {
   314  	var keyType, valueType *types.Type
   315  	p.lex.eat('<')
   316  
   317  	if p.lex.eatIf('>') {
   318  		var err error
   319  		keyType, err = types.MakeUnionType()
   320  
   321  		if err != nil {
   322  			return nil, err
   323  		}
   324  
   325  		valueType = keyType
   326  	} else {
   327  		var err error
   328  		keyType, err = p.parseType()
   329  
   330  		if err != nil {
   331  			return nil, err
   332  		}
   333  
   334  		p.lex.eat(',')
   335  		valueType, err = p.parseType()
   336  
   337  		if err != nil {
   338  			return nil, err
   339  		}
   340  
   341  		p.lex.eat('>')
   342  	}
   343  	return types.MakeMapType(keyType, valueType)
   344  }
   345  
   346  // Value :
   347  //   Type
   348  //   Bool
   349  //   Float
   350  //   String
   351  //   List
   352  //   Set
   353  //   Map
   354  //   Struct
   355  //
   356  // Bool :
   357  //   `true`
   358  //   `false`
   359  //
   360  // Float :
   361  //   ...
   362  //
   363  // String :
   364  //   ...
   365  //
   366  // List :
   367  //   `[` Values? `]`
   368  //
   369  // Values :
   370  //   Value
   371  //   Value `,` Values?
   372  //
   373  // Set :
   374  //   `set` `{` Values? `}`
   375  //
   376  // Map :
   377  //   `map` `{` MapEntries? `}`
   378  //
   379  // MapEntries :
   380  //   MapEntry
   381  //   MapEntry `,` MapEntries?
   382  //
   383  // MapEntry :
   384  //   Value `:` Value
   385  //
   386  // Struct :
   387  //   `struct` StructName? `{` StructFields? `}`
   388  //
   389  // StructFields :
   390  //   StructField
   391  //   StructField `,` StructFields?
   392  //
   393  // StructField :
   394  //   StructFieldName `:` Value
   395  func (p *Parser) parseValue(ctx context.Context) (types.Value, error) {
   396  	tok := p.lex.next()
   397  	switch tok {
   398  	case scanner.Ident:
   399  		switch tokenText := p.lex.tokenText(); tokenText {
   400  		case "true":
   401  			return types.Bool(true), nil
   402  		case "false":
   403  			return types.Bool(false), nil
   404  		case "set":
   405  			return p.parseSet(ctx)
   406  		case "map":
   407  			return p.parseMap(ctx)
   408  		case "struct":
   409  			return p.parseStruct(ctx)
   410  		case "blob":
   411  			return p.parseBlob(ctx)
   412  		default:
   413  			return p.parseTypeWithToken(tok, tokenText)
   414  		}
   415  	case scanner.Float, scanner.Int:
   416  		f := p.parseFloat()
   417  		return types.Float(f), nil
   418  	case '-':
   419  		if !p.lex.eatIf(scanner.Float) {
   420  			p.lex.eat(scanner.Int)
   421  		}
   422  		n := p.parseFloat()
   423  		return types.Float(-float64(n)), nil
   424  	case '+':
   425  		if !p.lex.eatIf(scanner.Float) {
   426  			p.lex.eat(scanner.Int)
   427  		}
   428  		return p.parseFloat(), nil
   429  	case '[':
   430  		return p.parseList(ctx)
   431  	case scanner.String:
   432  		s := p.lex.tokenText()
   433  		s2, err := strconv.Unquote(s)
   434  		if err != nil {
   435  			raiseSyntaxError(fmt.Sprintf("Invalid string %s", s), p.lex.pos())
   436  		}
   437  		return types.String(s2), nil
   438  	}
   439  
   440  	p.lex.unexpectedToken(tok)
   441  
   442  	panic("unreachable")
   443  }
   444  
   445  func (p *Parser) parseFloat() types.Float {
   446  	s := p.lex.tokenText()
   447  	f, _ := strconv.ParseFloat(s, 64)
   448  	return types.Float(f)
   449  }
   450  
   451  func (p *Parser) parseList(ctx context.Context) (types.List, error) {
   452  	// already swallowed '['
   453  	l, err := types.NewList(ctx, p.vrw)
   454  
   455  	if err != nil {
   456  		return types.EmptyList, err
   457  	}
   458  
   459  	le := l.Edit()
   460  
   461  	for p.lex.peek() != ']' {
   462  		v, err := p.parseValue(ctx)
   463  
   464  		if err != nil {
   465  			return types.EmptyList, err
   466  		}
   467  		le.Append(v)
   468  
   469  		if p.lex.eatIf(',') {
   470  			continue
   471  		}
   472  
   473  		break
   474  	}
   475  	p.lex.eat(']')
   476  	return le.List(ctx)
   477  }
   478  
   479  func (p *Parser) parseSet(ctx context.Context) (types.Set, error) {
   480  	// already swallowed 'set'
   481  	p.lex.eat('{')
   482  	s, err := types.NewSet(ctx, p.vrw)
   483  
   484  	if err != nil {
   485  		return types.EmptySet, err
   486  	}
   487  
   488  	se := s.Edit()
   489  
   490  	for p.lex.peek() != '}' {
   491  		v, err := p.parseValue(ctx)
   492  
   493  		if err != nil {
   494  			return types.EmptySet, err
   495  		}
   496  
   497  		se, err = se.Insert(v)
   498  
   499  		if err != nil {
   500  			return types.EmptySet, err
   501  		}
   502  
   503  		if p.lex.eatIf(',') {
   504  			continue
   505  		}
   506  
   507  		break
   508  	}
   509  	p.lex.eat('}')
   510  	return se.Set(ctx)
   511  }
   512  
   513  func (p *Parser) parseMap(ctx context.Context) (types.Map, error) {
   514  	// already swallowed 'map'
   515  	p.lex.eat('{')
   516  	m, err := types.NewMap(ctx, p.vrw)
   517  
   518  	if err != nil {
   519  		return types.EmptyMap, err
   520  	}
   521  
   522  	me := m.Edit()
   523  
   524  	for p.lex.peek() != '}' {
   525  		key, err := p.parseValue(ctx)
   526  
   527  		if err != nil {
   528  			return types.EmptyMap, err
   529  		}
   530  
   531  		p.lex.eat(':')
   532  		value, err := p.parseValue(ctx)
   533  		if err != nil {
   534  			return types.EmptyMap, err
   535  		}
   536  
   537  		me = me.Set(key, value)
   538  
   539  		if p.lex.eatIf(',') {
   540  			continue
   541  		}
   542  
   543  		break
   544  	}
   545  	p.lex.eat('}')
   546  	return me.Map(ctx)
   547  }
   548  
   549  func (p *Parser) blobString(s string) []byte {
   550  	raise := func() {
   551  		raiseSyntaxError(fmt.Sprintf("Invalid blob \"%s\"", s), p.lex.pos())
   552  	}
   553  
   554  	if len(s)%2 != 0 {
   555  		raise()
   556  	}
   557  
   558  	var buff bytes.Buffer
   559  	for i := 0; i < len(s); i += 2 {
   560  		n, err := strconv.ParseUint(s[i:i+2], 16, 8)
   561  		if err != nil {
   562  			raise()
   563  		}
   564  		buff.WriteByte(uint8(n))
   565  	}
   566  	return buff.Bytes()
   567  }
   568  
   569  func (p *Parser) parseBlob(ctx context.Context) (types.Blob, error) {
   570  	// already swallowed 'blob'
   571  	p.lex.eat('{')
   572  	var buff bytes.Buffer
   573  
   574  	for p.lex.peek() != '}' {
   575  		tok := p.lex.next()
   576  		switch tok {
   577  		case scanner.Ident, scanner.Int:
   578  			s := p.lex.tokenText()
   579  			buff.Write(p.blobString(s))
   580  		default:
   581  			p.lex.unexpectedToken(tok)
   582  		}
   583  
   584  	}
   585  	p.lex.eat('}')
   586  	return types.NewBlob(ctx, p.vrw, bytes.NewReader(buff.Bytes()))
   587  }
   588  
   589  func (p *Parser) parseStruct(ctx context.Context) (types.Struct, error) {
   590  	// already swallowed 'struct'
   591  	tok := p.lex.next()
   592  	name := ""
   593  	if tok == scanner.Ident {
   594  		name = p.lex.tokenText()
   595  		p.lex.eat('{')
   596  	} else {
   597  		p.lex.check('{', tok)
   598  	}
   599  	data := types.StructData{}
   600  
   601  	for p.lex.peek() != '}' {
   602  		p.lex.eat(scanner.Ident)
   603  
   604  		fieldName := p.lex.tokenText()
   605  		p.lex.eat(':')
   606  		v, err := p.parseValue(ctx)
   607  
   608  		if err != nil {
   609  			return types.EmptyStruct(types.Format_Default), err
   610  		}
   611  
   612  		data[fieldName] = v
   613  
   614  		if p.lex.eatIf(',') {
   615  			continue
   616  		}
   617  
   618  		break
   619  	}
   620  	p.lex.eat('}')
   621  	return types.NewStruct(p.vrw.Format(), name, data)
   622  }