github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nomdl/parser.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2017 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nomdl 23 24 import ( 25 "bytes" 26 "context" 27 "fmt" 28 "io" 29 "strconv" 30 "strings" 31 "text/scanner" 32 33 "github.com/dolthub/dolt/go/store/d" 34 "github.com/dolthub/dolt/go/store/types" 35 ) 36 37 // Parser provides ways to parse Noms types. 38 type Parser struct { 39 lex *lexer 40 vrw types.ValueReadWriter 41 } 42 43 // ParserOptions allows passing options into New. 44 type ParserOptions struct { 45 // Filename is the name of the file we are currently parsing. 46 Filename string 47 } 48 49 // New creates a new Parser. 50 func New(vrw types.ValueReadWriter, r io.Reader, options ParserOptions) *Parser { 51 s := scanner.Scanner{} 52 s.Init(r) 53 s.Filename = options.Filename 54 s.Mode = scanner.ScanIdents | scanner.ScanComments | scanner.SkipComments | scanner.ScanFloats | scanner.ScanStrings // | scanner.ScanRawStrings 55 s.Error = func(s *scanner.Scanner, msg string) {} 56 lex := lexer{scanner: &s} 57 return &Parser{&lex, vrw} 58 } 59 60 // ParseType parses a string describing a Noms type. 61 func ParseType(code string) (typ *types.Type, err error) { 62 p := New(nil, strings.NewReader(code), ParserOptions{}) 63 var typeErr error 64 err = catchSyntaxError(func() { 65 typ, typeErr = p.parseType() 66 p.ensureAtEnd() 67 }) 68 69 if err == nil && typeErr != nil { 70 return nil, typeErr 71 } 72 73 return typ, err 74 } 75 76 // MustParseType parses a string describing a Noms type and panics if there 77 // is an error. 78 func MustParseType(code string) *types.Type { 79 typ, err := ParseType(code) 80 d.PanicIfError(err) 81 return typ 82 } 83 84 // Parse parses a string describing a Noms value. 85 func Parse(ctx context.Context, vrw types.ValueReadWriter, code string) (v types.Value, err error) { 86 p := New(vrw, strings.NewReader(code), ParserOptions{}) 87 var parseErr error 88 err = catchSyntaxError(func() { 89 v, parseErr = p.parseValue(ctx) 90 p.ensureAtEnd() 91 }) 92 93 if err == nil && parseErr != nil { 94 return nil, parseErr 95 } 96 97 return v, err 98 } 99 100 // MustParse parses a string describing a Noms value and panics if there 101 // is an error. 102 func MustParse(ctx context.Context, vrw types.ValueReadWriter, code string) types.Value { 103 v, err := Parse(ctx, vrw, code) 104 d.PanicIfError(err) 105 return v 106 } 107 108 func (p *Parser) ensureAtEnd() { 109 p.lex.eat(scanner.EOF) 110 } 111 112 // Type : 113 // TypeWithoutUnion (`|` TypeWithoutUnion)* 114 // 115 // TypeWithoutUnion : 116 // `Blob` 117 // `Bool` 118 // `Float` 119 // `String` 120 // `Type` 121 // `Value` 122 // CycleType 123 // ListType 124 // MapType 125 // RefType 126 // SetType 127 // StructType 128 // 129 // CycleType : 130 // `Cycle` `<` StructName `>` 131 // 132 // ListType : 133 // `List` `<` Type? `>` 134 // 135 // MapType : 136 // `Map` `<` (Type `,` Type)? `>` 137 // 138 // RefType : 139 // `Set` `<` Type `>` 140 // 141 // SetType : 142 // `Set` `<` Type? `>` 143 // 144 // StructType : 145 // `Struct` StructName? `{` StructTypeFields? `}` 146 // 147 // StructTypeFields : 148 // StructTypeField 149 // StructTypeField `,` StructTypeFields? 150 // 151 // StructName : 152 // Ident 153 // 154 // StructTypeField : 155 // StructFieldName `?`? `:` Type 156 // 157 // StructFieldName : 158 // Ident 159 160 func (p *Parser) parseType() (*types.Type, error) { 161 tok := p.lex.eat(scanner.Ident) 162 return p.parseTypeWithToken(tok, p.lex.tokenText()) 163 } 164 165 func (p *Parser) parseTypeWithToken(tok rune, tokenText string) (*types.Type, error) { 166 t, err := p.parseSingleTypeWithToken(tok, tokenText) 167 168 if err != nil { 169 return nil, err 170 } 171 172 tok = p.lex.peek() 173 if tok != '|' { 174 return t, nil 175 } 176 unionTypes := []*types.Type{t} 177 178 for { 179 tok = p.lex.peek() 180 if tok == '|' { 181 p.lex.next() 182 } else { 183 break 184 } 185 st, err := p.parseSingleType() 186 187 if err != nil { 188 return nil, err 189 } 190 191 unionTypes = append(unionTypes, st) 192 } 193 return types.MakeUnionType(unionTypes...) 194 } 195 196 func (p *Parser) parseSingleType() (*types.Type, error) { 197 tok := p.lex.eat(scanner.Ident) 198 return p.parseSingleTypeWithToken(tok, p.lex.tokenText()) 199 } 200 201 func (p *Parser) parseSingleTypeWithToken(tok rune, tokenText string) (*types.Type, error) { 202 switch tokenText { 203 case "Bool": 204 return types.PrimitiveTypeMap[types.BoolKind], nil 205 case "Blob": 206 return types.PrimitiveTypeMap[types.BlobKind], nil 207 case "Float": 208 return types.PrimitiveTypeMap[types.FloatKind], nil 209 case "String": 210 return types.PrimitiveTypeMap[types.StringKind], nil 211 case "Type": 212 return types.PrimitiveTypeMap[types.TypeKind], nil 213 case "Value": 214 return types.PrimitiveTypeMap[types.ValueKind], nil 215 case "Struct": 216 return p.parseStructType() 217 case "Map": 218 return p.parseMapType() 219 case "List": 220 elemType, err := p.parseSingleElemType(true) 221 222 if err != nil { 223 return nil, err 224 } 225 226 return types.MakeListType(elemType) 227 case "Set": 228 elemType, err := p.parseSingleElemType(true) 229 230 if err != nil { 231 return nil, err 232 } 233 234 return types.MakeSetType(elemType) 235 case "Ref": 236 elemType, err := p.parseSingleElemType(false) 237 238 if err != nil { 239 return nil, err 240 } 241 242 return types.MakeRefType(elemType) 243 case "Cycle": 244 return p.parseCycleType(), nil 245 } 246 247 p.lex.unexpectedToken(tok) 248 return nil, types.ErrUnknownType 249 } 250 251 func (p *Parser) parseStructType() (*types.Type, error) { 252 tok := p.lex.next() 253 name := "" 254 if tok == scanner.Ident { 255 name = p.lex.tokenText() 256 p.lex.eat('{') 257 } else { 258 p.lex.check('{', tok) 259 } 260 fields := []types.StructField{} 261 262 for p.lex.peek() != '}' { 263 p.lex.eat(scanner.Ident) 264 265 fieldName := p.lex.tokenText() 266 optional := p.lex.eatIf('?') 267 p.lex.eat(':') 268 typ, err := p.parseType() 269 270 if err != nil { 271 return nil, err 272 } 273 274 fields = append(fields, types.StructField{ 275 Name: fieldName, 276 Type: typ, 277 Optional: optional, 278 }) 279 280 if p.lex.eatIf(',') { 281 continue 282 } 283 284 break 285 } 286 p.lex.eat('}') 287 return types.MakeStructType(name, fields...) 288 } 289 290 func (p *Parser) parseSingleElemType(allowEmptyUnion bool) (*types.Type, error) { 291 p.lex.eat('<') 292 if allowEmptyUnion && p.lex.eatIf('>') { 293 return types.MakeUnionType() 294 } 295 elemType, err := p.parseType() 296 297 if err != nil { 298 return nil, err 299 } 300 301 p.lex.eat('>') 302 return elemType, nil 303 } 304 305 func (p *Parser) parseCycleType() *types.Type { 306 p.lex.eat('<') 307 p.lex.eat(scanner.Ident) 308 name := p.lex.tokenText() 309 p.lex.eat('>') 310 return types.MakeCycleType(name) 311 } 312 313 func (p *Parser) parseMapType() (*types.Type, error) { 314 var keyType, valueType *types.Type 315 p.lex.eat('<') 316 317 if p.lex.eatIf('>') { 318 var err error 319 keyType, err = types.MakeUnionType() 320 321 if err != nil { 322 return nil, err 323 } 324 325 valueType = keyType 326 } else { 327 var err error 328 keyType, err = p.parseType() 329 330 if err != nil { 331 return nil, err 332 } 333 334 p.lex.eat(',') 335 valueType, err = p.parseType() 336 337 if err != nil { 338 return nil, err 339 } 340 341 p.lex.eat('>') 342 } 343 return types.MakeMapType(keyType, valueType) 344 } 345 346 // Value : 347 // Type 348 // Bool 349 // Float 350 // String 351 // List 352 // Set 353 // Map 354 // Struct 355 // 356 // Bool : 357 // `true` 358 // `false` 359 // 360 // Float : 361 // ... 362 // 363 // String : 364 // ... 365 // 366 // List : 367 // `[` Values? `]` 368 // 369 // Values : 370 // Value 371 // Value `,` Values? 372 // 373 // Set : 374 // `set` `{` Values? `}` 375 // 376 // Map : 377 // `map` `{` MapEntries? `}` 378 // 379 // MapEntries : 380 // MapEntry 381 // MapEntry `,` MapEntries? 382 // 383 // MapEntry : 384 // Value `:` Value 385 // 386 // Struct : 387 // `struct` StructName? `{` StructFields? `}` 388 // 389 // StructFields : 390 // StructField 391 // StructField `,` StructFields? 392 // 393 // StructField : 394 // StructFieldName `:` Value 395 func (p *Parser) parseValue(ctx context.Context) (types.Value, error) { 396 tok := p.lex.next() 397 switch tok { 398 case scanner.Ident: 399 switch tokenText := p.lex.tokenText(); tokenText { 400 case "true": 401 return types.Bool(true), nil 402 case "false": 403 return types.Bool(false), nil 404 case "set": 405 return p.parseSet(ctx) 406 case "map": 407 return p.parseMap(ctx) 408 case "struct": 409 return p.parseStruct(ctx) 410 case "blob": 411 return p.parseBlob(ctx) 412 default: 413 return p.parseTypeWithToken(tok, tokenText) 414 } 415 case scanner.Float, scanner.Int: 416 f := p.parseFloat() 417 return types.Float(f), nil 418 case '-': 419 if !p.lex.eatIf(scanner.Float) { 420 p.lex.eat(scanner.Int) 421 } 422 n := p.parseFloat() 423 return types.Float(-float64(n)), nil 424 case '+': 425 if !p.lex.eatIf(scanner.Float) { 426 p.lex.eat(scanner.Int) 427 } 428 return p.parseFloat(), nil 429 case '[': 430 return p.parseList(ctx) 431 case scanner.String: 432 s := p.lex.tokenText() 433 s2, err := strconv.Unquote(s) 434 if err != nil { 435 raiseSyntaxError(fmt.Sprintf("Invalid string %s", s), p.lex.pos()) 436 } 437 return types.String(s2), nil 438 } 439 440 p.lex.unexpectedToken(tok) 441 442 panic("unreachable") 443 } 444 445 func (p *Parser) parseFloat() types.Float { 446 s := p.lex.tokenText() 447 f, _ := strconv.ParseFloat(s, 64) 448 return types.Float(f) 449 } 450 451 func (p *Parser) parseList(ctx context.Context) (types.List, error) { 452 // already swallowed '[' 453 l, err := types.NewList(ctx, p.vrw) 454 455 if err != nil { 456 return types.EmptyList, err 457 } 458 459 le := l.Edit() 460 461 for p.lex.peek() != ']' { 462 v, err := p.parseValue(ctx) 463 464 if err != nil { 465 return types.EmptyList, err 466 } 467 le.Append(v) 468 469 if p.lex.eatIf(',') { 470 continue 471 } 472 473 break 474 } 475 p.lex.eat(']') 476 return le.List(ctx) 477 } 478 479 func (p *Parser) parseSet(ctx context.Context) (types.Set, error) { 480 // already swallowed 'set' 481 p.lex.eat('{') 482 s, err := types.NewSet(ctx, p.vrw) 483 484 if err != nil { 485 return types.EmptySet, err 486 } 487 488 se := s.Edit() 489 490 for p.lex.peek() != '}' { 491 v, err := p.parseValue(ctx) 492 493 if err != nil { 494 return types.EmptySet, err 495 } 496 497 se, err = se.Insert(v) 498 499 if err != nil { 500 return types.EmptySet, err 501 } 502 503 if p.lex.eatIf(',') { 504 continue 505 } 506 507 break 508 } 509 p.lex.eat('}') 510 return se.Set(ctx) 511 } 512 513 func (p *Parser) parseMap(ctx context.Context) (types.Map, error) { 514 // already swallowed 'map' 515 p.lex.eat('{') 516 m, err := types.NewMap(ctx, p.vrw) 517 518 if err != nil { 519 return types.EmptyMap, err 520 } 521 522 me := m.Edit() 523 524 for p.lex.peek() != '}' { 525 key, err := p.parseValue(ctx) 526 527 if err != nil { 528 return types.EmptyMap, err 529 } 530 531 p.lex.eat(':') 532 value, err := p.parseValue(ctx) 533 if err != nil { 534 return types.EmptyMap, err 535 } 536 537 me = me.Set(key, value) 538 539 if p.lex.eatIf(',') { 540 continue 541 } 542 543 break 544 } 545 p.lex.eat('}') 546 return me.Map(ctx) 547 } 548 549 func (p *Parser) blobString(s string) []byte { 550 raise := func() { 551 raiseSyntaxError(fmt.Sprintf("Invalid blob \"%s\"", s), p.lex.pos()) 552 } 553 554 if len(s)%2 != 0 { 555 raise() 556 } 557 558 var buff bytes.Buffer 559 for i := 0; i < len(s); i += 2 { 560 n, err := strconv.ParseUint(s[i:i+2], 16, 8) 561 if err != nil { 562 raise() 563 } 564 buff.WriteByte(uint8(n)) 565 } 566 return buff.Bytes() 567 } 568 569 func (p *Parser) parseBlob(ctx context.Context) (types.Blob, error) { 570 // already swallowed 'blob' 571 p.lex.eat('{') 572 var buff bytes.Buffer 573 574 for p.lex.peek() != '}' { 575 tok := p.lex.next() 576 switch tok { 577 case scanner.Ident, scanner.Int: 578 s := p.lex.tokenText() 579 buff.Write(p.blobString(s)) 580 default: 581 p.lex.unexpectedToken(tok) 582 } 583 584 } 585 p.lex.eat('}') 586 return types.NewBlob(ctx, p.vrw, bytes.NewReader(buff.Bytes())) 587 } 588 589 func (p *Parser) parseStruct(ctx context.Context) (types.Struct, error) { 590 // already swallowed 'struct' 591 tok := p.lex.next() 592 name := "" 593 if tok == scanner.Ident { 594 name = p.lex.tokenText() 595 p.lex.eat('{') 596 } else { 597 p.lex.check('{', tok) 598 } 599 data := types.StructData{} 600 601 for p.lex.peek() != '}' { 602 p.lex.eat(scanner.Ident) 603 604 fieldName := p.lex.tokenText() 605 p.lex.eat(':') 606 v, err := p.parseValue(ctx) 607 608 if err != nil { 609 return types.EmptyStruct(types.Format_Default), err 610 } 611 612 data[fieldName] = v 613 614 if p.lex.eatIf(',') { 615 continue 616 } 617 618 break 619 } 620 p.lex.eat('}') 621 return types.NewStruct(p.vrw.Format(), name, data) 622 }