github.com/ipld/go-ipld-prime@v0.21.0/schema/dsl/parse.go (about) 1 package schemadsl 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io" 8 "os" 9 "reflect" 10 "strconv" 11 "strings" 12 13 dmt "github.com/ipld/go-ipld-prime/schema/dmt" 14 ) 15 16 var globalTrue = true 17 18 // TODO: fuzz testing 19 20 func ParseBytes(src []byte) (*dmt.Schema, error) { 21 return Parse("", bytes.NewReader(src)) 22 } 23 24 func ParseFile(path string) (*dmt.Schema, error) { 25 f, err := os.Open(path) 26 if err != nil { 27 return nil, err 28 } 29 defer f.Close() 30 return Parse(path, f) 31 } 32 33 func Parse(name string, r io.Reader) (*dmt.Schema, error) { 34 p := &parser{ 35 path: name, 36 br: bufio.NewReader(r), 37 line: 1, 38 col: 1, 39 } 40 41 sch := &dmt.Schema{} 42 sch.Types.Values = make(map[string]dmt.TypeDefn) 43 44 for { 45 tok, err := p.consumeToken() 46 if err == io.EOF { 47 break 48 } 49 50 switch tok { 51 case "type": 52 name, err := p.consumeName() 53 if err != nil { 54 return nil, err 55 } 56 defn, err := p.typeDefn() 57 if err != nil { 58 return nil, err 59 } 60 mapAppend(&sch.Types, name, defn) 61 case "advanced": 62 return nil, p.errf("TODO: advanced") 63 default: 64 return nil, p.errf("unexpected token: %q", tok) 65 } 66 } 67 return sch, nil 68 } 69 70 func mapAppend(mapPtr, k, v interface{}) { 71 // TODO: delete with generics 72 // TODO: error on dupes 73 74 mval := reflect.ValueOf(mapPtr).Elem() 75 kval := reflect.ValueOf(k) 76 vval := reflect.ValueOf(v) 77 78 keys := mval.FieldByName("Keys") 79 keys.Set(reflect.Append(keys, kval)) 80 81 values := mval.FieldByName("Values") 82 if values.IsNil() { 83 values.Set(reflect.MakeMap(values.Type())) 84 } 85 values.SetMapIndex(kval, vval) 86 } 87 88 type parser struct { 89 path string 90 br *bufio.Reader 91 92 peekedToken string 93 94 line, col int 95 } 96 97 func (p *parser) forwardError(err error) error { 98 var prefix string 99 if p.path != "" { 100 prefix = p.path + ":" 101 } 102 return fmt.Errorf("%s%d:%d: %s", prefix, p.line, p.col, err) 103 } 104 105 func (p *parser) errf(format string, args ...interface{}) error { 106 return p.forwardError(fmt.Errorf(format, args...)) 107 } 108 109 func (p *parser) consumeToken() (string, error) { 110 if tok := p.peekedToken; tok != "" { 111 p.peekedToken = "" 112 return tok, nil 113 } 114 for { 115 // TODO: use runes for better unicode support 116 b, err := p.br.ReadByte() 117 if err == io.EOF { 118 return "", err // TODO: ErrUnexpectedEOF? 119 } 120 if err != nil { 121 return "", p.forwardError(err) 122 } 123 p.col++ 124 switch b { 125 case ' ', '\t', '\r': // skip whitespace 126 continue 127 case '\n': // skip newline 128 // TODO: should we require a newline after each type def, struct field, etc? 129 p.line++ 130 p.col = 1 131 continue 132 case '"': // quoted string 133 quoted, err := p.br.ReadString('"') 134 if err != nil { 135 return "", p.forwardError(err) 136 } 137 return "\"" + quoted, nil 138 case '{', '}', '[', ']', '(', ')', ':', '&': // simple token 139 return string(b), nil 140 case '#': // comment 141 _, err := p.br.ReadString('\n') 142 if err != nil { 143 return "", p.forwardError(err) 144 } 145 // tokenize the newline 146 if err := p.br.UnreadByte(); err != nil { 147 panic(err) // should never happen 148 } 149 continue 150 default: // string token or name 151 var sb strings.Builder 152 sb.WriteByte(b) 153 for { 154 b, err := p.br.ReadByte() 155 if err == io.EOF { 156 // Token ends at the end of the whole input. 157 return sb.String(), nil 158 } 159 if err != nil { 160 return "", p.forwardError(err) 161 } 162 // TODO: should probably allow unicode letters and numbers, like Go? 163 switch { 164 case b >= 'a' && b <= 'z', b >= 'A' && b <= 'Z': 165 case b >= '0' && b <= '9': 166 case b == '_': 167 default: 168 if err := p.br.UnreadByte(); err != nil { 169 panic(err) // should never happen 170 } 171 return sb.String(), nil 172 } 173 sb.WriteByte(b) 174 } 175 } 176 } 177 } 178 179 func (p *parser) consumePeeked() { 180 if p.peekedToken == "" { 181 panic("consumePeeked requires a peeked token to be present") 182 } 183 p.peekedToken = "" 184 } 185 186 func (p *parser) peekToken() (string, error) { 187 if tok := p.peekedToken; tok != "" { 188 return tok, nil 189 } 190 tok, err := p.consumeToken() 191 if err != nil { 192 if err == io.EOF { 193 // peekToken is often used when a token is optional. 194 // If we hit io.EOF, that's not an error. 195 // TODO: consider making peekToken just not return an error? 196 return "", nil 197 } 198 return "", err 199 } 200 p.peekedToken = tok 201 return tok, nil 202 } 203 204 func (p *parser) consumeName() (string, error) { 205 tok, err := p.consumeToken() 206 if err != nil { 207 return "", err 208 } 209 switch tok { 210 case "\"", "{", "}", "[", "]", "(", ")", ":": 211 return "", p.errf("expected a name, got %q", tok) 212 } 213 if tok[0] == '"' { 214 return "", p.errf("expected a name, got string %s", tok) 215 } 216 return tok, nil 217 } 218 219 func (p *parser) consumeString() (string, error) { 220 tok, err := p.consumeToken() 221 if err != nil { 222 return "", err 223 } 224 if tok[0] != '"' { 225 return "", p.errf("expected a string, got %q", tok) 226 } 227 // Unquote, too. 228 return tok[1 : len(tok)-1], nil 229 } 230 231 func (p *parser) consumeStringMap() (map[string]string, error) { 232 result := map[string]string{} 233 loop: 234 for { 235 tok, err := p.peekToken() 236 if err != nil { 237 return result, err 238 } 239 switch tok { 240 case "{": 241 p.consumePeeked() 242 case "}": 243 p.consumePeeked() 244 break loop 245 default: 246 key, err := p.consumeName() 247 if err != nil { 248 return result, err 249 } 250 value, err := p.consumeString() 251 if err != nil { 252 return result, err 253 } 254 result[key] = value 255 } 256 } 257 return result, nil 258 } 259 260 func (p *parser) consumeRequired(tok string) error { 261 got, err := p.consumeToken() 262 if err != nil { 263 return err 264 } 265 if got != tok { 266 return p.errf("expected %q, got %q", tok, got) 267 } 268 return nil 269 } 270 271 func (p *parser) typeDefn() (dmt.TypeDefn, error) { 272 var defn dmt.TypeDefn 273 kind, err := p.consumeToken() 274 if err != nil { 275 return defn, err 276 } 277 278 switch kind { 279 case "struct": 280 if err := p.consumeRequired("{"); err != nil { 281 return defn, err 282 } 283 defn.TypeDefnStruct, err = p.typeStruct() 284 case "union": 285 if err := p.consumeRequired("{"); err != nil { 286 return defn, err 287 } 288 defn.TypeDefnUnion, err = p.typeUnion() 289 case "enum": 290 if err := p.consumeRequired("{"); err != nil { 291 return defn, err 292 } 293 defn.TypeDefnEnum, err = p.typeEnum() 294 case "bool": 295 defn.TypeDefnBool = &dmt.TypeDefnBool{} 296 case "bytes": 297 defn.TypeDefnBytes = &dmt.TypeDefnBytes{} 298 case "float": 299 defn.TypeDefnFloat = &dmt.TypeDefnFloat{} 300 case "int": 301 defn.TypeDefnInt = &dmt.TypeDefnInt{} 302 case "link": 303 defn.TypeDefnLink = &dmt.TypeDefnLink{} 304 case "any": 305 defn.TypeDefnAny = &dmt.TypeDefnAny{} 306 case "&": 307 target, err := p.consumeName() 308 if err != nil { 309 return defn, err 310 } 311 defn.TypeDefnLink = &dmt.TypeDefnLink{ExpectedType: &target} 312 case "string": 313 defn.TypeDefnString = &dmt.TypeDefnString{} 314 case "{": 315 defn.TypeDefnMap, err = p.typeMap() 316 case "[": 317 defn.TypeDefnList, err = p.typeList() 318 case "=": 319 from, err := p.consumeName() 320 if err != nil { 321 return defn, err 322 } 323 defn.TypeDefnCopy = &dmt.TypeDefnCopy{FromType: from} 324 default: 325 err = p.errf("unknown type keyword: %q", kind) 326 } 327 328 return defn, err 329 } 330 331 func (p *parser) typeStruct() (*dmt.TypeDefnStruct, error) { 332 repr := &dmt.StructRepresentation_Map{} 333 repr.Fields = &dmt.Map__FieldName__StructRepresentation_Map_FieldDetails{} 334 335 defn := &dmt.TypeDefnStruct{} 336 for { 337 tok, err := p.consumeToken() 338 if err != nil { 339 return nil, err 340 } 341 342 if tok == "}" { 343 break 344 } 345 name := tok 346 347 var field dmt.StructField 348 loop: 349 for { 350 tok, err := p.peekToken() 351 if err != nil { 352 return nil, err 353 } 354 switch tok { 355 case "optional": 356 if field.Optional != nil { 357 return nil, p.errf("multiple optional keywords") 358 } 359 field.Optional = &globalTrue 360 p.consumePeeked() 361 case "nullable": 362 if field.Nullable != nil { 363 return nil, p.errf("multiple nullable keywords") 364 } 365 field.Nullable = &globalTrue 366 p.consumePeeked() 367 default: 368 var err error 369 field.Type, err = p.typeNameOrInlineDefn() 370 if err != nil { 371 return nil, err 372 } 373 break loop 374 } 375 } 376 tok, err = p.peekToken() 377 if err != nil { 378 return nil, err 379 } 380 if tok == "(" { 381 details := dmt.StructRepresentation_Map_FieldDetails{} 382 p.consumePeeked() 383 parenLoop: 384 for { 385 tok, err = p.consumeToken() 386 if err != nil { 387 return nil, err 388 } 389 switch tok { 390 case ")": 391 break parenLoop 392 case "rename": 393 str, err := p.consumeString() 394 if err != nil { 395 return nil, err 396 } 397 details.Rename = &str 398 case "implicit": 399 scalar, err := p.consumeToken() 400 if err != nil { 401 return nil, err 402 } 403 var anyScalar dmt.AnyScalar 404 switch { 405 case scalar[0] == '"': // string 406 s, err := strconv.Unquote(scalar) 407 if err != nil { 408 return nil, p.forwardError(err) 409 } 410 anyScalar.String = &s 411 case scalar == "true", scalar == "false": // bool 412 t := scalar == "true" 413 anyScalar.Bool = &t 414 case scalar[0] >= '0' && scalar[0] <= '0': 415 n, err := strconv.Atoi(scalar) 416 if err != nil { 417 return nil, p.forwardError(err) 418 } 419 anyScalar.Int = &n 420 default: 421 return nil, p.errf("unsupported implicit scalar: %s", scalar) 422 } 423 424 details.Implicit = &anyScalar 425 } 426 } 427 mapAppend(repr.Fields, name, details) 428 } 429 430 mapAppend(&defn.Fields, name, field) 431 } 432 433 reprName := "map" // default repr 434 if tok, err := p.peekToken(); err == nil && tok == "representation" { 435 p.consumePeeked() 436 name, err := p.consumeName() 437 if err != nil { 438 return nil, err 439 } 440 reprName = name 441 } 442 if reprName != "map" && len(repr.Fields.Keys) > 0 { 443 return nil, p.errf("rename and implicit are only supported for struct map representations") 444 } 445 switch reprName { 446 case "map": 447 if len(repr.Fields.Keys) == 0 { 448 // Fields is optional; omit it if empty. 449 repr.Fields = nil 450 } 451 defn.Representation.StructRepresentation_Map = repr 452 return defn, nil 453 case "tuple": 454 defn.Representation.StructRepresentation_Tuple = &dmt.StructRepresentation_Tuple{} 455 return defn, nil 456 // TODO: support custom fieldorder 457 case "stringjoin": 458 optMap, err := p.consumeStringMap() 459 if err != nil { 460 return nil, err 461 } 462 join, hasJoin := optMap["join"] 463 if !hasJoin { 464 return nil, p.errf("no join value provided for stringjoin repr") 465 } 466 defn.Representation.StructRepresentation_Stringjoin = &dmt.StructRepresentation_Stringjoin{ 467 Join: join, 468 } 469 return defn, nil 470 case "listpairs": 471 defn.Representation.StructRepresentation_Listpairs = &dmt.StructRepresentation_Listpairs{} 472 return defn, nil 473 default: 474 return nil, p.errf("unknown struct repr: %q", reprName) 475 } 476 } 477 478 func (p *parser) typeNameOrInlineDefn() (dmt.TypeNameOrInlineDefn, error) { 479 var typ dmt.TypeNameOrInlineDefn 480 tok, err := p.consumeToken() 481 if err != nil { 482 return typ, err 483 } 484 485 switch tok { 486 case "&": 487 expectedName, err := p.consumeName() 488 if err != nil { 489 return typ, err 490 } 491 typ.InlineDefn = &dmt.InlineDefn{TypeDefnLink: &dmt.TypeDefnLink{ExpectedType: &expectedName}} 492 case "[": 493 tlist, err := p.typeList() 494 if err != nil { 495 return typ, err 496 } 497 typ.InlineDefn = &dmt.InlineDefn{TypeDefnList: tlist} 498 case "{": 499 tmap, err := p.typeMap() 500 if err != nil { 501 return typ, err 502 } 503 typ.InlineDefn = &dmt.InlineDefn{TypeDefnMap: tmap} 504 default: 505 typ.TypeName = &tok 506 } 507 return typ, nil 508 } 509 510 func (p *parser) typeList() (*dmt.TypeDefnList, error) { 511 defn := &dmt.TypeDefnList{} 512 tok, err := p.peekToken() 513 if err != nil { 514 return nil, err 515 } 516 if tok == "nullable" { 517 defn.ValueNullable = &globalTrue 518 p.consumePeeked() 519 } 520 521 defn.ValueType, err = p.typeNameOrInlineDefn() 522 if err != nil { 523 return nil, err 524 } 525 526 if err := p.consumeRequired("]"); err != nil { 527 return defn, err 528 } 529 530 // TODO: repr 531 return defn, nil 532 } 533 534 func (p *parser) typeMap() (*dmt.TypeDefnMap, error) { 535 defn := &dmt.TypeDefnMap{} 536 537 var err error 538 defn.KeyType, err = p.consumeName() 539 if err != nil { 540 return nil, err 541 } 542 if err := p.consumeRequired(":"); err != nil { 543 return defn, err 544 } 545 546 tok, err := p.peekToken() 547 if err != nil { 548 return nil, err 549 } 550 if tok == "nullable" { 551 defn.ValueNullable = &globalTrue 552 p.consumePeeked() 553 } 554 555 defn.ValueType, err = p.typeNameOrInlineDefn() 556 if err != nil { 557 return nil, err 558 } 559 560 if err := p.consumeRequired("}"); err != nil { 561 return defn, err 562 } 563 564 return defn, nil 565 } 566 567 func (p *parser) typeUnion() (*dmt.TypeDefnUnion, error) { 568 defn := &dmt.TypeDefnUnion{} 569 var reprKeys []string 570 571 for { 572 tok, err := p.consumeToken() 573 if err != nil { 574 return nil, err 575 } 576 if tok == "}" { 577 break 578 } 579 if tok != "|" { 580 return nil, p.errf("expected %q or %q, got %q", "}", "|", tok) 581 } 582 var member dmt.UnionMember 583 nameOrInline, err := p.typeNameOrInlineDefn() 584 if err != nil { 585 return nil, err 586 } 587 588 if nameOrInline.TypeName != nil { 589 member.TypeName = nameOrInline.TypeName 590 } else { 591 if nameOrInline.InlineDefn.TypeDefnLink != nil { 592 member.UnionMemberInlineDefn = &dmt.UnionMemberInlineDefn{TypeDefnLink: nameOrInline.InlineDefn.TypeDefnLink} 593 } else { 594 return nil, p.errf("expected a name or inline link, got neither") 595 } 596 } 597 defn.Members = append(defn.Members, member) 598 599 key, err := p.consumeToken() 600 if err != nil { 601 return nil, err 602 } 603 reprKeys = append(reprKeys, key) 604 } 605 if err := p.consumeRequired("representation"); err != nil { 606 return nil, err 607 } 608 reprName, err := p.consumeName() 609 if err != nil { 610 return nil, err 611 } 612 switch reprName { 613 case "keyed": 614 repr := &dmt.UnionRepresentation_Keyed{} 615 for i, keyStr := range reprKeys { 616 key, err := strconv.Unquote(keyStr) 617 if err != nil { 618 return nil, p.forwardError(err) 619 } 620 mapAppend(repr, key, defn.Members[i]) 621 } 622 defn.Representation.UnionRepresentation_Keyed = repr 623 case "kinded": 624 repr := &dmt.UnionRepresentation_Kinded{} 625 // TODO: verify keys are valid kinds? enum should do it for us? 626 for i, key := range reprKeys { 627 mapAppend(repr, key, defn.Members[i]) 628 } 629 defn.Representation.UnionRepresentation_Kinded = repr 630 case "stringprefix": 631 repr := &dmt.UnionRepresentation_StringPrefix{ 632 Prefixes: dmt.Map__String__TypeName{ 633 Values: map[string]string{}, 634 }, 635 } 636 for i, key := range reprKeys { 637 // unquote prefix string 638 if len(key) < 2 || key[0] != '"' || key[len(key)-1] != '"' { 639 return nil, p.errf("invalid stringprefix %q", key) 640 } 641 key = key[1 : len(key)-1] 642 643 // add prefix to prefixes map 644 repr.Prefixes.Keys = append(repr.Prefixes.Keys, key) 645 repr.Prefixes.Values[key] = *defn.Members[i].TypeName 646 } 647 defn.Representation.UnionRepresentation_StringPrefix = repr 648 case "inline": 649 optMap, err := p.consumeStringMap() 650 if err != nil { 651 return nil, err 652 } 653 discriminantKey, hasDiscriminantKey := optMap["discriminantKey"] 654 if !hasDiscriminantKey { 655 return nil, p.errf("no discriminantKey value provided for inline repr") 656 } 657 repr := &dmt.UnionRepresentation_Inline{ 658 DiscriminantKey: discriminantKey, 659 DiscriminantTable: dmt.Map__String__TypeName{ 660 Values: map[string]string{}, 661 }, 662 } 663 // TODO: verify member types all have map representation 664 for i, qkey := range reprKeys { 665 key, err := strconv.Unquote(qkey) 666 if err != nil { 667 return nil, fmt.Errorf("invalid discriminant key %q: %w", key, err) 668 } 669 repr.DiscriminantTable.Keys = append(repr.DiscriminantTable.Keys, key) 670 repr.DiscriminantTable.Values[key] = *defn.Members[i].TypeName 671 } 672 defn.Representation.UnionRepresentation_Inline = repr 673 default: 674 return nil, p.errf("TODO: union repr %q", reprName) 675 } 676 return defn, nil 677 } 678 679 func (p *parser) typeEnum() (*dmt.TypeDefnEnum, error) { 680 defn := &dmt.TypeDefnEnum{} 681 var reprKeys []string 682 683 for { 684 tok, err := p.consumeToken() 685 if err != nil { 686 return nil, err 687 } 688 if tok == "}" { 689 break 690 } 691 if tok != "|" { 692 return nil, p.errf("expected %q or %q, got %q", "}", "|", tok) 693 } 694 name, err := p.consumeToken() 695 if err != nil { 696 return nil, err 697 } 698 defn.Members = append(defn.Members, name) 699 700 if tok, err := p.peekToken(); err == nil && tok == "(" { 701 p.consumePeeked() 702 key, err := p.consumeToken() 703 if err != nil { 704 return nil, err 705 } 706 reprKeys = append(reprKeys, key) 707 if err := p.consumeRequired(")"); err != nil { 708 return defn, err 709 } 710 } else { 711 reprKeys = append(reprKeys, "") 712 } 713 } 714 715 reprName := "string" // default repr 716 if tok, err := p.peekToken(); err == nil && tok == "representation" { 717 p.consumePeeked() 718 name, err := p.consumeName() 719 if err != nil { 720 return nil, err 721 } 722 reprName = name 723 } 724 switch reprName { 725 case "string": 726 repr := &dmt.EnumRepresentation_String{} 727 for i, key := range reprKeys { 728 if key == "" { 729 continue // no key; defaults to the name 730 } 731 if key[0] != '"' { 732 return nil, p.errf("enum string representation used with non-string key: %s", key) 733 } 734 unquoted, err := strconv.Unquote(key) 735 if err != nil { 736 return nil, p.forwardError(err) 737 } 738 mapAppend(repr, defn.Members[i], unquoted) 739 } 740 defn.Representation.EnumRepresentation_String = repr 741 case "int": 742 repr := &dmt.EnumRepresentation_Int{} 743 for i, key := range reprKeys { 744 if key[0] != '"' { 745 return nil, p.errf("enum int representation used with non-string key: %s", key) 746 } 747 unquoted, err := strconv.Unquote(key) 748 if err != nil { 749 return nil, p.forwardError(err) 750 } 751 parsed, err := strconv.Atoi(unquoted) 752 if err != nil { 753 return nil, p.forwardError(err) 754 } 755 mapAppend(repr, defn.Members[i], parsed) 756 } 757 defn.Representation.EnumRepresentation_Int = repr 758 default: 759 return nil, p.errf("unknown enum repr: %q", reprName) 760 } 761 return defn, nil 762 }