github.com/aacfactory/avro@v1.2.12/internal/base/schema.go (about) 1 package base 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "crypto/sha256" 7 "errors" 8 "fmt" 9 "github.com/aacfactory/avro/internal/hashs/crc64" 10 "golang.org/x/sync/singleflight" 11 "hash" 12 "sort" 13 "strconv" 14 "strings" 15 "sync" 16 "sync/atomic" 17 18 jsoniter "github.com/json-iterator/go" 19 ) 20 21 var nullDefault = struct{}{} 22 23 var ( 24 schemaReserved = []string{ 25 "doc", "fields", "items", "name", "namespace", "size", "symbols", 26 "values", "type", "aliases", "logicalType", "precision", "scale", 27 } 28 fieldReserved = []string{"default", "doc", "name", "order", "type", "aliases"} 29 ) 30 31 // Type is a schema type. 32 type Type string 33 34 // Schema type constants. 35 const ( 36 Record Type = "record" 37 Error Type = "error" 38 Ref Type = "<ref>" 39 Enum Type = "enum" 40 Array Type = "array" 41 Map Type = "map" 42 Union Type = "union" 43 Fixed Type = "fixed" 44 String Type = "string" 45 Bytes Type = "bytes" 46 Int Type = "int" 47 Long Type = "long" 48 Float Type = "float" 49 Double Type = "double" 50 Boolean Type = "boolean" 51 Null Type = "null" 52 Raw Type = "raw" 53 ) 54 55 // Order is a field order. 56 type Order string 57 58 // Field orders. 59 const ( 60 Asc Order = "ascending" 61 Desc Order = "descending" 62 Ignore Order = "ignore" 63 ) 64 65 // LogicalType is a schema logical type. 66 type LogicalType string 67 68 // Schema logical type constants. 69 const ( 70 Decimal LogicalType = "decimal" 71 UUID LogicalType = "uuid" 72 Date LogicalType = "date" 73 TimeMillis LogicalType = "time-millis" 74 TimeMicros LogicalType = "time-micros" 75 TimestampMillis LogicalType = "timestamp-millis" 76 TimestampMicros LogicalType = "timestamp-micros" 77 Duration LogicalType = "duration" 78 ) 79 80 // FingerprintType is a fingerprinting algorithm. 81 type FingerprintType string 82 83 // Fingerprint type constants. 84 const ( 85 CRC64Avro FingerprintType = "CRC64-AVRO" 86 MD5 FingerprintType = "MD5" 87 SHA256 FingerprintType = "SHA256" 88 ) 89 90 var fingerprinters = map[FingerprintType]hash.Hash{ 91 CRC64Avro: crc64.New(), 92 MD5: md5.New(), 93 SHA256: sha256.New(), 94 } 95 96 // SchemaCache is a cache of schemas. 97 type SchemaCache struct { 98 cache sync.Map // map[string]Schema 99 processingGroup singleflight.Group 100 processingCache sync.Map 101 } 102 103 // Add adds a schema to the cache with the given name. 104 func (c *SchemaCache) Add(name string, schema Schema) { 105 c.cache.Store(name, schema) 106 } 107 108 // Get returns the Schema if it exists. 109 func (c *SchemaCache) Get(name string) Schema { 110 if v, ok := c.cache.Load(name); ok { 111 return v.(Schema) 112 } 113 return nil 114 } 115 116 func (c *SchemaCache) addProcessing(name string, schema Schema) { 117 c.processingCache.Store(name, schema) 118 } 119 120 func (c *SchemaCache) getProcessing(name string) Schema { 121 if v, ok := c.cache.Load(name); ok { 122 return v.(Schema) 123 } 124 if v, ok := c.processingCache.Load(name); ok { 125 return v.(Schema) 126 } 127 return nil 128 } 129 130 // Schemas is a slice of Schemas. 131 type Schemas []Schema 132 133 // Get gets a schema and position by type or name if it is a named schema. 134 func (s Schemas) Get(name string) (Schema, int) { 135 for i, schema := range s { 136 if schemaTypeName(schema) == name { 137 return schema, i 138 } 139 } 140 141 return nil, -1 142 } 143 144 // Schema represents an Avro schema. 145 type Schema interface { 146 // Type returns the type of the schema. 147 Type() Type 148 149 // String returns the canonical form of the schema. 150 String() string 151 152 // Fingerprint returns the SHA256 fingerprint of the schema. 153 Fingerprint() [32]byte 154 155 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 156 FingerprintUsing(FingerprintType) ([]byte, error) 157 } 158 159 // LogicalSchema represents an Avro schema with a logical type. 160 type LogicalSchema interface { 161 // Type returns the type of the logical schema. 162 Type() LogicalType 163 164 // String returns the canonical form of the logical schema. 165 String() string 166 } 167 168 // PropertySchema represents a schema with properties. 169 type PropertySchema interface { 170 // Prop gets a property from the schema. 171 Prop(string) any 172 } 173 174 // NamedSchema represents a schema with a name. 175 type NamedSchema interface { 176 Schema 177 PropertySchema 178 179 // Name returns the name of the schema. 180 Name() string 181 182 // Namespace returns the namespace of a schema. 183 Namespace() string 184 185 // FullName returns the full qualified name of a schema. 186 FullName() string 187 } 188 189 // LogicalTypeSchema represents a schema that can contain a logical type. 190 type LogicalTypeSchema interface { 191 // Logical returns the logical schema or nil. 192 Logical() LogicalSchema 193 } 194 195 type name struct { 196 name string 197 namespace string 198 full string 199 aliases []string 200 } 201 202 func newName(n, ns string, aliases []string) (name, error) { 203 if idx := strings.LastIndexByte(n, '.'); idx > -1 { 204 ns = n[:idx] 205 n = n[idx+1:] 206 } 207 208 full := n 209 if ns != "" { 210 full = ns + "." + n 211 } 212 213 for _, part := range strings.Split(full, ".") { 214 if err := validateName(part); err != nil { 215 return name{}, fmt.Errorf("avro: invalid name part %q in name %q: %w", full, part, err) 216 } 217 } 218 219 a := make([]string, 0, len(aliases)) 220 for _, alias := range aliases { 221 if !strings.Contains(alias, ".") { 222 if err := validateName(alias); err != nil { 223 return name{}, fmt.Errorf("avro: invalid name %q: %w", alias, err) 224 } 225 if ns == "" { 226 a = append(a, alias) 227 continue 228 } 229 a = append(a, ns+"."+alias) 230 continue 231 } 232 233 for _, part := range strings.Split(alias, ".") { 234 if err := validateName(part); err != nil { 235 return name{}, fmt.Errorf("avro: invalid name part %q in name %q: %w", full, part, err) 236 } 237 } 238 a = append(a, alias) 239 } 240 241 return name{ 242 name: n, 243 namespace: ns, 244 full: full, 245 aliases: a, 246 }, nil 247 } 248 249 // Name returns the name of a schema. 250 func (n name) Name() string { 251 return n.name 252 } 253 254 // Namespace returns the namespace of a schema. 255 func (n name) Namespace() string { 256 return n.namespace 257 } 258 259 // FullName returns the fully qualified name of a schema. 260 func (n name) FullName() string { 261 return n.full 262 } 263 264 // Aliases returns the fully qualified aliases of a schema. 265 func (n name) Aliases() []string { 266 return n.aliases 267 } 268 269 type fingerprinter struct { 270 fingerprint atomic.Value // [32]byte 271 cache sync.Map // map[FingerprintType][]byte 272 } 273 274 // Fingerprint returns the SHA256 fingerprint of the schema. 275 func (f *fingerprinter) Fingerprint(stringer fmt.Stringer) [32]byte { 276 if v := f.fingerprint.Load(); v != nil { 277 return v.([32]byte) 278 } 279 280 fingerprint := sha256.Sum256([]byte(stringer.String())) 281 f.fingerprint.Store(fingerprint) 282 return fingerprint 283 } 284 285 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 286 func (f *fingerprinter) FingerprintUsing(typ FingerprintType, stringer fmt.Stringer) ([]byte, error) { 287 if v, ok := f.cache.Load(typ); ok { 288 return v.([]byte), nil 289 } 290 291 h, ok := fingerprinters[typ] 292 if !ok { 293 return nil, fmt.Errorf("avro: unknown fingerprint algorithm %s", typ) 294 } 295 296 h.Reset() 297 _, _ = h.Write([]byte(stringer.String())) 298 fingerprint := h.Sum(make([]byte, 0, h.Size())) 299 f.cache.Store(typ, fingerprint) 300 return fingerprint, nil 301 } 302 303 type properties struct { 304 props map[string]any 305 } 306 307 func newProperties(props map[string]any, res []string) properties { 308 p := properties{props: map[string]any{}} 309 for k, v := range props { 310 if isReserved(res, k) { 311 continue 312 } 313 p.props[k] = v 314 } 315 return p 316 } 317 318 func isReserved(res []string, k string) bool { 319 for _, r := range res { 320 if k == r { 321 return true 322 } 323 } 324 return false 325 } 326 327 // Prop gets a property from the schema. 328 func (p properties) Prop(name string) any { 329 if p.props == nil { 330 return nil 331 } 332 333 return p.props[name] 334 } 335 336 func (p properties) marshalPropertiesToJSON(buf *bytes.Buffer) error { 337 sortedPropertyKeys := make([]string, 0, len(p.props)) 338 for k := range p.props { 339 sortedPropertyKeys = append(sortedPropertyKeys, k) 340 } 341 sort.Strings(sortedPropertyKeys) 342 for _, k := range sortedPropertyKeys { 343 vv, err := jsoniter.Marshal(p.props[k]) 344 if err != nil { 345 return err 346 } 347 buf.WriteString(`,"` + k + `":`) 348 buf.Write(vv) 349 } 350 return nil 351 } 352 353 type schemaConfig struct { 354 aliases []string 355 doc string 356 def any 357 order Order 358 props map[string]any 359 } 360 361 // SchemaOption is a function that sets a schema option. 362 type SchemaOption func(*schemaConfig) 363 364 // WithAliases sets the aliases on a schema. 365 func WithAliases(aliases []string) SchemaOption { 366 return func(opts *schemaConfig) { 367 opts.aliases = aliases 368 } 369 } 370 371 // WithDoc sets the doc on a schema. 372 func WithDoc(doc string) SchemaOption { 373 return func(opts *schemaConfig) { 374 opts.doc = doc 375 } 376 } 377 378 // WithDefault sets the default on a schema. 379 func WithDefault(def any) SchemaOption { 380 return func(opts *schemaConfig) { 381 opts.def = def 382 } 383 } 384 385 // WithOrder sets the order on a schema. 386 func WithOrder(order Order) SchemaOption { 387 return func(opts *schemaConfig) { 388 opts.order = order 389 } 390 } 391 392 // WithProps sets the properties on a schema. 393 func WithProps(props map[string]any) SchemaOption { 394 return func(opts *schemaConfig) { 395 opts.props = props 396 } 397 } 398 399 // PrimitiveSchema is an Avro primitive type schema. 400 type PrimitiveSchema struct { 401 properties 402 fingerprinter 403 404 typ Type 405 logical LogicalSchema 406 } 407 408 // NewPrimitiveSchema creates a new PrimitiveSchema. 409 func NewPrimitiveSchema(t Type, l LogicalSchema, opts ...SchemaOption) *PrimitiveSchema { 410 var cfg schemaConfig 411 for _, opt := range opts { 412 opt(&cfg) 413 } 414 415 return &PrimitiveSchema{ 416 properties: newProperties(cfg.props, schemaReserved), 417 typ: t, 418 logical: l, 419 } 420 } 421 422 // Type returns the type of the schema. 423 func (s *PrimitiveSchema) Type() Type { 424 return s.typ 425 } 426 427 // Logical returns the logical schema or nil. 428 func (s *PrimitiveSchema) Logical() LogicalSchema { 429 return s.logical 430 } 431 432 // String returns the canonical form of the schema. 433 func (s *PrimitiveSchema) String() string { 434 if s.logical == nil { 435 return `"` + string(s.typ) + `"` 436 } 437 438 return `{"type":"` + string(s.typ) + `",` + s.logical.String() + `}` 439 } 440 441 // MarshalJSON marshals the schema to json. 442 func (s *PrimitiveSchema) MarshalJSON() ([]byte, error) { 443 if s.logical == nil && len(s.props) == 0 { 444 return jsoniter.Marshal(s.typ) 445 } 446 447 buf := new(bytes.Buffer) 448 buf.WriteString(`{"type":"` + string(s.typ) + `"`) 449 if s.logical != nil { 450 buf.WriteString(`,"logicalType":"` + string(s.logical.Type()) + `"`) 451 if d, ok := s.logical.(*DecimalLogicalSchema); ok { 452 buf.WriteString(`,"precision":` + strconv.Itoa(d.prec)) 453 if d.scale > 0 { 454 buf.WriteString(`,"scale":` + strconv.Itoa(d.scale)) 455 } 456 } 457 } 458 if err := s.marshalPropertiesToJSON(buf); err != nil { 459 return nil, err 460 } 461 buf.WriteString("}") 462 return buf.Bytes(), nil 463 } 464 465 // Fingerprint returns the SHA256 fingerprint of the schema. 466 func (s *PrimitiveSchema) Fingerprint() [32]byte { 467 return s.fingerprinter.Fingerprint(s) 468 } 469 470 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 471 func (s *PrimitiveSchema) FingerprintUsing(typ FingerprintType) ([]byte, error) { 472 return s.fingerprinter.FingerprintUsing(typ, s) 473 } 474 475 // RecordSchema is an Avro record type schema. 476 type RecordSchema struct { 477 name 478 properties 479 fingerprinter 480 481 isError bool 482 fields []*Field 483 doc string 484 } 485 486 // NewRecordSchema creates a new record schema instance. 487 func NewRecordSchema(name, namespace string, fields []*Field, opts ...SchemaOption) (*RecordSchema, error) { 488 var cfg schemaConfig 489 for _, opt := range opts { 490 opt(&cfg) 491 } 492 493 n, err := newName(name, namespace, cfg.aliases) 494 if err != nil { 495 return nil, err 496 } 497 498 return &RecordSchema{ 499 name: n, 500 properties: newProperties(cfg.props, schemaReserved), 501 fields: fields, 502 doc: cfg.doc, 503 }, nil 504 } 505 506 // NewErrorRecordSchema creates a new error record schema instance. 507 func NewErrorRecordSchema(name, namespace string, fields []*Field, opts ...SchemaOption) (*RecordSchema, error) { 508 rec, err := NewRecordSchema(name, namespace, fields, opts...) 509 if err != nil { 510 return nil, err 511 } 512 513 rec.isError = true 514 515 return rec, nil 516 } 517 518 // Type returns the type of the schema. 519 func (s *RecordSchema) Type() Type { 520 return Record 521 } 522 523 // Doc returns the documentation of a record. 524 func (s *RecordSchema) Doc() string { 525 return s.doc 526 } 527 528 // IsError determines is this is an error record. 529 func (s *RecordSchema) IsError() bool { 530 return s.isError 531 } 532 533 // Fields returns the fields of a record. 534 func (s *RecordSchema) Fields() []*Field { 535 return s.fields 536 } 537 538 // String returns the canonical form of the schema. 539 func (s *RecordSchema) String() string { 540 typ := "record" 541 if s.isError { 542 typ = "error" 543 } 544 545 fields := "" 546 for _, f := range s.fields { 547 fields += f.String() + "," 548 } 549 if len(fields) > 0 { 550 fields = fields[:len(fields)-1] 551 } 552 553 return `{"name":"` + s.FullName() + `","type":"` + typ + `","fields":[` + fields + `]}` 554 } 555 556 // MarshalJSON marshals the schema to json. 557 func (s *RecordSchema) MarshalJSON() ([]byte, error) { 558 buf := new(bytes.Buffer) 559 buf.WriteString(`{"name":"` + s.full + `"`) 560 if len(s.aliases) > 0 { 561 aliasesJSON, err := jsoniter.Marshal(s.aliases) 562 if err != nil { 563 return nil, err 564 } 565 buf.WriteString(`,"aliases":`) 566 buf.Write(aliasesJSON) 567 } 568 if s.doc != "" { 569 buf.WriteString(`,"doc":"` + s.doc + `"`) 570 } 571 if s.isError { 572 buf.WriteString(`,"type":"error"`) 573 } else { 574 buf.WriteString(`,"type":"record"`) 575 } 576 fieldsJSON, err := jsoniter.Marshal(s.fields) 577 if err != nil { 578 return nil, err 579 } 580 buf.WriteString(`,"fields":`) 581 buf.Write(fieldsJSON) 582 if err := s.marshalPropertiesToJSON(buf); err != nil { 583 return nil, err 584 } 585 buf.WriteString("}") 586 return buf.Bytes(), nil 587 } 588 589 // Fingerprint returns the SHA256 fingerprint of the schema. 590 func (s *RecordSchema) Fingerprint() [32]byte { 591 return s.fingerprinter.Fingerprint(s) 592 } 593 594 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 595 func (s *RecordSchema) FingerprintUsing(typ FingerprintType) ([]byte, error) { 596 return s.fingerprinter.FingerprintUsing(typ, s) 597 } 598 599 // Field is an Avro record type field. 600 type Field struct { 601 properties 602 603 name string 604 aliases []string 605 doc string 606 typ Schema 607 hasDef bool 608 def any 609 order Order 610 } 611 612 type noDef struct{} 613 614 // NoDefault is used when no default exists for a field. 615 var NoDefault = noDef{} 616 617 // NewField creates a new field instance. 618 func NewField(name string, typ Schema, opts ...SchemaOption) (*Field, error) { 619 cfg := schemaConfig{def: NoDefault} 620 for _, opt := range opts { 621 opt(&cfg) 622 } 623 624 if err := validateName(name); err != nil { 625 return nil, err 626 } 627 for _, a := range cfg.aliases { 628 if err := validateName(a); err != nil { 629 return nil, err 630 } 631 } 632 633 switch cfg.order { 634 case "": 635 cfg.order = Asc 636 case Asc, Desc, Ignore: 637 default: 638 return nil, fmt.Errorf("avro: field %q order %q is invalid", name, cfg.order) 639 } 640 641 f := &Field{ 642 properties: newProperties(cfg.props, fieldReserved), 643 name: name, 644 aliases: cfg.aliases, 645 doc: cfg.doc, 646 typ: typ, 647 order: cfg.order, 648 } 649 650 if cfg.def != NoDefault { 651 def, err := validateDefault(name, typ, cfg.def) 652 if err != nil { 653 return nil, err 654 } 655 f.def = def 656 f.hasDef = true 657 } 658 659 return f, nil 660 } 661 662 // Name returns the name of a field. 663 func (f *Field) Name() string { 664 return f.name 665 } 666 667 // Aliases return the field aliases. 668 func (f *Field) Aliases() []string { 669 return f.aliases 670 } 671 672 // Type returns the schema of a field. 673 func (f *Field) Type() Schema { 674 return f.typ 675 } 676 677 // HasDefault determines if the field has a default value. 678 func (f *Field) HasDefault() bool { 679 return f.hasDef 680 } 681 682 // Default returns the default of a field or nil. 683 // 684 // The only time a nil default is valid is for a Null Type. 685 func (f *Field) Default() any { 686 if f.def == nullDefault { 687 return nil 688 } 689 690 return f.def 691 } 692 693 // Doc returns the documentation of a field. 694 func (f *Field) Doc() string { 695 return f.doc 696 } 697 698 // Order returns the field order. 699 func (f *Field) Order() Order { 700 return f.order 701 } 702 703 // String returns the canonical form of a field. 704 func (f *Field) String() string { 705 return `{"name":"` + f.name + `","type":` + f.typ.String() + `}` 706 } 707 708 // MarshalJSON marshals the schema to json. 709 func (f *Field) MarshalJSON() ([]byte, error) { 710 buf := new(bytes.Buffer) 711 buf.WriteString(`{"name":"` + f.name + `"`) 712 if len(f.aliases) > 0 { 713 aliasesJSON, err := jsoniter.Marshal(f.aliases) 714 if err != nil { 715 return nil, err 716 } 717 buf.WriteString(`,"aliases":`) 718 buf.Write(aliasesJSON) 719 } 720 if f.doc != "" { 721 buf.WriteString(`,"doc":"` + f.doc + `"`) 722 } 723 typeJSON, err := jsoniter.Marshal(f.typ) 724 if err != nil { 725 return nil, err 726 } 727 buf.WriteString(`,"type":`) 728 buf.Write(typeJSON) 729 if f.hasDef { 730 defaultValueJSON, err := jsoniter.Marshal(f.Default()) 731 if err != nil { 732 return nil, err 733 } 734 buf.WriteString(`,"default":`) 735 buf.Write(defaultValueJSON) 736 } 737 if f.order != "" && f.order != Asc { 738 buf.WriteString(`,"order":"` + string(f.order) + `"`) 739 } 740 if err := f.marshalPropertiesToJSON(buf); err != nil { 741 return nil, err 742 } 743 buf.WriteString("}") 744 return buf.Bytes(), nil 745 } 746 747 // EnumSchema is an Avro enum type schema. 748 type EnumSchema struct { 749 name 750 properties 751 fingerprinter 752 753 symbols []string 754 def string 755 756 doc string 757 } 758 759 // NewEnumSchema creates a new enum schema instance. 760 func NewEnumSchema(name, namespace string, symbols []string, opts ...SchemaOption) (*EnumSchema, error) { 761 var cfg schemaConfig 762 for _, opt := range opts { 763 opt(&cfg) 764 } 765 766 n, err := newName(name, namespace, cfg.aliases) 767 if err != nil { 768 return nil, err 769 } 770 771 if len(symbols) == 0 { 772 return nil, errors.New("avro: enum must have a non-empty array of symbols") 773 } 774 for _, sym := range symbols { 775 if err = validateName(sym); err != nil { 776 return nil, fmt.Errorf("avro: invalid symnol %q", sym) 777 } 778 } 779 780 var def string 781 if d, ok := cfg.def.(string); ok && d != "" { 782 if !hasSymbol(symbols, d) { 783 return nil, fmt.Errorf("avro: symbol default %q must be a symbol", d) 784 } 785 def = d 786 } 787 788 return &EnumSchema{ 789 name: n, 790 properties: newProperties(cfg.props, schemaReserved), 791 symbols: symbols, 792 def: def, 793 doc: cfg.doc, 794 }, nil 795 } 796 797 func hasSymbol(symbols []string, sym string) bool { 798 for _, s := range symbols { 799 if s == sym { 800 return true 801 } 802 } 803 return false 804 } 805 806 // Type returns the type of the schema. 807 func (s *EnumSchema) Type() Type { 808 return Enum 809 } 810 811 // Doc returns the schema doc. 812 func (s *EnumSchema) Doc() string { 813 return s.doc 814 } 815 816 // Symbols returns the symbols of an enum. 817 func (s *EnumSchema) Symbols() []string { 818 return s.symbols 819 } 820 821 // Default returns the default of an enum or an empty string. 822 func (s *EnumSchema) Default() string { 823 return s.def 824 } 825 826 // String returns the canonical form of the schema. 827 func (s *EnumSchema) String() string { 828 symbols := "" 829 for _, sym := range s.symbols { 830 symbols += `"` + sym + `",` 831 } 832 if len(symbols) > 0 { 833 symbols = symbols[:len(symbols)-1] 834 } 835 836 return `{"name":"` + s.FullName() + `","type":"enum","symbols":[` + symbols + `]}` 837 } 838 839 // MarshalJSON marshals the schema to json. 840 func (s *EnumSchema) MarshalJSON() ([]byte, error) { 841 buf := new(bytes.Buffer) 842 buf.WriteString(`{"name":"` + s.full + `"`) 843 if len(s.aliases) > 0 { 844 aliasesJSON, err := jsoniter.Marshal(s.aliases) 845 if err != nil { 846 return nil, err 847 } 848 buf.WriteString(`,"aliases":`) 849 buf.Write(aliasesJSON) 850 } 851 if s.doc != "" { 852 buf.WriteString(`,"doc":"` + s.doc + `"`) 853 } 854 buf.WriteString(`,"type":"enum"`) 855 symbolsJSON, err := jsoniter.Marshal(s.symbols) 856 if err != nil { 857 return nil, err 858 } 859 buf.WriteString(`,"symbols":`) 860 buf.Write(symbolsJSON) 861 if s.def != "" { 862 buf.WriteString(`,"default":"` + s.def + `"`) 863 } 864 if err := s.marshalPropertiesToJSON(buf); err != nil { 865 return nil, err 866 } 867 buf.WriteString("}") 868 return buf.Bytes(), nil 869 } 870 871 // Fingerprint returns the SHA256 fingerprint of the schema. 872 func (s *EnumSchema) Fingerprint() [32]byte { 873 return s.fingerprinter.Fingerprint(s) 874 } 875 876 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 877 func (s *EnumSchema) FingerprintUsing(typ FingerprintType) ([]byte, error) { 878 return s.fingerprinter.FingerprintUsing(typ, s) 879 } 880 881 // ArraySchema is an Avro array type schema. 882 type ArraySchema struct { 883 properties 884 fingerprinter 885 886 items Schema 887 } 888 889 // NewArraySchema creates an array schema instance. 890 func NewArraySchema(items Schema, opts ...SchemaOption) *ArraySchema { 891 var cfg schemaConfig 892 for _, opt := range opts { 893 opt(&cfg) 894 } 895 896 return &ArraySchema{ 897 properties: newProperties(cfg.props, schemaReserved), 898 items: items, 899 } 900 } 901 902 // Type returns the type of the schema. 903 func (s *ArraySchema) Type() Type { 904 return Array 905 } 906 907 // Items returns the items schema of an array. 908 func (s *ArraySchema) Items() Schema { 909 return s.items 910 } 911 912 // String returns the canonical form of the schema. 913 func (s *ArraySchema) String() string { 914 return `{"type":"array","items":` + s.items.String() + `}` 915 } 916 917 // MarshalJSON marshals the schema to json. 918 func (s *ArraySchema) MarshalJSON() ([]byte, error) { 919 buf := new(bytes.Buffer) 920 buf.WriteString(`{"type":"array"`) 921 itemsJSON, err := jsoniter.Marshal(s.items) 922 if err != nil { 923 return nil, err 924 } 925 buf.WriteString(`,"items":`) 926 buf.Write(itemsJSON) 927 if err = s.marshalPropertiesToJSON(buf); err != nil { 928 return nil, err 929 } 930 buf.WriteString("}") 931 return buf.Bytes(), nil 932 } 933 934 // Fingerprint returns the SHA256 fingerprint of the schema. 935 func (s *ArraySchema) Fingerprint() [32]byte { 936 return s.fingerprinter.Fingerprint(s) 937 } 938 939 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 940 func (s *ArraySchema) FingerprintUsing(typ FingerprintType) ([]byte, error) { 941 return s.fingerprinter.FingerprintUsing(typ, s) 942 } 943 944 // MapSchema is an Avro map type schema. 945 type MapSchema struct { 946 properties 947 fingerprinter 948 949 values Schema 950 } 951 952 // NewMapSchema creates a map schema instance. 953 func NewMapSchema(values Schema, opts ...SchemaOption) *MapSchema { 954 var cfg schemaConfig 955 for _, opt := range opts { 956 opt(&cfg) 957 } 958 959 return &MapSchema{ 960 properties: newProperties(cfg.props, schemaReserved), 961 values: values, 962 } 963 } 964 965 // Type returns the type of the schema. 966 func (s *MapSchema) Type() Type { 967 return Map 968 } 969 970 // Values returns the values schema of a map. 971 func (s *MapSchema) Values() Schema { 972 return s.values 973 } 974 975 // String returns the canonical form of the schema. 976 func (s *MapSchema) String() string { 977 return `{"type":"map","values":` + s.values.String() + `}` 978 } 979 980 // MarshalJSON marshals the schema to json. 981 func (s *MapSchema) MarshalJSON() ([]byte, error) { 982 buf := new(bytes.Buffer) 983 buf.WriteString(`{"type":"map"`) 984 valuesJSON, err := jsoniter.Marshal(s.values) 985 if err != nil { 986 return nil, err 987 } 988 buf.WriteString(`,"values":`) 989 buf.Write(valuesJSON) 990 if err := s.marshalPropertiesToJSON(buf); err != nil { 991 return nil, err 992 } 993 buf.WriteString("}") 994 return buf.Bytes(), nil 995 } 996 997 // Fingerprint returns the SHA256 fingerprint of the schema. 998 func (s *MapSchema) Fingerprint() [32]byte { 999 return s.fingerprinter.Fingerprint(s) 1000 } 1001 1002 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 1003 func (s *MapSchema) FingerprintUsing(typ FingerprintType) ([]byte, error) { 1004 return s.fingerprinter.FingerprintUsing(typ, s) 1005 } 1006 1007 // UnionSchema is an Avro union type schema. 1008 type UnionSchema struct { 1009 fingerprinter 1010 1011 types Schemas 1012 } 1013 1014 // NewUnionSchema creates a union schema instance. 1015 func NewUnionSchema(types []Schema) (*UnionSchema, error) { 1016 seen := map[string]bool{} 1017 for _, schema := range types { 1018 if schema.Type() == Union { 1019 return nil, errors.New("avro: union type cannot be a union") 1020 } 1021 1022 strType := schemaTypeName(schema) 1023 1024 if seen[strType] { 1025 return nil, errors.New("avro: union type must be unique") 1026 } 1027 seen[strType] = true 1028 } 1029 1030 return &UnionSchema{ 1031 types: types, 1032 }, nil 1033 } 1034 1035 // Type returns the type of the schema. 1036 func (s *UnionSchema) Type() Type { 1037 return Union 1038 } 1039 1040 // Types returns the types of a union. 1041 func (s *UnionSchema) Types() Schemas { 1042 return s.types 1043 } 1044 1045 // Nullable returns the Schema if the union is nullable, otherwise nil. 1046 func (s *UnionSchema) Nullable() bool { 1047 if len(s.types) != 2 || s.types[0].Type() != Null && s.types[1].Type() != Null { 1048 return false 1049 } 1050 1051 return true 1052 } 1053 1054 // Indices returns the index of the null and type schemas for a 1055 // nullable schema. For non-nullable schemas 0 is returned for 1056 // both. 1057 func (s *UnionSchema) Indices() (null, typ int) { 1058 if !s.Nullable() { 1059 return 0, 0 1060 } 1061 if s.types[0].Type() == Null { 1062 return 0, 1 1063 } 1064 return 1, 0 1065 } 1066 1067 // String returns the canonical form of the schema. 1068 func (s *UnionSchema) String() string { 1069 types := "" 1070 for _, typ := range s.types { 1071 types += typ.String() + "," 1072 } 1073 if len(types) > 0 { 1074 types = types[:len(types)-1] 1075 } 1076 1077 return `[` + types + `]` 1078 } 1079 1080 // MarshalJSON marshals the schema to json. 1081 func (s *UnionSchema) MarshalJSON() ([]byte, error) { 1082 return jsoniter.Marshal(s.types) 1083 } 1084 1085 // Fingerprint returns the SHA256 fingerprint of the schema. 1086 func (s *UnionSchema) Fingerprint() [32]byte { 1087 return s.fingerprinter.Fingerprint(s) 1088 } 1089 1090 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 1091 func (s *UnionSchema) FingerprintUsing(typ FingerprintType) ([]byte, error) { 1092 return s.fingerprinter.FingerprintUsing(typ, s) 1093 } 1094 1095 // FixedSchema is an Avro fixed type schema. 1096 type FixedSchema struct { 1097 name 1098 properties 1099 fingerprinter 1100 1101 size int 1102 logical LogicalSchema 1103 } 1104 1105 // NewFixedSchema creates a new fixed schema instance. 1106 func NewFixedSchema( 1107 name, namespace string, 1108 size int, 1109 logical LogicalSchema, 1110 opts ...SchemaOption, 1111 ) (*FixedSchema, error) { 1112 var cfg schemaConfig 1113 for _, opt := range opts { 1114 opt(&cfg) 1115 } 1116 1117 n, err := newName(name, namespace, cfg.aliases) 1118 if err != nil { 1119 return nil, err 1120 } 1121 1122 return &FixedSchema{ 1123 name: n, 1124 properties: newProperties(cfg.props, schemaReserved), 1125 size: size, 1126 logical: logical, 1127 }, nil 1128 } 1129 1130 // Type returns the type of the schema. 1131 func (s *FixedSchema) Type() Type { 1132 return Fixed 1133 } 1134 1135 // Size returns the number of bytes of the fixed schema. 1136 func (s *FixedSchema) Size() int { 1137 return s.size 1138 } 1139 1140 // Logical returns the logical schema or nil. 1141 func (s *FixedSchema) Logical() LogicalSchema { 1142 return s.logical 1143 } 1144 1145 // String returns the canonical form of the schema. 1146 func (s *FixedSchema) String() string { 1147 size := strconv.Itoa(s.size) 1148 1149 var logical string 1150 if s.logical != nil { 1151 logical = "," + s.logical.String() 1152 } 1153 1154 return `{"name":"` + s.FullName() + `","type":"fixed","size":` + size + logical + `}` 1155 } 1156 1157 // MarshalJSON marshals the schema to json. 1158 func (s *FixedSchema) MarshalJSON() ([]byte, error) { 1159 buf := new(bytes.Buffer) 1160 buf.WriteString(`{"name":"` + s.full + `"`) 1161 if len(s.aliases) > 0 { 1162 aliasesJSON, err := jsoniter.Marshal(s.aliases) 1163 if err != nil { 1164 return nil, err 1165 } 1166 buf.WriteString(`,"aliases":`) 1167 buf.Write(aliasesJSON) 1168 } 1169 buf.WriteString(`,"type":"fixed"`) 1170 buf.WriteString(`,"size":` + strconv.Itoa(s.size)) 1171 if s.logical != nil { 1172 buf.WriteString(`,"logicalType":"` + string(s.logical.Type()) + `"`) 1173 if d, ok := s.logical.(*DecimalLogicalSchema); ok { 1174 buf.WriteString(`,"precision":` + strconv.Itoa(d.prec)) 1175 if d.scale > 0 { 1176 buf.WriteString(`,"scale":` + strconv.Itoa(d.scale)) 1177 } 1178 } 1179 } 1180 if err := s.marshalPropertiesToJSON(buf); err != nil { 1181 return nil, err 1182 } 1183 buf.WriteString("}") 1184 return buf.Bytes(), nil 1185 } 1186 1187 // Fingerprint returns the SHA256 fingerprint of the schema. 1188 func (s *FixedSchema) Fingerprint() [32]byte { 1189 return s.fingerprinter.Fingerprint(s) 1190 } 1191 1192 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 1193 func (s *FixedSchema) FingerprintUsing(typ FingerprintType) ([]byte, error) { 1194 return s.fingerprinter.FingerprintUsing(typ, s) 1195 } 1196 1197 // NullSchema is an Avro null type schema. 1198 type NullSchema struct { 1199 fingerprinter 1200 } 1201 1202 // Type returns the type of the schema. 1203 func (s *NullSchema) Type() Type { 1204 return Null 1205 } 1206 1207 // String returns the canonical form of the schema. 1208 func (s *NullSchema) String() string { 1209 return `"null"` 1210 } 1211 1212 // MarshalJSON marshals the schema to json. 1213 func (s *NullSchema) MarshalJSON() ([]byte, error) { 1214 return []byte(`"null"`), nil 1215 } 1216 1217 // Fingerprint returns the SHA256 fingerprint of the schema. 1218 func (s *NullSchema) Fingerprint() [32]byte { 1219 return s.fingerprinter.Fingerprint(s) 1220 } 1221 1222 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 1223 func (s *NullSchema) FingerprintUsing(typ FingerprintType) ([]byte, error) { 1224 return s.fingerprinter.FingerprintUsing(typ, s) 1225 } 1226 1227 // RefSchema is a reference to a named Avro schema. 1228 type RefSchema struct { 1229 actual NamedSchema 1230 } 1231 1232 // NewRefSchema creates a ref schema instance. 1233 func NewRefSchema(schema NamedSchema) *RefSchema { 1234 return &RefSchema{ 1235 actual: schema, 1236 } 1237 } 1238 1239 // Type returns the type of the schema. 1240 func (s *RefSchema) Type() Type { 1241 return Ref 1242 } 1243 1244 // Schema returns the schema being referenced. 1245 func (s *RefSchema) Schema() NamedSchema { 1246 return s.actual 1247 } 1248 1249 // String returns the canonical form of the schema. 1250 func (s *RefSchema) String() string { 1251 return `"` + s.actual.FullName() + `"` 1252 } 1253 1254 // MarshalJSON marshals the schema to json. 1255 func (s *RefSchema) MarshalJSON() ([]byte, error) { 1256 return []byte(`"` + s.actual.FullName() + `"`), nil 1257 } 1258 1259 // Fingerprint returns the SHA256 fingerprint of the schema. 1260 func (s *RefSchema) Fingerprint() [32]byte { 1261 return s.actual.Fingerprint() 1262 } 1263 1264 // FingerprintUsing returns the fingerprint of the schema using the given algorithm or an error. 1265 func (s *RefSchema) FingerprintUsing(typ FingerprintType) ([]byte, error) { 1266 return s.actual.FingerprintUsing(typ) 1267 } 1268 1269 // PrimitiveLogicalSchema is a logical type with no properties. 1270 type PrimitiveLogicalSchema struct { 1271 typ LogicalType 1272 } 1273 1274 // NewPrimitiveLogicalSchema creates a new primitive logical schema instance. 1275 func NewPrimitiveLogicalSchema(typ LogicalType) *PrimitiveLogicalSchema { 1276 return &PrimitiveLogicalSchema{ 1277 typ: typ, 1278 } 1279 } 1280 1281 // Type returns the type of the logical schema. 1282 func (s *PrimitiveLogicalSchema) Type() LogicalType { 1283 return s.typ 1284 } 1285 1286 // String returns the canonical form of the logical schema. 1287 func (s *PrimitiveLogicalSchema) String() string { 1288 return `"logicalType":"` + string(s.typ) + `"` 1289 } 1290 1291 // DecimalLogicalSchema is a decimal logical type. 1292 type DecimalLogicalSchema struct { 1293 prec int 1294 scale int 1295 } 1296 1297 // NewDecimalLogicalSchema creates a new decimal logical schema instance. 1298 func NewDecimalLogicalSchema(prec, scale int) *DecimalLogicalSchema { 1299 return &DecimalLogicalSchema{ 1300 prec: prec, 1301 scale: scale, 1302 } 1303 } 1304 1305 // Type returns the type of the logical schema. 1306 func (s *DecimalLogicalSchema) Type() LogicalType { 1307 return Decimal 1308 } 1309 1310 // Precision returns the precision of the decimal logical schema. 1311 func (s *DecimalLogicalSchema) Precision() int { 1312 return s.prec 1313 } 1314 1315 // Scale returns the scale of the decimal logical schema. 1316 func (s *DecimalLogicalSchema) Scale() int { 1317 return s.scale 1318 } 1319 1320 // String returns the canonical form of the logical schema. 1321 func (s *DecimalLogicalSchema) String() string { 1322 var scale string 1323 if s.scale > 0 { 1324 scale = `,"scale":` + strconv.Itoa(s.scale) 1325 } 1326 precision := strconv.Itoa(s.prec) 1327 1328 return `"logicalType":"` + string(Decimal) + `","precision":` + precision + scale 1329 } 1330 1331 func invalidNameFirstChar(r rune) bool { 1332 return (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') && r != '_' 1333 } 1334 1335 func invalidNameOtherChar(r rune) bool { 1336 return invalidNameFirstChar(r) && (r < '0' || r > '9') 1337 } 1338 1339 func validateName(name string) error { 1340 if name == "" { 1341 return errors.New("name must be a non-empty") 1342 } 1343 1344 if strings.IndexFunc(name[:1], invalidNameFirstChar) > -1 { 1345 return fmt.Errorf("invalid name %s", name) 1346 } 1347 if strings.IndexFunc(name[1:], invalidNameOtherChar) > -1 { 1348 return fmt.Errorf("invalid name %s", name) 1349 } 1350 1351 return nil 1352 } 1353 1354 func validateDefault(name string, schema Schema, def any) (any, error) { 1355 def, ok := isValidDefault(schema, def) 1356 if !ok { 1357 return nil, fmt.Errorf("avro: invalid default for field %s. %+v not a %s", name, def, schema.Type()) 1358 } 1359 return def, nil 1360 } 1361 1362 func isValidDefault(schema Schema, def any) (any, bool) { 1363 switch schema.Type() { 1364 case Ref: 1365 ref := schema.(*RefSchema) 1366 return isValidDefault(ref.Schema(), def) 1367 case Null: 1368 return nullDefault, def == nil 1369 case Enum: 1370 v, ok := def.(string) 1371 if !ok || len(v) == 0 { 1372 return def, false 1373 } 1374 1375 var found bool 1376 for _, sym := range schema.(*EnumSchema).symbols { 1377 if def == sym { 1378 found = true 1379 break 1380 } 1381 } 1382 return def, found 1383 case String, Bytes, Fixed: 1384 if _, ok := def.(string); ok { 1385 return def, true 1386 } 1387 case Boolean: 1388 if _, ok := def.(bool); ok { 1389 return def, true 1390 } 1391 case Int: 1392 if i, ok := def.(int8); ok { 1393 return int(i), true 1394 } 1395 if i, ok := def.(int16); ok { 1396 return int(i), true 1397 } 1398 if i, ok := def.(int32); ok { 1399 return int(i), true 1400 } 1401 if _, ok := def.(int); ok { 1402 return def, true 1403 } 1404 if f, ok := def.(float64); ok { 1405 return int(f), true 1406 } 1407 case Long: 1408 if _, ok := def.(int64); ok { 1409 return def, true 1410 } 1411 if f, ok := def.(float64); ok { 1412 return int64(f), true 1413 } 1414 case Float: 1415 if _, ok := def.(float32); ok { 1416 return def, true 1417 } 1418 if f, ok := def.(float64); ok { 1419 return float32(f), true 1420 } 1421 case Double: 1422 if _, ok := def.(float64); ok { 1423 return def, true 1424 } 1425 case Array: 1426 arr, ok := def.([]any) 1427 if !ok { 1428 return nil, false 1429 } 1430 1431 as := schema.(*ArraySchema) 1432 for i, v := range arr { 1433 v, ok := isValidDefault(as.Items(), v) 1434 if !ok { 1435 return nil, false 1436 } 1437 arr[i] = v 1438 } 1439 return arr, true 1440 case Map: 1441 m, ok := def.(map[string]any) 1442 if !ok { 1443 return nil, false 1444 } 1445 1446 ms := schema.(*MapSchema) 1447 for k, v := range m { 1448 v, ok := isValidDefault(ms.Values(), v) 1449 if !ok { 1450 return nil, false 1451 } 1452 1453 m[k] = v 1454 } 1455 return m, true 1456 case Union: 1457 unionSchema := schema.(*UnionSchema) 1458 return isValidDefault(unionSchema.Types()[0], def) 1459 case Record: 1460 m, ok := def.(map[string]any) 1461 if !ok { 1462 return nil, false 1463 } 1464 1465 for _, field := range schema.(*RecordSchema).Fields() { 1466 fieldDef := field.Default() 1467 if newDef, ok := m[field.Name()]; ok { 1468 fieldDef = newDef 1469 } 1470 1471 v, ok := isValidDefault(field.Type(), fieldDef) 1472 if !ok { 1473 return nil, false 1474 } 1475 1476 m[field.Name()] = v 1477 } 1478 return m, true 1479 } 1480 return nil, false 1481 } 1482 1483 func schemaTypeName(schema Schema) string { 1484 if schema.Type() == Ref { 1485 schema = schema.(*RefSchema).Schema() 1486 } 1487 1488 if n, ok := schema.(NamedSchema); ok { 1489 return n.FullName() 1490 } 1491 1492 sname := string(schema.Type()) 1493 if lt := getLogicalType(schema); lt != "" { 1494 sname += "." + string(lt) 1495 } 1496 return sname 1497 }