github.com/hamba/avro@v1.8.0/schema_parse.go (about)

     1  package avro
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"math"
     8  	"path/filepath"
     9  	"strings"
    10  
    11  	jsoniter "github.com/json-iterator/go"
    12  )
    13  
    14  var (
    15  	schemaReserved = []string{
    16  		"doc", "fields", "items", "name", "namespace", "size", "symbols",
    17  		"values", "type", "aliases", "logicalType", "precision", "scale",
    18  	}
    19  	fieldReserved = []string{"default", "doc", "name", "order", "type", "aliases"}
    20  )
    21  
    22  // DefaultSchemaCache is the default cache for schemas.
    23  var DefaultSchemaCache = &SchemaCache{}
    24  
    25  // Parse parses a schema string.
    26  func Parse(schema string) (Schema, error) {
    27  	return ParseWithCache(schema, "", DefaultSchemaCache)
    28  }
    29  
    30  // ParseWithCache parses a schema string using the given namespace and  schema cache.
    31  func ParseWithCache(schema, namespace string, cache *SchemaCache) (Schema, error) {
    32  	var json interface{}
    33  	if err := jsoniter.Unmarshal([]byte(schema), &json); err != nil {
    34  		json = schema
    35  	}
    36  
    37  	return parseType(namespace, json, cache)
    38  }
    39  
    40  // MustParse parses a schema string, panicing if there is an error.
    41  func MustParse(schema string) Schema {
    42  	parsed, err := Parse(schema)
    43  	if err != nil {
    44  		panic(err)
    45  	}
    46  
    47  	return parsed
    48  }
    49  
    50  // ParseFiles parses the schemas in the files, in the order they appear, returning the last schema.
    51  //
    52  // This is useful when your schemas rely on other schemas.
    53  func ParseFiles(paths ...string) (Schema, error) {
    54  	var schema Schema
    55  	for _, path := range paths {
    56  		s, err := ioutil.ReadFile(filepath.Clean(path))
    57  		if err != nil {
    58  			return nil, err
    59  		}
    60  
    61  		schema, err = Parse(string(s))
    62  		if err != nil {
    63  			return nil, err
    64  		}
    65  	}
    66  
    67  	return schema, nil
    68  }
    69  
    70  func parseType(namespace string, v interface{}, cache *SchemaCache) (Schema, error) {
    71  	switch val := v.(type) {
    72  	case nil:
    73  		return &NullSchema{}, nil
    74  
    75  	case string:
    76  		return parsePrimitiveType(namespace, val, cache)
    77  
    78  	case map[string]interface{}:
    79  		return parseComplexType(namespace, val, cache)
    80  
    81  	case []interface{}:
    82  		return parseUnion(namespace, val, cache)
    83  	}
    84  
    85  	return nil, fmt.Errorf("avro: unknown type: %v", v)
    86  }
    87  
    88  func parsePrimitiveType(namespace, s string, cache *SchemaCache) (Schema, error) {
    89  	typ := Type(s)
    90  	switch typ {
    91  	case Null:
    92  		return &NullSchema{}, nil
    93  
    94  	case String, Bytes, Int, Long, Float, Double, Boolean:
    95  		return parsePrimitive(typ, nil)
    96  
    97  	default:
    98  		schema := cache.Get(fullName(namespace, s))
    99  		if schema != nil {
   100  			return schema, nil
   101  		}
   102  
   103  		return nil, fmt.Errorf("avro: unknown type: %s", s)
   104  	}
   105  }
   106  
   107  func parseComplexType(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) {
   108  	if val, ok := m["type"].([]interface{}); ok {
   109  		return parseUnion(namespace, val, cache)
   110  	}
   111  
   112  	str, ok := m["type"].(string)
   113  	if !ok {
   114  		return nil, fmt.Errorf("avro: unknown type: %+v", m)
   115  	}
   116  	typ := Type(str)
   117  
   118  	switch typ {
   119  	case Null:
   120  		return &NullSchema{}, nil
   121  
   122  	case String, Bytes, Int, Long, Float, Double, Boolean:
   123  		return parsePrimitive(typ, m)
   124  
   125  	case Record, Error:
   126  		return parseRecord(typ, namespace, m, cache)
   127  
   128  	case Enum:
   129  		return parseEnum(namespace, m, cache)
   130  
   131  	case Array:
   132  		return parseArray(namespace, m, cache)
   133  
   134  	case Map:
   135  		return parseMap(namespace, m, cache)
   136  
   137  	case Fixed:
   138  		return parseFixed(namespace, m, cache)
   139  
   140  	default:
   141  		return parseType(namespace, string(typ), cache)
   142  	}
   143  }
   144  
   145  func parsePrimitive(typ Type, m map[string]interface{}) (Schema, error) {
   146  	logical := parsePrimitiveLogicalType(typ, m)
   147  
   148  	prim := NewPrimitiveSchema(typ, logical)
   149  
   150  	for k, v := range m {
   151  		prim.AddProp(k, v)
   152  	}
   153  
   154  	return prim, nil
   155  }
   156  
   157  func parsePrimitiveLogicalType(typ Type, m map[string]interface{}) LogicalSchema {
   158  	if m == nil {
   159  		return nil
   160  	}
   161  
   162  	lt, ok := m["logicalType"].(string)
   163  	if !ok {
   164  		return nil
   165  	}
   166  
   167  	ltyp := LogicalType(lt)
   168  	if (typ == String && ltyp == UUID) ||
   169  		(typ == Int && ltyp == Date) ||
   170  		(typ == Int && ltyp == TimeMillis) ||
   171  		(typ == Long && ltyp == TimeMicros) ||
   172  		(typ == Long && ltyp == TimestampMillis) ||
   173  		(typ == Long && ltyp == TimestampMicros) {
   174  		return NewPrimitiveLogicalSchema(ltyp)
   175  	}
   176  
   177  	if typ == Bytes && ltyp == Decimal {
   178  		return parseDecimalLogicalType(-1, m)
   179  	}
   180  
   181  	return nil
   182  }
   183  
   184  func parseRecord(typ Type, namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) {
   185  	name, newNamespace, err := resolveFullName(m)
   186  	if err != nil {
   187  		return nil, err
   188  	}
   189  	if newNamespace != "" {
   190  		namespace = newNamespace
   191  	}
   192  
   193  	fs, ok := m["fields"].([]interface{})
   194  	if !ok {
   195  		return nil, errors.New("avro: record must have an array of fields")
   196  	}
   197  	fields := make([]*Field, len(fs))
   198  
   199  	var rec *RecordSchema
   200  	switch typ {
   201  	case Record:
   202  		rec, err = NewRecordSchema(name, namespace, fields)
   203  	case Error:
   204  		rec, err = NewErrorRecordSchema(name, namespace, fields)
   205  	}
   206  	if err != nil {
   207  		return nil, err
   208  	}
   209  
   210  	doc := resolveDoc(m)
   211  	rec.AddDoc(doc)
   212  
   213  	cache.Add(rec.FullName(), NewRefSchema(rec))
   214  
   215  	for k, v := range m {
   216  		rec.AddProp(k, v)
   217  	}
   218  
   219  	for i, f := range fs {
   220  		field, err := parseField(namespace, f, cache)
   221  		if err != nil {
   222  			return nil, err
   223  		}
   224  
   225  		fields[i] = field
   226  	}
   227  
   228  	return rec, nil
   229  }
   230  
   231  func parseField(namespace string, v interface{}, cache *SchemaCache) (*Field, error) {
   232  	m, ok := v.(map[string]interface{})
   233  	if !ok {
   234  		return nil, fmt.Errorf("avro: invalid field: %+v", v)
   235  	}
   236  
   237  	name, err := resolveName(m)
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  
   242  	if _, ok := m["type"]; !ok {
   243  		return nil, errors.New("avro: field requires a type")
   244  	}
   245  	typ, err := parseType(namespace, m["type"], cache)
   246  	if err != nil {
   247  		return nil, err
   248  	}
   249  
   250  	def, ok := m["default"]
   251  	if !ok {
   252  		def = NoDefault
   253  	}
   254  
   255  	field, err := NewField(name, typ, def)
   256  	if err != nil {
   257  		return nil, err
   258  	}
   259  
   260  	doc := resolveDoc(m)
   261  	field.AddDoc(doc)
   262  
   263  	for k, v := range m {
   264  		field.AddProp(k, v)
   265  	}
   266  
   267  	return field, nil
   268  }
   269  
   270  func parseEnum(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) {
   271  	name, newNamespace, err := resolveFullName(m)
   272  	if err != nil {
   273  		return nil, err
   274  	}
   275  	if newNamespace != "" {
   276  		namespace = newNamespace
   277  	}
   278  
   279  	syms, ok := m["symbols"].([]interface{})
   280  	if !ok {
   281  		return nil, errors.New("avro: enum must have a non-empty array of symbols")
   282  	}
   283  
   284  	symbols := make([]string, len(syms))
   285  	for i, sym := range syms {
   286  		str, ok := sym.(string)
   287  		if !ok {
   288  			return nil, fmt.Errorf("avro: invalid symbol: %+v", sym)
   289  		}
   290  
   291  		symbols[i] = str
   292  	}
   293  
   294  	enum, err := NewEnumSchema(name, namespace, symbols)
   295  	if err != nil {
   296  		return nil, err
   297  	}
   298  
   299  	cache.Add(enum.FullName(), enum)
   300  
   301  	for k, v := range m {
   302  		enum.AddProp(k, v)
   303  	}
   304  
   305  	return enum, nil
   306  }
   307  
   308  func parseArray(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) {
   309  	items, ok := m["items"]
   310  	if !ok {
   311  		return nil, errors.New("avro: array must have an items key")
   312  	}
   313  
   314  	schema, err := parseType(namespace, items, cache)
   315  	if err != nil {
   316  		return nil, err
   317  	}
   318  
   319  	arr := NewArraySchema(schema)
   320  
   321  	for k, v := range m {
   322  		arr.AddProp(k, v)
   323  	}
   324  
   325  	return arr, nil
   326  }
   327  
   328  func parseMap(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) {
   329  	values, ok := m["values"]
   330  	if !ok {
   331  		return nil, errors.New("avro: map must have an values key")
   332  	}
   333  
   334  	schema, err := parseType(namespace, values, cache)
   335  	if err != nil {
   336  		return nil, err
   337  	}
   338  
   339  	ms := NewMapSchema(schema)
   340  
   341  	for k, v := range m {
   342  		ms.AddProp(k, v)
   343  	}
   344  
   345  	return ms, nil
   346  }
   347  
   348  func parseUnion(namespace string, v []interface{}, cache *SchemaCache) (Schema, error) {
   349  	var err error
   350  	types := make([]Schema, len(v))
   351  	for i := range v {
   352  		types[i], err = parseType(namespace, v[i], cache)
   353  		if err != nil {
   354  			return nil, err
   355  		}
   356  	}
   357  
   358  	return NewUnionSchema(types)
   359  }
   360  
   361  func parseFixed(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) {
   362  	name, newNamespace, err := resolveFullName(m)
   363  	if err != nil {
   364  		return nil, err
   365  	}
   366  	if newNamespace != "" {
   367  		namespace = newNamespace
   368  	}
   369  
   370  	size, ok := m["size"].(float64)
   371  	if !ok {
   372  		return nil, errors.New("avro: fixed must have a size")
   373  	}
   374  
   375  	logical := parseFixedLogicalType(int(size), m)
   376  
   377  	fixed, err := NewFixedSchema(name, namespace, int(size), logical)
   378  	if err != nil {
   379  		return nil, err
   380  	}
   381  
   382  	cache.Add(fixed.FullName(), fixed)
   383  
   384  	for k, v := range m {
   385  		fixed.AddProp(k, v)
   386  	}
   387  
   388  	return fixed, nil
   389  }
   390  
   391  func parseFixedLogicalType(size int, m map[string]interface{}) LogicalSchema {
   392  	lt, ok := m["logicalType"].(string)
   393  	if !ok {
   394  		return nil
   395  	}
   396  
   397  	ltyp := LogicalType(lt)
   398  	if ltyp == Duration && size == 12 {
   399  		return NewPrimitiveLogicalSchema(Duration)
   400  	}
   401  
   402  	if ltyp == Decimal {
   403  		return parseDecimalLogicalType(size, m)
   404  	}
   405  
   406  	return nil
   407  }
   408  
   409  func parseDecimalLogicalType(size int, m map[string]interface{}) LogicalSchema {
   410  	prec, ok := m["precision"].(float64)
   411  	if !ok || prec <= 0 {
   412  		return nil
   413  	}
   414  
   415  	if size > 0 {
   416  		maxPrecision := math.Round(math.Floor(math.Log10(2) * (8*float64(size) - 1)))
   417  		if prec > maxPrecision {
   418  			return nil
   419  		}
   420  	}
   421  
   422  	scale, _ := m["scale"].(float64)
   423  	if scale < 0 {
   424  		return nil
   425  	}
   426  
   427  	// Scale may not be bigger than precision
   428  	if scale > prec {
   429  		return nil
   430  	}
   431  
   432  	return NewDecimalLogicalSchema(int(prec), int(scale))
   433  }
   434  
   435  func fullName(namespace, name string) string {
   436  	if len(namespace) == 0 || strings.ContainsRune(name, '.') {
   437  		return name
   438  	}
   439  
   440  	return namespace + "." + name
   441  }
   442  
   443  func resolveName(m map[string]interface{}) (string, error) {
   444  	name, ok := m["name"].(string)
   445  	if !ok {
   446  		return "", errors.New("avro: name key required")
   447  	}
   448  
   449  	return name, nil
   450  }
   451  
   452  func resolveDoc(m map[string]interface{}) string {
   453  	doc, ok := m["doc"].(string)
   454  	if !ok {
   455  		return ""
   456  	}
   457  	return doc
   458  }
   459  
   460  func resolveFullName(m map[string]interface{}) (string, string, error) {
   461  	name, err := resolveName(m)
   462  	if err != nil {
   463  		return "", "", err
   464  	}
   465  
   466  	namespace, ok := m["namespace"].(string)
   467  	if !ok {
   468  		return name, "", nil
   469  	}
   470  	if namespace == "" {
   471  		return "", "", errors.New("avro: namespace key must be non-empty or omitted")
   472  	}
   473  
   474  	return name, namespace, nil
   475  }