github.com/hamba/avro@v1.8.0/schema_parse.go (about) 1 package avro 2 3 import ( 4 "errors" 5 "fmt" 6 "io/ioutil" 7 "math" 8 "path/filepath" 9 "strings" 10 11 jsoniter "github.com/json-iterator/go" 12 ) 13 14 var ( 15 schemaReserved = []string{ 16 "doc", "fields", "items", "name", "namespace", "size", "symbols", 17 "values", "type", "aliases", "logicalType", "precision", "scale", 18 } 19 fieldReserved = []string{"default", "doc", "name", "order", "type", "aliases"} 20 ) 21 22 // DefaultSchemaCache is the default cache for schemas. 23 var DefaultSchemaCache = &SchemaCache{} 24 25 // Parse parses a schema string. 26 func Parse(schema string) (Schema, error) { 27 return ParseWithCache(schema, "", DefaultSchemaCache) 28 } 29 30 // ParseWithCache parses a schema string using the given namespace and schema cache. 31 func ParseWithCache(schema, namespace string, cache *SchemaCache) (Schema, error) { 32 var json interface{} 33 if err := jsoniter.Unmarshal([]byte(schema), &json); err != nil { 34 json = schema 35 } 36 37 return parseType(namespace, json, cache) 38 } 39 40 // MustParse parses a schema string, panicing if there is an error. 41 func MustParse(schema string) Schema { 42 parsed, err := Parse(schema) 43 if err != nil { 44 panic(err) 45 } 46 47 return parsed 48 } 49 50 // ParseFiles parses the schemas in the files, in the order they appear, returning the last schema. 51 // 52 // This is useful when your schemas rely on other schemas. 53 func ParseFiles(paths ...string) (Schema, error) { 54 var schema Schema 55 for _, path := range paths { 56 s, err := ioutil.ReadFile(filepath.Clean(path)) 57 if err != nil { 58 return nil, err 59 } 60 61 schema, err = Parse(string(s)) 62 if err != nil { 63 return nil, err 64 } 65 } 66 67 return schema, nil 68 } 69 70 func parseType(namespace string, v interface{}, cache *SchemaCache) (Schema, error) { 71 switch val := v.(type) { 72 case nil: 73 return &NullSchema{}, nil 74 75 case string: 76 return parsePrimitiveType(namespace, val, cache) 77 78 case map[string]interface{}: 79 return parseComplexType(namespace, val, cache) 80 81 case []interface{}: 82 return parseUnion(namespace, val, cache) 83 } 84 85 return nil, fmt.Errorf("avro: unknown type: %v", v) 86 } 87 88 func parsePrimitiveType(namespace, s string, cache *SchemaCache) (Schema, error) { 89 typ := Type(s) 90 switch typ { 91 case Null: 92 return &NullSchema{}, nil 93 94 case String, Bytes, Int, Long, Float, Double, Boolean: 95 return parsePrimitive(typ, nil) 96 97 default: 98 schema := cache.Get(fullName(namespace, s)) 99 if schema != nil { 100 return schema, nil 101 } 102 103 return nil, fmt.Errorf("avro: unknown type: %s", s) 104 } 105 } 106 107 func parseComplexType(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) { 108 if val, ok := m["type"].([]interface{}); ok { 109 return parseUnion(namespace, val, cache) 110 } 111 112 str, ok := m["type"].(string) 113 if !ok { 114 return nil, fmt.Errorf("avro: unknown type: %+v", m) 115 } 116 typ := Type(str) 117 118 switch typ { 119 case Null: 120 return &NullSchema{}, nil 121 122 case String, Bytes, Int, Long, Float, Double, Boolean: 123 return parsePrimitive(typ, m) 124 125 case Record, Error: 126 return parseRecord(typ, namespace, m, cache) 127 128 case Enum: 129 return parseEnum(namespace, m, cache) 130 131 case Array: 132 return parseArray(namespace, m, cache) 133 134 case Map: 135 return parseMap(namespace, m, cache) 136 137 case Fixed: 138 return parseFixed(namespace, m, cache) 139 140 default: 141 return parseType(namespace, string(typ), cache) 142 } 143 } 144 145 func parsePrimitive(typ Type, m map[string]interface{}) (Schema, error) { 146 logical := parsePrimitiveLogicalType(typ, m) 147 148 prim := NewPrimitiveSchema(typ, logical) 149 150 for k, v := range m { 151 prim.AddProp(k, v) 152 } 153 154 return prim, nil 155 } 156 157 func parsePrimitiveLogicalType(typ Type, m map[string]interface{}) LogicalSchema { 158 if m == nil { 159 return nil 160 } 161 162 lt, ok := m["logicalType"].(string) 163 if !ok { 164 return nil 165 } 166 167 ltyp := LogicalType(lt) 168 if (typ == String && ltyp == UUID) || 169 (typ == Int && ltyp == Date) || 170 (typ == Int && ltyp == TimeMillis) || 171 (typ == Long && ltyp == TimeMicros) || 172 (typ == Long && ltyp == TimestampMillis) || 173 (typ == Long && ltyp == TimestampMicros) { 174 return NewPrimitiveLogicalSchema(ltyp) 175 } 176 177 if typ == Bytes && ltyp == Decimal { 178 return parseDecimalLogicalType(-1, m) 179 } 180 181 return nil 182 } 183 184 func parseRecord(typ Type, namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) { 185 name, newNamespace, err := resolveFullName(m) 186 if err != nil { 187 return nil, err 188 } 189 if newNamespace != "" { 190 namespace = newNamespace 191 } 192 193 fs, ok := m["fields"].([]interface{}) 194 if !ok { 195 return nil, errors.New("avro: record must have an array of fields") 196 } 197 fields := make([]*Field, len(fs)) 198 199 var rec *RecordSchema 200 switch typ { 201 case Record: 202 rec, err = NewRecordSchema(name, namespace, fields) 203 case Error: 204 rec, err = NewErrorRecordSchema(name, namespace, fields) 205 } 206 if err != nil { 207 return nil, err 208 } 209 210 doc := resolveDoc(m) 211 rec.AddDoc(doc) 212 213 cache.Add(rec.FullName(), NewRefSchema(rec)) 214 215 for k, v := range m { 216 rec.AddProp(k, v) 217 } 218 219 for i, f := range fs { 220 field, err := parseField(namespace, f, cache) 221 if err != nil { 222 return nil, err 223 } 224 225 fields[i] = field 226 } 227 228 return rec, nil 229 } 230 231 func parseField(namespace string, v interface{}, cache *SchemaCache) (*Field, error) { 232 m, ok := v.(map[string]interface{}) 233 if !ok { 234 return nil, fmt.Errorf("avro: invalid field: %+v", v) 235 } 236 237 name, err := resolveName(m) 238 if err != nil { 239 return nil, err 240 } 241 242 if _, ok := m["type"]; !ok { 243 return nil, errors.New("avro: field requires a type") 244 } 245 typ, err := parseType(namespace, m["type"], cache) 246 if err != nil { 247 return nil, err 248 } 249 250 def, ok := m["default"] 251 if !ok { 252 def = NoDefault 253 } 254 255 field, err := NewField(name, typ, def) 256 if err != nil { 257 return nil, err 258 } 259 260 doc := resolveDoc(m) 261 field.AddDoc(doc) 262 263 for k, v := range m { 264 field.AddProp(k, v) 265 } 266 267 return field, nil 268 } 269 270 func parseEnum(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) { 271 name, newNamespace, err := resolveFullName(m) 272 if err != nil { 273 return nil, err 274 } 275 if newNamespace != "" { 276 namespace = newNamespace 277 } 278 279 syms, ok := m["symbols"].([]interface{}) 280 if !ok { 281 return nil, errors.New("avro: enum must have a non-empty array of symbols") 282 } 283 284 symbols := make([]string, len(syms)) 285 for i, sym := range syms { 286 str, ok := sym.(string) 287 if !ok { 288 return nil, fmt.Errorf("avro: invalid symbol: %+v", sym) 289 } 290 291 symbols[i] = str 292 } 293 294 enum, err := NewEnumSchema(name, namespace, symbols) 295 if err != nil { 296 return nil, err 297 } 298 299 cache.Add(enum.FullName(), enum) 300 301 for k, v := range m { 302 enum.AddProp(k, v) 303 } 304 305 return enum, nil 306 } 307 308 func parseArray(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) { 309 items, ok := m["items"] 310 if !ok { 311 return nil, errors.New("avro: array must have an items key") 312 } 313 314 schema, err := parseType(namespace, items, cache) 315 if err != nil { 316 return nil, err 317 } 318 319 arr := NewArraySchema(schema) 320 321 for k, v := range m { 322 arr.AddProp(k, v) 323 } 324 325 return arr, nil 326 } 327 328 func parseMap(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) { 329 values, ok := m["values"] 330 if !ok { 331 return nil, errors.New("avro: map must have an values key") 332 } 333 334 schema, err := parseType(namespace, values, cache) 335 if err != nil { 336 return nil, err 337 } 338 339 ms := NewMapSchema(schema) 340 341 for k, v := range m { 342 ms.AddProp(k, v) 343 } 344 345 return ms, nil 346 } 347 348 func parseUnion(namespace string, v []interface{}, cache *SchemaCache) (Schema, error) { 349 var err error 350 types := make([]Schema, len(v)) 351 for i := range v { 352 types[i], err = parseType(namespace, v[i], cache) 353 if err != nil { 354 return nil, err 355 } 356 } 357 358 return NewUnionSchema(types) 359 } 360 361 func parseFixed(namespace string, m map[string]interface{}, cache *SchemaCache) (Schema, error) { 362 name, newNamespace, err := resolveFullName(m) 363 if err != nil { 364 return nil, err 365 } 366 if newNamespace != "" { 367 namespace = newNamespace 368 } 369 370 size, ok := m["size"].(float64) 371 if !ok { 372 return nil, errors.New("avro: fixed must have a size") 373 } 374 375 logical := parseFixedLogicalType(int(size), m) 376 377 fixed, err := NewFixedSchema(name, namespace, int(size), logical) 378 if err != nil { 379 return nil, err 380 } 381 382 cache.Add(fixed.FullName(), fixed) 383 384 for k, v := range m { 385 fixed.AddProp(k, v) 386 } 387 388 return fixed, nil 389 } 390 391 func parseFixedLogicalType(size int, m map[string]interface{}) LogicalSchema { 392 lt, ok := m["logicalType"].(string) 393 if !ok { 394 return nil 395 } 396 397 ltyp := LogicalType(lt) 398 if ltyp == Duration && size == 12 { 399 return NewPrimitiveLogicalSchema(Duration) 400 } 401 402 if ltyp == Decimal { 403 return parseDecimalLogicalType(size, m) 404 } 405 406 return nil 407 } 408 409 func parseDecimalLogicalType(size int, m map[string]interface{}) LogicalSchema { 410 prec, ok := m["precision"].(float64) 411 if !ok || prec <= 0 { 412 return nil 413 } 414 415 if size > 0 { 416 maxPrecision := math.Round(math.Floor(math.Log10(2) * (8*float64(size) - 1))) 417 if prec > maxPrecision { 418 return nil 419 } 420 } 421 422 scale, _ := m["scale"].(float64) 423 if scale < 0 { 424 return nil 425 } 426 427 // Scale may not be bigger than precision 428 if scale > prec { 429 return nil 430 } 431 432 return NewDecimalLogicalSchema(int(prec), int(scale)) 433 } 434 435 func fullName(namespace, name string) string { 436 if len(namespace) == 0 || strings.ContainsRune(name, '.') { 437 return name 438 } 439 440 return namespace + "." + name 441 } 442 443 func resolveName(m map[string]interface{}) (string, error) { 444 name, ok := m["name"].(string) 445 if !ok { 446 return "", errors.New("avro: name key required") 447 } 448 449 return name, nil 450 } 451 452 func resolveDoc(m map[string]interface{}) string { 453 doc, ok := m["doc"].(string) 454 if !ok { 455 return "" 456 } 457 return doc 458 } 459 460 func resolveFullName(m map[string]interface{}) (string, string, error) { 461 name, err := resolveName(m) 462 if err != nil { 463 return "", "", err 464 } 465 466 namespace, ok := m["namespace"].(string) 467 if !ok { 468 return name, "", nil 469 } 470 if namespace == "" { 471 return "", "", errors.New("avro: namespace key must be non-empty or omitted") 472 } 473 474 return name, namespace, nil 475 }