github.com/willyham/dosa@v2.3.1-0.20171024181418-1e446d37ee71+incompatible/schema/avro/avro.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package avro 22 23 import ( 24 "encoding/json" 25 "fmt" 26 27 gv "github.com/elodina/go-avro" 28 "github.com/pkg/errors" 29 "github.com/uber-go/dosa" 30 ) 31 32 const ( 33 clusteringKeys = "ClusteringKeys" 34 partitionKeys = "PartitionKeys" 35 nameKey = "Name" 36 descendingKey = "Descending" 37 dosaTypeKey = "dosaType" 38 indexKeys = "Indexes" 39 indexKeyField = "Key" 40 ) 41 42 // map from dosa type to avro type 43 var avroTypes = map[dosa.Type]gv.Schema{ 44 dosa.String: &gv.StringSchema{}, 45 dosa.Blob: &gv.BytesSchema{}, 46 dosa.Bool: &gv.BooleanSchema{}, 47 dosa.Double: &gv.DoubleSchema{}, 48 dosa.Int32: &gv.IntSchema{}, 49 dosa.Int64: &gv.LongSchema{}, 50 dosa.Timestamp: &gv.LongSchema{}, 51 dosa.TUUID: &gv.StringSchema{}, 52 } 53 54 // Record implements Schema and represents Avro record type. 55 type Record struct { 56 Name string `json:"name,omitempty"` 57 Namespace string `json:"namespace,omitempty"` 58 Doc string `json:"doc,omitempty"` 59 Aliases []string `json:"aliases,omitempty"` 60 Properties map[string]interface{} `json:"meta, omitempty"` 61 Fields []*Field `json:"fields"` 62 } 63 64 // String returns a JSON representation of RecordSchema. 65 func (s *Record) String() string { 66 bytes, err := json.MarshalIndent(s, "", " ") 67 if err != nil { 68 panic(err) 69 } 70 71 return string(bytes) 72 } 73 74 // MarshalJSON serializes the given schema as JSON. 75 func (s *Record) MarshalJSON() ([]byte, error) { 76 m := make(map[string]interface{}) 77 78 m["type"] = "record" 79 80 if len(s.Name) > 0 { 81 m["name"] = s.Name 82 } 83 84 if len(s.Namespace) > 0 { 85 m["namespace"] = s.Namespace 86 } 87 88 if len(s.Doc) > 0 { 89 m["doc"] = s.Doc 90 } 91 if len(s.Aliases) > 0 { 92 m["aliases"] = s.Aliases 93 } 94 95 m["fields"] = s.Fields 96 for k, v := range s.Properties { 97 m[k] = v 98 } 99 return json.Marshal(m) 100 } 101 102 // Field represents a schema field for Avro record. 103 type Field struct { 104 Name string `json:"name,omitempty"` 105 Doc string `json:"doc,omitempty"` 106 Default interface{} `json:"default"` 107 Type gv.Schema `json:"type,omitempty"` 108 Properties map[string]string 109 } 110 111 // MarshalJSON serializes the given schema field as JSON. 112 func (s *Field) MarshalJSON() ([]byte, error) { 113 m := make(map[string]interface{}) 114 if s.Type != nil { 115 m["type"] = s.Type 116 } 117 if len(s.Name) > 0 { 118 m["name"] = s.Name 119 } 120 121 if s.Type.Type() == gv.Null || (s.Type.Type() == gv.Union && s.Type.(*gv.UnionSchema).Types[0].Type() == gv.Null) || s.Default != nil { 122 m["default"] = s.Default 123 } 124 if len(s.Doc) > 0 { 125 m["doc"] = s.Doc 126 } 127 128 for k, v := range s.Properties { 129 m[k] = v 130 } 131 return json.Marshal(m) 132 } 133 134 // ToAvro converts dosa entity definition to avro schema 135 func ToAvro(fqn dosa.FQN, ed *dosa.EntityDefinition) ([]byte, error) { 136 fields := make([]*Field, len(ed.Columns)) 137 for i, c := range ed.Columns { 138 props := make(map[string]string) 139 props[dosaTypeKey] = c.Type.String() 140 // TODO add tags 141 fields[i] = &Field{ 142 Name: c.Name, 143 Type: avroTypes[c.Type], 144 Properties: props, 145 Default: nil, 146 } 147 } 148 149 meta := make(map[string]interface{}) 150 meta[partitionKeys] = ed.Key.PartitionKeys 151 meta[clusteringKeys] = ed.Key.ClusteringKeys 152 meta[indexKeys] = ed.Indexes 153 154 ar := &Record{ 155 Name: ed.Name, 156 Namespace: fqn.String(), 157 Fields: fields, 158 Properties: meta, 159 } 160 161 bs, err := ar.MarshalJSON() 162 if err != nil { 163 return nil, errors.Wrap(err, "failed to serialize avro schema into json") 164 } 165 return bs, nil 166 } 167 168 // FromAvro converts avro schema to dosa entity definition 169 func FromAvro(data string) (*dosa.EntityDefinition, error) { 170 schema, err := gv.ParseSchema(data) 171 if err != nil { 172 return nil, errors.Wrap(err, "failed to parse avro schema from json") 173 } 174 175 pks, err := decodePartitionKeysFromSchema(schema) 176 if err != nil { 177 return nil, errors.Wrap(err, "failed to parse avro schema for partition keys") 178 } 179 180 cks, err := decodeClusteringKeysFromSchema(schema) 181 if err != nil { 182 return nil, errors.Wrap(err, "failed to parse avro schema for clustering keys") 183 } 184 185 rs, ok := schema.(*gv.RecordSchema) 186 if !ok { 187 return nil, errors.New("fail to parse avro schema") 188 } 189 190 cols, err := decodeFields(rs.Fields) 191 if err != nil { 192 return nil, errors.Wrap(err, "failed to parse avro schema for fields") 193 } 194 195 // index 196 idx, err := decodeIndexes(schema) 197 if err != nil { 198 return nil, errors.Wrap(err, "failed to parse avro schema for index") 199 } 200 201 return &dosa.EntityDefinition{ 202 Name: schema.GetName(), 203 Key: &dosa.PrimaryKey{ 204 PartitionKeys: pks, 205 ClusteringKeys: cks, 206 }, 207 Columns: cols, 208 Indexes: idx, 209 }, nil 210 } 211 212 func decodeFields(fields []*gv.SchemaField) ([]*dosa.ColumnDefinition, error) { 213 cols := make([]*dosa.ColumnDefinition, len(fields)) 214 for i, f := range fields { 215 dosaType, ok := f.Prop(dosaTypeKey) 216 if !ok { 217 return nil, fmt.Errorf("cannot find %s key in the field", dosaTypeKey) 218 } 219 220 t, ok := dosaType.(string) 221 if !ok { 222 return nil, fmt.Errorf("failed to convert %s to string", dosaTypeKey) 223 } 224 225 col := &dosa.ColumnDefinition{ 226 Name: f.Name, 227 Type: dosa.FromString(t), 228 } 229 cols[i] = col 230 } 231 return cols, nil 232 } 233 234 func decodePartitionKeysFromSchema(schema gv.Schema) ([]string, error) { 235 if prop, ok := schema.Prop(partitionKeys); ok { 236 return decodePartitionKeys(prop) 237 } 238 return nil, fmt.Errorf("cannot find %s key in the schema", partitionKeys) 239 } 240 241 func decodePartitionKeys(prop interface{}) ([]string, error) { 242 realPks, ok := prop.([]interface{}) 243 if !ok { 244 return nil, fmt.Errorf("failed to parse partition keys: %v", prop) 245 } 246 247 pks := make([]string, len(realPks)) 248 for i, v := range realPks { 249 pks[i], ok = v.(string) 250 if !ok { 251 return nil, fmt.Errorf("failed to parse partition keys: %v", prop) 252 } 253 } 254 return pks, nil 255 } 256 257 func decodeIndexes(schema gv.Schema) (map[string]*dosa.IndexDefinition, error) { 258 idx := make(map[string]*dosa.IndexDefinition) 259 if prop, ok := schema.Prop(indexKeys); ok { 260 realIndexes, ok := prop.(map[string]interface{}) 261 if !ok { 262 return nil, fmt.Errorf("failed to parse index: %v", prop) 263 } 264 265 for key, index := range realIndexes { 266 realIndex, ok := index.(map[string]interface{}) 267 if !ok { 268 return nil, fmt.Errorf("failed to parse index: %v", index) 269 } 270 271 realKey, ok := realIndex[indexKeyField].(map[string]interface{}) 272 if !ok { 273 return nil, fmt.Errorf("failed to parse index key: %v", index) 274 } 275 276 pks, err := decodePartitionKeys(realKey[partitionKeys]) 277 if err != nil { 278 return nil, errors.Wrap(err, "failed to parse avro index partition keys") 279 } 280 281 cks, err := decodeClusteringKeys(realKey[clusteringKeys]) 282 if err != nil { 283 return nil, errors.Wrap(err, "failed to parse avro index clustering keys") 284 } 285 286 idx[key] = &dosa.IndexDefinition{ 287 Key: &dosa.PrimaryKey{ 288 PartitionKeys: pks, 289 ClusteringKeys: cks, 290 }, 291 } 292 } 293 return idx, nil 294 } 295 return nil, fmt.Errorf("cannot find %s index in the schema", indexKeys) 296 } 297 298 func decodeClusteringKeysFromSchema(schema gv.Schema) ([]*dosa.ClusteringKey, error) { 299 if prop, ok := schema.Prop(clusteringKeys); ok { 300 return decodeClusteringKeys(prop) 301 } 302 return nil, fmt.Errorf("cannot find %s key in the schema", clusteringKeys) 303 } 304 305 func decodeClusteringKeys(prop interface{}) ([]*dosa.ClusteringKey, error) { 306 // cluster key is optional 307 if prop == nil { 308 return nil, nil 309 } 310 311 realCks, ok := prop.([]interface{}) 312 if !ok { 313 return nil, fmt.Errorf("failed to parse clustering keys: %v", prop) 314 } 315 316 cks := make([]*dosa.ClusteringKey, len(realCks)) 317 for i, v := range realCks { 318 pair, ok := v.(map[string]interface{}) 319 if !ok { 320 return nil, fmt.Errorf("failed to parse clustering key: %v", v) 321 } 322 323 name, ok := pair[nameKey] 324 if !ok { 325 return nil, fmt.Errorf("cannot find %s key in %v", nameKey, pair) 326 } 327 ck := &dosa.ClusteringKey{} 328 ck.Name, ok = name.(string) 329 if !ok { 330 return nil, fmt.Errorf("failed to convert %v to string", name) 331 } 332 333 descending, ok := pair[descendingKey] 334 if !ok { 335 return nil, fmt.Errorf("cannot find %s key in %v", descendingKey, pair) 336 } 337 ck.Descending, ok = descending.(bool) 338 if !ok { 339 return nil, fmt.Errorf("failed to convert %v to bool", descending) 340 } 341 342 cks[i] = ck 343 } 344 345 return cks, nil 346 }