github.com/willyham/dosa@v2.3.1-0.20171024181418-1e446d37ee71+incompatible/schema/avro/avro.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package avro
    22  
    23  import (
    24  	"encoding/json"
    25  	"fmt"
    26  
    27  	gv "github.com/elodina/go-avro"
    28  	"github.com/pkg/errors"
    29  	"github.com/uber-go/dosa"
    30  )
    31  
    32  const (
    33  	clusteringKeys = "ClusteringKeys"
    34  	partitionKeys  = "PartitionKeys"
    35  	nameKey        = "Name"
    36  	descendingKey  = "Descending"
    37  	dosaTypeKey    = "dosaType"
    38  	indexKeys      = "Indexes"
    39  	indexKeyField  = "Key"
    40  )
    41  
    42  // map from dosa type to avro type
    43  var avroTypes = map[dosa.Type]gv.Schema{
    44  	dosa.String:    &gv.StringSchema{},
    45  	dosa.Blob:      &gv.BytesSchema{},
    46  	dosa.Bool:      &gv.BooleanSchema{},
    47  	dosa.Double:    &gv.DoubleSchema{},
    48  	dosa.Int32:     &gv.IntSchema{},
    49  	dosa.Int64:     &gv.LongSchema{},
    50  	dosa.Timestamp: &gv.LongSchema{},
    51  	dosa.TUUID:     &gv.StringSchema{},
    52  }
    53  
    54  // Record implements Schema and represents Avro record type.
    55  type Record struct {
    56  	Name       string                 `json:"name,omitempty"`
    57  	Namespace  string                 `json:"namespace,omitempty"`
    58  	Doc        string                 `json:"doc,omitempty"`
    59  	Aliases    []string               `json:"aliases,omitempty"`
    60  	Properties map[string]interface{} `json:"meta, omitempty"`
    61  	Fields     []*Field               `json:"fields"`
    62  }
    63  
    64  // String returns a JSON representation of RecordSchema.
    65  func (s *Record) String() string {
    66  	bytes, err := json.MarshalIndent(s, "", "    ")
    67  	if err != nil {
    68  		panic(err)
    69  	}
    70  
    71  	return string(bytes)
    72  }
    73  
    74  // MarshalJSON serializes the given schema as JSON.
    75  func (s *Record) MarshalJSON() ([]byte, error) {
    76  	m := make(map[string]interface{})
    77  
    78  	m["type"] = "record"
    79  
    80  	if len(s.Name) > 0 {
    81  		m["name"] = s.Name
    82  	}
    83  
    84  	if len(s.Namespace) > 0 {
    85  		m["namespace"] = s.Namespace
    86  	}
    87  
    88  	if len(s.Doc) > 0 {
    89  		m["doc"] = s.Doc
    90  	}
    91  	if len(s.Aliases) > 0 {
    92  		m["aliases"] = s.Aliases
    93  	}
    94  
    95  	m["fields"] = s.Fields
    96  	for k, v := range s.Properties {
    97  		m[k] = v
    98  	}
    99  	return json.Marshal(m)
   100  }
   101  
   102  // Field represents a schema field for Avro record.
   103  type Field struct {
   104  	Name       string      `json:"name,omitempty"`
   105  	Doc        string      `json:"doc,omitempty"`
   106  	Default    interface{} `json:"default"`
   107  	Type       gv.Schema   `json:"type,omitempty"`
   108  	Properties map[string]string
   109  }
   110  
   111  // MarshalJSON serializes the given schema field as JSON.
   112  func (s *Field) MarshalJSON() ([]byte, error) {
   113  	m := make(map[string]interface{})
   114  	if s.Type != nil {
   115  		m["type"] = s.Type
   116  	}
   117  	if len(s.Name) > 0 {
   118  		m["name"] = s.Name
   119  	}
   120  
   121  	if s.Type.Type() == gv.Null || (s.Type.Type() == gv.Union && s.Type.(*gv.UnionSchema).Types[0].Type() == gv.Null) || s.Default != nil {
   122  		m["default"] = s.Default
   123  	}
   124  	if len(s.Doc) > 0 {
   125  		m["doc"] = s.Doc
   126  	}
   127  
   128  	for k, v := range s.Properties {
   129  		m[k] = v
   130  	}
   131  	return json.Marshal(m)
   132  }
   133  
   134  // ToAvro converts dosa entity definition to avro schema
   135  func ToAvro(fqn dosa.FQN, ed *dosa.EntityDefinition) ([]byte, error) {
   136  	fields := make([]*Field, len(ed.Columns))
   137  	for i, c := range ed.Columns {
   138  		props := make(map[string]string)
   139  		props[dosaTypeKey] = c.Type.String()
   140  		// TODO add tags
   141  		fields[i] = &Field{
   142  			Name:       c.Name,
   143  			Type:       avroTypes[c.Type],
   144  			Properties: props,
   145  			Default:    nil,
   146  		}
   147  	}
   148  
   149  	meta := make(map[string]interface{})
   150  	meta[partitionKeys] = ed.Key.PartitionKeys
   151  	meta[clusteringKeys] = ed.Key.ClusteringKeys
   152  	meta[indexKeys] = ed.Indexes
   153  
   154  	ar := &Record{
   155  		Name:       ed.Name,
   156  		Namespace:  fqn.String(),
   157  		Fields:     fields,
   158  		Properties: meta,
   159  	}
   160  
   161  	bs, err := ar.MarshalJSON()
   162  	if err != nil {
   163  		return nil, errors.Wrap(err, "failed to serialize avro schema into json")
   164  	}
   165  	return bs, nil
   166  }
   167  
   168  // FromAvro converts avro schema to dosa entity definition
   169  func FromAvro(data string) (*dosa.EntityDefinition, error) {
   170  	schema, err := gv.ParseSchema(data)
   171  	if err != nil {
   172  		return nil, errors.Wrap(err, "failed to parse avro schema from json")
   173  	}
   174  
   175  	pks, err := decodePartitionKeysFromSchema(schema)
   176  	if err != nil {
   177  		return nil, errors.Wrap(err, "failed to parse avro schema for partition keys")
   178  	}
   179  
   180  	cks, err := decodeClusteringKeysFromSchema(schema)
   181  	if err != nil {
   182  		return nil, errors.Wrap(err, "failed to parse avro schema for clustering keys")
   183  	}
   184  
   185  	rs, ok := schema.(*gv.RecordSchema)
   186  	if !ok {
   187  		return nil, errors.New("fail to parse avro schema")
   188  	}
   189  
   190  	cols, err := decodeFields(rs.Fields)
   191  	if err != nil {
   192  		return nil, errors.Wrap(err, "failed to parse avro schema for fields")
   193  	}
   194  
   195  	// index
   196  	idx, err := decodeIndexes(schema)
   197  	if err != nil {
   198  		return nil, errors.Wrap(err, "failed to parse avro schema for index")
   199  	}
   200  
   201  	return &dosa.EntityDefinition{
   202  		Name: schema.GetName(),
   203  		Key: &dosa.PrimaryKey{
   204  			PartitionKeys:  pks,
   205  			ClusteringKeys: cks,
   206  		},
   207  		Columns: cols,
   208  		Indexes: idx,
   209  	}, nil
   210  }
   211  
   212  func decodeFields(fields []*gv.SchemaField) ([]*dosa.ColumnDefinition, error) {
   213  	cols := make([]*dosa.ColumnDefinition, len(fields))
   214  	for i, f := range fields {
   215  		dosaType, ok := f.Prop(dosaTypeKey)
   216  		if !ok {
   217  			return nil, fmt.Errorf("cannot find %s key in the field", dosaTypeKey)
   218  		}
   219  
   220  		t, ok := dosaType.(string)
   221  		if !ok {
   222  			return nil, fmt.Errorf("failed to convert %s to string", dosaTypeKey)
   223  		}
   224  
   225  		col := &dosa.ColumnDefinition{
   226  			Name: f.Name,
   227  			Type: dosa.FromString(t),
   228  		}
   229  		cols[i] = col
   230  	}
   231  	return cols, nil
   232  }
   233  
   234  func decodePartitionKeysFromSchema(schema gv.Schema) ([]string, error) {
   235  	if prop, ok := schema.Prop(partitionKeys); ok {
   236  		return decodePartitionKeys(prop)
   237  	}
   238  	return nil, fmt.Errorf("cannot find %s key in the schema", partitionKeys)
   239  }
   240  
   241  func decodePartitionKeys(prop interface{}) ([]string, error) {
   242  	realPks, ok := prop.([]interface{})
   243  	if !ok {
   244  		return nil, fmt.Errorf("failed to parse partition keys: %v", prop)
   245  	}
   246  
   247  	pks := make([]string, len(realPks))
   248  	for i, v := range realPks {
   249  		pks[i], ok = v.(string)
   250  		if !ok {
   251  			return nil, fmt.Errorf("failed to parse partition keys: %v", prop)
   252  		}
   253  	}
   254  	return pks, nil
   255  }
   256  
   257  func decodeIndexes(schema gv.Schema) (map[string]*dosa.IndexDefinition, error) {
   258  	idx := make(map[string]*dosa.IndexDefinition)
   259  	if prop, ok := schema.Prop(indexKeys); ok {
   260  		realIndexes, ok := prop.(map[string]interface{})
   261  		if !ok {
   262  			return nil, fmt.Errorf("failed to parse index: %v", prop)
   263  		}
   264  
   265  		for key, index := range realIndexes {
   266  			realIndex, ok := index.(map[string]interface{})
   267  			if !ok {
   268  				return nil, fmt.Errorf("failed to parse index: %v", index)
   269  			}
   270  
   271  			realKey, ok := realIndex[indexKeyField].(map[string]interface{})
   272  			if !ok {
   273  				return nil, fmt.Errorf("failed to parse index key: %v", index)
   274  			}
   275  
   276  			pks, err := decodePartitionKeys(realKey[partitionKeys])
   277  			if err != nil {
   278  				return nil, errors.Wrap(err, "failed to parse avro index partition keys")
   279  			}
   280  
   281  			cks, err := decodeClusteringKeys(realKey[clusteringKeys])
   282  			if err != nil {
   283  				return nil, errors.Wrap(err, "failed to parse avro index clustering keys")
   284  			}
   285  
   286  			idx[key] = &dosa.IndexDefinition{
   287  				Key: &dosa.PrimaryKey{
   288  					PartitionKeys:  pks,
   289  					ClusteringKeys: cks,
   290  				},
   291  			}
   292  		}
   293  		return idx, nil
   294  	}
   295  	return nil, fmt.Errorf("cannot find %s index in the schema", indexKeys)
   296  }
   297  
   298  func decodeClusteringKeysFromSchema(schema gv.Schema) ([]*dosa.ClusteringKey, error) {
   299  	if prop, ok := schema.Prop(clusteringKeys); ok {
   300  		return decodeClusteringKeys(prop)
   301  	}
   302  	return nil, fmt.Errorf("cannot find %s key in the schema", clusteringKeys)
   303  }
   304  
   305  func decodeClusteringKeys(prop interface{}) ([]*dosa.ClusteringKey, error) {
   306  	// cluster key is optional
   307  	if prop == nil {
   308  		return nil, nil
   309  	}
   310  
   311  	realCks, ok := prop.([]interface{})
   312  	if !ok {
   313  		return nil, fmt.Errorf("failed to parse clustering keys: %v", prop)
   314  	}
   315  
   316  	cks := make([]*dosa.ClusteringKey, len(realCks))
   317  	for i, v := range realCks {
   318  		pair, ok := v.(map[string]interface{})
   319  		if !ok {
   320  			return nil, fmt.Errorf("failed to parse clustering key: %v", v)
   321  		}
   322  
   323  		name, ok := pair[nameKey]
   324  		if !ok {
   325  			return nil, fmt.Errorf("cannot find %s key in %v", nameKey, pair)
   326  		}
   327  		ck := &dosa.ClusteringKey{}
   328  		ck.Name, ok = name.(string)
   329  		if !ok {
   330  			return nil, fmt.Errorf("failed to convert %v to string", name)
   331  		}
   332  
   333  		descending, ok := pair[descendingKey]
   334  		if !ok {
   335  			return nil, fmt.Errorf("cannot find %s key in %v", descendingKey, pair)
   336  		}
   337  		ck.Descending, ok = descending.(bool)
   338  		if !ok {
   339  			return nil, fmt.Errorf("failed to convert %v to bool", descending)
   340  		}
   341  
   342  		cks[i] = ck
   343  	}
   344  
   345  	return cks, nil
   346  }