github.com/willyham/dosa@v2.3.1-0.20171024181418-1e446d37ee71+incompatible/entity.go

github.com/willyham/dosa@v2.3.1-0.20171024181418-1e446d37ee71+incompatible/entity.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package dosa
    22  
    23  import (
    24  	"bytes"
    25  	"strings"
    26  
    27  	"reflect"
    28  
    29  	"github.com/pkg/errors"
    30  )
    31  
    32  // Table represents a parsed entity format on the client side
    33  // In addition to shared EntityDefinition, it records struct name and field names.
    34  type Table struct {
    35  	EntityDefinition
    36  	StructName string
    37  	ColToField map[string]string // map from column name -> field name
    38  	FieldToCol map[string]string // map from field name -> column name
    39  }
    40  
    41  // ClusteringKey stores name and ordering of a clustering key
    42  type ClusteringKey struct {
    43  	Name       string
    44  	Descending bool
    45  }
    46  
    47  // String takes a ClusteringKey and returns "column-name ASC|DESC"
    48  func (ck ClusteringKey) String() string {
    49  	if ck.Descending {
    50  		return ck.Name + " DESC"
    51  	}
    52  	return ck.Name + " ASC"
    53  }
    54  
    55  // PrimaryKey stores information about partition keys and clustering keys
    56  type PrimaryKey struct {
    57  	PartitionKeys  []string
    58  	ClusteringKeys []*ClusteringKey
    59  }
    60  
    61  // Clone returns a deep copy of PrimaryKey
    62  func (pk PrimaryKey) Clone() *PrimaryKey {
    63  	npk := &PrimaryKey{}
    64  	if pk.PartitionKeys != nil {
    65  		npk.PartitionKeys = make([]string, len(pk.PartitionKeys))
    66  
    67  		for i, k := range pk.PartitionKeys {
    68  			npk.PartitionKeys[i] = k
    69  		}
    70  
    71  	}
    72  
    73  	if pk.ClusteringKeys != nil {
    74  		npk.ClusteringKeys = make([]*ClusteringKey, len(pk.ClusteringKeys))
    75  		for i, c := range pk.ClusteringKeys {
    76  			npk.ClusteringKeys[i] = &ClusteringKey{
    77  				Name:       c.Name,
    78  				Descending: c.Descending,
    79  			}
    80  		}
    81  	}
    82  
    83  	return npk
    84  }
    85  
    86  // ClusteringKeySet returns a set of all clustering keys.
    87  func (pk PrimaryKey) ClusteringKeySet() map[string]struct{} {
    88  	m := make(map[string]struct{})
    89  	for _, c := range pk.ClusteringKeys {
    90  		m[c.Name] = struct{}{}
    91  	}
    92  	return m
    93  }
    94  
    95  // PartitionKeySet returns the set of partition keys
    96  func (pk PrimaryKey) PartitionKeySet() map[string]struct{} {
    97  	m := make(map[string]struct{})
    98  	for _, p := range pk.PartitionKeys {
    99  		m[p] = struct{}{}
   100  	}
   101  	return m
   102  }
   103  
   104  // PrimaryKeySet returns the union of the set of partition keys and clustering keys
   105  func (pk PrimaryKey) PrimaryKeySet() map[string]struct{} {
   106  	m := pk.ClusteringKeySet()
   107  	for _, p := range pk.PartitionKeys {
   108  		m[p] = struct{}{}
   109  	}
   110  	return m
   111  }
   112  
   113  // formatClusteringKeys takes an array of ClusteringKeys and returns
   114  // a string that shows all of them, separated by commas
   115  func formatClusteringKeys(keys []*ClusteringKey) string {
   116  	pieces := make([]string, len(keys))
   117  	for index, ck := range keys {
   118  		pieces[index] = ck.String()
   119  	}
   120  	return strings.Join(pieces, ", ")
   121  }
   122  
   123  func formatPartitionKeys(keys []string) string {
   124  	if len(keys) > 1 {
   125  		return "(" + strings.Join(keys, ", ") + ")"
   126  	}
   127  	return keys[0]
   128  }
   129  
   130  // String method produces the following output:
   131  // for multiple partition keys: ((partition-key, ...), clustering-key ASC/DESC, ...)
   132  // for one partition key: (partition-key, clustering-key ASC/DESC, ...)
   133  func (pk PrimaryKey) String() string {
   134  	var b bytes.Buffer
   135  	b.WriteByte('(')
   136  	b.WriteString(formatPartitionKeys(pk.PartitionKeys))
   137  	if pk.ClusteringKeys != nil && len(pk.ClusteringKeys) > 0 {
   138  		b.WriteString(", ")
   139  		b.WriteString(formatClusteringKeys(pk.ClusteringKeys))
   140  	}
   141  	b.WriteByte(')')
   142  	return b.String()
   143  }
   144  
   145  // ColumnDefinition stores information about a column
   146  type ColumnDefinition struct {
   147  	Name      string // normalized column name
   148  	Type      Type
   149  	IsPointer bool // used by client only to indicate whether this field is pointer
   150  	// TODO: change as need to support tags like pii, etc
   151  	// currently it's in the form of a map from tag name to (optional) tag value
   152  	Tags map[string]string
   153  }
   154  
   155  // Clone returns a deep copy of ColumnDefinition
   156  func (cd *ColumnDefinition) Clone() *ColumnDefinition {
   157  	// TODO: clone tag
   158  	return &ColumnDefinition{
   159  		Name: cd.Name,
   160  		Type: cd.Type,
   161  	}
   162  }
   163  
   164  // IndexDefinition stores information about a DOSA entity's index
   165  type IndexDefinition struct {
   166  	Key *PrimaryKey
   167  }
   168  
   169  // Clone returns a deep copy of IndexDefinition
   170  func (id *IndexDefinition) Clone() *IndexDefinition {
   171  	return &IndexDefinition{
   172  		Key: id.Key.Clone(),
   173  	}
   174  }
   175  
   176  // EntityDefinition stores information about a DOSA entity
   177  type EntityDefinition struct {
   178  	Name    string // normalized entity name
   179  	Key     *PrimaryKey
   180  	Columns []*ColumnDefinition
   181  	Indexes map[string]*IndexDefinition
   182  }
   183  
   184  // Clone returns a deep copy of EntityDefinition
   185  func (e *EntityDefinition) Clone() *EntityDefinition {
   186  	newEd := &EntityDefinition{
   187  		Name: e.Name,
   188  		Key:  e.Key.Clone(),
   189  	}
   190  
   191  	if e.Columns != nil {
   192  		newEd.Columns = make([]*ColumnDefinition, len(e.Columns))
   193  		for i, col := range e.Columns {
   194  			newEd.Columns[i] = col.Clone()
   195  		}
   196  	}
   197  
   198  	if e.Indexes != nil {
   199  		newEd.Indexes = make(map[string]*IndexDefinition)
   200  		if e.Indexes == nil {
   201  			newEd.Indexes = nil
   202  		}
   203  		for k, index := range e.Indexes {
   204  			newEd.Indexes[k] = index.Clone()
   205  		}
   206  	}
   207  
   208  	return newEd
   209  }
   210  
   211  // EnsureValid ensures the entity definition is valid.
   212  // All the names used (entity name, column name) must be valid.
   213  // No duplicate names can be used in column names or key names.
   214  // The primary key must not be nil and must contain at least one partition key.
   215  func (e *EntityDefinition) EnsureValid() error {
   216  	if e == nil {
   217  		return errors.New("EntityDefinition is nil")
   218  	}
   219  
   220  	if err := IsValidName(e.Name); err != nil {
   221  		return errors.Wrap(err, "EntityDefinition has invalid name")
   222  	}
   223  
   224  	columnNamesSeen := map[string]struct{}{}
   225  	for _, c := range e.Columns {
   226  		if c == nil {
   227  			return errors.New("EntityDefinition has nil column")
   228  		}
   229  		if err := IsValidName(c.Name); err != nil {
   230  			return errors.Wrap(err, "EntityDefinition has invalid column name")
   231  		}
   232  		if _, ok := columnNamesSeen[c.Name]; ok {
   233  			return errors.Errorf("duplicated column found: %q", c.Name)
   234  		}
   235  		if c.Type == Invalid {
   236  			return errors.Errorf("invalid type for column: %q", c.Name)
   237  		}
   238  		columnNamesSeen[c.Name] = struct{}{}
   239  	}
   240  
   241  	if e.Key == nil {
   242  		return errors.New("EntityDefinition has nil primary key")
   243  	}
   244  
   245  	if len(e.Key.PartitionKeys) == 0 {
   246  		return errors.New("EntityDefinition does not have partition key")
   247  	}
   248  
   249  	keyNamesSeen := map[string]struct{}{}
   250  	for _, p := range e.Key.PartitionKeys {
   251  		if _, ok := columnNamesSeen[p]; !ok {
   252  			return errors.Errorf("partition key does not refer to a column: %q", p)
   253  		}
   254  		if _, ok := keyNamesSeen[p]; ok {
   255  			return errors.Errorf("a column cannot be used twice in key: %q", p)
   256  		}
   257  		keyNamesSeen[p] = struct{}{}
   258  	}
   259  
   260  	for _, c := range e.Key.ClusteringKeys {
   261  		if c == nil {
   262  			return errors.New("EntityDefinition has invalid nil clustering key")
   263  		}
   264  
   265  		if _, ok := columnNamesSeen[c.Name]; !ok {
   266  			return errors.Errorf("clustering key does not refer to a column: %q", c.Name)
   267  		}
   268  
   269  		if _, ok := keyNamesSeen[c.Name]; ok {
   270  			return errors.Errorf("a column cannot be used twice in key: %q", c.Name)
   271  		}
   272  		keyNamesSeen[c.Name] = struct{}{}
   273  	}
   274  
   275  	if err := e.ensureNonNullablePrimaryKeys(); err != nil {
   276  		return err
   277  	}
   278  
   279  	// validate index
   280  	for indexName, index := range e.Indexes {
   281  		if err := IsValidName(indexName); err != nil {
   282  			return errors.Wrap(err, "IndexDefinition has invalid name")
   283  		}
   284  
   285  		if index == nil {
   286  			return errors.New("IndexDefinition is nil")
   287  		}
   288  
   289  		if index.Key == nil {
   290  			return errors.New("IndexDefinition has nil key")
   291  		}
   292  
   293  		if len(index.Key.PartitionKeys) == 0 {
   294  			return errors.New("index does not have partition key")
   295  		}
   296  
   297  		keyNamesSeen := map[string]struct{}{}
   298  		for _, p := range index.Key.PartitionKeys {
   299  			if _, ok := columnNamesSeen[p]; !ok {
   300  				return errors.Errorf("index partition key does not refer to a column: %q", p)
   301  			}
   302  			if _, ok := keyNamesSeen[p]; ok {
   303  				return errors.Errorf("a column cannot be used twice in index key: %q", p)
   304  			}
   305  			keyNamesSeen[p] = struct{}{}
   306  		}
   307  
   308  		for _, c := range index.Key.ClusteringKeys {
   309  			if c == nil {
   310  				return errors.New("IndexDefinition has invalid nil clustering key")
   311  			}
   312  
   313  			if _, ok := columnNamesSeen[c.Name]; !ok {
   314  				return errors.Errorf("clustering key does not refer to a column: %q", c.Name)
   315  			}
   316  
   317  			if _, ok := keyNamesSeen[c.Name]; ok {
   318  				return errors.Errorf("a column cannot be used twice in index key: %q", c.Name)
   319  			}
   320  			keyNamesSeen[c.Name] = struct{}{}
   321  		}
   322  	}
   323  
   324  	return nil
   325  }
   326  
   327  func (e *EntityDefinition) ensureNonNullablePrimaryKeys() error {
   328  	columns := e.ColumnMap()
   329  
   330  	for k := range e.PartitionKeySet() {
   331  		if isInvalidPrimaryKeyType(columns[k]) {
   332  			return errors.Errorf("primary key is of nullable type: %q", k)
   333  		}
   334  	}
   335  
   336  	for k := range e.Key.ClusteringKeySet() {
   337  		if isInvalidPrimaryKeyType(columns[k]) {
   338  			return errors.Errorf("clustering key is of nullable type: %q", k)
   339  		}
   340  	}
   341  
   342  	return nil
   343  }
   344  
   345  // ColumnTypes returns a map of column name to column type for all columns.
   346  func (e *EntityDefinition) ColumnTypes() map[string]Type {
   347  	m := make(map[string]Type)
   348  	for _, c := range e.Columns {
   349  		m[c.Name] = c.Type
   350  	}
   351  	return m
   352  }
   353  
   354  // ColumnMap returns a map of column name to column definition for all columns.
   355  func (e *EntityDefinition) ColumnMap() map[string]*ColumnDefinition {
   356  	m := make(map[string]*ColumnDefinition)
   357  	for _, c := range e.Columns {
   358  		m[c.Name] = c
   359  	}
   360  	return m
   361  }
   362  
   363  // PartitionKeySet returns a set of all partition keys.
   364  func (e *EntityDefinition) PartitionKeySet() map[string]struct{} {
   365  	m := make(map[string]struct{})
   366  	for _, p := range e.Key.PartitionKeys {
   367  		m[p] = struct{}{}
   368  	}
   369  	return m
   370  }
   371  
   372  // KeySet returns a set of all keys, including partition keys and clustering keys.
   373  func (e *EntityDefinition) KeySet() map[string]struct{} {
   374  	m := e.Key.ClusteringKeySet()
   375  	pks := e.PartitionKeySet()
   376  	for p := range pks {
   377  		m[p] = struct{}{}
   378  	}
   379  	return m
   380  }
   381  
   382  // IsCompatible checks if two entity definitions are compatible or not.
   383  // e1.g. edA.IsCompatible(edB) return true, means edA is compatible with edB.
   384  // edA is the one to compare and edB is the one to be compared.
   385  func (e *EntityDefinition) IsCompatible(e2 *EntityDefinition) error {
   386  	// for better naming
   387  	e1 := e
   388  
   389  	// entity name should be the same
   390  	if e1.Name != e2.Name {
   391  		return errors.Errorf("entity name mismatch: (%s vs %s)", e1.Name, e2.Name)
   392  	}
   393  
   394  	// primary key should be exactly same
   395  	pks1 := e1.Key.PartitionKeys
   396  	pks2 := e2.Key.PartitionKeys
   397  
   398  	b := reflect.DeepEqual(pks1, pks2)
   399  	if !b {
   400  		return errors.Errorf("partition key mismatch: (%v vs %v)", pks1, pks2)
   401  	}
   402  
   403  	cks1 := e1.Key.ClusteringKeys
   404  	cks2 := e2.Key.ClusteringKeys
   405  	if len(cks2) != 0 || len(cks1) != 0 {
   406  		if !reflect.DeepEqual(cks1, cks2) {
   407  			return errors.Errorf("clustering key mismatch: (%v vs %v)", cks1, cks2)
   408  		}
   409  	}
   410  	// only allow to add new columns
   411  	colsMap1 := e1.ColumnTypes()
   412  	colsMap2 := e2.ColumnTypes()
   413  
   414  	for name, colType2 := range colsMap2 {
   415  		colType1, ok := colsMap1[name]
   416  		if !ok {
   417  			return errors.Errorf("the column %s in old entity %s but not in new entity", name, e2.Name)
   418  		}
   419  		if colType1 != colType2 {
   420  			return errors.Errorf("the type for column %s mismatch: (%v vs %v)", name, colType1, colType2)
   421  		}
   422  	}
   423  
   424  	// Index can only be added, not mutated
   425  	if len(e2.Indexes) > len(e1.Indexes) {
   426  		return errors.Errorf("Old entity %s has %d indexes but new entity has %d indexes", e2.Name, len(e2.Indexes), len(e1.Indexes))
   427  	}
   428  
   429  	if e2.Indexes != nil {
   430  		for name, index2 := range e2.Indexes {
   431  			index1, ok := e1.Indexes[name]
   432  			if !ok {
   433  				return errors.Errorf("Index %s in the old entity %s are missing in the new entity", name, e2.Name)
   434  			}
   435  
   436  			if !reflect.DeepEqual(index1, index2) {
   437  				return errors.Errorf("index mismatch: (%v vs %v)", index1, index2)
   438  			}
   439  		}
   440  	}
   441  	// TODO Handle tags in the future
   442  
   443  	return nil
   444  }
   445  
   446  // FindColumnDefinition finds the column definition by the column name
   447  func (e *EntityDefinition) FindColumnDefinition(name string) *ColumnDefinition {
   448  	for _, cd := range e.Columns {
   449  		if cd.Name == name {
   450  			return cd
   451  		}
   452  	}
   453  	return nil
   454  }
   455  
   456  // UniqueKey adds any missing keys from the entity's primary key to the keys
   457  // specified in the index, to guarantee that the returned key is unique
   458  // This method is used to create materialized views
   459  func (e *EntityDefinition) UniqueKey(oldKey *PrimaryKey) *PrimaryKey {
   460  	indexHas := oldKey.PrimaryKeySet()
   461  	result := *oldKey
   462  
   463  	// look for missing primary keys
   464  	for _, key := range e.Key.PartitionKeys {
   465  		if _, ok := indexHas[key]; !ok {
   466  			result.ClusteringKeys = append(result.ClusteringKeys, &ClusteringKey{
   467  				Name: key})
   468  		}
   469  	}
   470  
   471  	// look for missing clustering keys
   472  	for _, key := range e.Key.ClusteringKeys {
   473  		if _, ok := indexHas[key.Name]; !ok {
   474  			result.ClusteringKeys = append(result.ClusteringKeys, &ClusteringKey{
   475  				Name: key.Name})
   476  		}
   477  	}
   478  
   479  	return &result
   480  }