github.com/willyham/dosa@v2.3.1-0.20171024181418-1e446d37ee71+incompatible/entity.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package dosa 22 23 import ( 24 "bytes" 25 "strings" 26 27 "reflect" 28 29 "github.com/pkg/errors" 30 ) 31 32 // Table represents a parsed entity format on the client side 33 // In addition to shared EntityDefinition, it records struct name and field names. 34 type Table struct { 35 EntityDefinition 36 StructName string 37 ColToField map[string]string // map from column name -> field name 38 FieldToCol map[string]string // map from field name -> column name 39 } 40 41 // ClusteringKey stores name and ordering of a clustering key 42 type ClusteringKey struct { 43 Name string 44 Descending bool 45 } 46 47 // String takes a ClusteringKey and returns "column-name ASC|DESC" 48 func (ck ClusteringKey) String() string { 49 if ck.Descending { 50 return ck.Name + " DESC" 51 } 52 return ck.Name + " ASC" 53 } 54 55 // PrimaryKey stores information about partition keys and clustering keys 56 type PrimaryKey struct { 57 PartitionKeys []string 58 ClusteringKeys []*ClusteringKey 59 } 60 61 // Clone returns a deep copy of PrimaryKey 62 func (pk PrimaryKey) Clone() *PrimaryKey { 63 npk := &PrimaryKey{} 64 if pk.PartitionKeys != nil { 65 npk.PartitionKeys = make([]string, len(pk.PartitionKeys)) 66 67 for i, k := range pk.PartitionKeys { 68 npk.PartitionKeys[i] = k 69 } 70 71 } 72 73 if pk.ClusteringKeys != nil { 74 npk.ClusteringKeys = make([]*ClusteringKey, len(pk.ClusteringKeys)) 75 for i, c := range pk.ClusteringKeys { 76 npk.ClusteringKeys[i] = &ClusteringKey{ 77 Name: c.Name, 78 Descending: c.Descending, 79 } 80 } 81 } 82 83 return npk 84 } 85 86 // ClusteringKeySet returns a set of all clustering keys. 87 func (pk PrimaryKey) ClusteringKeySet() map[string]struct{} { 88 m := make(map[string]struct{}) 89 for _, c := range pk.ClusteringKeys { 90 m[c.Name] = struct{}{} 91 } 92 return m 93 } 94 95 // PartitionKeySet returns the set of partition keys 96 func (pk PrimaryKey) PartitionKeySet() map[string]struct{} { 97 m := make(map[string]struct{}) 98 for _, p := range pk.PartitionKeys { 99 m[p] = struct{}{} 100 } 101 return m 102 } 103 104 // PrimaryKeySet returns the union of the set of partition keys and clustering keys 105 func (pk PrimaryKey) PrimaryKeySet() map[string]struct{} { 106 m := pk.ClusteringKeySet() 107 for _, p := range pk.PartitionKeys { 108 m[p] = struct{}{} 109 } 110 return m 111 } 112 113 // formatClusteringKeys takes an array of ClusteringKeys and returns 114 // a string that shows all of them, separated by commas 115 func formatClusteringKeys(keys []*ClusteringKey) string { 116 pieces := make([]string, len(keys)) 117 for index, ck := range keys { 118 pieces[index] = ck.String() 119 } 120 return strings.Join(pieces, ", ") 121 } 122 123 func formatPartitionKeys(keys []string) string { 124 if len(keys) > 1 { 125 return "(" + strings.Join(keys, ", ") + ")" 126 } 127 return keys[0] 128 } 129 130 // String method produces the following output: 131 // for multiple partition keys: ((partition-key, ...), clustering-key ASC/DESC, ...) 132 // for one partition key: (partition-key, clustering-key ASC/DESC, ...) 133 func (pk PrimaryKey) String() string { 134 var b bytes.Buffer 135 b.WriteByte('(') 136 b.WriteString(formatPartitionKeys(pk.PartitionKeys)) 137 if pk.ClusteringKeys != nil && len(pk.ClusteringKeys) > 0 { 138 b.WriteString(", ") 139 b.WriteString(formatClusteringKeys(pk.ClusteringKeys)) 140 } 141 b.WriteByte(')') 142 return b.String() 143 } 144 145 // ColumnDefinition stores information about a column 146 type ColumnDefinition struct { 147 Name string // normalized column name 148 Type Type 149 IsPointer bool // used by client only to indicate whether this field is pointer 150 // TODO: change as need to support tags like pii, etc 151 // currently it's in the form of a map from tag name to (optional) tag value 152 Tags map[string]string 153 } 154 155 // Clone returns a deep copy of ColumnDefinition 156 func (cd *ColumnDefinition) Clone() *ColumnDefinition { 157 // TODO: clone tag 158 return &ColumnDefinition{ 159 Name: cd.Name, 160 Type: cd.Type, 161 } 162 } 163 164 // IndexDefinition stores information about a DOSA entity's index 165 type IndexDefinition struct { 166 Key *PrimaryKey 167 } 168 169 // Clone returns a deep copy of IndexDefinition 170 func (id *IndexDefinition) Clone() *IndexDefinition { 171 return &IndexDefinition{ 172 Key: id.Key.Clone(), 173 } 174 } 175 176 // EntityDefinition stores information about a DOSA entity 177 type EntityDefinition struct { 178 Name string // normalized entity name 179 Key *PrimaryKey 180 Columns []*ColumnDefinition 181 Indexes map[string]*IndexDefinition 182 } 183 184 // Clone returns a deep copy of EntityDefinition 185 func (e *EntityDefinition) Clone() *EntityDefinition { 186 newEd := &EntityDefinition{ 187 Name: e.Name, 188 Key: e.Key.Clone(), 189 } 190 191 if e.Columns != nil { 192 newEd.Columns = make([]*ColumnDefinition, len(e.Columns)) 193 for i, col := range e.Columns { 194 newEd.Columns[i] = col.Clone() 195 } 196 } 197 198 if e.Indexes != nil { 199 newEd.Indexes = make(map[string]*IndexDefinition) 200 if e.Indexes == nil { 201 newEd.Indexes = nil 202 } 203 for k, index := range e.Indexes { 204 newEd.Indexes[k] = index.Clone() 205 } 206 } 207 208 return newEd 209 } 210 211 // EnsureValid ensures the entity definition is valid. 212 // All the names used (entity name, column name) must be valid. 213 // No duplicate names can be used in column names or key names. 214 // The primary key must not be nil and must contain at least one partition key. 215 func (e *EntityDefinition) EnsureValid() error { 216 if e == nil { 217 return errors.New("EntityDefinition is nil") 218 } 219 220 if err := IsValidName(e.Name); err != nil { 221 return errors.Wrap(err, "EntityDefinition has invalid name") 222 } 223 224 columnNamesSeen := map[string]struct{}{} 225 for _, c := range e.Columns { 226 if c == nil { 227 return errors.New("EntityDefinition has nil column") 228 } 229 if err := IsValidName(c.Name); err != nil { 230 return errors.Wrap(err, "EntityDefinition has invalid column name") 231 } 232 if _, ok := columnNamesSeen[c.Name]; ok { 233 return errors.Errorf("duplicated column found: %q", c.Name) 234 } 235 if c.Type == Invalid { 236 return errors.Errorf("invalid type for column: %q", c.Name) 237 } 238 columnNamesSeen[c.Name] = struct{}{} 239 } 240 241 if e.Key == nil { 242 return errors.New("EntityDefinition has nil primary key") 243 } 244 245 if len(e.Key.PartitionKeys) == 0 { 246 return errors.New("EntityDefinition does not have partition key") 247 } 248 249 keyNamesSeen := map[string]struct{}{} 250 for _, p := range e.Key.PartitionKeys { 251 if _, ok := columnNamesSeen[p]; !ok { 252 return errors.Errorf("partition key does not refer to a column: %q", p) 253 } 254 if _, ok := keyNamesSeen[p]; ok { 255 return errors.Errorf("a column cannot be used twice in key: %q", p) 256 } 257 keyNamesSeen[p] = struct{}{} 258 } 259 260 for _, c := range e.Key.ClusteringKeys { 261 if c == nil { 262 return errors.New("EntityDefinition has invalid nil clustering key") 263 } 264 265 if _, ok := columnNamesSeen[c.Name]; !ok { 266 return errors.Errorf("clustering key does not refer to a column: %q", c.Name) 267 } 268 269 if _, ok := keyNamesSeen[c.Name]; ok { 270 return errors.Errorf("a column cannot be used twice in key: %q", c.Name) 271 } 272 keyNamesSeen[c.Name] = struct{}{} 273 } 274 275 if err := e.ensureNonNullablePrimaryKeys(); err != nil { 276 return err 277 } 278 279 // validate index 280 for indexName, index := range e.Indexes { 281 if err := IsValidName(indexName); err != nil { 282 return errors.Wrap(err, "IndexDefinition has invalid name") 283 } 284 285 if index == nil { 286 return errors.New("IndexDefinition is nil") 287 } 288 289 if index.Key == nil { 290 return errors.New("IndexDefinition has nil key") 291 } 292 293 if len(index.Key.PartitionKeys) == 0 { 294 return errors.New("index does not have partition key") 295 } 296 297 keyNamesSeen := map[string]struct{}{} 298 for _, p := range index.Key.PartitionKeys { 299 if _, ok := columnNamesSeen[p]; !ok { 300 return errors.Errorf("index partition key does not refer to a column: %q", p) 301 } 302 if _, ok := keyNamesSeen[p]; ok { 303 return errors.Errorf("a column cannot be used twice in index key: %q", p) 304 } 305 keyNamesSeen[p] = struct{}{} 306 } 307 308 for _, c := range index.Key.ClusteringKeys { 309 if c == nil { 310 return errors.New("IndexDefinition has invalid nil clustering key") 311 } 312 313 if _, ok := columnNamesSeen[c.Name]; !ok { 314 return errors.Errorf("clustering key does not refer to a column: %q", c.Name) 315 } 316 317 if _, ok := keyNamesSeen[c.Name]; ok { 318 return errors.Errorf("a column cannot be used twice in index key: %q", c.Name) 319 } 320 keyNamesSeen[c.Name] = struct{}{} 321 } 322 } 323 324 return nil 325 } 326 327 func (e *EntityDefinition) ensureNonNullablePrimaryKeys() error { 328 columns := e.ColumnMap() 329 330 for k := range e.PartitionKeySet() { 331 if isInvalidPrimaryKeyType(columns[k]) { 332 return errors.Errorf("primary key is of nullable type: %q", k) 333 } 334 } 335 336 for k := range e.Key.ClusteringKeySet() { 337 if isInvalidPrimaryKeyType(columns[k]) { 338 return errors.Errorf("clustering key is of nullable type: %q", k) 339 } 340 } 341 342 return nil 343 } 344 345 // ColumnTypes returns a map of column name to column type for all columns. 346 func (e *EntityDefinition) ColumnTypes() map[string]Type { 347 m := make(map[string]Type) 348 for _, c := range e.Columns { 349 m[c.Name] = c.Type 350 } 351 return m 352 } 353 354 // ColumnMap returns a map of column name to column definition for all columns. 355 func (e *EntityDefinition) ColumnMap() map[string]*ColumnDefinition { 356 m := make(map[string]*ColumnDefinition) 357 for _, c := range e.Columns { 358 m[c.Name] = c 359 } 360 return m 361 } 362 363 // PartitionKeySet returns a set of all partition keys. 364 func (e *EntityDefinition) PartitionKeySet() map[string]struct{} { 365 m := make(map[string]struct{}) 366 for _, p := range e.Key.PartitionKeys { 367 m[p] = struct{}{} 368 } 369 return m 370 } 371 372 // KeySet returns a set of all keys, including partition keys and clustering keys. 373 func (e *EntityDefinition) KeySet() map[string]struct{} { 374 m := e.Key.ClusteringKeySet() 375 pks := e.PartitionKeySet() 376 for p := range pks { 377 m[p] = struct{}{} 378 } 379 return m 380 } 381 382 // IsCompatible checks if two entity definitions are compatible or not. 383 // e1.g. edA.IsCompatible(edB) return true, means edA is compatible with edB. 384 // edA is the one to compare and edB is the one to be compared. 385 func (e *EntityDefinition) IsCompatible(e2 *EntityDefinition) error { 386 // for better naming 387 e1 := e 388 389 // entity name should be the same 390 if e1.Name != e2.Name { 391 return errors.Errorf("entity name mismatch: (%s vs %s)", e1.Name, e2.Name) 392 } 393 394 // primary key should be exactly same 395 pks1 := e1.Key.PartitionKeys 396 pks2 := e2.Key.PartitionKeys 397 398 b := reflect.DeepEqual(pks1, pks2) 399 if !b { 400 return errors.Errorf("partition key mismatch: (%v vs %v)", pks1, pks2) 401 } 402 403 cks1 := e1.Key.ClusteringKeys 404 cks2 := e2.Key.ClusteringKeys 405 if len(cks2) != 0 || len(cks1) != 0 { 406 if !reflect.DeepEqual(cks1, cks2) { 407 return errors.Errorf("clustering key mismatch: (%v vs %v)", cks1, cks2) 408 } 409 } 410 // only allow to add new columns 411 colsMap1 := e1.ColumnTypes() 412 colsMap2 := e2.ColumnTypes() 413 414 for name, colType2 := range colsMap2 { 415 colType1, ok := colsMap1[name] 416 if !ok { 417 return errors.Errorf("the column %s in old entity %s but not in new entity", name, e2.Name) 418 } 419 if colType1 != colType2 { 420 return errors.Errorf("the type for column %s mismatch: (%v vs %v)", name, colType1, colType2) 421 } 422 } 423 424 // Index can only be added, not mutated 425 if len(e2.Indexes) > len(e1.Indexes) { 426 return errors.Errorf("Old entity %s has %d indexes but new entity has %d indexes", e2.Name, len(e2.Indexes), len(e1.Indexes)) 427 } 428 429 if e2.Indexes != nil { 430 for name, index2 := range e2.Indexes { 431 index1, ok := e1.Indexes[name] 432 if !ok { 433 return errors.Errorf("Index %s in the old entity %s are missing in the new entity", name, e2.Name) 434 } 435 436 if !reflect.DeepEqual(index1, index2) { 437 return errors.Errorf("index mismatch: (%v vs %v)", index1, index2) 438 } 439 } 440 } 441 // TODO Handle tags in the future 442 443 return nil 444 } 445 446 // FindColumnDefinition finds the column definition by the column name 447 func (e *EntityDefinition) FindColumnDefinition(name string) *ColumnDefinition { 448 for _, cd := range e.Columns { 449 if cd.Name == name { 450 return cd 451 } 452 } 453 return nil 454 } 455 456 // UniqueKey adds any missing keys from the entity's primary key to the keys 457 // specified in the index, to guarantee that the returned key is unique 458 // This method is used to create materialized views 459 func (e *EntityDefinition) UniqueKey(oldKey *PrimaryKey) *PrimaryKey { 460 indexHas := oldKey.PrimaryKeySet() 461 result := *oldKey 462 463 // look for missing primary keys 464 for _, key := range e.Key.PartitionKeys { 465 if _, ok := indexHas[key]; !ok { 466 result.ClusteringKeys = append(result.ClusteringKeys, &ClusteringKey{ 467 Name: key}) 468 } 469 } 470 471 // look for missing clustering keys 472 for _, key := range e.Key.ClusteringKeys { 473 if _, ok := indexHas[key.Name]; !ok { 474 result.ClusteringKeys = append(result.ClusteringKeys, &ClusteringKey{ 475 Name: key.Name}) 476 } 477 } 478 479 return &result 480 }