github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/schema/encoding/schema_marshaling.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package encoding 16 17 import ( 18 "context" 19 "errors" 20 "sync" 21 22 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 23 "github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo" 24 "github.com/dolthub/dolt/go/store/hash" 25 "github.com/dolthub/dolt/go/store/marshal" 26 "github.com/dolthub/dolt/go/store/types" 27 ) 28 29 // Correct Marshalling & Unmarshalling is essential to compatibility across Dolt versions 30 // any changes to the fields of Schema or other persisted objects must be append only, no 31 // fields can ever be removed without breaking compatibility. 32 // 33 // the marshalling annotations of new fields must have the "omitempty" option to allow newer 34 // versions of Dolt to read objects serialized by older Dolt versions where the field did not 35 // yet exists. However, all fields must always be written. 36 type encodedColumn struct { 37 Tag uint64 `noms:"tag" json:"tag"` 38 39 // Name is the name of the field 40 Name string `noms:"name" json:"name"` 41 42 // Kind is the type of the field. See types/noms_kind.go in the liquidata fork for valid values 43 Kind string `noms:"kind" json:"kind"` 44 45 IsPartOfPK bool `noms:"is_part_of_pk" json:"is_part_of_pk"` 46 47 TypeInfo encodedTypeInfo `noms:"typeinfo,omitempty" json:"typeinfo,omitempty"` 48 49 Default string `noms:"default,omitempty" json:"default,omitempty"` 50 51 AutoIncrement bool `noms:"auto_increment,omitempty" json:"auto_increment,omitempty"` 52 53 Comment string `noms:"comment,omitempty" json:"comment,omitempty"` 54 55 Constraints []encodedConstraint `noms:"col_constraints" json:"col_constraints"` 56 57 // NB: all new fields must have the 'omitempty' annotation. See comment above 58 } 59 60 func encodeAllColConstraints(constraints []schema.ColConstraint) []encodedConstraint { 61 nomsConstraints := make([]encodedConstraint, len(constraints)) 62 63 for i, c := range constraints { 64 nomsConstraints[i] = encodeColConstraint(c) 65 } 66 67 return nomsConstraints 68 } 69 70 func decodeAllColConstraint(encConstraints []encodedConstraint) []schema.ColConstraint { 71 if len(encConstraints) == 0 { 72 return nil 73 } 74 75 constraints := make([]schema.ColConstraint, len(encConstraints)) 76 77 for i, nc := range encConstraints { 78 c := nc.decodeColConstraint() 79 constraints[i] = c 80 } 81 82 return constraints 83 } 84 85 func encodeColumn(col schema.Column) encodedColumn { 86 return encodedColumn{ 87 Tag: col.Tag, 88 Name: col.Name, 89 Kind: col.KindString(), 90 IsPartOfPK: col.IsPartOfPK, 91 TypeInfo: encodeTypeInfo(col.TypeInfo), 92 Default: col.Default, 93 AutoIncrement: col.AutoIncrement, 94 Comment: col.Comment, 95 Constraints: encodeAllColConstraints(col.Constraints), 96 } 97 } 98 99 func (nfd encodedColumn) decodeColumn() (schema.Column, error) { 100 var typeInfo typeinfo.TypeInfo 101 var err error 102 if nfd.TypeInfo.Type != "" { 103 typeInfo, err = nfd.TypeInfo.decodeTypeInfo() 104 if err != nil { 105 return schema.Column{}, err 106 } 107 } else if nfd.Kind != "" { 108 typeInfo = typeinfo.FromKind(schema.LwrStrToKind[nfd.Kind]) 109 } else { 110 return schema.Column{}, errors.New("cannot decode column due to unknown schema format") 111 } 112 colConstraints := decodeAllColConstraint(nfd.Constraints) 113 return schema.NewColumnWithTypeInfo(nfd.Name, nfd.Tag, typeInfo, nfd.IsPartOfPK, nfd.Default, nfd.AutoIncrement, nfd.Comment, colConstraints...) 114 } 115 116 type encodedConstraint struct { 117 Type string `noms:"constraint_type" json:"constraint_type"` 118 Params map[string]string `noms:"params" json:"params"` 119 } 120 121 func encodeColConstraint(constraint schema.ColConstraint) encodedConstraint { 122 return encodedConstraint{constraint.GetConstraintType(), constraint.GetConstraintParams()} 123 } 124 125 func (encCnst encodedConstraint) decodeColConstraint() schema.ColConstraint { 126 return schema.ColConstraintFromTypeAndParams(encCnst.Type, encCnst.Params) 127 } 128 129 type encodedTypeInfo struct { 130 Type string `noms:"type" json:"type"` 131 Params map[string]string `noms:"params" json:"params"` 132 } 133 134 func encodeTypeInfo(ti typeinfo.TypeInfo) encodedTypeInfo { 135 return encodedTypeInfo{ti.GetTypeIdentifier().String(), ti.GetTypeParams()} 136 } 137 138 func (enc encodedTypeInfo) decodeTypeInfo() (typeinfo.TypeInfo, error) { 139 id := typeinfo.ParseIdentifier(enc.Type) 140 return typeinfo.FromTypeParams(id, enc.Params) 141 } 142 143 type encodedIndex struct { 144 Name string `noms:"name" json:"name"` 145 Tags []uint64 `noms:"tags" json:"tags"` 146 Comment string `noms:"comment" json:"comment"` 147 Unique bool `noms:"unique" json:"unique"` 148 IsSystemDefined bool `noms:"hidden,omitempty" json:"hidden,omitempty"` // Was previously named Hidden, do not change noms name 149 } 150 151 type encodedCheck struct { 152 Name string `noms:"name" json:"name"` 153 Expression string `noms:"expression" json:"expression"` 154 Enforced bool `noms:"enforced" json:"enforced"` 155 } 156 157 type schemaData struct { 158 Columns []encodedColumn `noms:"columns" json:"columns"` 159 IndexCollection []encodedIndex `noms:"idxColl,omitempty" json:"idxColl,omitempty"` 160 CheckConstraints []encodedCheck `noms:"checks,omitempty" json:"checks,omitempty"` 161 } 162 163 func (sd *schemaData) Copy() *schemaData { 164 var columns []encodedColumn 165 if sd.Columns != nil { 166 columns = make([]encodedColumn, len(sd.Columns)) 167 for i, column := range sd.Columns { 168 columns[i] = column 169 } 170 } 171 172 var idxCol []encodedIndex 173 if sd.IndexCollection != nil { 174 idxCol = make([]encodedIndex, len(sd.IndexCollection)) 175 for i, idx := range sd.IndexCollection { 176 idxCol[i] = idx 177 idxCol[i].Tags = make([]uint64, len(idx.Tags)) 178 for j, tag := range idx.Tags { 179 idxCol[i].Tags[j] = tag 180 } 181 } 182 } 183 184 var checks []encodedCheck 185 if sd.CheckConstraints != nil { 186 checks = make([]encodedCheck, len(sd.CheckConstraints)) 187 for i, check := range sd.CheckConstraints { 188 checks[i] = check 189 } 190 } 191 192 return &schemaData{ 193 Columns: columns, 194 IndexCollection: idxCol, 195 CheckConstraints: checks, 196 } 197 } 198 199 func toSchemaData(sch schema.Schema) (schemaData, error) { 200 allCols := sch.GetAllCols() 201 encCols := make([]encodedColumn, allCols.Size()) 202 203 i := 0 204 err := allCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) { 205 encCols[i] = encodeColumn(col) 206 i++ 207 208 return false, nil 209 }) 210 211 if err != nil { 212 return schemaData{}, err 213 } 214 215 encodedIndexes := make([]encodedIndex, sch.Indexes().Count()) 216 for i, index := range sch.Indexes().AllIndexes() { 217 encodedIndexes[i] = encodedIndex{ 218 Name: index.Name(), 219 Tags: index.IndexedColumnTags(), 220 Comment: index.Comment(), 221 Unique: index.IsUnique(), 222 IsSystemDefined: !index.IsUserDefined(), 223 } 224 } 225 226 encodedChecks := make([]encodedCheck, sch.Checks().Count()) 227 checks := sch.Checks() 228 for i, check := range checks.AllChecks() { 229 encodedChecks[i] = encodedCheck{ 230 Name: check.Name(), 231 Expression: check.Expression(), 232 Enforced: check.Enforced(), 233 } 234 } 235 236 return schemaData{ 237 Columns: encCols, 238 IndexCollection: encodedIndexes, 239 CheckConstraints: encodedChecks, 240 }, nil 241 } 242 243 func (sd schemaData) decodeSchema() (schema.Schema, error) { 244 numCols := len(sd.Columns) 245 cols := make([]schema.Column, numCols) 246 247 var err error 248 for i, col := range sd.Columns { 249 cols[i], err = col.decodeColumn() 250 if err != nil { 251 return nil, err 252 } 253 } 254 255 colColl := schema.NewColCollection(cols...) 256 257 sch, err := schema.SchemaFromCols(colColl) 258 if err != nil { 259 return nil, err 260 } 261 262 err = sd.addChecksAndIndexesToSchema(sch) 263 if err != nil { 264 return nil, err 265 } 266 267 return sch, nil 268 } 269 270 func (sd schemaData) addChecksAndIndexesToSchema(sch schema.Schema) error { 271 for _, encodedIndex := range sd.IndexCollection { 272 _, err := sch.Indexes().UnsafeAddIndexByColTags( 273 encodedIndex.Name, 274 encodedIndex.Tags, 275 schema.IndexProperties{ 276 IsUnique: encodedIndex.Unique, 277 IsUserDefined: !encodedIndex.IsSystemDefined, 278 Comment: encodedIndex.Comment, 279 }, 280 ) 281 if err != nil { 282 return err 283 } 284 } 285 286 for _, encodedCheck := range sd.CheckConstraints { 287 _, err := sch.Checks().AddCheck( 288 encodedCheck.Name, 289 encodedCheck.Expression, 290 encodedCheck.Enforced, 291 ) 292 if err != nil { 293 return err 294 } 295 } 296 return nil 297 } 298 299 // MarshalSchemaAsNomsValue takes a Schema and converts it to a types.Value 300 func MarshalSchemaAsNomsValue(ctx context.Context, vrw types.ValueReadWriter, sch schema.Schema) (types.Value, error) { 301 // Anyone calling this is going to serialize this to disk, so it's our last line of defense against defective schemas. 302 // Business logic should catch errors before this point, but this is a failsafe. 303 err := schema.ValidateForInsert(sch.GetAllCols()) 304 if err != nil { 305 return nil, err 306 } 307 308 sd, err := toSchemaData(sch) 309 310 if err != nil { 311 return types.EmptyStruct(vrw.Format()), err 312 } 313 314 val, err := marshal.Marshal(ctx, vrw, sd) 315 316 if err != nil { 317 return types.EmptyStruct(vrw.Format()), err 318 } 319 320 if _, ok := val.(types.Struct); ok { 321 return val, nil 322 } 323 324 return types.EmptyStruct(vrw.Format()), errors.New("Table Schema could not be converted to types.Struct") 325 } 326 327 type schCacheData struct { 328 all *schema.ColCollection 329 pk *schema.ColCollection 330 nonPK *schema.ColCollection 331 sd *schemaData 332 } 333 334 var schemaCacheMu *sync.Mutex = &sync.Mutex{} 335 var unmarshalledSchemaCache = map[hash.Hash]schCacheData{} 336 337 // UnmarshalSchemaNomsValue takes a types.Value instance and Unmarshalls it into a Schema. 338 func UnmarshalSchemaNomsValue(ctx context.Context, nbf *types.NomsBinFormat, schemaVal types.Value) (schema.Schema, error) { 339 h, err := schemaVal.Hash(nbf) 340 if err != nil { 341 return nil, err 342 } 343 344 schemaCacheMu.Lock() 345 cachedData, ok := unmarshalledSchemaCache[h] 346 schemaCacheMu.Unlock() 347 348 if ok { 349 cachedSch := schema.SchemaFromColCollections(cachedData.all, cachedData.pk, cachedData.nonPK) 350 sd := cachedData.sd.Copy() 351 err := sd.addChecksAndIndexesToSchema(cachedSch) 352 if err != nil { 353 return nil, err 354 } 355 356 return cachedSch, nil 357 } 358 359 var sd schemaData 360 err = marshal.Unmarshal(ctx, nbf, schemaVal, &sd) 361 362 if err != nil { 363 return nil, err 364 } 365 366 sch, err := sd.decodeSchema() 367 if err != nil { 368 return nil, err 369 } 370 371 d := schCacheData{ 372 all: sch.GetAllCols(), 373 pk: sch.GetPKCols(), 374 nonPK: sch.GetNonPKCols(), 375 sd: sd.Copy(), 376 } 377 378 schemaCacheMu.Lock() 379 unmarshalledSchemaCache[h] = d 380 schemaCacheMu.Unlock() 381 382 return sch, nil 383 } 384 385 type superSchemaData struct { 386 Columns []encodedColumn `noms:"columns" json:"columns"` 387 TagNames map[uint64][]string `noms:"col_constraints" json:"col_constraints"` 388 } 389 390 func toSuperSchemaData(ss *schema.SuperSchema) (superSchemaData, error) { 391 encCols := make([]encodedColumn, ss.Size()) 392 tn := make(map[uint64][]string) 393 394 i := 0 395 err := ss.Iter(func(tag uint64, col schema.Column) (stop bool, err error) { 396 encCols[i] = encodeColumn(col) 397 tn[tag] = ss.AllColumnNames(tag) 398 i++ 399 400 return false, nil 401 }) 402 403 if err != nil { 404 return superSchemaData{}, err 405 } 406 407 return superSchemaData{encCols, tn}, nil 408 } 409 410 func (ssd superSchemaData) decodeSuperSchema() (*schema.SuperSchema, error) { 411 numCols := len(ssd.Columns) 412 cols := make([]schema.Column, numCols) 413 414 for i, col := range ssd.Columns { 415 c, err := col.decodeColumn() 416 if err != nil { 417 return nil, err 418 } 419 cols[i] = c 420 } 421 422 colColl := schema.NewColCollection(cols...) 423 424 if ssd.TagNames == nil { 425 ssd.TagNames = make(map[uint64][]string) 426 } 427 428 return schema.UnmarshalSuperSchema(colColl, ssd.TagNames), nil 429 } 430 431 // MarshalSuperSchemaAsNomsValue creates a Noms value from a SuperSchema to be written to a RootValue. 432 func MarshalSuperSchemaAsNomsValue(ctx context.Context, vrw types.ValueReadWriter, ss *schema.SuperSchema) (types.Value, error) { 433 ssd, err := toSuperSchemaData(ss) 434 435 if err != nil { 436 return types.EmptyStruct(vrw.Format()), err 437 } 438 439 val, err := marshal.Marshal(ctx, vrw, ssd) 440 441 if err != nil { 442 return types.EmptyStruct(vrw.Format()), err 443 } 444 445 if _, ok := val.(types.Struct); ok { 446 return val, nil 447 } 448 449 return types.EmptyStruct(vrw.Format()), errors.New("Table Super Schema could not be converted to types.Struct") 450 } 451 452 // UnmarshalSuperSchemaNomsValue takes a Noms value read from a RootValue and constructs a SuperSchema from it. 453 func UnmarshalSuperSchemaNomsValue(ctx context.Context, nbf *types.NomsBinFormat, ssVal types.Value) (*schema.SuperSchema, error) { 454 var ssd superSchemaData 455 err := marshal.Unmarshal(ctx, nbf, ssVal, &ssd) 456 457 if err != nil { 458 return nil, err 459 } 460 461 return ssd.decodeSuperSchema() 462 }