github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/schema/encoding/schema_marshaling.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package encoding
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"sync"
    21  
    22  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    23  	"github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo"
    24  	"github.com/dolthub/dolt/go/store/hash"
    25  	"github.com/dolthub/dolt/go/store/marshal"
    26  	"github.com/dolthub/dolt/go/store/types"
    27  )
    28  
    29  // Correct Marshalling & Unmarshalling is essential to compatibility across Dolt versions
    30  // any changes to the fields of Schema or other persisted objects must be append only, no
    31  // fields can ever be removed without breaking compatibility.
    32  //
    33  // the marshalling annotations of new fields must have the "omitempty" option to allow newer
    34  // versions of Dolt to read objects serialized by older Dolt versions where the field did not
    35  // yet exists. However, all fields must always be written.
    36  type encodedColumn struct {
    37  	Tag uint64 `noms:"tag" json:"tag"`
    38  
    39  	// Name is the name of the field
    40  	Name string `noms:"name" json:"name"`
    41  
    42  	// Kind is the type of the field.  See types/noms_kind.go in the liquidata fork for valid values
    43  	Kind string `noms:"kind" json:"kind"`
    44  
    45  	IsPartOfPK bool `noms:"is_part_of_pk" json:"is_part_of_pk"`
    46  
    47  	TypeInfo encodedTypeInfo `noms:"typeinfo,omitempty" json:"typeinfo,omitempty"`
    48  
    49  	Default string `noms:"default,omitempty" json:"default,omitempty"`
    50  
    51  	AutoIncrement bool `noms:"auto_increment,omitempty" json:"auto_increment,omitempty"`
    52  
    53  	Comment string `noms:"comment,omitempty" json:"comment,omitempty"`
    54  
    55  	Constraints []encodedConstraint `noms:"col_constraints" json:"col_constraints"`
    56  
    57  	// NB: all new fields must have the 'omitempty' annotation. See comment above
    58  }
    59  
    60  func encodeAllColConstraints(constraints []schema.ColConstraint) []encodedConstraint {
    61  	nomsConstraints := make([]encodedConstraint, len(constraints))
    62  
    63  	for i, c := range constraints {
    64  		nomsConstraints[i] = encodeColConstraint(c)
    65  	}
    66  
    67  	return nomsConstraints
    68  }
    69  
    70  func decodeAllColConstraint(encConstraints []encodedConstraint) []schema.ColConstraint {
    71  	if len(encConstraints) == 0 {
    72  		return nil
    73  	}
    74  
    75  	constraints := make([]schema.ColConstraint, len(encConstraints))
    76  
    77  	for i, nc := range encConstraints {
    78  		c := nc.decodeColConstraint()
    79  		constraints[i] = c
    80  	}
    81  
    82  	return constraints
    83  }
    84  
    85  func encodeColumn(col schema.Column) encodedColumn {
    86  	return encodedColumn{
    87  		Tag:           col.Tag,
    88  		Name:          col.Name,
    89  		Kind:          col.KindString(),
    90  		IsPartOfPK:    col.IsPartOfPK,
    91  		TypeInfo:      encodeTypeInfo(col.TypeInfo),
    92  		Default:       col.Default,
    93  		AutoIncrement: col.AutoIncrement,
    94  		Comment:       col.Comment,
    95  		Constraints:   encodeAllColConstraints(col.Constraints),
    96  	}
    97  }
    98  
    99  func (nfd encodedColumn) decodeColumn() (schema.Column, error) {
   100  	var typeInfo typeinfo.TypeInfo
   101  	var err error
   102  	if nfd.TypeInfo.Type != "" {
   103  		typeInfo, err = nfd.TypeInfo.decodeTypeInfo()
   104  		if err != nil {
   105  			return schema.Column{}, err
   106  		}
   107  	} else if nfd.Kind != "" {
   108  		typeInfo = typeinfo.FromKind(schema.LwrStrToKind[nfd.Kind])
   109  	} else {
   110  		return schema.Column{}, errors.New("cannot decode column due to unknown schema format")
   111  	}
   112  	colConstraints := decodeAllColConstraint(nfd.Constraints)
   113  	return schema.NewColumnWithTypeInfo(nfd.Name, nfd.Tag, typeInfo, nfd.IsPartOfPK, nfd.Default, nfd.AutoIncrement, nfd.Comment, colConstraints...)
   114  }
   115  
   116  type encodedConstraint struct {
   117  	Type   string            `noms:"constraint_type" json:"constraint_type"`
   118  	Params map[string]string `noms:"params" json:"params"`
   119  }
   120  
   121  func encodeColConstraint(constraint schema.ColConstraint) encodedConstraint {
   122  	return encodedConstraint{constraint.GetConstraintType(), constraint.GetConstraintParams()}
   123  }
   124  
   125  func (encCnst encodedConstraint) decodeColConstraint() schema.ColConstraint {
   126  	return schema.ColConstraintFromTypeAndParams(encCnst.Type, encCnst.Params)
   127  }
   128  
   129  type encodedTypeInfo struct {
   130  	Type   string            `noms:"type" json:"type"`
   131  	Params map[string]string `noms:"params" json:"params"`
   132  }
   133  
   134  func encodeTypeInfo(ti typeinfo.TypeInfo) encodedTypeInfo {
   135  	return encodedTypeInfo{ti.GetTypeIdentifier().String(), ti.GetTypeParams()}
   136  }
   137  
   138  func (enc encodedTypeInfo) decodeTypeInfo() (typeinfo.TypeInfo, error) {
   139  	id := typeinfo.ParseIdentifier(enc.Type)
   140  	return typeinfo.FromTypeParams(id, enc.Params)
   141  }
   142  
   143  type encodedIndex struct {
   144  	Name            string   `noms:"name" json:"name"`
   145  	Tags            []uint64 `noms:"tags" json:"tags"`
   146  	Comment         string   `noms:"comment" json:"comment"`
   147  	Unique          bool     `noms:"unique" json:"unique"`
   148  	IsSystemDefined bool     `noms:"hidden,omitempty" json:"hidden,omitempty"` // Was previously named Hidden, do not change noms name
   149  }
   150  
   151  type encodedCheck struct {
   152  	Name       string `noms:"name" json:"name"`
   153  	Expression string `noms:"expression" json:"expression"`
   154  	Enforced   bool   `noms:"enforced" json:"enforced"`
   155  }
   156  
   157  type schemaData struct {
   158  	Columns          []encodedColumn `noms:"columns" json:"columns"`
   159  	IndexCollection  []encodedIndex  `noms:"idxColl,omitempty" json:"idxColl,omitempty"`
   160  	CheckConstraints []encodedCheck  `noms:"checks,omitempty" json:"checks,omitempty"`
   161  }
   162  
   163  func (sd *schemaData) Copy() *schemaData {
   164  	var columns []encodedColumn
   165  	if sd.Columns != nil {
   166  		columns = make([]encodedColumn, len(sd.Columns))
   167  		for i, column := range sd.Columns {
   168  			columns[i] = column
   169  		}
   170  	}
   171  
   172  	var idxCol []encodedIndex
   173  	if sd.IndexCollection != nil {
   174  		idxCol = make([]encodedIndex, len(sd.IndexCollection))
   175  		for i, idx := range sd.IndexCollection {
   176  			idxCol[i] = idx
   177  			idxCol[i].Tags = make([]uint64, len(idx.Tags))
   178  			for j, tag := range idx.Tags {
   179  				idxCol[i].Tags[j] = tag
   180  			}
   181  		}
   182  	}
   183  
   184  	var checks []encodedCheck
   185  	if sd.CheckConstraints != nil {
   186  		checks = make([]encodedCheck, len(sd.CheckConstraints))
   187  		for i, check := range sd.CheckConstraints {
   188  			checks[i] = check
   189  		}
   190  	}
   191  
   192  	return &schemaData{
   193  		Columns:          columns,
   194  		IndexCollection:  idxCol,
   195  		CheckConstraints: checks,
   196  	}
   197  }
   198  
   199  func toSchemaData(sch schema.Schema) (schemaData, error) {
   200  	allCols := sch.GetAllCols()
   201  	encCols := make([]encodedColumn, allCols.Size())
   202  
   203  	i := 0
   204  	err := allCols.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
   205  		encCols[i] = encodeColumn(col)
   206  		i++
   207  
   208  		return false, nil
   209  	})
   210  
   211  	if err != nil {
   212  		return schemaData{}, err
   213  	}
   214  
   215  	encodedIndexes := make([]encodedIndex, sch.Indexes().Count())
   216  	for i, index := range sch.Indexes().AllIndexes() {
   217  		encodedIndexes[i] = encodedIndex{
   218  			Name:            index.Name(),
   219  			Tags:            index.IndexedColumnTags(),
   220  			Comment:         index.Comment(),
   221  			Unique:          index.IsUnique(),
   222  			IsSystemDefined: !index.IsUserDefined(),
   223  		}
   224  	}
   225  
   226  	encodedChecks := make([]encodedCheck, sch.Checks().Count())
   227  	checks := sch.Checks()
   228  	for i, check := range checks.AllChecks() {
   229  		encodedChecks[i] = encodedCheck{
   230  			Name:       check.Name(),
   231  			Expression: check.Expression(),
   232  			Enforced:   check.Enforced(),
   233  		}
   234  	}
   235  
   236  	return schemaData{
   237  		Columns:          encCols,
   238  		IndexCollection:  encodedIndexes,
   239  		CheckConstraints: encodedChecks,
   240  	}, nil
   241  }
   242  
   243  func (sd schemaData) decodeSchema() (schema.Schema, error) {
   244  	numCols := len(sd.Columns)
   245  	cols := make([]schema.Column, numCols)
   246  
   247  	var err error
   248  	for i, col := range sd.Columns {
   249  		cols[i], err = col.decodeColumn()
   250  		if err != nil {
   251  			return nil, err
   252  		}
   253  	}
   254  
   255  	colColl := schema.NewColCollection(cols...)
   256  
   257  	sch, err := schema.SchemaFromCols(colColl)
   258  	if err != nil {
   259  		return nil, err
   260  	}
   261  
   262  	err = sd.addChecksAndIndexesToSchema(sch)
   263  	if err != nil {
   264  		return nil, err
   265  	}
   266  
   267  	return sch, nil
   268  }
   269  
   270  func (sd schemaData) addChecksAndIndexesToSchema(sch schema.Schema) error {
   271  	for _, encodedIndex := range sd.IndexCollection {
   272  		_, err := sch.Indexes().UnsafeAddIndexByColTags(
   273  			encodedIndex.Name,
   274  			encodedIndex.Tags,
   275  			schema.IndexProperties{
   276  				IsUnique:      encodedIndex.Unique,
   277  				IsUserDefined: !encodedIndex.IsSystemDefined,
   278  				Comment:       encodedIndex.Comment,
   279  			},
   280  		)
   281  		if err != nil {
   282  			return err
   283  		}
   284  	}
   285  
   286  	for _, encodedCheck := range sd.CheckConstraints {
   287  		_, err := sch.Checks().AddCheck(
   288  			encodedCheck.Name,
   289  			encodedCheck.Expression,
   290  			encodedCheck.Enforced,
   291  		)
   292  		if err != nil {
   293  			return err
   294  		}
   295  	}
   296  	return nil
   297  }
   298  
   299  // MarshalSchemaAsNomsValue takes a Schema and converts it to a types.Value
   300  func MarshalSchemaAsNomsValue(ctx context.Context, vrw types.ValueReadWriter, sch schema.Schema) (types.Value, error) {
   301  	// Anyone calling this is going to serialize this to disk, so it's our last line of defense against defective schemas.
   302  	// Business logic should catch errors before this point, but this is a failsafe.
   303  	err := schema.ValidateForInsert(sch.GetAllCols())
   304  	if err != nil {
   305  		return nil, err
   306  	}
   307  
   308  	sd, err := toSchemaData(sch)
   309  
   310  	if err != nil {
   311  		return types.EmptyStruct(vrw.Format()), err
   312  	}
   313  
   314  	val, err := marshal.Marshal(ctx, vrw, sd)
   315  
   316  	if err != nil {
   317  		return types.EmptyStruct(vrw.Format()), err
   318  	}
   319  
   320  	if _, ok := val.(types.Struct); ok {
   321  		return val, nil
   322  	}
   323  
   324  	return types.EmptyStruct(vrw.Format()), errors.New("Table Schema could not be converted to types.Struct")
   325  }
   326  
   327  type schCacheData struct {
   328  	all   *schema.ColCollection
   329  	pk    *schema.ColCollection
   330  	nonPK *schema.ColCollection
   331  	sd    *schemaData
   332  }
   333  
   334  var schemaCacheMu *sync.Mutex = &sync.Mutex{}
   335  var unmarshalledSchemaCache = map[hash.Hash]schCacheData{}
   336  
   337  // UnmarshalSchemaNomsValue takes a types.Value instance and Unmarshalls it into a Schema.
   338  func UnmarshalSchemaNomsValue(ctx context.Context, nbf *types.NomsBinFormat, schemaVal types.Value) (schema.Schema, error) {
   339  	h, err := schemaVal.Hash(nbf)
   340  	if err != nil {
   341  		return nil, err
   342  	}
   343  
   344  	schemaCacheMu.Lock()
   345  	cachedData, ok := unmarshalledSchemaCache[h]
   346  	schemaCacheMu.Unlock()
   347  
   348  	if ok {
   349  		cachedSch := schema.SchemaFromColCollections(cachedData.all, cachedData.pk, cachedData.nonPK)
   350  		sd := cachedData.sd.Copy()
   351  		err := sd.addChecksAndIndexesToSchema(cachedSch)
   352  		if err != nil {
   353  			return nil, err
   354  		}
   355  
   356  		return cachedSch, nil
   357  	}
   358  
   359  	var sd schemaData
   360  	err = marshal.Unmarshal(ctx, nbf, schemaVal, &sd)
   361  
   362  	if err != nil {
   363  		return nil, err
   364  	}
   365  
   366  	sch, err := sd.decodeSchema()
   367  	if err != nil {
   368  		return nil, err
   369  	}
   370  
   371  	d := schCacheData{
   372  		all:   sch.GetAllCols(),
   373  		pk:    sch.GetPKCols(),
   374  		nonPK: sch.GetNonPKCols(),
   375  		sd:    sd.Copy(),
   376  	}
   377  
   378  	schemaCacheMu.Lock()
   379  	unmarshalledSchemaCache[h] = d
   380  	schemaCacheMu.Unlock()
   381  
   382  	return sch, nil
   383  }
   384  
   385  type superSchemaData struct {
   386  	Columns  []encodedColumn     `noms:"columns" json:"columns"`
   387  	TagNames map[uint64][]string `noms:"col_constraints" json:"col_constraints"`
   388  }
   389  
   390  func toSuperSchemaData(ss *schema.SuperSchema) (superSchemaData, error) {
   391  	encCols := make([]encodedColumn, ss.Size())
   392  	tn := make(map[uint64][]string)
   393  
   394  	i := 0
   395  	err := ss.Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
   396  		encCols[i] = encodeColumn(col)
   397  		tn[tag] = ss.AllColumnNames(tag)
   398  		i++
   399  
   400  		return false, nil
   401  	})
   402  
   403  	if err != nil {
   404  		return superSchemaData{}, err
   405  	}
   406  
   407  	return superSchemaData{encCols, tn}, nil
   408  }
   409  
   410  func (ssd superSchemaData) decodeSuperSchema() (*schema.SuperSchema, error) {
   411  	numCols := len(ssd.Columns)
   412  	cols := make([]schema.Column, numCols)
   413  
   414  	for i, col := range ssd.Columns {
   415  		c, err := col.decodeColumn()
   416  		if err != nil {
   417  			return nil, err
   418  		}
   419  		cols[i] = c
   420  	}
   421  
   422  	colColl := schema.NewColCollection(cols...)
   423  
   424  	if ssd.TagNames == nil {
   425  		ssd.TagNames = make(map[uint64][]string)
   426  	}
   427  
   428  	return schema.UnmarshalSuperSchema(colColl, ssd.TagNames), nil
   429  }
   430  
   431  // MarshalSuperSchemaAsNomsValue creates a Noms value from a SuperSchema to be written to a RootValue.
   432  func MarshalSuperSchemaAsNomsValue(ctx context.Context, vrw types.ValueReadWriter, ss *schema.SuperSchema) (types.Value, error) {
   433  	ssd, err := toSuperSchemaData(ss)
   434  
   435  	if err != nil {
   436  		return types.EmptyStruct(vrw.Format()), err
   437  	}
   438  
   439  	val, err := marshal.Marshal(ctx, vrw, ssd)
   440  
   441  	if err != nil {
   442  		return types.EmptyStruct(vrw.Format()), err
   443  	}
   444  
   445  	if _, ok := val.(types.Struct); ok {
   446  		return val, nil
   447  	}
   448  
   449  	return types.EmptyStruct(vrw.Format()), errors.New("Table Super Schema could not be converted to types.Struct")
   450  }
   451  
   452  // UnmarshalSuperSchemaNomsValue takes a Noms value read from a RootValue and constructs a SuperSchema from it.
   453  func UnmarshalSuperSchemaNomsValue(ctx context.Context, nbf *types.NomsBinFormat, ssVal types.Value) (*schema.SuperSchema, error) {
   454  	var ssd superSchemaData
   455  	err := marshal.Unmarshal(ctx, nbf, ssVal, &ssd)
   456  
   457  	if err != nil {
   458  		return nil, err
   459  	}
   460  
   461  	return ssd.decodeSuperSchema()
   462  }