github.com/dolthub/go-mysql-server@v0.18.0/memory/table_data.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package memory
    16  
    17  import (
    18  	"fmt"
    19  	"sort"
    20  	"strconv"
    21  
    22  	"github.com/cespare/xxhash/v2"
    23  
    24  	"github.com/dolthub/go-mysql-server/sql"
    25  	"github.com/dolthub/go-mysql-server/sql/expression"
    26  	"github.com/dolthub/go-mysql-server/sql/transform"
    27  	"github.com/dolthub/go-mysql-server/sql/types"
    28  )
    29  
    30  // TableData encapsulates all schema and data for a table's schema and rows. Other aspects of a table can change
    31  // freely as needed for different views on a table (column projections, index lookups, filters, etc.) but the
    32  // storage of underlying data lives here.
    33  type TableData struct {
    34  	dbName    string
    35  	tableName string
    36  	comment   string
    37  
    38  	// Schema / config data
    39  	schema                  sql.PrimaryKeySchema
    40  	indexes                 map[string]sql.Index
    41  	fkColl                  *ForeignKeyCollection
    42  	checks                  []sql.CheckDefinition
    43  	collation               sql.CollationID
    44  	autoColIdx              int
    45  	primaryKeyIndexes       bool
    46  	fullTextConfigTableName string
    47  
    48  	// Data storage
    49  	partitions    map[string][]sql.Row
    50  	partitionKeys [][]byte
    51  	autoIncVal    uint64
    52  
    53  	// Indexes are implemented as an unordered slice of rows. The first N elements in the row are the values of the
    54  	// indexed columns, and the final value is the location of the row in the primary storage.
    55  	// TODO: we could make these much more performant by using a tree or other ordered collection
    56  	secondaryIndexStorage map[indexName][]sql.Row
    57  }
    58  
    59  type indexName string
    60  
    61  // primaryRowLocation is a special marker element in index storage rows containing the partition and index of the row
    62  // in the primary storage.
    63  type primaryRowLocation struct {
    64  	partition string
    65  	idx       int
    66  }
    67  
    68  // Table returns a table with this data
    69  func (td TableData) Table(database *BaseDatabase) *Table {
    70  	return &Table{
    71  		db:               database,
    72  		name:             td.tableName,
    73  		data:             &td,
    74  		pkIndexesEnabled: td.primaryKeyIndexes,
    75  	}
    76  }
    77  
    78  func (td TableData) copy() *TableData {
    79  	sch := td.schema.Schema.Copy()
    80  	pkSch := sql.NewPrimaryKeySchema(sch, td.schema.PkOrdinals...)
    81  	td.schema = pkSch
    82  
    83  	parts := make(map[string][]sql.Row, len(td.partitions))
    84  	for k, v := range td.partitions {
    85  		data := make([]sql.Row, len(v))
    86  		copy(data, v)
    87  		parts[k] = data
    88  	}
    89  
    90  	keys := make([][]byte, len(td.partitionKeys))
    91  	for i := range td.partitionKeys {
    92  		keys[i] = make([]byte, len(td.partitionKeys[i]))
    93  		copy(keys[i], td.partitionKeys[i])
    94  	}
    95  
    96  	idxStorage := make(map[indexName][]sql.Row, len(td.secondaryIndexStorage))
    97  	for k, v := range td.secondaryIndexStorage {
    98  		data := make([]sql.Row, len(v))
    99  		copy(data, v)
   100  		idxStorage[k] = data
   101  	}
   102  	td.secondaryIndexStorage = idxStorage
   103  
   104  	td.partitionKeys, td.partitions = keys, parts
   105  
   106  	if td.checks != nil {
   107  		checks := make([]sql.CheckDefinition, len(td.checks))
   108  		copy(checks, td.checks)
   109  		td.checks = checks
   110  	}
   111  
   112  	return &td
   113  }
   114  
   115  // partition returns the partition for the row given. Uses the primary key columns if they exist, or all columns
   116  // otherwise
   117  func (td TableData) partition(row sql.Row) (int, error) {
   118  	var keyColumns []int
   119  	if len(td.schema.PkOrdinals) > 0 {
   120  		keyColumns = td.schema.PkOrdinals
   121  	} else {
   122  		keyColumns = make([]int, len(td.schema.Schema))
   123  		for i := range keyColumns {
   124  			keyColumns[i] = i
   125  		}
   126  	}
   127  
   128  	hash := xxhash.New()
   129  	var err error
   130  	for i := range keyColumns {
   131  		v := row[keyColumns[i]]
   132  		if i > 0 {
   133  			// separate each column with a null byte
   134  			if _, err = hash.Write([]byte{0}); err != nil {
   135  				return 0, err
   136  			}
   137  		}
   138  
   139  		t, isStringType := td.schema.Schema[i].Type.(sql.StringType)
   140  		if isStringType && v != nil {
   141  			v, err = types.ConvertToString(v, t)
   142  			if err == nil {
   143  				err = t.Collation().WriteWeightString(hash, v.(string))
   144  			}
   145  		} else {
   146  			_, err = fmt.Fprintf(hash, "%v", v)
   147  		}
   148  		if err != nil {
   149  			return 0, err
   150  		}
   151  	}
   152  
   153  	sum64 := hash.Sum64()
   154  	return int(sum64 % uint64(len(td.partitionKeys))), nil
   155  }
   156  
   157  func (td *TableData) truncate(schema sql.PrimaryKeySchema) *TableData {
   158  	var keys [][]byte
   159  	var partitions = map[string][]sql.Row{}
   160  	numParts := len(td.partitionKeys)
   161  
   162  	for i := 0; i < numParts; i++ {
   163  		key := strconv.Itoa(i)
   164  		keys = append(keys, []byte(key))
   165  		partitions[key] = []sql.Row{}
   166  	}
   167  
   168  	td.partitionKeys = keys
   169  	td.partitions = partitions
   170  	td.schema = schema
   171  
   172  	td.indexes = rewriteIndexes(td.indexes, schema)
   173  	td.secondaryIndexStorage = make(map[indexName][]sql.Row)
   174  
   175  	td.autoIncVal = 0
   176  	if schema.HasAutoIncrement() {
   177  		td.autoIncVal = 1
   178  	}
   179  
   180  	return td
   181  }
   182  
   183  // rewriteIndexes returns a new set of indexes appropriate for the new schema provided. Index expressions are adjusted
   184  // as necessary, and any indexes for columns that no longer exist are removed from the set.
   185  func rewriteIndexes(indexes map[string]sql.Index, schema sql.PrimaryKeySchema) map[string]sql.Index {
   186  	newIdxes := make(map[string]sql.Index)
   187  	for name, idx := range indexes {
   188  		newIdx := rewriteIndex(idx.(*Index), schema)
   189  		if newIdx != nil {
   190  			newIdxes[name] = newIdx
   191  		}
   192  	}
   193  	return newIdxes
   194  }
   195  
   196  // rewriteIndex returns a new index appropriate for the new schema provided, or nil if no columns remain to be indexed
   197  // in the schema
   198  func rewriteIndex(idx *Index, schema sql.PrimaryKeySchema) *Index {
   199  	var newExprs []sql.Expression
   200  	for _, expr := range idx.Exprs {
   201  		newE, _, _ := transform.Expr(expr, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) {
   202  			if gf, ok := e.(*expression.GetField); ok {
   203  				newIdx := schema.IndexOfColName(gf.Name())
   204  				if newIdx < 0 {
   205  					return nil, transform.SameTree, nil
   206  				}
   207  				return gf.WithIndex(newIdx), transform.NewTree, nil
   208  			}
   209  
   210  			return e, transform.SameTree, nil
   211  		})
   212  		if newE != nil {
   213  			newExprs = append(newExprs, newE)
   214  		}
   215  	}
   216  
   217  	if len(newExprs) == 0 {
   218  		return nil
   219  	}
   220  
   221  	newIdx := *idx
   222  	newIdx.Exprs = newExprs
   223  	return &newIdx
   224  }
   225  
   226  func (td *TableData) columnIndexes(colNames []string) ([]int, error) {
   227  	columns := make([]int, 0, len(colNames))
   228  
   229  	for _, name := range colNames {
   230  		i := td.schema.IndexOf(name, td.tableName)
   231  		if i == -1 {
   232  			return nil, errColumnNotFound.New(name)
   233  		}
   234  
   235  		columns = append(columns, i)
   236  	}
   237  
   238  	return columns, nil
   239  }
   240  
   241  // toStorageRow returns the given row normalized for storage, omitting virtual columns
   242  func (td *TableData) toStorageRow(row sql.Row) sql.Row {
   243  	if !td.schema.HasVirtualColumns() {
   244  		return row
   245  	}
   246  
   247  	storageRow := make(sql.Row, len(td.schema.Schema))
   248  	storageRowIdx := 0
   249  	for i, col := range td.schema.Schema {
   250  		if col.Virtual {
   251  			continue
   252  		}
   253  		storageRow[storageRowIdx] = row[i]
   254  		storageRowIdx++
   255  	}
   256  
   257  	return storageRow[:storageRowIdx]
   258  }
   259  
   260  func (td *TableData) numRows(ctx *sql.Context) (uint64, error) {
   261  	var count uint64
   262  	for _, rows := range td.partitions {
   263  		count += uint64(len(rows))
   264  	}
   265  
   266  	return count, nil
   267  }
   268  
   269  // throws an error if any two or more rows share the same |cols| values.
   270  func (td *TableData) errIfDuplicateEntryExist(cols []string, idxName string) error {
   271  	columnMapping, err := td.columnIndexes(cols)
   272  	if err != nil {
   273  		return err
   274  	}
   275  	unique := make(map[uint64]struct{})
   276  	for _, partition := range td.partitions {
   277  		for _, row := range partition {
   278  			idxPrefixKey := projectOnRow(columnMapping, row)
   279  			if hasNulls(idxPrefixKey) {
   280  				continue
   281  			}
   282  			h, err := sql.HashOf(idxPrefixKey)
   283  			if err != nil {
   284  				return err
   285  			}
   286  			if _, ok := unique[h]; ok {
   287  				return sql.NewUniqueKeyErr(formatRow(row, columnMapping), false, nil)
   288  			}
   289  			unique[h] = struct{}{}
   290  		}
   291  	}
   292  	return nil
   293  }
   294  
   295  func hasNulls(row sql.Row) bool {
   296  	for _, v := range row {
   297  		if v == nil {
   298  			return true
   299  		}
   300  	}
   301  	return false
   302  }
   303  
   304  // getColumnOrdinal returns the index in the schema and column with the name given, if it exists, or -1, nil otherwise.
   305  func (td *TableData) getColumnOrdinal(col string) (int, *sql.Column) {
   306  	i := td.schema.IndexOf(col, td.tableName)
   307  	if i == -1 {
   308  		return -1, nil
   309  	}
   310  
   311  	return i, td.schema.Schema[i]
   312  }
   313  
   314  func (td *TableData) generateCheckName() string {
   315  	i := 1
   316  Top:
   317  	for {
   318  		name := fmt.Sprintf("%s_chk_%d", td.tableName, i)
   319  		for _, check := range td.checks {
   320  			if check.Name == name {
   321  				i++
   322  				continue Top
   323  			}
   324  		}
   325  		return name
   326  	}
   327  }
   328  
   329  func (td *TableData) indexColsForTableEditor() ([][]int, [][]uint16) {
   330  	var uniqIdxCols [][]int
   331  	var prefixLengths [][]uint16
   332  	for _, idx := range td.indexes {
   333  		if !idx.IsUnique() {
   334  			continue
   335  		}
   336  		var colNames []string
   337  		expressions := idx.(*Index).Exprs
   338  		for _, exp := range expressions {
   339  			colNames = append(colNames, exp.(*expression.GetField).Name())
   340  		}
   341  		colIdxs, err := td.columnIndexes(colNames)
   342  		if err != nil {
   343  			// this means that the column names in this index aren't in the schema, which can happen in the case of a
   344  			// table rewrite
   345  			continue
   346  		}
   347  		uniqIdxCols = append(uniqIdxCols, colIdxs)
   348  		prefixLengths = append(prefixLengths, idx.PrefixLengths())
   349  	}
   350  	return uniqIdxCols, prefixLengths
   351  }
   352  
   353  // Sorts the rows in the partitions of the table to be in primary key order.
   354  func (td *TableData) sortRows() {
   355  	var pk []pkfield
   356  	for _, column := range td.schema.Schema {
   357  		if column.PrimaryKey {
   358  			idx, col := td.getColumnOrdinal(column.Name)
   359  			pk = append(pk, pkfield{idx, col})
   360  		}
   361  	}
   362  
   363  	var flattenedRows []partitionRow
   364  	for _, k := range td.partitionKeys {
   365  		p := td.partitions[string(k)]
   366  		for i := 0; i < len(p); i++ {
   367  			flattenedRows = append(flattenedRows, partitionRow{string(k), i})
   368  		}
   369  	}
   370  
   371  	sort.Sort(partitionssort{
   372  		pk:      pk,
   373  		ps:      td.partitions,
   374  		allRows: flattenedRows,
   375  		indexes: td.secondaryIndexStorage,
   376  	})
   377  
   378  	td.sortSecondaryIndexes()
   379  }
   380  
   381  func (td *TableData) sortSecondaryIndexes() {
   382  	for idxName, idxStorage := range td.secondaryIndexStorage {
   383  		idx := td.indexes[string(idxName)].(*Index)
   384  		fieldIndexes := idx.columnIndexes(td.schema.Schema)
   385  		types := make([]sql.Type, len(fieldIndexes))
   386  		for i, idx := range fieldIndexes {
   387  			types[i] = td.schema.Schema[idx].Type
   388  		}
   389  		sort.Slice(idxStorage, func(i, j int) bool {
   390  			for t, typ := range types {
   391  				left := idxStorage[i][t]
   392  				right := idxStorage[j][t]
   393  
   394  				// Compare doesn't handle nil values, so we need to handle that case. Nils sort before other values
   395  				if left == nil {
   396  					if right == nil {
   397  						continue
   398  					} else {
   399  						return true
   400  					}
   401  				} else if right == nil {
   402  					return false
   403  				}
   404  
   405  				compare, err := typ.Compare(left, right)
   406  				if err != nil {
   407  					panic(err)
   408  				}
   409  				if compare != 0 {
   410  					return compare < 0
   411  				}
   412  			}
   413  			return false
   414  		})
   415  	}
   416  }
   417  
   418  func (td TableData) virtualColIndexes() []int {
   419  	var indexes []int
   420  	for i, col := range td.schema.Schema {
   421  		if col.Virtual {
   422  			indexes = append(indexes, i)
   423  		}
   424  	}
   425  	return indexes
   426  }
   427  
   428  func insertValueInRows(ctx *sql.Context, data *TableData, colIdx int, colDefault *sql.ColumnDefaultValue) error {
   429  	for k, p := range data.partitions {
   430  		newP := make([]sql.Row, len(p))
   431  		for i, row := range p {
   432  			var newRow sql.Row
   433  			newRow = append(newRow, row[:colIdx]...)
   434  			newRow = append(newRow, nil)
   435  			newRow = append(newRow, row[colIdx:]...)
   436  			var err error
   437  			if !data.schema.Schema[colIdx].Nullable && colDefault == nil {
   438  				newRow[colIdx] = data.schema.Schema[colIdx].Type.Zero()
   439  			} else {
   440  				newRow[colIdx], err = colDefault.Eval(ctx, newRow)
   441  				if err != nil {
   442  					return err
   443  				}
   444  			}
   445  			newP[i] = newRow
   446  		}
   447  		data.partitions[k] = newP
   448  	}
   449  	return nil
   450  }