github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/table/editor/creation/index.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package creation
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"io"
    22  	"strings"
    23  
    24  	"github.com/dolthub/go-mysql-server/sql"
    25  
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
    31  	"github.com/dolthub/dolt/go/store/prolly"
    32  	"github.com/dolthub/dolt/go/store/prolly/tree"
    33  	"github.com/dolthub/dolt/go/store/types"
    34  	"github.com/dolthub/dolt/go/store/val"
    35  )
    36  
    37  type CreateIndexReturn struct {
    38  	NewTable *doltdb.Table
    39  	Sch      schema.Schema
    40  	OldIndex schema.Index
    41  	NewIndex schema.Index
    42  }
    43  
    44  // CreateIndex creates the given index on the given table with the given schema. Returns the updated table, updated schema, and created index.
    45  func CreateIndex(
    46  	ctx *sql.Context,
    47  	table *doltdb.Table,
    48  	tableName, indexName string,
    49  	columns []string,
    50  	prefixLengths []uint16,
    51  	props schema.IndexProperties,
    52  	opts editor.Options,
    53  ) (*CreateIndexReturn, error) {
    54  	sch, err := table.GetSchema(ctx)
    55  	if err != nil {
    56  		return nil, err
    57  	}
    58  
    59  	// get the real column names as CREATE INDEX columns are case-insensitive
    60  	var realColNames []string
    61  	allTableCols := sch.GetAllCols()
    62  	for _, indexCol := range columns {
    63  		tableCol, ok := allTableCols.GetByNameCaseInsensitive(indexCol)
    64  		if !ok {
    65  			return nil, fmt.Errorf("column `%s` does not exist for the table", indexCol)
    66  		}
    67  		realColNames = append(realColNames, tableCol.Name)
    68  	}
    69  
    70  	if indexName == "" {
    71  		indexName = strings.Join(realColNames, "")
    72  		_, ok := sch.Indexes().GetByNameCaseInsensitive(indexName)
    73  		var i int
    74  		for ok {
    75  			i++
    76  			indexName = fmt.Sprintf("%s_%d", strings.Join(realColNames, ""), i)
    77  			_, ok = sch.Indexes().GetByNameCaseInsensitive(indexName)
    78  		}
    79  	}
    80  	if !doltdb.IsValidIdentifier(indexName) {
    81  		return nil, fmt.Errorf("invalid index name `%s`", indexName)
    82  	}
    83  
    84  	// if an index was already created for the column set but was not generated by the user then we replace it
    85  	existingIndex, ok := sch.Indexes().GetIndexByColumnNames(realColNames...)
    86  	if ok && !existingIndex.IsUserDefined() {
    87  		_, err = sch.Indexes().RemoveIndex(existingIndex.Name())
    88  		if err != nil {
    89  			return nil, err
    90  		}
    91  		table, err = table.DeleteIndexRowData(ctx, existingIndex.Name())
    92  		if err != nil {
    93  			return nil, err
    94  		}
    95  	}
    96  
    97  	// create the index metadata, will error if index names are taken or an index with the same columns in the same order exists
    98  	index, err := sch.Indexes().AddIndexByColNames(
    99  		indexName,
   100  		realColNames,
   101  		prefixLengths,
   102  		props,
   103  	)
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  
   108  	// update the table schema with the new index
   109  	newTable, err := table.UpdateSchema(ctx, sch)
   110  	if err != nil {
   111  		return nil, err
   112  	}
   113  
   114  	// TODO: in the case that we're replacing an implicit index with one the user specified, we could do this more
   115  	//  cheaply in some cases by just renaming it, rather than building it from scratch. But that's harder to get right.
   116  	indexRows, err := BuildSecondaryIndex(ctx, newTable, index, tableName, opts)
   117  	if err != nil {
   118  		return nil, err
   119  	}
   120  
   121  	newTable, err = newTable.SetIndexRows(ctx, index.Name(), indexRows)
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  
   126  	return &CreateIndexReturn{
   127  		NewTable: newTable,
   128  		Sch:      sch,
   129  		OldIndex: existingIndex,
   130  		NewIndex: index,
   131  	}, nil
   132  }
   133  
   134  func BuildSecondaryIndex(ctx *sql.Context, tbl *doltdb.Table, idx schema.Index, tableName string, opts editor.Options) (durable.Index, error) {
   135  	switch tbl.Format() {
   136  	case types.Format_LD_1:
   137  		m, err := editor.RebuildIndex(ctx, tbl, idx.Name(), opts)
   138  		if err != nil {
   139  			return nil, err
   140  		}
   141  		return durable.IndexFromNomsMap(m, tbl.ValueReadWriter(), tbl.NodeStore()), nil
   142  
   143  	case types.Format_DOLT:
   144  		sch, err := tbl.GetSchema(ctx)
   145  		if err != nil {
   146  			return nil, err
   147  		}
   148  		m, err := tbl.GetRowData(ctx)
   149  		if err != nil {
   150  			return nil, err
   151  		}
   152  		primary := durable.ProllyMapFromIndex(m)
   153  		return BuildSecondaryProllyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch, tableName, idx, primary)
   154  
   155  	default:
   156  		return nil, fmt.Errorf("unknown NomsBinFormat")
   157  	}
   158  }
   159  
   160  // BuildSecondaryProllyIndex builds secondary index data for the given primary
   161  // index row data |primary|. |sch| is the current schema of the table.
   162  func BuildSecondaryProllyIndex(
   163  	ctx *sql.Context,
   164  	vrw types.ValueReadWriter,
   165  	ns tree.NodeStore,
   166  	sch schema.Schema,
   167  	tableName string,
   168  	idx schema.Index,
   169  	primary prolly.Map,
   170  ) (durable.Index, error) {
   171  	var uniqCb DupEntryCb
   172  	if idx.IsUnique() {
   173  		kd := idx.Schema().GetKeyDescriptor()
   174  		uniqCb = func(ctx context.Context, existingKey, newKey val.Tuple) error {
   175  			msg := FormatKeyForUniqKeyErr(newKey, kd)
   176  			return sql.NewUniqueKeyErr(msg, false, nil)
   177  		}
   178  	}
   179  	return BuildProllyIndexExternal(ctx, vrw, ns, sch, tableName, idx, primary, uniqCb)
   180  }
   181  
   182  // FormatKeyForUniqKeyErr formats the given tuple |key| using |d|. The resulting
   183  // string is suitable for use in a sql.UniqueKeyError
   184  // This is copied from the writer package to avoid pulling in that dependency and prevent cycles
   185  func FormatKeyForUniqKeyErr(key val.Tuple, d val.TupleDesc) string {
   186  	var sb strings.Builder
   187  	sb.WriteString("[")
   188  	seenOne := false
   189  	for i := range d.Types {
   190  		if seenOne {
   191  			sb.WriteString(",")
   192  		}
   193  		seenOne = true
   194  		sb.WriteString(d.FormatValue(i, key.GetField(i)))
   195  	}
   196  	sb.WriteString("]")
   197  	return sb.String()
   198  }
   199  
   200  // DupEntryCb receives duplicate unique index entries.
   201  type DupEntryCb func(ctx context.Context, existingKey, newKey val.Tuple) error
   202  
   203  // BuildUniqueProllyIndex builds a unique index based on the given |primary| row
   204  // data. If any duplicate entries are found, they are passed to |cb|. If |cb|
   205  // returns a non-nil error then the process is stopped.
   206  func BuildUniqueProllyIndex(
   207  	ctx *sql.Context,
   208  	vrw types.ValueReadWriter,
   209  	ns tree.NodeStore,
   210  	sch schema.Schema,
   211  	tableName string,
   212  	idx schema.Index,
   213  	primary prolly.Map,
   214  	cb DupEntryCb,
   215  ) (durable.Index, error) {
   216  	empty, err := durable.NewEmptyIndex(ctx, vrw, ns, idx.Schema())
   217  	if err != nil {
   218  		return nil, err
   219  	}
   220  	secondary := durable.ProllyMapFromIndex(empty)
   221  	if schema.IsKeyless(sch) {
   222  		secondary = prolly.ConvertToSecondaryKeylessIndex(secondary)
   223  	}
   224  
   225  	iter, err := primary.IterAll(ctx)
   226  	if err != nil {
   227  		return nil, err
   228  	}
   229  	p := primary.Pool()
   230  
   231  	prefixDesc := secondary.KeyDesc().PrefixDesc(idx.Count())
   232  	secondaryBld, err := index.NewSecondaryKeyBuilder(ctx, tableName, sch, idx, secondary.KeyDesc(), p, secondary.NodeStore())
   233  	if err != nil {
   234  		return nil, err
   235  	}
   236  
   237  	mut := secondary.Mutate()
   238  	for {
   239  		var k, v val.Tuple
   240  		k, v, err = iter.Next(ctx)
   241  		if err == io.EOF {
   242  			break
   243  		} else if err != nil {
   244  			return nil, err
   245  		}
   246  
   247  		idxKey, err := secondaryBld.SecondaryKeyFromRow(ctx, k, v)
   248  		if err != nil {
   249  			return nil, err
   250  		}
   251  
   252  		if prefixDesc.HasNulls(idxKey) {
   253  			continue
   254  		}
   255  
   256  		err = mut.GetPrefix(ctx, idxKey, prefixDesc, func(existingKey, _ val.Tuple) error {
   257  			// register a constraint violation if |idxKey| collides with |existingKey|
   258  			if existingKey != nil {
   259  				return cb(ctx, existingKey, idxKey)
   260  			}
   261  			return nil
   262  		})
   263  		if err != nil {
   264  			return nil, err
   265  		}
   266  
   267  		if err = mut.Put(ctx, idxKey, val.EmptyTuple); err != nil {
   268  			return nil, err
   269  		}
   270  	}
   271  
   272  	secondary, err = mut.Map(ctx)
   273  	if err != nil {
   274  		return nil, err
   275  	}
   276  	return durable.IndexFromProllyMap(secondary), nil
   277  }
   278  
   279  // PrefixItr iterates all keys of a given prefix |p| and its descriptor |d| in map |m|.
   280  // todo(andy): move to pkg prolly
   281  type PrefixItr struct {
   282  	itr prolly.MapIter
   283  	p   val.Tuple
   284  	d   val.TupleDesc
   285  }
   286  
   287  func NewPrefixItr(ctx context.Context, p val.Tuple, d val.TupleDesc, m rangeIterator) (PrefixItr, error) {
   288  	rng := prolly.PrefixRange(p, d)
   289  	itr, err := m.IterRange(ctx, rng)
   290  	if err != nil {
   291  		return PrefixItr{}, err
   292  	}
   293  	return PrefixItr{p: p, d: d, itr: itr}, nil
   294  }
   295  
   296  func (itr PrefixItr) Next(ctx context.Context) (k, v val.Tuple, err error) {
   297  OUTER:
   298  	for {
   299  		k, v, err = itr.itr.Next(ctx)
   300  		if err != nil {
   301  			return nil, nil, err
   302  		}
   303  
   304  		// check if p is a prefix of k
   305  		// range iteration currently can return keys not in the range
   306  		for i := 0; i < itr.p.Count(); i++ {
   307  			f1 := itr.p.GetField(i)
   308  			f2 := k.GetField(i)
   309  			if bytes.Compare(f1, f2) != 0 {
   310  				// if a field in the prefix does not match |k|, go to the next row
   311  				continue OUTER
   312  			}
   313  		}
   314  
   315  		return k, v, nil
   316  	}
   317  }
   318  
   319  type rangeIterator interface {
   320  	IterRange(ctx context.Context, rng prolly.Range) (prolly.MapIter, error)
   321  }
   322  
   323  var _ error = (*prollyUniqueKeyErr)(nil)
   324  
   325  // prollyUniqueKeyErr is an error that is returned when a unique constraint has been violated. It contains the index key
   326  // (which is the full row).
   327  type prollyUniqueKeyErr struct {
   328  	k         val.Tuple
   329  	kd        val.TupleDesc
   330  	IndexName string
   331  }
   332  
   333  // Error implements the error interface.
   334  func (u *prollyUniqueKeyErr) Error() string {
   335  	keyStr, _ := formatKey(u.k, u.kd)
   336  	return fmt.Sprintf("duplicate unique key given: %s", keyStr)
   337  }
   338  
   339  // formatKey returns a comma-separated string representation of the key given
   340  // that matches the output of the old format.
   341  func formatKey(key val.Tuple, td val.TupleDesc) (string, error) {
   342  	vals := make([]string, td.Count())
   343  	for i := 0; i < td.Count(); i++ {
   344  		vals[i] = td.FormatValue(i, key.GetField(i))
   345  	}
   346  
   347  	return fmt.Sprintf("[%s]", strings.Join(vals, ",")), nil
   348  }