github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/doltdb/durable/index.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package durable
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"io"
    22  	"strings"
    23  
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    25  	"github.com/dolthub/dolt/go/store/hash"
    26  	"github.com/dolthub/dolt/go/store/prolly"
    27  	"github.com/dolthub/dolt/go/store/prolly/shim"
    28  	"github.com/dolthub/dolt/go/store/prolly/tree"
    29  	"github.com/dolthub/dolt/go/store/types"
    30  	"github.com/dolthub/dolt/go/store/val"
    31  )
    32  
    33  // Index represents a Table index.
    34  type Index interface {
    35  	// HashOf returns the hash.Hash of this table.
    36  	HashOf() (hash.Hash, error)
    37  
    38  	// Count returns the cardinality of the index.
    39  	Count() (uint64, error)
    40  
    41  	// Empty returns true if the index is empty.
    42  	Empty() (bool, error)
    43  
    44  	// Format returns the types.NomsBinFormat for this index.
    45  	Format() *types.NomsBinFormat
    46  
    47  	// AddColumnToRows adds the column given to the rows data and returns the resulting rows.
    48  	// The |newCol| is present in |newSchema|.
    49  	AddColumnToRows(ctx context.Context, newCol string, newSchema schema.Schema) (Index, error)
    50  
    51  	// Returns the serialized bytes of the (top of the) index.
    52  	// Non-public. Used for flatbuffers Table persistence.
    53  	bytes() ([]byte, error)
    54  
    55  	DebugString(ctx context.Context, ns tree.NodeStore, schema schema.Schema) string
    56  }
    57  
    58  // IndexSet stores a collection secondary Indexes.
    59  type IndexSet interface {
    60  	// HashOf returns the hash.Hash of this table.
    61  	HashOf() (hash.Hash, error)
    62  
    63  	// GetIndex gets an index from the set.
    64  	GetIndex(ctx context.Context, sch schema.Schema, name string) (Index, error)
    65  
    66  	// HasIndex returns true if an index with the specified name exists in the set.
    67  	HasIndex(ctx context.Context, name string) (bool, error)
    68  
    69  	// PutIndex puts an index into the set.
    70  	PutIndex(ctx context.Context, name string, idx Index) (IndexSet, error)
    71  
    72  	// PutNomsIndex puts a noms index into the set.
    73  	// todo(andy): this is a temporary stop-gap while abstracting types.Map
    74  	PutNomsIndex(ctx context.Context, name string, idx types.Map) (IndexSet, error)
    75  
    76  	// DropIndex removes an index from the set.
    77  	DropIndex(ctx context.Context, name string) (IndexSet, error)
    78  
    79  	// RenameIndex renames index |oldName| to |newName|.
    80  	RenameIndex(ctx context.Context, oldName, newName string) (IndexSet, error)
    81  }
    82  
    83  // RefFromIndex persists the Index and returns a types.Ref to it.
    84  func RefFromIndex(ctx context.Context, vrw types.ValueReadWriter, idx Index) (types.Ref, error) {
    85  	switch idx.Format() {
    86  	case types.Format_LD_1:
    87  		return refFromNomsValue(ctx, vrw, idx.(nomsIndex).index)
    88  
    89  	case types.Format_DOLT:
    90  		b := shim.ValueFromMap(idx.(prollyIndex).index)
    91  		return refFromNomsValue(ctx, vrw, b)
    92  
    93  	default:
    94  		return types.Ref{}, errNbfUnknown
    95  	}
    96  }
    97  
    98  // indexFromRef reads the types.Ref from storage and returns the Index it points to.
    99  func indexFromRef(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, sch schema.Schema, r types.Ref) (Index, error) {
   100  	return indexFromAddr(ctx, vrw, ns, sch, r.TargetHash())
   101  }
   102  
   103  func indexFromAddr(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, sch schema.Schema, addr hash.Hash) (Index, error) {
   104  	v, err := vrw.ReadValue(ctx, addr)
   105  	if err != nil {
   106  		return nil, err
   107  	}
   108  
   109  	switch vrw.Format() {
   110  	case types.Format_LD_1:
   111  		return IndexFromNomsMap(v.(types.Map), vrw, ns), nil
   112  
   113  	case types.Format_DOLT:
   114  		pm, err := shim.MapFromValue(v, sch, ns)
   115  		if err != nil {
   116  			return nil, err
   117  		}
   118  		return IndexFromProllyMap(pm), nil
   119  
   120  	default:
   121  		return nil, errNbfUnknown
   122  	}
   123  }
   124  
   125  // NewEmptyIndex returns an index with no rows.
   126  func NewEmptyIndex(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, sch schema.Schema) (Index, error) {
   127  	switch vrw.Format() {
   128  	case types.Format_LD_1:
   129  		m, err := types.NewMap(ctx, vrw)
   130  		if err != nil {
   131  			return nil, err
   132  		}
   133  		return IndexFromNomsMap(m, vrw, ns), nil
   134  
   135  	case types.Format_DOLT:
   136  		kd, vd := sch.GetMapDescriptors()
   137  		m, err := prolly.NewMapFromTuples(ctx, ns, kd, vd)
   138  		if err != nil {
   139  			return nil, err
   140  		}
   141  		return IndexFromProllyMap(m), nil
   142  
   143  	default:
   144  		return nil, errNbfUnknown
   145  	}
   146  }
   147  
   148  type nomsIndex struct {
   149  	index types.Map
   150  	vrw   types.ValueReadWriter
   151  	ns    tree.NodeStore
   152  }
   153  
   154  var _ Index = nomsIndex{}
   155  
   156  func IterAllIndexes(
   157  	ctx context.Context,
   158  	sch schema.Schema,
   159  	set IndexSet,
   160  	cb func(name string, idx Index) error,
   161  ) error {
   162  	for _, def := range sch.Indexes().AllIndexes() {
   163  		idx, err := set.GetIndex(ctx, sch, def.Name())
   164  		if err != nil {
   165  			return err
   166  		}
   167  		if err = cb(def.Name(), idx); err != nil {
   168  			return err
   169  		}
   170  	}
   171  	return nil
   172  }
   173  
   174  // NomsMapFromIndex unwraps the Index and returns the underlying types.Map.
   175  func NomsMapFromIndex(i Index) types.Map {
   176  	return i.(nomsIndex).index
   177  }
   178  
   179  // IndexFromNomsMap wraps a types.Map and returns it as an Index.
   180  func IndexFromNomsMap(m types.Map, vrw types.ValueReadWriter, ns tree.NodeStore) Index {
   181  	return nomsIndex{
   182  		index: m,
   183  		vrw:   vrw,
   184  		ns:    ns,
   185  	}
   186  }
   187  
   188  var _ Index = nomsIndex{}
   189  
   190  // HashOf implements Index.
   191  func (i nomsIndex) HashOf() (hash.Hash, error) {
   192  	return i.index.Hash(i.vrw.Format())
   193  }
   194  
   195  // Count implements Index.
   196  func (i nomsIndex) Count() (uint64, error) {
   197  	return i.index.Len(), nil
   198  }
   199  
   200  // Empty implements Index.
   201  func (i nomsIndex) Empty() (bool, error) {
   202  	return i.index.Len() == 0, nil
   203  }
   204  
   205  // Format implements Index.
   206  func (i nomsIndex) Format() *types.NomsBinFormat {
   207  	return i.vrw.Format()
   208  }
   209  
   210  // bytes implements Index.
   211  func (i nomsIndex) bytes() ([]byte, error) {
   212  	rowschunk, err := types.EncodeValue(i.index, i.vrw.Format())
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  	return rowschunk.Data(), nil
   217  }
   218  
   219  func (i nomsIndex) AddColumnToRows(ctx context.Context, newCol string, newSchema schema.Schema) (Index, error) {
   220  	// no-op for noms indexes because of tag-based mapping
   221  	return i, nil
   222  }
   223  
   224  func (i nomsIndex) DebugString(ctx context.Context, ns tree.NodeStore, schema schema.Schema) string {
   225  	panic("Not implemented")
   226  }
   227  
   228  type prollyIndex struct {
   229  	index prolly.Map
   230  }
   231  
   232  // ProllyMapFromIndex unwraps the Index and returns the underlying prolly.Map.
   233  func ProllyMapFromIndex(i Index) prolly.Map {
   234  	return i.(prollyIndex).index
   235  }
   236  
   237  // IndexFromProllyMap wraps a prolly.Map and returns it as an Index.
   238  func IndexFromProllyMap(m prolly.Map) Index {
   239  	return prollyIndex{index: m}
   240  }
   241  
   242  var _ Index = prollyIndex{}
   243  
   244  // HashOf implements Index.
   245  func (i prollyIndex) HashOf() (hash.Hash, error) {
   246  	return i.index.HashOf(), nil
   247  }
   248  
   249  // Count implements Index.
   250  func (i prollyIndex) Count() (uint64, error) {
   251  	c, err := i.index.Count()
   252  	return uint64(c), err
   253  }
   254  
   255  // Empty implements Index.
   256  func (i prollyIndex) Empty() (bool, error) {
   257  	c, err := i.index.Count()
   258  	if err != nil {
   259  		return false, err
   260  	}
   261  	return c == 0, nil
   262  }
   263  
   264  // Format implements Index.
   265  func (i prollyIndex) Format() *types.NomsBinFormat {
   266  	return i.index.Format()
   267  }
   268  
   269  // bytes implements Index.
   270  func (i prollyIndex) bytes() ([]byte, error) {
   271  	return []byte(shim.ValueFromMap(i.index).(types.SerialMessage)), nil
   272  }
   273  
   274  var _ Index = prollyIndex{}
   275  
   276  func (i prollyIndex) AddColumnToRows(ctx context.Context, newCol string, newSchema schema.Schema) (Index, error) {
   277  	var last bool
   278  	colIdx, iCol := 0, 0
   279  	newSchema.GetNonPKCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
   280  		last = false
   281  		if strings.ToLower(col.Name) == strings.ToLower(newCol) {
   282  			last = true
   283  			colIdx = iCol
   284  		}
   285  		iCol++
   286  		return false, nil
   287  	})
   288  
   289  	// If the column we added was last among non-primary key columns we can skip this step
   290  	if last {
   291  		return i, nil
   292  	}
   293  
   294  	// If not, then we have to iterate over this table's rows and update all the offsets for the new column
   295  	rowMap := ProllyMapFromIndex(i)
   296  	mutator := rowMap.Mutate()
   297  
   298  	iter, err := mutator.IterAll(ctx)
   299  	if err != nil {
   300  		return nil, err
   301  	}
   302  
   303  	// Re-write all the rows, inserting a zero-byte field in every value tuple
   304  	_, valDesc := rowMap.Descriptors()
   305  	b := val.NewTupleBuilder(valDesc)
   306  	for {
   307  		k, v, err := iter.Next(ctx)
   308  		if err == io.EOF {
   309  			b.Recycle()
   310  			break
   311  		} else if err != nil {
   312  			return nil, err
   313  		}
   314  
   315  		for i := 0; i < colIdx; i++ {
   316  			b.PutRaw(i, v.GetField(i))
   317  		}
   318  		b.PutRaw(colIdx, nil)
   319  		for i := colIdx; i < v.Count(); i++ {
   320  			b.PutRaw(i+1, v.GetField(i))
   321  		}
   322  
   323  		err = mutator.Put(ctx, k, b.BuildPermissive(sharePool))
   324  		if err != nil {
   325  			return nil, err
   326  		}
   327  
   328  		b.Recycle()
   329  	}
   330  
   331  	newMap, err := mutator.Map(ctx)
   332  	if err != nil {
   333  		return nil, err
   334  	}
   335  
   336  	return IndexFromProllyMap(newMap), nil
   337  }
   338  
   339  func (i prollyIndex) DebugString(ctx context.Context, ns tree.NodeStore, schema schema.Schema) string {
   340  	var b bytes.Buffer
   341  	i.index.WalkNodes(ctx, func(ctx context.Context, nd tree.Node) error {
   342  		return tree.OutputProllyNode(ctx, &b, nd, ns, schema)
   343  	})
   344  	return b.String()
   345  }
   346  
   347  // NewIndexSet returns an empty IndexSet.
   348  func NewIndexSet(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore) (IndexSet, error) {
   349  	if vrw.Format().UsesFlatbuffers() {
   350  		emptyam, err := prolly.NewEmptyAddressMap(ns)
   351  		if err != nil {
   352  			return nil, err
   353  		}
   354  		return doltDevIndexSet{vrw, ns, emptyam}, nil
   355  	}
   356  
   357  	empty, err := types.NewMap(ctx, vrw)
   358  	if err != nil {
   359  		return nil, err
   360  	}
   361  	return nomsIndexSet{
   362  		indexes: empty,
   363  		vrw:     vrw,
   364  	}, nil
   365  }
   366  
   367  func NewIndexSetWithEmptyIndexes(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, sch schema.Schema) (IndexSet, error) {
   368  	s, err := NewIndexSet(ctx, vrw, ns)
   369  	if err != nil {
   370  		return nil, err
   371  	}
   372  	for _, index := range sch.Indexes().AllIndexes() {
   373  		empty, err := NewEmptyIndex(ctx, vrw, ns, index.Schema())
   374  		if err != nil {
   375  			return nil, err
   376  		}
   377  		s, err = s.PutIndex(ctx, index.Name(), empty)
   378  		if err != nil {
   379  			return nil, err
   380  		}
   381  	}
   382  	return s, nil
   383  }
   384  
   385  type nomsIndexSet struct {
   386  	indexes types.Map
   387  	vrw     types.ValueReadWriter
   388  	ns      tree.NodeStore
   389  }
   390  
   391  var _ IndexSet = nomsIndexSet{}
   392  
   393  // HashOf implements IndexSet.
   394  func (s nomsIndexSet) HashOf() (hash.Hash, error) {
   395  	return s.indexes.Hash(s.vrw.Format())
   396  }
   397  
   398  // HasIndex implements IndexSet.
   399  func (s nomsIndexSet) HasIndex(ctx context.Context, name string) (bool, error) {
   400  	_, ok, err := s.indexes.MaybeGet(ctx, types.String(name))
   401  	if err != nil {
   402  		return false, err
   403  	}
   404  	return ok, nil
   405  }
   406  
   407  // GetIndex implements IndexSet.
   408  func (s nomsIndexSet) GetIndex(ctx context.Context, sch schema.Schema, name string) (Index, error) {
   409  	v, ok, err := s.indexes.MaybeGet(ctx, types.String(name))
   410  	if !ok {
   411  		err = fmt.Errorf("index %s not found in IndexSet", name)
   412  	}
   413  	if err != nil {
   414  		return nil, err
   415  	}
   416  
   417  	idx := sch.Indexes().GetByName(name)
   418  	if idx == nil {
   419  		return nil, fmt.Errorf("index not found: %s", name)
   420  	}
   421  
   422  	return indexFromRef(ctx, s.vrw, s.ns, idx.Schema(), v.(types.Ref))
   423  }
   424  
   425  // PutNomsIndex implements IndexSet.
   426  func (s nomsIndexSet) PutNomsIndex(ctx context.Context, name string, idx types.Map) (IndexSet, error) {
   427  	return s.PutIndex(ctx, name, IndexFromNomsMap(idx, s.vrw, s.ns))
   428  }
   429  
   430  // PutIndex implements IndexSet.
   431  func (s nomsIndexSet) PutIndex(ctx context.Context, name string, idx Index) (IndexSet, error) {
   432  	ref, err := RefFromIndex(ctx, s.vrw, idx)
   433  	if err != nil {
   434  		return nil, err
   435  	}
   436  
   437  	im, err := s.indexes.Edit().Set(types.String(name), ref).Map(ctx)
   438  	if err != nil {
   439  		return nil, err
   440  	}
   441  
   442  	return nomsIndexSet{indexes: im, vrw: s.vrw, ns: s.ns}, nil
   443  }
   444  
   445  // DropIndex implements IndexSet.
   446  func (s nomsIndexSet) DropIndex(ctx context.Context, name string) (IndexSet, error) {
   447  	im, err := s.indexes.Edit().Remove(types.String(name)).Map(ctx)
   448  	if err != nil {
   449  		return nil, err
   450  	}
   451  
   452  	return nomsIndexSet{indexes: im, vrw: s.vrw, ns: s.ns}, nil
   453  }
   454  
   455  func (s nomsIndexSet) RenameIndex(ctx context.Context, oldName, newName string) (IndexSet, error) {
   456  	v, ok, err := s.indexes.MaybeGet(ctx, types.String(oldName))
   457  	if !ok {
   458  		err = fmt.Errorf("index %s not found in IndexSet", oldName)
   459  	}
   460  	if err != nil {
   461  		return nil, err
   462  	}
   463  
   464  	edit := s.indexes.Edit()
   465  	im, err := edit.Set(types.String(newName), v).Remove(types.String(oldName)).Map(ctx)
   466  	if err != nil {
   467  		return nil, err
   468  	}
   469  
   470  	return nomsIndexSet{indexes: im, vrw: s.vrw, ns: s.ns}, nil
   471  }
   472  
   473  func mapFromIndexSet(ic IndexSet) types.Map {
   474  	return ic.(nomsIndexSet).indexes
   475  }
   476  
   477  type doltDevIndexSet struct {
   478  	vrw types.ValueReadWriter
   479  	ns  tree.NodeStore
   480  	am  prolly.AddressMap
   481  }
   482  
   483  var _ IndexSet = doltDevIndexSet{}
   484  
   485  func (is doltDevIndexSet) HashOf() (hash.Hash, error) {
   486  	return is.am.HashOf(), nil
   487  }
   488  
   489  func (is doltDevIndexSet) HasIndex(ctx context.Context, name string) (bool, error) {
   490  	addr, err := is.am.Get(ctx, name)
   491  	if err != nil {
   492  		return false, err
   493  	}
   494  	if addr.IsEmpty() {
   495  		return false, nil
   496  	}
   497  	return true, nil
   498  }
   499  
   500  func (is doltDevIndexSet) GetIndex(ctx context.Context, sch schema.Schema, name string) (Index, error) {
   501  	addr, err := is.am.Get(ctx, name)
   502  	if err != nil {
   503  		return nil, err
   504  	}
   505  	if addr.IsEmpty() {
   506  		return nil, fmt.Errorf("index %s not found in IndexSet", name)
   507  	}
   508  	idx := sch.Indexes().GetByName(name)
   509  	if idx == nil {
   510  		return nil, fmt.Errorf("index schema not found: %s", name)
   511  	}
   512  	return indexFromAddr(ctx, is.vrw, is.ns, idx.Schema(), addr)
   513  }
   514  
   515  func (is doltDevIndexSet) PutIndex(ctx context.Context, name string, idx Index) (IndexSet, error) {
   516  	ref, err := RefFromIndex(ctx, is.vrw, idx)
   517  	if err != nil {
   518  		return nil, err
   519  	}
   520  
   521  	ae := is.am.Editor()
   522  	err = ae.Update(ctx, name, ref.TargetHash())
   523  	if err != nil {
   524  		return nil, err
   525  	}
   526  	am, err := ae.Flush(ctx)
   527  	if err != nil {
   528  		return nil, err
   529  	}
   530  
   531  	return doltDevIndexSet{is.vrw, is.ns, am}, nil
   532  }
   533  
   534  func (is doltDevIndexSet) PutNomsIndex(ctx context.Context, name string, idx types.Map) (IndexSet, error) {
   535  	return is.PutIndex(ctx, name, IndexFromNomsMap(idx, is.vrw, is.ns))
   536  }
   537  
   538  func (is doltDevIndexSet) DropIndex(ctx context.Context, name string) (IndexSet, error) {
   539  	ae := is.am.Editor()
   540  	err := ae.Delete(ctx, name)
   541  	if err != nil {
   542  		return nil, err
   543  	}
   544  	am, err := ae.Flush(ctx)
   545  	if err != nil {
   546  		return nil, err
   547  	}
   548  	return doltDevIndexSet{is.vrw, is.ns, am}, nil
   549  }
   550  
   551  func (is doltDevIndexSet) RenameIndex(ctx context.Context, oldName, newName string) (IndexSet, error) {
   552  	addr, err := is.am.Get(ctx, oldName)
   553  	if err != nil {
   554  		return nil, err
   555  	}
   556  	if addr.IsEmpty() {
   557  		return nil, fmt.Errorf("index %s not found in IndexSet", oldName)
   558  	}
   559  	newaddr, err := is.am.Get(ctx, newName)
   560  	if err != nil {
   561  		return nil, err
   562  	}
   563  	if !newaddr.IsEmpty() {
   564  		return nil, fmt.Errorf("index %s found in IndexSet when attempting to rename index", newName)
   565  	}
   566  
   567  	ae := is.am.Editor()
   568  	err = ae.Update(ctx, newName, addr)
   569  	if err != nil {
   570  		return nil, err
   571  	}
   572  	err = ae.Delete(ctx, oldName)
   573  	if err != nil {
   574  		return nil, err
   575  	}
   576  
   577  	am, err := ae.Flush(ctx)
   578  	if err != nil {
   579  		return nil, err
   580  	}
   581  
   582  	return doltDevIndexSet{is.vrw, is.ns, am}, nil
   583  }