github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/doltdb/durable/index.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package durable 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "io" 22 "strings" 23 24 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 25 "github.com/dolthub/dolt/go/store/hash" 26 "github.com/dolthub/dolt/go/store/prolly" 27 "github.com/dolthub/dolt/go/store/prolly/shim" 28 "github.com/dolthub/dolt/go/store/prolly/tree" 29 "github.com/dolthub/dolt/go/store/types" 30 "github.com/dolthub/dolt/go/store/val" 31 ) 32 33 // Index represents a Table index. 34 type Index interface { 35 // HashOf returns the hash.Hash of this table. 36 HashOf() (hash.Hash, error) 37 38 // Count returns the cardinality of the index. 39 Count() (uint64, error) 40 41 // Empty returns true if the index is empty. 42 Empty() (bool, error) 43 44 // Format returns the types.NomsBinFormat for this index. 45 Format() *types.NomsBinFormat 46 47 // AddColumnToRows adds the column given to the rows data and returns the resulting rows. 48 // The |newCol| is present in |newSchema|. 49 AddColumnToRows(ctx context.Context, newCol string, newSchema schema.Schema) (Index, error) 50 51 // Returns the serialized bytes of the (top of the) index. 52 // Non-public. Used for flatbuffers Table persistence. 53 bytes() ([]byte, error) 54 55 DebugString(ctx context.Context, ns tree.NodeStore, schema schema.Schema) string 56 } 57 58 // IndexSet stores a collection secondary Indexes. 59 type IndexSet interface { 60 // HashOf returns the hash.Hash of this table. 61 HashOf() (hash.Hash, error) 62 63 // GetIndex gets an index from the set. 64 GetIndex(ctx context.Context, sch schema.Schema, name string) (Index, error) 65 66 // HasIndex returns true if an index with the specified name exists in the set. 67 HasIndex(ctx context.Context, name string) (bool, error) 68 69 // PutIndex puts an index into the set. 70 PutIndex(ctx context.Context, name string, idx Index) (IndexSet, error) 71 72 // PutNomsIndex puts a noms index into the set. 73 // todo(andy): this is a temporary stop-gap while abstracting types.Map 74 PutNomsIndex(ctx context.Context, name string, idx types.Map) (IndexSet, error) 75 76 // DropIndex removes an index from the set. 77 DropIndex(ctx context.Context, name string) (IndexSet, error) 78 79 // RenameIndex renames index |oldName| to |newName|. 80 RenameIndex(ctx context.Context, oldName, newName string) (IndexSet, error) 81 } 82 83 // RefFromIndex persists the Index and returns a types.Ref to it. 84 func RefFromIndex(ctx context.Context, vrw types.ValueReadWriter, idx Index) (types.Ref, error) { 85 switch idx.Format() { 86 case types.Format_LD_1: 87 return refFromNomsValue(ctx, vrw, idx.(nomsIndex).index) 88 89 case types.Format_DOLT: 90 b := shim.ValueFromMap(idx.(prollyIndex).index) 91 return refFromNomsValue(ctx, vrw, b) 92 93 default: 94 return types.Ref{}, errNbfUnknown 95 } 96 } 97 98 // indexFromRef reads the types.Ref from storage and returns the Index it points to. 99 func indexFromRef(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, sch schema.Schema, r types.Ref) (Index, error) { 100 return indexFromAddr(ctx, vrw, ns, sch, r.TargetHash()) 101 } 102 103 func indexFromAddr(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, sch schema.Schema, addr hash.Hash) (Index, error) { 104 v, err := vrw.ReadValue(ctx, addr) 105 if err != nil { 106 return nil, err 107 } 108 109 switch vrw.Format() { 110 case types.Format_LD_1: 111 return IndexFromNomsMap(v.(types.Map), vrw, ns), nil 112 113 case types.Format_DOLT: 114 pm, err := shim.MapFromValue(v, sch, ns) 115 if err != nil { 116 return nil, err 117 } 118 return IndexFromProllyMap(pm), nil 119 120 default: 121 return nil, errNbfUnknown 122 } 123 } 124 125 // NewEmptyIndex returns an index with no rows. 126 func NewEmptyIndex(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, sch schema.Schema) (Index, error) { 127 switch vrw.Format() { 128 case types.Format_LD_1: 129 m, err := types.NewMap(ctx, vrw) 130 if err != nil { 131 return nil, err 132 } 133 return IndexFromNomsMap(m, vrw, ns), nil 134 135 case types.Format_DOLT: 136 kd, vd := sch.GetMapDescriptors() 137 m, err := prolly.NewMapFromTuples(ctx, ns, kd, vd) 138 if err != nil { 139 return nil, err 140 } 141 return IndexFromProllyMap(m), nil 142 143 default: 144 return nil, errNbfUnknown 145 } 146 } 147 148 type nomsIndex struct { 149 index types.Map 150 vrw types.ValueReadWriter 151 ns tree.NodeStore 152 } 153 154 var _ Index = nomsIndex{} 155 156 func IterAllIndexes( 157 ctx context.Context, 158 sch schema.Schema, 159 set IndexSet, 160 cb func(name string, idx Index) error, 161 ) error { 162 for _, def := range sch.Indexes().AllIndexes() { 163 idx, err := set.GetIndex(ctx, sch, def.Name()) 164 if err != nil { 165 return err 166 } 167 if err = cb(def.Name(), idx); err != nil { 168 return err 169 } 170 } 171 return nil 172 } 173 174 // NomsMapFromIndex unwraps the Index and returns the underlying types.Map. 175 func NomsMapFromIndex(i Index) types.Map { 176 return i.(nomsIndex).index 177 } 178 179 // IndexFromNomsMap wraps a types.Map and returns it as an Index. 180 func IndexFromNomsMap(m types.Map, vrw types.ValueReadWriter, ns tree.NodeStore) Index { 181 return nomsIndex{ 182 index: m, 183 vrw: vrw, 184 ns: ns, 185 } 186 } 187 188 var _ Index = nomsIndex{} 189 190 // HashOf implements Index. 191 func (i nomsIndex) HashOf() (hash.Hash, error) { 192 return i.index.Hash(i.vrw.Format()) 193 } 194 195 // Count implements Index. 196 func (i nomsIndex) Count() (uint64, error) { 197 return i.index.Len(), nil 198 } 199 200 // Empty implements Index. 201 func (i nomsIndex) Empty() (bool, error) { 202 return i.index.Len() == 0, nil 203 } 204 205 // Format implements Index. 206 func (i nomsIndex) Format() *types.NomsBinFormat { 207 return i.vrw.Format() 208 } 209 210 // bytes implements Index. 211 func (i nomsIndex) bytes() ([]byte, error) { 212 rowschunk, err := types.EncodeValue(i.index, i.vrw.Format()) 213 if err != nil { 214 return nil, err 215 } 216 return rowschunk.Data(), nil 217 } 218 219 func (i nomsIndex) AddColumnToRows(ctx context.Context, newCol string, newSchema schema.Schema) (Index, error) { 220 // no-op for noms indexes because of tag-based mapping 221 return i, nil 222 } 223 224 func (i nomsIndex) DebugString(ctx context.Context, ns tree.NodeStore, schema schema.Schema) string { 225 panic("Not implemented") 226 } 227 228 type prollyIndex struct { 229 index prolly.Map 230 } 231 232 // ProllyMapFromIndex unwraps the Index and returns the underlying prolly.Map. 233 func ProllyMapFromIndex(i Index) prolly.Map { 234 return i.(prollyIndex).index 235 } 236 237 // IndexFromProllyMap wraps a prolly.Map and returns it as an Index. 238 func IndexFromProllyMap(m prolly.Map) Index { 239 return prollyIndex{index: m} 240 } 241 242 var _ Index = prollyIndex{} 243 244 // HashOf implements Index. 245 func (i prollyIndex) HashOf() (hash.Hash, error) { 246 return i.index.HashOf(), nil 247 } 248 249 // Count implements Index. 250 func (i prollyIndex) Count() (uint64, error) { 251 c, err := i.index.Count() 252 return uint64(c), err 253 } 254 255 // Empty implements Index. 256 func (i prollyIndex) Empty() (bool, error) { 257 c, err := i.index.Count() 258 if err != nil { 259 return false, err 260 } 261 return c == 0, nil 262 } 263 264 // Format implements Index. 265 func (i prollyIndex) Format() *types.NomsBinFormat { 266 return i.index.Format() 267 } 268 269 // bytes implements Index. 270 func (i prollyIndex) bytes() ([]byte, error) { 271 return []byte(shim.ValueFromMap(i.index).(types.SerialMessage)), nil 272 } 273 274 var _ Index = prollyIndex{} 275 276 func (i prollyIndex) AddColumnToRows(ctx context.Context, newCol string, newSchema schema.Schema) (Index, error) { 277 var last bool 278 colIdx, iCol := 0, 0 279 newSchema.GetNonPKCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { 280 last = false 281 if strings.ToLower(col.Name) == strings.ToLower(newCol) { 282 last = true 283 colIdx = iCol 284 } 285 iCol++ 286 return false, nil 287 }) 288 289 // If the column we added was last among non-primary key columns we can skip this step 290 if last { 291 return i, nil 292 } 293 294 // If not, then we have to iterate over this table's rows and update all the offsets for the new column 295 rowMap := ProllyMapFromIndex(i) 296 mutator := rowMap.Mutate() 297 298 iter, err := mutator.IterAll(ctx) 299 if err != nil { 300 return nil, err 301 } 302 303 // Re-write all the rows, inserting a zero-byte field in every value tuple 304 _, valDesc := rowMap.Descriptors() 305 b := val.NewTupleBuilder(valDesc) 306 for { 307 k, v, err := iter.Next(ctx) 308 if err == io.EOF { 309 b.Recycle() 310 break 311 } else if err != nil { 312 return nil, err 313 } 314 315 for i := 0; i < colIdx; i++ { 316 b.PutRaw(i, v.GetField(i)) 317 } 318 b.PutRaw(colIdx, nil) 319 for i := colIdx; i < v.Count(); i++ { 320 b.PutRaw(i+1, v.GetField(i)) 321 } 322 323 err = mutator.Put(ctx, k, b.BuildPermissive(sharePool)) 324 if err != nil { 325 return nil, err 326 } 327 328 b.Recycle() 329 } 330 331 newMap, err := mutator.Map(ctx) 332 if err != nil { 333 return nil, err 334 } 335 336 return IndexFromProllyMap(newMap), nil 337 } 338 339 func (i prollyIndex) DebugString(ctx context.Context, ns tree.NodeStore, schema schema.Schema) string { 340 var b bytes.Buffer 341 i.index.WalkNodes(ctx, func(ctx context.Context, nd tree.Node) error { 342 return tree.OutputProllyNode(ctx, &b, nd, ns, schema) 343 }) 344 return b.String() 345 } 346 347 // NewIndexSet returns an empty IndexSet. 348 func NewIndexSet(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore) (IndexSet, error) { 349 if vrw.Format().UsesFlatbuffers() { 350 emptyam, err := prolly.NewEmptyAddressMap(ns) 351 if err != nil { 352 return nil, err 353 } 354 return doltDevIndexSet{vrw, ns, emptyam}, nil 355 } 356 357 empty, err := types.NewMap(ctx, vrw) 358 if err != nil { 359 return nil, err 360 } 361 return nomsIndexSet{ 362 indexes: empty, 363 vrw: vrw, 364 }, nil 365 } 366 367 func NewIndexSetWithEmptyIndexes(ctx context.Context, vrw types.ValueReadWriter, ns tree.NodeStore, sch schema.Schema) (IndexSet, error) { 368 s, err := NewIndexSet(ctx, vrw, ns) 369 if err != nil { 370 return nil, err 371 } 372 for _, index := range sch.Indexes().AllIndexes() { 373 empty, err := NewEmptyIndex(ctx, vrw, ns, index.Schema()) 374 if err != nil { 375 return nil, err 376 } 377 s, err = s.PutIndex(ctx, index.Name(), empty) 378 if err != nil { 379 return nil, err 380 } 381 } 382 return s, nil 383 } 384 385 type nomsIndexSet struct { 386 indexes types.Map 387 vrw types.ValueReadWriter 388 ns tree.NodeStore 389 } 390 391 var _ IndexSet = nomsIndexSet{} 392 393 // HashOf implements IndexSet. 394 func (s nomsIndexSet) HashOf() (hash.Hash, error) { 395 return s.indexes.Hash(s.vrw.Format()) 396 } 397 398 // HasIndex implements IndexSet. 399 func (s nomsIndexSet) HasIndex(ctx context.Context, name string) (bool, error) { 400 _, ok, err := s.indexes.MaybeGet(ctx, types.String(name)) 401 if err != nil { 402 return false, err 403 } 404 return ok, nil 405 } 406 407 // GetIndex implements IndexSet. 408 func (s nomsIndexSet) GetIndex(ctx context.Context, sch schema.Schema, name string) (Index, error) { 409 v, ok, err := s.indexes.MaybeGet(ctx, types.String(name)) 410 if !ok { 411 err = fmt.Errorf("index %s not found in IndexSet", name) 412 } 413 if err != nil { 414 return nil, err 415 } 416 417 idx := sch.Indexes().GetByName(name) 418 if idx == nil { 419 return nil, fmt.Errorf("index not found: %s", name) 420 } 421 422 return indexFromRef(ctx, s.vrw, s.ns, idx.Schema(), v.(types.Ref)) 423 } 424 425 // PutNomsIndex implements IndexSet. 426 func (s nomsIndexSet) PutNomsIndex(ctx context.Context, name string, idx types.Map) (IndexSet, error) { 427 return s.PutIndex(ctx, name, IndexFromNomsMap(idx, s.vrw, s.ns)) 428 } 429 430 // PutIndex implements IndexSet. 431 func (s nomsIndexSet) PutIndex(ctx context.Context, name string, idx Index) (IndexSet, error) { 432 ref, err := RefFromIndex(ctx, s.vrw, idx) 433 if err != nil { 434 return nil, err 435 } 436 437 im, err := s.indexes.Edit().Set(types.String(name), ref).Map(ctx) 438 if err != nil { 439 return nil, err 440 } 441 442 return nomsIndexSet{indexes: im, vrw: s.vrw, ns: s.ns}, nil 443 } 444 445 // DropIndex implements IndexSet. 446 func (s nomsIndexSet) DropIndex(ctx context.Context, name string) (IndexSet, error) { 447 im, err := s.indexes.Edit().Remove(types.String(name)).Map(ctx) 448 if err != nil { 449 return nil, err 450 } 451 452 return nomsIndexSet{indexes: im, vrw: s.vrw, ns: s.ns}, nil 453 } 454 455 func (s nomsIndexSet) RenameIndex(ctx context.Context, oldName, newName string) (IndexSet, error) { 456 v, ok, err := s.indexes.MaybeGet(ctx, types.String(oldName)) 457 if !ok { 458 err = fmt.Errorf("index %s not found in IndexSet", oldName) 459 } 460 if err != nil { 461 return nil, err 462 } 463 464 edit := s.indexes.Edit() 465 im, err := edit.Set(types.String(newName), v).Remove(types.String(oldName)).Map(ctx) 466 if err != nil { 467 return nil, err 468 } 469 470 return nomsIndexSet{indexes: im, vrw: s.vrw, ns: s.ns}, nil 471 } 472 473 func mapFromIndexSet(ic IndexSet) types.Map { 474 return ic.(nomsIndexSet).indexes 475 } 476 477 type doltDevIndexSet struct { 478 vrw types.ValueReadWriter 479 ns tree.NodeStore 480 am prolly.AddressMap 481 } 482 483 var _ IndexSet = doltDevIndexSet{} 484 485 func (is doltDevIndexSet) HashOf() (hash.Hash, error) { 486 return is.am.HashOf(), nil 487 } 488 489 func (is doltDevIndexSet) HasIndex(ctx context.Context, name string) (bool, error) { 490 addr, err := is.am.Get(ctx, name) 491 if err != nil { 492 return false, err 493 } 494 if addr.IsEmpty() { 495 return false, nil 496 } 497 return true, nil 498 } 499 500 func (is doltDevIndexSet) GetIndex(ctx context.Context, sch schema.Schema, name string) (Index, error) { 501 addr, err := is.am.Get(ctx, name) 502 if err != nil { 503 return nil, err 504 } 505 if addr.IsEmpty() { 506 return nil, fmt.Errorf("index %s not found in IndexSet", name) 507 } 508 idx := sch.Indexes().GetByName(name) 509 if idx == nil { 510 return nil, fmt.Errorf("index schema not found: %s", name) 511 } 512 return indexFromAddr(ctx, is.vrw, is.ns, idx.Schema(), addr) 513 } 514 515 func (is doltDevIndexSet) PutIndex(ctx context.Context, name string, idx Index) (IndexSet, error) { 516 ref, err := RefFromIndex(ctx, is.vrw, idx) 517 if err != nil { 518 return nil, err 519 } 520 521 ae := is.am.Editor() 522 err = ae.Update(ctx, name, ref.TargetHash()) 523 if err != nil { 524 return nil, err 525 } 526 am, err := ae.Flush(ctx) 527 if err != nil { 528 return nil, err 529 } 530 531 return doltDevIndexSet{is.vrw, is.ns, am}, nil 532 } 533 534 func (is doltDevIndexSet) PutNomsIndex(ctx context.Context, name string, idx types.Map) (IndexSet, error) { 535 return is.PutIndex(ctx, name, IndexFromNomsMap(idx, is.vrw, is.ns)) 536 } 537 538 func (is doltDevIndexSet) DropIndex(ctx context.Context, name string) (IndexSet, error) { 539 ae := is.am.Editor() 540 err := ae.Delete(ctx, name) 541 if err != nil { 542 return nil, err 543 } 544 am, err := ae.Flush(ctx) 545 if err != nil { 546 return nil, err 547 } 548 return doltDevIndexSet{is.vrw, is.ns, am}, nil 549 } 550 551 func (is doltDevIndexSet) RenameIndex(ctx context.Context, oldName, newName string) (IndexSet, error) { 552 addr, err := is.am.Get(ctx, oldName) 553 if err != nil { 554 return nil, err 555 } 556 if addr.IsEmpty() { 557 return nil, fmt.Errorf("index %s not found in IndexSet", oldName) 558 } 559 newaddr, err := is.am.Get(ctx, newName) 560 if err != nil { 561 return nil, err 562 } 563 if !newaddr.IsEmpty() { 564 return nil, fmt.Errorf("index %s found in IndexSet when attempting to rename index", newName) 565 } 566 567 ae := is.am.Editor() 568 err = ae.Update(ctx, newName, addr) 569 if err != nil { 570 return nil, err 571 } 572 err = ae.Delete(ctx, oldName) 573 if err != nil { 574 return nil, err 575 } 576 577 am, err := ae.Flush(ctx) 578 if err != nil { 579 return nil, err 580 } 581 582 return doltDevIndexSet{is.vrw, is.ns, am}, nil 583 }