github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/history_table.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sqle 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 "strings" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 "github.com/dolthub/go-mysql-server/sql/expression" 25 "github.com/dolthub/go-mysql-server/sql/transform" 26 "github.com/dolthub/go-mysql-server/sql/types" 27 "github.com/dolthub/vitess/go/sqltypes" 28 29 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 30 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 31 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" 32 "github.com/dolthub/dolt/go/store/datas" 33 "github.com/dolthub/dolt/go/store/hash" 34 ) 35 36 const ( 37 // CommitHashCol is the name of the column containing the commit hash in the result set 38 CommitHashCol = "commit_hash" 39 40 // CommitterCol is the name of the column containing the committer in the result set 41 CommitterCol = "committer" 42 43 // CommitDateCol is the name of the column containing the commit date in the result set 44 CommitDateCol = "commit_date" 45 ) 46 47 var ( 48 // CommitHashColType is the sql type of the commit hash column 49 CommitHashColType = types.MustCreateString(sqltypes.Char, 32, sql.Collation_ascii_bin) 50 51 // CommitterColType is the sql type of the committer column 52 CommitterColType = types.MustCreateString(sqltypes.VarChar, 1024, sql.Collation_ascii_bin) 53 ) 54 55 var _ sql.Table = (*HistoryTable)(nil) 56 var _ sql.IndexAddressableTable = (*HistoryTable)(nil) 57 var _ sql.IndexedTable = (*HistoryTable)(nil) 58 var _ sql.PrimaryKeyTable = (*HistoryTable)(nil) 59 60 // HistoryTable is a system table that shows the history of rows over time 61 type HistoryTable struct { 62 doltTable *DoltTable 63 commitFilters []sql.Expression 64 cmItr doltdb.CommitItr 65 commitCheck doltdb.CommitFilter 66 indexLookup sql.IndexLookup 67 projectedCols []uint64 68 conversionWarningsByColumn map[string]struct{} 69 } 70 71 func (ht *HistoryTable) PrimaryKeySchema() sql.PrimaryKeySchema { 72 tableName := ht.Name() 73 basePkSch := ht.doltTable.PrimaryKeySchema() 74 newSch := sql.PrimaryKeySchema{ 75 Schema: make(sql.Schema, len(basePkSch.Schema), len(basePkSch.Schema)+3), 76 PkOrdinals: basePkSch.PkOrdinals, 77 } 78 79 // Returning a schema from a single table with multiple table names can confuse parts of the analyzer 80 for i, col := range basePkSch.Schema.Copy() { 81 col.Source = tableName 82 newSch.Schema[i] = col 83 } 84 85 newSch.Schema = append(newSch.Schema, 86 &sql.Column{ 87 Name: CommitHashCol, 88 Source: tableName, 89 Type: CommitHashColType, 90 }, 91 &sql.Column{ 92 Name: CommitterCol, 93 Source: tableName, 94 Type: CommitterColType, 95 }, 96 &sql.Column{ 97 Name: CommitDateCol, 98 Source: tableName, 99 Type: types.Datetime, 100 }, 101 ) 102 return newSch 103 } 104 105 func (ht *HistoryTable) GetIndexes(ctx *sql.Context) ([]sql.Index, error) { 106 tbl, err := ht.doltTable.DoltTable(ctx) 107 if err != nil { 108 return nil, err 109 } 110 111 // For index pushdown to work, we need to represent the indexes from the underlying table as belonging to this one 112 // Our results will also not be ordered, so we need to declare them as such 113 return index.DoltHistoryIndexesFromTable(ctx, ht.doltTable.db.Name(), ht.Name(), tbl, ht.doltTable.db.DbData().Ddb) 114 } 115 116 func (ht *HistoryTable) IndexedAccess(lookup sql.IndexLookup) sql.IndexedTable { 117 ret := *ht 118 return &ret 119 } 120 121 func (ht *HistoryTable) PreciseMatch() bool { 122 return false 123 } 124 125 func (ht *HistoryTable) LookupPartitions(ctx *sql.Context, lookup sql.IndexLookup) (sql.PartitionIter, error) { 126 if lookup.Index.ID() == index.CommitHashIndexId { 127 hs, ok := index.LookupToPointSelectStr(lookup) 128 if !ok { 129 return nil, fmt.Errorf("failed to parse commit hash lookup: %s", sql.DebugString(lookup.Ranges)) 130 } 131 132 var hashes []hash.Hash 133 var commits []*doltdb.Commit 134 var metas []*datas.CommitMeta 135 for _, hs := range hs { 136 if hs == doltdb.Working { 137 138 } 139 h, ok := hash.MaybeParse(hs) 140 if !ok { 141 continue 142 } 143 hashes = append(hashes, h) 144 145 cm, err := doltdb.HashToCommit(ctx, ht.doltTable.db.DbData().Ddb.ValueReadWriter(), ht.doltTable.db.DbData().Ddb.NodeStore(), h) 146 if err != nil { 147 return nil, err 148 } 149 commits = append(commits, cm) 150 151 meta, err := cm.GetCommitMeta(ctx) 152 if err != nil { 153 return nil, err 154 } 155 metas = append(metas, meta) 156 } 157 if len(hashes) == 0 { 158 return sql.PartitionsToPartitionIter(), nil 159 } 160 161 iter, err := ht.filterIter(ctx, doltdb.NewCommitSliceIter(commits, hashes)) 162 if err != nil { 163 return nil, err 164 } 165 return &commitPartitioner{cmItr: iter}, nil 166 167 } 168 ht.indexLookup = lookup 169 return ht.Partitions(ctx) 170 } 171 172 // NewHistoryTable creates a history table 173 func NewHistoryTable(table *DoltTable, ddb *doltdb.DoltDB, head *doltdb.Commit) sql.Table { 174 cmItr := doltdb.CommitItrForRoots(ddb, head) 175 176 // System tables don't currently use overridden schemas, so if one is set on |table|, 177 // clear it out to make sure we use the correct schema that matches the data being used. 178 if table.overriddenSchema != nil { 179 table.overriddenSchema = nil 180 } 181 182 h := &HistoryTable{ 183 doltTable: table, 184 cmItr: cmItr, 185 conversionWarningsByColumn: make(map[string]struct{}), 186 } 187 return h 188 } 189 190 // History table schema returns the corresponding history table schema for the base table given, which consists of 191 // the table's schema with 3 additional columns 192 func historyTableSchema(tableName string, table *DoltTable) sql.Schema { 193 baseSch := table.Schema().Copy() 194 newSch := make(sql.Schema, len(baseSch), len(baseSch)+3) 195 196 for i, col := range baseSch { 197 // Returning a schema from a single table with multiple table names can confuse parts of the analyzer 198 col.Source = tableName 199 newSch[i] = col 200 } 201 202 newSch = append(newSch, 203 &sql.Column{ 204 Name: CommitHashCol, 205 Source: tableName, 206 Type: CommitHashColType, 207 }, 208 &sql.Column{ 209 Name: CommitterCol, 210 Source: tableName, 211 Type: CommitterColType, 212 }, 213 &sql.Column{ 214 Name: CommitDateCol, 215 Source: tableName, 216 Type: types.Datetime, 217 }, 218 ) 219 return newSch 220 } 221 222 func (ht *HistoryTable) filterIter(ctx *sql.Context, iter doltdb.CommitItr) (doltdb.CommitItr, error) { 223 if len(ht.commitFilters) > 0 { 224 r, err := ht.doltTable.db.GetRoot(ctx) 225 if err != nil { 226 return doltdb.FilteringCommitItr{}, err 227 } 228 h, err := r.HashOf() 229 if err != nil { 230 return doltdb.FilteringCommitItr{}, err 231 } 232 filters := substituteWorkingHash(h, ht.commitFilters) 233 check, err := commitFilterForExprs(ctx, filters) 234 if err != nil { 235 return doltdb.FilteringCommitItr{}, err 236 } 237 238 return doltdb.NewFilteringCommitItr(iter, check), nil 239 } 240 return iter, nil 241 } 242 243 func substituteWorkingHash(h hash.Hash, f []sql.Expression) []sql.Expression { 244 ret := make([]sql.Expression, len(f)) 245 for i, e := range f { 246 ret[i], _, _ = transform.Expr(e, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) { 247 switch e := e.(type) { 248 case *expression.Literal: 249 if e.Value() == doltdb.Working { 250 return expression.NewLiteral(h.String(), e.Type()), transform.NewTree, nil 251 } 252 default: 253 } 254 return e, transform.SameTree, nil 255 }) 256 } 257 return ret 258 } 259 260 func commitFilterForExprs(ctx *sql.Context, filters []sql.Expression) (doltdb.CommitFilter, error) { 261 filters = transformFilters(ctx, filters...) 262 263 return func(ctx context.Context, h hash.Hash, optCmt *doltdb.OptionalCommit) (filterOut bool, err error) { 264 cm, ok := optCmt.ToCommit() 265 if !ok { 266 return false, nil // NM4 TEST. 267 } 268 269 meta, err := cm.GetCommitMeta(ctx) 270 271 if err != nil { 272 return false, err 273 } 274 275 sc := sql.NewContext(ctx) 276 r := sql.Row{h.String(), meta.Name, meta.Time()} 277 278 for _, filter := range filters { 279 res, err := filter.Eval(sc, r) 280 if err != nil { 281 return false, err 282 } 283 b, ok := res.(bool) 284 if ok && !b { 285 return true, nil 286 } 287 } 288 289 return false, err 290 }, nil 291 } 292 293 func transformFilters(ctx *sql.Context, filters ...sql.Expression) []sql.Expression { 294 for i := range filters { 295 filters[i], _, _ = transform.Expr(filters[i], func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) { 296 gf, ok := e.(*expression.GetField) 297 if !ok { 298 return e, transform.SameTree, nil 299 } 300 switch gf.Name() { 301 case CommitHashCol: 302 return gf.WithIndex(0), transform.NewTree, nil 303 case CommitterCol: 304 return gf.WithIndex(1), transform.NewTree, nil 305 case CommitDateCol: 306 return gf.WithIndex(2), transform.NewTree, nil 307 default: 308 return gf, transform.SameTree, nil 309 } 310 }) 311 } 312 return filters 313 } 314 315 func (ht *HistoryTable) WithProjections(colNames []string) sql.Table { 316 nt := *ht 317 nt.projectedCols = make([]uint64, len(colNames)) 318 nonHistoryCols := make([]string, 0) 319 cols := ht.doltTable.sch.GetAllCols() 320 for i := range colNames { 321 col, ok := cols.LowerNameToCol[strings.ToLower(colNames[i])] 322 if !ok { 323 switch colNames[i] { 324 case CommitHashCol: 325 nt.projectedCols[i] = schema.HistoryCommitHashTag 326 case CommitterCol: 327 nt.projectedCols[i] = schema.HistoryCommitterTag 328 case CommitDateCol: 329 nt.projectedCols[i] = schema.HistoryCommitDateTag 330 default: 331 } 332 } else { 333 nt.projectedCols[i] = col.Tag 334 nonHistoryCols = append(nonHistoryCols, col.Name) 335 } 336 } 337 projectedTable := ht.doltTable.WithProjections(nonHistoryCols) 338 nt.doltTable = projectedTable.(*DoltTable) 339 return &nt 340 } 341 342 func (ht *HistoryTable) Projections() []string { 343 // The semantics of nil v. zero length is important when displaying explain plans 344 if ht.projectedCols == nil { 345 return nil 346 } 347 348 names := make([]string, len(ht.projectedCols)) 349 cols := ht.doltTable.sch.GetAllCols() 350 for i := range ht.projectedCols { 351 if col, ok := cols.TagToCol[ht.projectedCols[i]]; ok { 352 names[i] = col.Name 353 } else { 354 switch ht.projectedCols[i] { 355 case schema.HistoryCommitHashTag: 356 names[i] = CommitHashCol 357 case schema.HistoryCommitterTag: 358 names[i] = CommitterCol 359 case schema.HistoryCommitDateTag: 360 names[i] = CommitDateCol 361 default: 362 } 363 } 364 } 365 return names 366 } 367 368 func (ht *HistoryTable) ProjectedTags() []uint64 { 369 if ht.projectedCols != nil { 370 return ht.projectedCols 371 } 372 // Otherwise (no projection), return the tags for the underlying table with the extra meta tags appended 373 return append(ht.doltTable.ProjectedTags(), schema.HistoryCommitHashTag, schema.HistoryCommitterTag, schema.HistoryCommitDateTag) 374 } 375 376 // Name returns the name of the history table 377 func (ht *HistoryTable) Name() string { 378 return doltdb.DoltHistoryTablePrefix + ht.doltTable.Name() 379 } 380 381 // String returns the name of the history table 382 func (ht *HistoryTable) String() string { 383 return doltdb.DoltHistoryTablePrefix + ht.doltTable.Name() 384 } 385 386 // Schema returns the schema for the history table 387 func (ht *HistoryTable) Schema() sql.Schema { 388 sch := historyTableSchema(ht.Name(), ht.doltTable) 389 if ht.projectedCols == nil { 390 return sch 391 } 392 393 projectedSch := make(sql.Schema, len(ht.projectedCols)) 394 allCols := ht.doltTable.sch.GetAllCols() 395 for i, t := range ht.projectedCols { 396 if col, ok := allCols.TagToCol[t]; ok { 397 idx := sch.IndexOfColName(col.Name) 398 projectedSch[i] = sch[idx] 399 } else if t == schema.HistoryCommitterTag { 400 projectedSch[i] = &sql.Column{ 401 Name: CommitterCol, 402 Source: ht.Name(), 403 Type: CommitterColType, 404 } 405 } else if t == schema.HistoryCommitHashTag { 406 projectedSch[i] = &sql.Column{ 407 Name: CommitHashCol, 408 Source: ht.Name(), 409 Type: CommitHashColType, 410 } 411 } else if t == schema.HistoryCommitDateTag { 412 projectedSch[i] = &sql.Column{ 413 Name: CommitDateCol, 414 Source: ht.Name(), 415 Type: types.Datetime, 416 } 417 } else { 418 panic("column not found") 419 } 420 } 421 return projectedSch 422 } 423 424 // Collation implements the sql.Table interface. 425 func (ht *HistoryTable) Collation() sql.CollationID { 426 return sql.CollationID(ht.doltTable.sch.GetCollation()) 427 } 428 429 // Partitions returns a PartitionIter which will be used in getting partitions each of which is used to create RowIter. 430 func (ht *HistoryTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) { 431 iter, err := ht.filterIter(ctx, ht.cmItr) 432 if err != nil { 433 return nil, err 434 } 435 return &commitPartitioner{cmItr: iter}, nil 436 } 437 438 // PartitionRows takes a partition and returns a row iterator for that partition 439 func (ht *HistoryTable) PartitionRows(ctx *sql.Context, part sql.Partition) (sql.RowIter, error) { 440 cp := part.(*commitPartition) 441 return ht.newRowItrForTableAtCommit(ctx, ht.doltTable, cp.h, cp.cm, ht.indexLookup, ht.ProjectedTags()) 442 } 443 444 // commitPartition is a single commit 445 type commitPartition struct { 446 h hash.Hash 447 cm *doltdb.Commit 448 } 449 450 // Key returns the hash of the commit for this partition which is used as the partition key 451 func (cp *commitPartition) Key() []byte { 452 return cp.h[:] 453 } 454 455 // commitPartitioner creates partitions from a CommitItr 456 type commitPartitioner struct { 457 cmItr doltdb.CommitItr 458 } 459 460 // Next returns the next partition and nil, io.EOF when complete 461 func (cp commitPartitioner) Next(ctx *sql.Context) (sql.Partition, error) { 462 h, optCmt, err := cp.cmItr.Next(ctx) 463 if err != nil { 464 return nil, err 465 } 466 cm, ok := optCmt.ToCommit() 467 if !ok { 468 return nil, io.EOF 469 } 470 471 return &commitPartition{h, cm}, nil 472 } 473 474 // Close closes the partitioner 475 func (cp commitPartitioner) Close(ctx *sql.Context) error { 476 cp.cmItr.Reset(ctx) 477 return nil 478 } 479 480 type historyIter struct { 481 table sql.Table 482 tablePartitions sql.PartitionIter 483 currPart sql.RowIter 484 rowConverter func(row sql.Row) sql.Row 485 nonExistentTable bool 486 } 487 488 func (ht *HistoryTable) newRowItrForTableAtCommit(ctx *sql.Context, table *DoltTable, h hash.Hash, cm *doltdb.Commit, lookup sql.IndexLookup, projections []uint64) (*historyIter, error) { 489 targetSchema := table.Schema() 490 491 root, err := cm.GetRootValue(ctx) 492 if err != nil { 493 return nil, err 494 } 495 496 meta, err := cm.GetCommitMeta(ctx) 497 if err != nil { 498 return nil, err 499 } 500 501 _, _, ok, err := doltdb.GetTableInsensitive(ctx, root, table.Name()) 502 if err != nil { 503 return nil, err 504 } 505 if !ok { 506 return &historyIter{nonExistentTable: true}, nil 507 } 508 509 lockedTable, err := table.LockedToRoot(ctx, root) 510 if err != nil { 511 return nil, err 512 } 513 514 var partIter sql.PartitionIter 515 var histTable sql.Table 516 if !lookup.IsEmpty() { 517 indexes, err := lockedTable.GetIndexes(ctx) 518 if err != nil { 519 return nil, err 520 } 521 for _, idx := range indexes { 522 if idx.ID() == lookup.Index.ID() { 523 histTable = lockedTable.IndexedAccess(lookup) 524 if err != nil { 525 return nil, err 526 } 527 528 if histTable != nil { 529 newLookup := sql.IndexLookup{Index: idx, Ranges: lookup.Ranges} 530 partIter, err = histTable.(sql.IndexedTable).LookupPartitions(ctx, newLookup) 531 if err != nil { 532 return nil, err 533 } 534 break 535 } 536 } 537 } 538 } 539 if histTable == nil { 540 histTable = lockedTable 541 partIter, err = lockedTable.Partitions(ctx) 542 if err != nil { 543 return nil, err 544 } 545 } 546 547 converter := ht.rowConverter(ctx, lockedTable.Schema(), targetSchema, h, meta, projections) 548 return &historyIter{ 549 table: histTable, 550 tablePartitions: partIter, 551 rowConverter: converter, 552 }, nil 553 } 554 555 // Next retrieves the next row. It will return io.EOF if it's the last row. After retrieving the last row, Close 556 // will be automatically closed. 557 func (i *historyIter) Next(ctx *sql.Context) (sql.Row, error) { 558 if i.nonExistentTable { 559 return nil, io.EOF 560 } 561 562 if i.currPart == nil { 563 nextPart, err := i.tablePartitions.Next(ctx) 564 if err != nil { 565 return nil, err 566 } 567 568 rowIter, err := i.table.PartitionRows(ctx, nextPart) 569 if err != nil { 570 return nil, err 571 } 572 573 i.currPart = rowIter 574 return i.Next(ctx) 575 } 576 577 r, err := i.currPart.Next(ctx) 578 if err == io.EOF { 579 i.currPart = nil 580 return i.Next(ctx) 581 } else if err != nil { 582 return nil, err 583 } 584 585 return i.rowConverter(r), nil 586 } 587 588 func (i *historyIter) Close(ctx *sql.Context) error { 589 return nil 590 } 591 592 // rowConverter returns a function that converts a row to another schema for the dolt_history system tables. |srcSchema| 593 // describes the incoming row, |targetSchema| describes the desired row schema, and |projections| controls which fields 594 // are including the returned row. The hash |h| and commit metadata |meta| are used to augment the row with custom 595 // fields for the dolt_history table to return commit metadata. 596 func (ht *HistoryTable) rowConverter(ctx *sql.Context, srcSchema, targetSchema sql.Schema, h hash.Hash, meta *datas.CommitMeta, projections []uint64) func(row sql.Row) sql.Row { 597 srcToTarget := make(map[int]int) 598 for i, col := range targetSchema { 599 srcIdx := srcSchema.IndexOfColName(col.Name) 600 if srcIdx >= 0 { 601 // only add a conversion if the type is the same 602 // TODO: we could do a projection to convert between types in some cases 603 if srcSchema[srcIdx].Type.Equals(targetSchema[i].Type) { 604 srcToTarget[srcIdx] = i 605 } else { 606 if _, alreadyWarned := ht.conversionWarningsByColumn[col.Name]; !alreadyWarned { 607 ctx.Warn(1246, "Unable to convert field %s in historical rows because its type (%s) doesn't match "+ 608 "current schema's type (%s)", col.Name, col.Type.String(), srcSchema[srcIdx].Type.String()) 609 ht.conversionWarningsByColumn[col.Name] = struct{}{} 610 } 611 } 612 } 613 } 614 615 return func(row sql.Row) sql.Row { 616 r := make(sql.Row, len(projections)) 617 for i, t := range projections { 618 switch t { 619 case schema.HistoryCommitterTag: 620 r[i] = meta.Name 621 case schema.HistoryCommitDateTag: 622 r[i] = meta.Time() 623 case schema.HistoryCommitHashTag: 624 r[i] = h.String() 625 default: 626 if j, ok := srcToTarget[i]; ok { 627 r[j] = row[i] 628 } 629 } 630 } 631 return r 632 } 633 }