github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/sqle/dtables/diff_table.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package dtables 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 22 "github.com/dolthub/go-mysql-server/sql" 23 "github.com/dolthub/go-mysql-server/sql/parse" 24 25 "github.com/dolthub/dolt/go/libraries/doltcore/diff" 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 27 "github.com/dolthub/dolt/go/libraries/doltcore/row" 28 "github.com/dolthub/dolt/go/libraries/doltcore/rowconv" 29 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 30 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/expreval" 31 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil" 32 "github.com/dolthub/dolt/go/libraries/utils/set" 33 "github.com/dolthub/dolt/go/store/hash" 34 "github.com/dolthub/dolt/go/store/types" 35 ) 36 37 const ( 38 toCommit = "to_commit" 39 fromCommit = "from_commit" 40 toCommitDate = "to_commit_date" 41 fromCommitDate = "from_commit_date" 42 43 diffTypeColName = "diff_type" 44 diffTypeAdded = "added" 45 diffTypeModified = "modified" 46 diffTypeRemoved = "removed" 47 ) 48 49 func toNamer(name string) string { 50 return diff.To + "_" + name 51 } 52 53 func fromNamer(name string) string { 54 return diff.From + "_" + name 55 } 56 57 var _ sql.Table = (*DiffTable)(nil) 58 var _ sql.FilteredTable = (*DiffTable)(nil) 59 60 type DiffTable struct { 61 name string 62 ddb *doltdb.DoltDB 63 workingRoot *doltdb.RootValue 64 head *doltdb.Commit 65 66 ss *schema.SuperSchema 67 joiner *rowconv.Joiner 68 sqlSch sql.Schema 69 partitionFilters []sql.Expression 70 rowFilters []sql.Expression 71 } 72 73 func NewDiffTable(ctx *sql.Context, tblName string, ddb *doltdb.DoltDB, root *doltdb.RootValue, head *doltdb.Commit) (sql.Table, error) { 74 diffTblName := doltdb.DoltDiffTablePrefix + tblName 75 76 ss, err := calcSuperSchema(ctx, root, tblName) 77 if err != nil { 78 return nil, err 79 } 80 81 _ = ss.AddColumn(schema.NewColumn("commit", schema.DiffCommitTag, types.StringKind, false)) 82 _ = ss.AddColumn(schema.NewColumn("commit_date", schema.DiffCommitDateTag, types.TimestampKind, false)) 83 84 sch, err := ss.GenerateSchema() 85 86 if err != nil { 87 return nil, err 88 } 89 90 if sch.GetAllCols().Size() <= 1 { 91 return nil, sql.ErrTableNotFound.New(diffTblName) 92 } 93 94 j, err := rowconv.NewJoiner( 95 []rowconv.NamedSchema{{Name: diff.To, Sch: sch}, {Name: diff.From, Sch: sch}}, 96 map[string]rowconv.ColNamingFunc{ 97 diff.To: toNamer, 98 diff.From: fromNamer, 99 }) 100 101 if err != nil { 102 return nil, err 103 } 104 105 sqlSch, err := sqlutil.FromDoltSchema(diffTblName, j.GetSchema()) 106 107 if err != nil { 108 return nil, err 109 } 110 111 // parses to literal, no need to pass through analyzer 112 defaultVal, err := parse.StringToColumnDefaultValue(ctx, fmt.Sprintf(`"%s"`, diffTypeModified)) 113 if err != nil { 114 return nil, err 115 } 116 117 sqlSch = append(sqlSch, &sql.Column{ 118 Name: diffTypeColName, 119 Type: sql.Text, 120 Default: defaultVal, 121 Nullable: false, 122 Source: diffTblName, 123 }) 124 125 return &DiffTable{ 126 name: tblName, 127 ddb: ddb, 128 workingRoot: root, 129 head: head, 130 ss: ss, 131 joiner: j, 132 sqlSch: sqlSch, 133 partitionFilters: nil, 134 rowFilters: nil, 135 }, nil 136 } 137 138 func (dt *DiffTable) Name() string { 139 return doltdb.DoltDiffTablePrefix + dt.name 140 } 141 142 func (dt *DiffTable) String() string { 143 return doltdb.DoltDiffTablePrefix + dt.name 144 } 145 146 func (dt *DiffTable) Schema() sql.Schema { 147 return dt.sqlSch 148 } 149 150 func (dt *DiffTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) { 151 cmItr := doltdb.CommitItrForRoots(dt.ddb, dt.head) 152 153 sf, err := selectFuncForFilters(dt.ddb.Format(), dt.partitionFilters) 154 155 if err != nil { 156 return nil, err 157 } 158 159 return newDiffPartitions(ctx, cmItr, dt.workingRoot, dt.name, sf) 160 } 161 162 var partitionFilterCols = set.NewStrSet([]string{toCommit, fromCommit, toCommitDate, fromCommitDate}) 163 164 func splitPartitionFilters(filters []sql.Expression) (commitFilters, rowFilters []sql.Expression) { 165 return splitFilters(filters, getColumnFilterCheck(partitionFilterCols)) 166 } 167 168 // HandledFilters returns the list of filters that will be handled by the table itself 169 func (dt *DiffTable) HandledFilters(filters []sql.Expression) []sql.Expression { 170 dt.partitionFilters, dt.rowFilters = splitPartitionFilters(filters) 171 return dt.partitionFilters 172 } 173 174 // Filters returns the list of filters that are applied to this table. 175 func (dt *DiffTable) Filters() []sql.Expression { 176 return dt.partitionFilters 177 } 178 179 // WithFilters returns a new sql.Table instance with the filters applied 180 func (dt *DiffTable) WithFilters(ctx *sql.Context, filters []sql.Expression) sql.Table { 181 if dt.partitionFilters == nil { 182 dt.partitionFilters, dt.rowFilters = splitPartitionFilters(filters) 183 } 184 185 return dt 186 } 187 188 func (dt *DiffTable) PartitionRows(ctx *sql.Context, part sql.Partition) (sql.RowIter, error) { 189 dp := part.(diffPartition) 190 return dp.getRowIter(ctx, dt.ddb, dt.ss, dt.joiner) 191 } 192 193 func tableData(ctx *sql.Context, tbl *doltdb.Table, ddb *doltdb.DoltDB) (types.Map, schema.Schema, error) { 194 var data types.Map 195 var err error 196 if tbl == nil { 197 data, err = types.NewMap(ctx, ddb.ValueReadWriter()) 198 if err != nil { 199 return types.EmptyMap, nil, err 200 } 201 } else { 202 data, err = tbl.GetRowData(ctx) 203 if err != nil { 204 return types.EmptyMap, nil, err 205 } 206 } 207 208 var sch schema.Schema 209 if tbl == nil { 210 sch = schema.EmptySchema 211 } else { 212 sch, err = tbl.GetSchema(ctx) 213 214 if err != nil { 215 return types.EmptyMap, nil, err 216 } 217 } 218 219 return data, sch, nil 220 } 221 222 var _ sql.RowIter = (*diffRowItr)(nil) 223 224 type diffRowItr struct { 225 ad diff.RowDiffer 226 diffSrc *diff.RowDiffSource 227 joiner *rowconv.Joiner 228 sch schema.Schema 229 fromCommitInfo commitInfo 230 toCommitInfo commitInfo 231 } 232 233 type commitInfo struct { 234 name types.String 235 date *types.Timestamp 236 nameTag uint64 237 dateTag uint64 238 } 239 240 // Next returns the next row 241 func (itr *diffRowItr) Next() (sql.Row, error) { 242 r, _, err := itr.diffSrc.NextDiff() 243 244 if err != nil { 245 return nil, err 246 } 247 248 toAndFromRows, err := itr.joiner.Split(r) 249 if err != nil { 250 return nil, err 251 } 252 _, hasTo := toAndFromRows[diff.To] 253 _, hasFrom := toAndFromRows[diff.From] 254 255 r, err = r.SetColVal(itr.toCommitInfo.nameTag, types.String(itr.toCommitInfo.name), itr.sch) 256 if err != nil { 257 return nil, err 258 } 259 260 r, err = r.SetColVal(itr.fromCommitInfo.nameTag, types.String(itr.fromCommitInfo.name), itr.sch) 261 262 if err != nil { 263 return nil, err 264 } 265 266 if itr.toCommitInfo.date != nil { 267 r, err = r.SetColVal(itr.toCommitInfo.dateTag, *itr.toCommitInfo.date, itr.sch) 268 269 if err != nil { 270 return nil, err 271 } 272 } 273 274 if itr.fromCommitInfo.date != nil { 275 r, err = r.SetColVal(itr.fromCommitInfo.dateTag, *itr.fromCommitInfo.date, itr.sch) 276 277 if err != nil { 278 return nil, err 279 } 280 } 281 282 sqlRow, err := sqlutil.DoltRowToSqlRow(r, itr.sch) 283 284 if err != nil { 285 return nil, err 286 } 287 288 if hasTo && hasFrom { 289 sqlRow = append(sqlRow, diffTypeModified) 290 } else if hasTo && !hasFrom { 291 sqlRow = append(sqlRow, diffTypeAdded) 292 } else { 293 sqlRow = append(sqlRow, diffTypeRemoved) 294 } 295 296 return sqlRow, nil 297 } 298 299 // Close closes the iterator 300 func (itr *diffRowItr) Close(*sql.Context) (err error) { 301 defer itr.ad.Close() 302 defer func() { 303 closeErr := itr.diffSrc.Close() 304 305 if err == nil { 306 err = closeErr 307 } 308 }() 309 310 return nil 311 } 312 313 type tblInfoAtCommit struct { 314 name string 315 date *types.Timestamp 316 tbl *doltdb.Table 317 tblHash hash.Hash 318 } 319 320 // data partitioned into pairs of table states which get compared 321 type diffPartition struct { 322 to *doltdb.Table 323 from *doltdb.Table 324 toName string 325 fromName string 326 toDate *types.Timestamp 327 fromDate *types.Timestamp 328 } 329 330 func (dp diffPartition) Key() []byte { 331 return []byte(dp.toName + dp.fromName) 332 } 333 334 func (dp diffPartition) getRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, ss *schema.SuperSchema, joiner *rowconv.Joiner) (sql.RowIter, error) { 335 fromData, fromSch, err := tableData(ctx, dp.from, ddb) 336 337 if err != nil { 338 return nil, err 339 } 340 341 toData, toSch, err := tableData(ctx, dp.to, ddb) 342 343 if err != nil { 344 return nil, err 345 } 346 347 vrw := types.NewMemoryValueStore() // We're displaying here, so all values that require a VRW will use an internal one 348 349 fromConv, err := rowConvForSchema(ctx, vrw, ss, fromSch) 350 351 if err != nil { 352 return nil, err 353 } 354 355 toConv, err := rowConvForSchema(ctx, vrw, ss, toSch) 356 357 if err != nil { 358 return nil, err 359 } 360 361 sch := joiner.GetSchema() 362 toCol, _ := sch.GetAllCols().GetByName(toCommit) 363 fromCol, _ := sch.GetAllCols().GetByName(fromCommit) 364 toDateCol, _ := sch.GetAllCols().GetByName(toCommitDate) 365 fromDateCol, _ := sch.GetAllCols().GetByName(fromCommitDate) 366 367 fromCmInfo := commitInfo{types.String(dp.fromName), dp.fromDate, fromCol.Tag, fromDateCol.Tag} 368 toCmInfo := commitInfo{types.String(dp.toName), dp.toDate, toCol.Tag, toDateCol.Tag} 369 370 rd := diff.NewRowDiffer(ctx, fromSch, toSch, 1024) 371 rd.Start(ctx, fromData, toData) 372 373 src := diff.NewRowDiffSource(rd, joiner) 374 src.AddInputRowConversion(fromConv, toConv) 375 376 return &diffRowItr{ 377 ad: rd, 378 diffSrc: src, 379 joiner: joiner, 380 sch: joiner.GetSchema(), 381 fromCommitInfo: fromCmInfo, 382 toCommitInfo: toCmInfo, 383 }, nil 384 } 385 386 type partitionSelectFunc func(*sql.Context, diffPartition) (bool, error) 387 388 func selectFuncForFilters(nbf *types.NomsBinFormat, filters []sql.Expression) (partitionSelectFunc, error) { 389 const ( 390 toCommitTag uint64 = iota 391 fromCommitTag 392 toCommitDateTag 393 fromCommitDateTag 394 ) 395 396 colColl := schema.NewColCollection( 397 schema.NewColumn(toCommit, toCommitTag, types.StringKind, false), 398 schema.NewColumn(fromCommit, fromCommitTag, types.StringKind, false), 399 schema.NewColumn(toCommitDate, toCommitDateTag, types.TimestampKind, false), 400 schema.NewColumn(fromCommitDate, fromCommitDateTag, types.TimestampKind, false), 401 ) 402 403 expFunc, err := expreval.ExpressionFuncFromSQLExpressions(nbf, schema.UnkeyedSchemaFromCols(colColl), filters) 404 405 if err != nil { 406 return nil, err 407 } 408 409 return func(ctx *sql.Context, partition diffPartition) (bool, error) { 410 vals := row.TaggedValues{ 411 toCommitTag: types.String(partition.toName), 412 fromCommitTag: types.String(partition.fromName), 413 } 414 415 if partition.toDate != nil { 416 vals[toCommitDateTag] = *partition.toDate 417 } 418 419 if partition.fromDate != nil { 420 vals[fromCommitDateTag] = *partition.fromDate 421 } 422 423 return expFunc(ctx, vals) 424 }, nil 425 } 426 427 var _ sql.PartitionIter = &diffPartitions{} 428 429 // collection of paratitions. Implements PartitionItr 430 type diffPartitions struct { 431 // TODO change the sql.PartitionIterator interface so that Next receives the context rather than caching it. 432 ctx *sql.Context 433 tblName string 434 cmItr doltdb.CommitItr 435 cmHashToTblInfo map[hash.Hash]tblInfoAtCommit 436 selectFunc partitionSelectFunc 437 } 438 439 func newDiffPartitions(ctx *sql.Context, cmItr doltdb.CommitItr, wr *doltdb.RootValue, tblName string, selectFunc partitionSelectFunc) (*diffPartitions, error) { 440 t, exactName, ok, err := wr.GetTableInsensitive(ctx, tblName) 441 442 if err != nil { 443 return nil, err 444 } 445 446 if !ok { 447 return nil, errors.New(fmt.Sprintf("table: %s does not exist", tblName)) 448 } 449 450 wrTblHash, _, err := wr.GetTableHash(ctx, exactName) 451 452 if err != nil { 453 return nil, err 454 } 455 456 cmHash, _, err := cmItr.Next(ctx) 457 458 if err != nil { 459 return nil, err 460 } 461 462 cmHashToTblInfo := make(map[hash.Hash]tblInfoAtCommit) 463 cmHashToTblInfo[cmHash] = tblInfoAtCommit{"WORKING", nil, t, wrTblHash} 464 465 err = cmItr.Reset(ctx) 466 467 if err != nil { 468 return nil, err 469 } 470 471 return &diffPartitions{ 472 ctx: ctx, 473 tblName: tblName, 474 cmItr: cmItr, 475 cmHashToTblInfo: cmHashToTblInfo, 476 selectFunc: selectFunc, 477 }, nil 478 } 479 480 // called in a commit iteration loop. Adds partitions when it finds a commit and it's parent that have different values 481 // for the hash of the table being looked at. 482 func (dp *diffPartitions) processCommit(ctx *sql.Context, cmHash hash.Hash, cm *doltdb.Commit, root *doltdb.RootValue, tbl *doltdb.Table) (*diffPartition, error) { 483 tblHash, _, err := root.GetTableHash(ctx, dp.tblName) 484 485 if err != nil { 486 return nil, err 487 } 488 489 toInfoForCommit := dp.cmHashToTblInfo[cmHash] 490 cmHashStr := cmHash.String() 491 meta, err := cm.GetCommitMeta() 492 493 if err != nil { 494 return nil, err 495 } 496 497 ts := types.Timestamp(meta.Time()) 498 499 var nextPartition *diffPartition 500 if tblHash != toInfoForCommit.tblHash { 501 partition := diffPartition{toInfoForCommit.tbl, tbl, toInfoForCommit.name, cmHashStr, toInfoForCommit.date, &ts} 502 selected, err := dp.selectFunc(ctx, partition) 503 504 if err != nil { 505 return nil, err 506 } 507 508 if selected { 509 nextPartition = &partition 510 } 511 } 512 513 newInfo := tblInfoAtCommit{cmHashStr, &ts, tbl, tblHash} 514 parentHashes, err := cm.ParentHashes(ctx) 515 516 if err != nil { 517 return nil, err 518 } 519 520 for _, h := range parentHashes { 521 dp.cmHashToTblInfo[h] = newInfo 522 } 523 524 return nextPartition, nil 525 } 526 527 func (dp *diffPartitions) Next() (sql.Partition, error) { 528 for { 529 cmHash, cm, err := dp.cmItr.Next(dp.ctx) 530 531 if err != nil { 532 return nil, err 533 } 534 535 root, err := cm.GetRootValue() 536 537 if err != nil { 538 return nil, err 539 } 540 541 tbl, _, _, err := root.GetTableInsensitive(dp.ctx, dp.tblName) 542 543 if err != nil { 544 return nil, err 545 } 546 547 next, err := dp.processCommit(dp.ctx, cmHash, cm, root, tbl) 548 549 if err != nil { 550 return nil, err 551 } 552 553 if next != nil { 554 return *next, nil 555 } 556 } 557 } 558 559 func (dp *diffPartitions) Close(*sql.Context) error { 560 return nil 561 } 562 563 // creates a RowConverter for transforming rows with the the given schema to this super schema. 564 func rowConvForSchema(ctx context.Context, vrw types.ValueReadWriter, ss *schema.SuperSchema, sch schema.Schema) (*rowconv.RowConverter, error) { 565 if schema.SchemasAreEqual(sch, schema.EmptySchema) { 566 return rowconv.IdentityConverter, nil 567 } 568 569 inNameToOutName, err := ss.NameMapForSchema(sch) 570 571 if err != nil { 572 return nil, err 573 } 574 575 ssch, err := ss.GenerateSchema() 576 577 if err != nil { 578 return nil, err 579 } 580 581 fm, err := rowconv.NameMapping(sch, ssch, inNameToOutName) 582 583 if err != nil { 584 return nil, err 585 } 586 587 return rowconv.NewRowConverter(ctx, vrw, fm) 588 }