github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/dtables/diff_iter.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package dtables 16 17 import ( 18 "context" 19 "io" 20 "time" 21 22 "github.com/dolthub/go-mysql-server/sql" 23 24 "github.com/dolthub/dolt/go/libraries/doltcore/diff" 25 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 27 "github.com/dolthub/dolt/go/libraries/doltcore/rowconv" 28 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 29 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" 30 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil" 31 "github.com/dolthub/dolt/go/store/prolly" 32 "github.com/dolthub/dolt/go/store/prolly/tree" 33 "github.com/dolthub/dolt/go/store/types" 34 "github.com/dolthub/dolt/go/store/val" 35 ) 36 37 type diffRowItr struct { 38 ad diff.RowDiffer 39 diffSrc *diff.RowDiffSource 40 joiner *rowconv.Joiner 41 sch schema.Schema 42 fromCommitInfo commitInfo 43 toCommitInfo commitInfo 44 } 45 46 var _ sql.RowIter = &diffRowItr{} 47 48 type commitInfo struct { 49 name types.String 50 date *types.Timestamp 51 nameTag uint64 52 dateTag uint64 53 } 54 55 func newNomsDiffIter(ctx *sql.Context, ddb *doltdb.DoltDB, joiner *rowconv.Joiner, dp DiffPartition, lookup sql.IndexLookup) (*diffRowItr, error) { 56 fromData, fromSch, err := tableData(ctx, dp.from, ddb) 57 58 if err != nil { 59 return nil, err 60 } 61 62 toData, toSch, err := tableData(ctx, dp.to, ddb) 63 64 if err != nil { 65 return nil, err 66 } 67 68 fromConv, err := dp.rowConvForSchema(ctx, ddb.ValueReadWriter(), dp.fromSch, fromSch) 69 70 if err != nil { 71 return nil, err 72 } 73 74 toConv, err := dp.rowConvForSchema(ctx, ddb.ValueReadWriter(), dp.toSch, toSch) 75 76 if err != nil { 77 return nil, err 78 } 79 80 sch := joiner.GetSchema() 81 toCol, _ := sch.GetAllCols().GetByName(toCommit) 82 fromCol, _ := sch.GetAllCols().GetByName(fromCommit) 83 toDateCol, _ := sch.GetAllCols().GetByName(toCommitDate) 84 fromDateCol, _ := sch.GetAllCols().GetByName(fromCommitDate) 85 86 fromCmInfo := commitInfo{types.String(dp.fromName), dp.fromDate, fromCol.Tag, fromDateCol.Tag} 87 toCmInfo := commitInfo{types.String(dp.toName), dp.toDate, toCol.Tag, toDateCol.Tag} 88 89 rd := diff.NewRowDiffer(ctx, ddb.Format(), fromSch, toSch, 1024) 90 // TODO (dhruv) don't cast to noms map 91 // Use index lookup if it exists 92 if lookup.IsEmpty() { 93 rd.Start(ctx, durable.NomsMapFromIndex(fromData), durable.NomsMapFromIndex(toData)) 94 } else { 95 ranges, err := index.NomsRangesFromIndexLookup(ctx, lookup) // TODO: this is a testing method 96 if err != nil { 97 return nil, err 98 } 99 // TODO: maybe just use Check 100 rangeFunc := func(ctx context.Context, val types.Value) (bool, bool, error) { 101 v, ok := val.(types.Tuple) 102 if !ok { 103 return false, false, nil 104 } 105 return ranges[0].Check.Check(ctx, ddb.ValueReadWriter(), v) 106 } 107 rd.StartWithRange(ctx, durable.NomsMapFromIndex(fromData), durable.NomsMapFromIndex(toData), ranges[0].Start, rangeFunc) 108 } 109 110 src := diff.NewRowDiffSource(rd, joiner, ctx.Warn) 111 src.AddInputRowConversion(fromConv, toConv) 112 113 return &diffRowItr{ 114 ad: rd, 115 diffSrc: src, 116 joiner: joiner, 117 sch: joiner.GetSchema(), 118 fromCommitInfo: fromCmInfo, 119 toCommitInfo: toCmInfo, 120 }, nil 121 } 122 123 // Next returns the next row 124 func (itr *diffRowItr) Next(ctx *sql.Context) (sql.Row, error) { 125 r, err := itr.diffSrc.NextDiff() 126 127 if err != nil { 128 return nil, err 129 } 130 131 toAndFromRows, err := itr.joiner.Split(r) 132 if err != nil { 133 return nil, err 134 } 135 _, hasTo := toAndFromRows[diff.To] 136 _, hasFrom := toAndFromRows[diff.From] 137 138 r, err = r.SetColVal(itr.toCommitInfo.nameTag, types.String(itr.toCommitInfo.name), itr.sch) 139 if err != nil { 140 return nil, err 141 } 142 143 r, err = r.SetColVal(itr.fromCommitInfo.nameTag, types.String(itr.fromCommitInfo.name), itr.sch) 144 145 if err != nil { 146 return nil, err 147 } 148 149 if itr.toCommitInfo.date != nil { 150 r, err = r.SetColVal(itr.toCommitInfo.dateTag, *itr.toCommitInfo.date, itr.sch) 151 152 if err != nil { 153 return nil, err 154 } 155 } 156 157 if itr.fromCommitInfo.date != nil { 158 r, err = r.SetColVal(itr.fromCommitInfo.dateTag, *itr.fromCommitInfo.date, itr.sch) 159 160 if err != nil { 161 return nil, err 162 } 163 } 164 165 sqlRow, err := sqlutil.DoltRowToSqlRow(r, itr.sch) 166 167 if err != nil { 168 return nil, err 169 } 170 171 if hasTo && hasFrom { 172 sqlRow = append(sqlRow, diffTypeModified) 173 } else if hasTo && !hasFrom { 174 sqlRow = append(sqlRow, diffTypeAdded) 175 } else { 176 sqlRow = append(sqlRow, diffTypeRemoved) 177 } 178 179 return sqlRow, nil 180 } 181 182 // Close closes the iterator 183 func (itr *diffRowItr) Close(*sql.Context) (err error) { 184 defer itr.ad.Close() 185 defer func() { 186 closeErr := itr.diffSrc.Close() 187 188 if err == nil { 189 err = closeErr 190 } 191 }() 192 193 return nil 194 } 195 196 type commitInfo2 struct { 197 name string 198 ts *time.Time 199 } 200 201 type prollyDiffIter struct { 202 from, to prolly.Map 203 fromSch, toSch schema.Schema 204 targetFromSch, targetToSch schema.Schema 205 fromConverter, toConverter ProllyRowConverter 206 fromVD, toVD val.TupleDesc 207 keyless bool 208 209 fromCm commitInfo2 210 toCm commitInfo2 211 212 rows chan sql.Row 213 errChan chan error 214 cancel context.CancelFunc 215 } 216 217 var _ sql.RowIter = prollyDiffIter{} 218 219 // newProllyDiffIter produces dolt_diff system table and dolt_diff table 220 // function rows. The rows first have the "to" columns on the left and the 221 // "from" columns on the right. After the "to" and "from" columns, a commit 222 // name, and commit date is also present. The final column is the diff_type 223 // column. 224 // 225 // An example: to_pk, to_col1, to_commit, to_commit_date, from_pk, from_col1, from_commit, from_commit_date, diff_type 226 // 227 // |targetFromSchema| and |targetToSchema| defines what the schema should be for 228 // the row data on the "from" or "to" side. In the above example, both schemas are 229 // identical with two columns "pk" and "col1". The dolt diff table function for 230 // example can provide two different schemas. 231 // 232 // The |from| and |to| tables in the DiffPartition may have different schemas 233 // than |targetFromSchema| or |targetToSchema|. We convert the rows from the 234 // schema of |from| to |targetFromSchema| and the schema of |to| to 235 // |targetToSchema|. See the tablediff_prolly package. 236 func newProllyDiffIter(ctx *sql.Context, dp DiffPartition, targetFromSchema, targetToSchema schema.Schema) (prollyDiffIter, error) { 237 fromCm := commitInfo2{ 238 name: dp.fromName, 239 ts: (*time.Time)(dp.fromDate), 240 } 241 toCm := commitInfo2{ 242 name: dp.toName, 243 ts: (*time.Time)(dp.toDate), 244 } 245 var from, to prolly.Map 246 247 var fsch schema.Schema = schema.EmptySchema 248 if dp.from != nil { 249 idx, err := dp.from.GetRowData(ctx) 250 if err != nil { 251 return prollyDiffIter{}, err 252 } 253 from = durable.ProllyMapFromIndex(idx) 254 if fsch, err = dp.from.GetSchema(ctx); err != nil { 255 return prollyDiffIter{}, err 256 } 257 } 258 259 var tsch schema.Schema = schema.EmptySchema 260 if dp.to != nil { 261 idx, err := dp.to.GetRowData(ctx) 262 if err != nil { 263 return prollyDiffIter{}, err 264 } 265 to = durable.ProllyMapFromIndex(idx) 266 if tsch, err = dp.to.GetSchema(ctx); err != nil { 267 return prollyDiffIter{}, err 268 } 269 } 270 271 var nodeStore tree.NodeStore 272 if dp.to != nil { 273 nodeStore = dp.to.NodeStore() 274 } else { 275 nodeStore = dp.from.NodeStore() 276 } 277 278 fromConverter, err := NewProllyRowConverter(fsch, targetFromSchema, ctx.Warn, nodeStore) 279 if err != nil { 280 return prollyDiffIter{}, err 281 } 282 283 toConverter, err := NewProllyRowConverter(tsch, targetToSchema, ctx.Warn, nodeStore) 284 if err != nil { 285 return prollyDiffIter{}, err 286 } 287 288 fromVD := fsch.GetValueDescriptor() 289 toVD := tsch.GetValueDescriptor() 290 keyless := schema.IsKeyless(targetFromSchema) && schema.IsKeyless(targetToSchema) 291 child, cancel := context.WithCancel(ctx) 292 iter := prollyDiffIter{ 293 from: from, 294 to: to, 295 fromSch: fsch, 296 toSch: tsch, 297 targetFromSch: targetFromSchema, 298 targetToSch: targetToSchema, 299 fromConverter: fromConverter, 300 toConverter: toConverter, 301 fromVD: fromVD, 302 toVD: toVD, 303 keyless: keyless, 304 fromCm: fromCm, 305 toCm: toCm, 306 rows: make(chan sql.Row, 64), 307 errChan: make(chan error), 308 cancel: cancel, 309 } 310 311 go func() { 312 iter.queueRows(child) 313 }() 314 315 return iter, nil 316 } 317 318 func (itr prollyDiffIter) Next(ctx *sql.Context) (sql.Row, error) { 319 select { 320 case <-ctx.Done(): 321 return nil, ctx.Err() 322 case err := <-itr.errChan: 323 return nil, err 324 case row, ok := <-itr.rows: 325 if !ok { 326 return nil, io.EOF 327 } 328 return row, nil 329 } 330 } 331 332 func (itr prollyDiffIter) Close(ctx *sql.Context) error { 333 itr.cancel() 334 return nil 335 } 336 337 func (itr prollyDiffIter) queueRows(ctx context.Context) { 338 // TODO: Determine whether or not the schema has changed. If it has, then all rows should count as modifications in the diff. 339 considerAllRowsModified := false 340 err := prolly.DiffMaps(ctx, itr.from, itr.to, considerAllRowsModified, func(ctx context.Context, d tree.Diff) error { 341 dItr, err := itr.makeDiffRowItr(ctx, d) 342 if err != nil { 343 return err 344 } 345 for { 346 r, err := dItr.Next(ctx) 347 if err == io.EOF { 348 return nil 349 } 350 if err != nil { 351 return err 352 } 353 select { 354 case <-ctx.Done(): 355 return ctx.Err() 356 case itr.rows <- r: 357 continue 358 } 359 } 360 }) 361 if err != nil && err != io.EOF { 362 select { 363 case <-ctx.Done(): 364 case itr.errChan <- err: 365 } 366 return 367 } 368 // we need to drain itr.rows before returning io.EOF 369 close(itr.rows) 370 } 371 372 // todo(andy): copy string fields 373 func (itr prollyDiffIter) makeDiffRowItr(ctx context.Context, d tree.Diff) (*repeatingRowIter, error) { 374 if !itr.keyless { 375 r, err := itr.getDiffRow(ctx, d) 376 if err != nil { 377 return nil, err 378 } 379 return &repeatingRowIter{row: r, n: 1}, nil 380 } 381 382 r, n, err := itr.getDiffRowAndCardinality(ctx, d) 383 if err != nil { 384 return nil, err 385 } 386 return &repeatingRowIter{row: r, n: n}, nil 387 } 388 389 func (itr prollyDiffIter) getDiffRowAndCardinality(ctx context.Context, d tree.Diff) (r sql.Row, n uint64, err error) { 390 switch d.Type { 391 case tree.AddedDiff: 392 n = val.ReadKeylessCardinality(val.Tuple(d.To)) 393 case tree.RemovedDiff: 394 n = val.ReadKeylessCardinality(val.Tuple(d.From)) 395 case tree.ModifiedDiff: 396 fN := val.ReadKeylessCardinality(val.Tuple(d.From)) 397 tN := val.ReadKeylessCardinality(val.Tuple(d.To)) 398 if fN < tN { 399 n = tN - fN 400 d.Type = tree.AddedDiff 401 } else { 402 n = fN - tN 403 d.Type = tree.RemovedDiff 404 } 405 } 406 407 r, err = itr.getDiffRow(ctx, d) 408 if err != nil { 409 return nil, 0, err 410 } 411 412 return r, n, nil 413 } 414 415 func (itr prollyDiffIter) getDiffRow(ctx context.Context, dif tree.Diff) (row sql.Row, err error) { 416 tLen := schemaSize(itr.targetToSch) 417 fLen := schemaSize(itr.targetFromSch) 418 419 if fLen == 0 && dif.Type == tree.AddedDiff { 420 fLen = tLen 421 } else if tLen == 0 && dif.Type == tree.RemovedDiff { 422 tLen = fLen 423 } 424 // 2 commit names, 2 commit dates, 1 diff_type 425 row = make(sql.Row, fLen+tLen+5) 426 427 // todo (dhruv): implement warnings for row column value coercions. 428 429 if dif.Type != tree.RemovedDiff { 430 err = itr.toConverter.PutConverted(ctx, val.Tuple(dif.Key), val.Tuple(dif.To), row[0:tLen]) 431 if err != nil { 432 return nil, err 433 } 434 } 435 436 idx := tLen 437 row[idx] = itr.toCm.name 438 row[idx+1] = maybeTime(itr.toCm.ts) 439 440 if dif.Type != tree.AddedDiff { 441 err = itr.fromConverter.PutConverted(ctx, val.Tuple(dif.Key), val.Tuple(dif.From), row[tLen+2:tLen+2+fLen]) 442 if err != nil { 443 return nil, err 444 } 445 } 446 447 idx = fLen + 2 + tLen 448 row[idx] = itr.fromCm.name 449 row[idx+1] = maybeTime(itr.fromCm.ts) 450 row[idx+2] = diffTypeString(dif) 451 452 return row, nil 453 } 454 455 type repeatingRowIter struct { 456 row sql.Row 457 n uint64 458 } 459 460 func (r *repeatingRowIter) Next(ctx context.Context) (sql.Row, error) { 461 if r.n == 0 { 462 return nil, io.EOF 463 } 464 r.n-- 465 c := make(sql.Row, len(r.row)) 466 copy(c, r.row) 467 return c, nil 468 } 469 470 func schemaSize(sch schema.Schema) int { 471 if sch == nil { 472 return 0 473 } 474 return sch.GetAllCols().Size() 475 } 476 477 func diffTypeString(d tree.Diff) (s string) { 478 switch d.Type { 479 case tree.AddedDiff: 480 s = diffTypeAdded 481 case tree.ModifiedDiff: 482 s = diffTypeModified 483 case tree.RemovedDiff: 484 s = diffTypeRemoved 485 } 486 return 487 } 488 489 func maybeTime(t *time.Time) interface{} { 490 if t != nil { 491 return *t 492 } 493 return nil 494 } 495 496 //------------------------------------ 497 // diffPartitionRowIter 498 //------------------------------------ 499 500 var _ sql.RowIter = (*diffPartitionRowIter)(nil) 501 502 type diffPartitionRowIter struct { 503 diffPartitions *DiffPartitions 504 ddb *doltdb.DoltDB 505 joiner *rowconv.Joiner 506 currentPartition *sql.Partition 507 currentRowIter *sql.RowIter 508 } 509 510 func NewDiffPartitionRowIter(partition sql.Partition, ddb *doltdb.DoltDB, joiner *rowconv.Joiner) *diffPartitionRowIter { 511 return &diffPartitionRowIter{ 512 currentPartition: &partition, 513 ddb: ddb, 514 joiner: joiner, 515 } 516 } 517 518 func (itr *diffPartitionRowIter) Next(ctx *sql.Context) (sql.Row, error) { 519 for { 520 if itr.currentPartition == nil { 521 nextPartition, err := itr.diffPartitions.Next(ctx) 522 if err != nil { 523 return nil, err 524 } 525 itr.currentPartition = &nextPartition 526 } 527 528 if itr.currentRowIter == nil { 529 dp := (*itr.currentPartition).(DiffPartition) 530 rowIter, err := dp.GetRowIter(ctx, itr.ddb, itr.joiner, sql.IndexLookup{}) 531 if err != nil { 532 return nil, err 533 } 534 itr.currentRowIter = &rowIter 535 } 536 537 row, err := (*itr.currentRowIter).Next(ctx) 538 if err == io.EOF { 539 itr.currentPartition = nil 540 itr.currentRowIter = nil 541 542 if itr.diffPartitions == nil { 543 return nil, err 544 } 545 546 continue 547 } else if err != nil { 548 return nil, err 549 } else { 550 return row, nil 551 } 552 } 553 } 554 555 func (itr *diffPartitionRowIter) Close(_ *sql.Context) error { 556 return nil 557 }