github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/dolt_diff_table_function.go (about) 1 // Copyright 2020-2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sqle 16 17 import ( 18 "fmt" 19 "strings" 20 21 "github.com/dolthub/go-mysql-server/sql" 22 gmstypes "github.com/dolthub/go-mysql-server/sql/types" 23 "gopkg.in/src-d/go-errors.v1" 24 25 "github.com/dolthub/dolt/go/libraries/doltcore/diff" 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 27 "github.com/dolthub/dolt/go/libraries/doltcore/merge" 28 "github.com/dolthub/dolt/go/libraries/doltcore/ref" 29 "github.com/dolthub/dolt/go/libraries/doltcore/rowconv" 30 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 31 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" 32 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dtables" 33 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil" 34 "github.com/dolthub/dolt/go/store/types" 35 ) 36 37 const diffTableDefaultRowCount = 1000 38 39 var ErrInvalidNonLiteralArgument = errors.NewKind("Invalid argument to %s: %s – only literal values supported") 40 41 var _ sql.TableFunction = (*DiffTableFunction)(nil) 42 var _ sql.ExecSourceRel = (*DiffTableFunction)(nil) 43 44 type DiffTableFunction struct { 45 ctx *sql.Context 46 fromCommitExpr sql.Expression 47 toCommitExpr sql.Expression 48 dotCommitExpr sql.Expression 49 tableNameExpr sql.Expression 50 database sql.Database 51 sqlSch sql.Schema 52 joiner *rowconv.Joiner 53 54 tableDelta diff.TableDelta 55 fromDate *types.Timestamp 56 toDate *types.Timestamp 57 } 58 59 // NewInstance creates a new instance of TableFunction interface 60 func (dtf *DiffTableFunction) NewInstance(ctx *sql.Context, database sql.Database, expressions []sql.Expression) (sql.Node, error) { 61 newInstance := &DiffTableFunction{ 62 ctx: ctx, 63 database: database, 64 } 65 66 node, err := newInstance.WithExpressions(expressions...) 67 if err != nil { 68 return nil, err 69 } 70 71 return node, nil 72 } 73 74 func (dtf *DiffTableFunction) DataLength(ctx *sql.Context) (uint64, error) { 75 numBytesPerRow := schema.SchemaAvgLength(dtf.Schema()) 76 numRows, _, err := dtf.RowCount(ctx) 77 if err != nil { 78 return 0, err 79 } 80 return numBytesPerRow * numRows, nil 81 } 82 83 func (dtf *DiffTableFunction) RowCount(_ *sql.Context) (uint64, bool, error) { 84 return diffTableDefaultRowCount, false, nil 85 } 86 87 // Database implements the sql.Databaser interface 88 func (dtf *DiffTableFunction) Database() sql.Database { 89 return dtf.database 90 } 91 92 // WithDatabase implements the sql.Databaser interface 93 func (dtf *DiffTableFunction) WithDatabase(database sql.Database) (sql.Node, error) { 94 ndtf := *dtf 95 ndtf.database = database 96 return &ndtf, nil 97 } 98 99 // Expressions implements the sql.Expressioner interface 100 func (dtf *DiffTableFunction) Expressions() []sql.Expression { 101 if dtf.dotCommitExpr != nil { 102 return []sql.Expression{ 103 dtf.dotCommitExpr, dtf.tableNameExpr, 104 } 105 } 106 return []sql.Expression{ 107 dtf.fromCommitExpr, dtf.toCommitExpr, dtf.tableNameExpr, 108 } 109 } 110 111 // WithExpressions implements the sql.Expressioner interface 112 func (dtf *DiffTableFunction) WithExpressions(expression ...sql.Expression) (sql.Node, error) { 113 if len(expression) < 2 { 114 return nil, sql.ErrInvalidArgumentNumber.New(dtf.Name(), "2 to 3", len(expression)) 115 } 116 117 // TODO: For now, we will only support literal / fully-resolved arguments to the 118 // DiffTableFunction to avoid issues where the schema is needed in the analyzer 119 // before the arguments could be resolved. 120 for _, expr := range expression { 121 if !expr.Resolved() { 122 return nil, ErrInvalidNonLiteralArgument.New(dtf.Name(), expr.String()) 123 } 124 // prepared statements resolve functions beforehand, so above check fails 125 if _, ok := expr.(sql.FunctionExpression); ok { 126 return nil, ErrInvalidNonLiteralArgument.New(dtf.Name(), expr.String()) 127 } 128 } 129 130 newDtf := *dtf 131 if strings.Contains(expression[0].String(), "..") { 132 if len(expression) != 2 { 133 return nil, sql.ErrInvalidArgumentNumber.New(fmt.Sprintf("%v with .. or ...", newDtf.Name()), 2, len(expression)) 134 } 135 newDtf.dotCommitExpr = expression[0] 136 newDtf.tableNameExpr = expression[1] 137 } else { 138 if len(expression) != 3 { 139 return nil, sql.ErrInvalidArgumentNumber.New(newDtf.Name(), 3, len(expression)) 140 } 141 newDtf.fromCommitExpr = expression[0] 142 newDtf.toCommitExpr = expression[1] 143 newDtf.tableNameExpr = expression[2] 144 } 145 146 fromCommitVal, toCommitVal, dotCommitVal, tableName, err := newDtf.evaluateArguments() 147 if err != nil { 148 return nil, err 149 } 150 151 err = newDtf.generateSchema(newDtf.ctx, fromCommitVal, toCommitVal, dotCommitVal, tableName) 152 if err != nil { 153 return nil, err 154 } 155 156 return &newDtf, nil 157 } 158 159 // Children implements the sql.Node interface 160 func (dtf *DiffTableFunction) Children() []sql.Node { 161 return nil 162 } 163 164 // RowIter implements the sql.Node interface 165 func (dtf *DiffTableFunction) RowIter(ctx *sql.Context, _ sql.Row) (sql.RowIter, error) { 166 // Everything we need to start iterating was cached when we previously determined the schema of the result 167 // TODO: When we add support for joining on table functions, we'll need to evaluate this against the 168 // specified row. That row is what has the left_table context in a join query. 169 // This will expand the test cases we need to cover significantly. 170 fromCommitVal, toCommitVal, dotCommitVal, _, err := dtf.evaluateArguments() 171 if err != nil { 172 return nil, err 173 } 174 175 sqledb, ok := dtf.database.(dsess.SqlDatabase) 176 if !ok { 177 return nil, fmt.Errorf("unable to get dolt database") 178 } 179 180 fromCommitStr, toCommitStr, err := loadCommitStrings(ctx, fromCommitVal, toCommitVal, dotCommitVal, sqledb) 181 if err != nil { 182 return nil, err 183 } 184 185 ddb := sqledb.DbData().Ddb 186 dp := dtables.NewDiffPartition(dtf.tableDelta.ToTable, dtf.tableDelta.FromTable, toCommitStr, fromCommitStr, dtf.toDate, dtf.fromDate, dtf.tableDelta.ToSch, dtf.tableDelta.FromSch) 187 188 return dtables.NewDiffPartitionRowIter(*dp, ddb, dtf.joiner), nil 189 } 190 191 // findMatchingDelta returns the best matching table delta for the table name 192 // given, taking renames into account 193 func findMatchingDelta(deltas []diff.TableDelta, tableName string) diff.TableDelta { 194 tableName = strings.ToLower(tableName) 195 for _, d := range deltas { 196 if strings.ToLower(d.ToName) == tableName { 197 return d 198 } 199 } 200 201 for _, d := range deltas { 202 if strings.ToLower(d.FromName) == tableName { 203 return d 204 } 205 } 206 207 // no delta means no diff, or the table doesn't exist 208 return diff.TableDelta{} 209 } 210 211 type refDetails struct { 212 root doltdb.RootValue 213 hashStr string 214 commitTime *types.Timestamp 215 } 216 217 // loadDetailsForRef loads the root, hash, and timestamp for the specified from 218 // and to ref values 219 func loadDetailsForRefs(ctx *sql.Context, fromRef, toRef, dotRef interface{}, db dsess.SqlDatabase) (*refDetails, *refDetails, error) { 220 fromCommitStr, toCommitStr, err := loadCommitStrings(ctx, fromRef, toRef, dotRef, db) 221 if err != nil { 222 return nil, nil, err 223 } 224 225 sess := dsess.DSessFromSess(ctx.Session) 226 dbName := db.Name() 227 228 fromRoot, fromCommitTime, fromHashStr, err := sess.ResolveRootForRef(ctx, dbName, fromCommitStr) 229 if err != nil { 230 return nil, nil, err 231 } 232 fromDetails := &refDetails{fromRoot, fromHashStr, fromCommitTime} 233 234 toRoot, toCommitTime, toHashStr, err := sess.ResolveRootForRef(ctx, dbName, toCommitStr) 235 if err != nil { 236 return nil, nil, err 237 } 238 toDetails := &refDetails{toRoot, toHashStr, toCommitTime} 239 240 return fromDetails, toDetails, nil 241 } 242 243 func resolveCommitStrings(ctx *sql.Context, fromRef, toRef, dotRef interface{}, db dsess.SqlDatabase) (string, string, error) { 244 if dotRef != nil { 245 dotStr, err := interfaceToString(dotRef) 246 if err != nil { 247 return "", "", err 248 } 249 250 sess := dsess.DSessFromSess(ctx.Session) 251 252 if strings.Contains(dotStr, "...") { 253 refs := strings.Split(dotStr, "...") 254 255 headRef, err := sess.CWBHeadRef(ctx, db.Name()) 256 if err != nil { 257 return "", "", err 258 } 259 260 rightCm, err := resolveCommit(ctx, db.DbData().Ddb, headRef, refs[0]) 261 if err != nil { 262 return "", "", err 263 } 264 265 leftCm, err := resolveCommit(ctx, db.DbData().Ddb, headRef, refs[1]) 266 if err != nil { 267 return "", "", err 268 } 269 270 mergeBase, err := merge.MergeBase(ctx, rightCm, leftCm) 271 if err != nil { 272 return "", "", err 273 } 274 275 return mergeBase.String(), refs[1], nil 276 } else { 277 refs := strings.Split(dotStr, "..") 278 return refs[0], refs[1], nil 279 } 280 } 281 282 fromStr, err := interfaceToString(fromRef) 283 if err != nil { 284 return "", "", err 285 } 286 287 toStr, err := interfaceToString(toRef) 288 if err != nil { 289 return "", "", err 290 } 291 292 return fromStr, toStr, nil 293 } 294 295 // loadCommitStrings gets the to and from commit strings, using the common 296 // ancestor as the from commit string for three dot diff 297 func loadCommitStrings(ctx *sql.Context, fromRef, toRef, dotRef interface{}, db dsess.SqlDatabase) (string, string, error) { 298 fromStr, toStr, err := resolveCommitStrings(ctx, fromRef, toRef, dotRef, db) 299 if err != nil { 300 return "", "", err 301 } 302 303 if len(fromStr) == 0 || len(toStr) == 0 { 304 return "", "", fmt.Errorf("expected strings for from and to revisions, got: %v, %v", fromStr, toStr) 305 } 306 307 return fromStr, toStr, nil 308 } 309 310 // interfaceToString converts an interface to a string 311 func interfaceToString(r interface{}) (string, error) { 312 str, ok := r.(string) 313 if !ok { 314 return "", fmt.Errorf("received '%v' when expecting commit hash string", str) 315 } 316 return str, nil 317 } 318 319 func resolveRoot(ctx *sql.Context, sess *dsess.DoltSession, dbName, hashStr string) (*refDetails, error) { 320 root, commitTime, _, err := sess.ResolveRootForRef(ctx, dbName, hashStr) 321 if err != nil { 322 return nil, err 323 } 324 325 return &refDetails{root, hashStr, commitTime}, nil 326 } 327 328 func resolveCommit(ctx *sql.Context, ddb *doltdb.DoltDB, headRef ref.DoltRef, cSpecStr string) (*doltdb.Commit, error) { 329 cs, err := doltdb.NewCommitSpec(cSpecStr) 330 if err != nil { 331 return nil, err 332 } 333 334 optCmt, err := ddb.Resolve(ctx, cs, headRef) 335 if err != nil { 336 return nil, err 337 } 338 cm, ok := optCmt.ToCommit() 339 if !ok { 340 return nil, doltdb.ErrGhostCommitEncountered 341 } 342 343 return cm, nil 344 } 345 346 // WithChildren implements the sql.Node interface 347 func (dtf *DiffTableFunction) WithChildren(node ...sql.Node) (sql.Node, error) { 348 if len(node) != 0 { 349 return nil, fmt.Errorf("unexpected children") 350 } 351 return dtf, nil 352 } 353 354 // CheckPrivileges implements the sql.Node interface 355 func (dtf *DiffTableFunction) CheckPrivileges(ctx *sql.Context, opChecker sql.PrivilegedOperationChecker) bool { 356 _, _, _, tableName, err := dtf.evaluateArguments() 357 if err != nil { 358 return false 359 } 360 361 subject := sql.PrivilegeCheckSubject{Database: dtf.database.Name(), Table: tableName} 362 // TODO: Add tests for privilege checking 363 return opChecker.UserHasPrivileges(ctx, 364 sql.NewPrivilegedOperation(subject, sql.PrivilegeType_Select)) 365 } 366 367 // evaluateArguments evaluates the argument expressions to turn them into 368 // values this DiffTableFunction can use. Note that this method only evals 369 // the expressions, and doesn't validate the values. 370 // TODO: evaluating expression arguments during binding is incompatible 371 // with prepared statement support. 372 func (dtf *DiffTableFunction) evaluateArguments() (interface{}, interface{}, interface{}, string, error) { 373 if !dtf.Resolved() { 374 return nil, nil, nil, "", nil 375 } 376 377 if !gmstypes.IsText(dtf.tableNameExpr.Type()) { 378 return nil, nil, nil, "", sql.ErrInvalidArgumentDetails.New(dtf.Name(), dtf.tableNameExpr.String()) 379 } 380 381 tableNameVal, err := dtf.tableNameExpr.Eval(dtf.ctx, nil) 382 if err != nil { 383 return nil, nil, nil, "", err 384 } 385 386 tableName, ok := tableNameVal.(string) 387 if !ok { 388 return nil, nil, nil, "", ErrInvalidTableName.New(dtf.tableNameExpr.String()) 389 } 390 391 if dtf.dotCommitExpr != nil { 392 if !gmstypes.IsText(dtf.dotCommitExpr.Type()) { 393 return nil, nil, nil, "", sql.ErrInvalidArgumentDetails.New(dtf.Name(), dtf.dotCommitExpr.String()) 394 } 395 396 dotCommitVal, err := dtf.dotCommitExpr.Eval(dtf.ctx, nil) 397 if err != nil { 398 return nil, nil, nil, "", err 399 } 400 401 return nil, nil, dotCommitVal, tableName, nil 402 } 403 404 if !gmstypes.IsText(dtf.fromCommitExpr.Type()) { 405 return nil, nil, nil, "", sql.ErrInvalidArgumentDetails.New(dtf.Name(), dtf.fromCommitExpr.String()) 406 } 407 if !gmstypes.IsText(dtf.toCommitExpr.Type()) { 408 return nil, nil, nil, "", sql.ErrInvalidArgumentDetails.New(dtf.Name(), dtf.toCommitExpr.String()) 409 } 410 411 fromCommitVal, err := dtf.fromCommitExpr.Eval(dtf.ctx, nil) 412 if err != nil { 413 return nil, nil, nil, "", err 414 } 415 416 toCommitVal, err := dtf.toCommitExpr.Eval(dtf.ctx, nil) 417 if err != nil { 418 return nil, nil, nil, "", err 419 } 420 return fromCommitVal, toCommitVal, nil, tableName, nil 421 } 422 423 func (dtf *DiffTableFunction) generateSchema(ctx *sql.Context, fromCommitVal, toCommitVal, dotCommitVal interface{}, tableName string) error { 424 if !dtf.Resolved() { 425 return nil 426 } 427 428 sqledb, ok := dtf.database.(dsess.SqlDatabase) 429 if !ok { 430 return fmt.Errorf("unexpected database type: %T", dtf.database) 431 } 432 433 delta, err := dtf.cacheTableDelta(ctx, fromCommitVal, toCommitVal, dotCommitVal, tableName, sqledb) 434 if err != nil { 435 return err 436 } 437 438 fromTable, fromTableExists := delta.FromTable, delta.FromTable != nil 439 toTable, toTableExists := delta.ToTable, delta.ToTable != nil 440 441 if !toTableExists && !fromTableExists { 442 return sql.ErrTableNotFound.New(tableName) 443 } 444 445 var toSchema, fromSchema schema.Schema 446 var format *types.NomsBinFormat 447 448 if fromTableExists { 449 fromSchema = delta.FromSch 450 format = fromTable.Format() 451 } 452 453 if toTableExists { 454 toSchema = delta.ToSch 455 format = toTable.Format() 456 } 457 458 diffTableSch, j, err := dtables.GetDiffTableSchemaAndJoiner(format, fromSchema, toSchema) 459 if err != nil { 460 return err 461 } 462 dtf.joiner = j 463 464 // TODO: sql.Columns include a Source that indicates the table it came from, but we don't have a real table 465 // when the column comes from a table function, so we omit the table name when we create these columns. 466 // This allows column projections to work correctly with table functions, but we will need to add a 467 // unique id (e.g. hash generated from method arguments) when we add support for aliasing and joining 468 // table functions in order for the analyzer to determine which table function result a column comes from. 469 sqlSchema, err := sqlutil.FromDoltSchema("", "", diffTableSch) 470 if err != nil { 471 return err 472 } 473 474 dtf.sqlSch = sqlSchema.Schema 475 476 return nil 477 } 478 479 // cacheTableDelta caches and returns an appropriate table delta for the table name given, taking renames into 480 // consideration. Returns a sql.ErrTableNotFound if the given table name cannot be found in either revision. 481 func (dtf *DiffTableFunction) cacheTableDelta(ctx *sql.Context, fromCommitVal, toCommitVal, dotCommitVal interface{}, tableName string, db dsess.SqlDatabase) (diff.TableDelta, error) { 482 fromRefDetails, toRefDetails, err := loadDetailsForRefs(ctx, fromCommitVal, toCommitVal, dotCommitVal, db) 483 if err != nil { 484 return diff.TableDelta{}, err 485 } 486 487 fromTable, _, fromTableExists, err := doltdb.GetTableInsensitive(ctx, fromRefDetails.root, tableName) 488 if err != nil { 489 return diff.TableDelta{}, err 490 } 491 492 toTable, _, toTableExists, err := doltdb.GetTableInsensitive(ctx, toRefDetails.root, tableName) 493 if err != nil { 494 return diff.TableDelta{}, err 495 } 496 497 if !fromTableExists && !toTableExists { 498 return diff.TableDelta{}, sql.ErrTableNotFound.New(tableName) 499 } 500 501 // TODO: it would be nice to limit this to just the table under consideration, not all tables with a diff 502 deltas, err := diff.GetTableDeltas(ctx, fromRefDetails.root, toRefDetails.root) 503 if err != nil { 504 return diff.TableDelta{}, err 505 } 506 507 dtf.fromDate = fromRefDetails.commitTime 508 dtf.toDate = toRefDetails.commitTime 509 510 delta := findMatchingDelta(deltas, tableName) 511 512 // We only get a delta if there's a diff. When there isn't one, construct a delta here with table and schema info 513 if delta.FromTable == nil && delta.ToTable == nil { 514 delta.FromName = tableName 515 delta.ToName = tableName 516 delta.FromTable = fromTable 517 delta.ToTable = toTable 518 519 if fromTable != nil { 520 sch, err := fromTable.GetSchema(ctx) 521 if err != nil { 522 return diff.TableDelta{}, err 523 } 524 delta.FromSch = sch 525 } 526 527 if toTable != nil { 528 sch, err := toTable.GetSchema(ctx) 529 if err != nil { 530 return diff.TableDelta{}, err 531 } 532 delta.ToSch = sch 533 } 534 535 // TODO: There are other fields we could set here that we don't 536 } 537 538 dtf.tableDelta = delta 539 540 return delta, nil 541 } 542 543 // Schema implements the sql.Node interface 544 func (dtf *DiffTableFunction) Schema() sql.Schema { 545 if !dtf.Resolved() { 546 return nil 547 } 548 549 if dtf.sqlSch == nil { 550 panic("schema hasn't been generated yet") 551 } 552 553 return dtf.sqlSch 554 } 555 556 // Resolved implements the sql.Resolvable interface 557 func (dtf *DiffTableFunction) Resolved() bool { 558 if dtf.dotCommitExpr != nil { 559 return dtf.tableNameExpr.Resolved() && dtf.dotCommitExpr.Resolved() 560 } 561 return dtf.tableNameExpr.Resolved() && dtf.fromCommitExpr.Resolved() && dtf.toCommitExpr.Resolved() 562 } 563 564 func (dtf *DiffTableFunction) IsReadOnly() bool { 565 return true 566 } 567 568 // String implements the Stringer interface 569 func (dtf *DiffTableFunction) String() string { 570 if dtf.dotCommitExpr != nil { 571 return fmt.Sprintf("DOLT_DIFF(%s, %s)", 572 dtf.dotCommitExpr.String(), 573 dtf.tableNameExpr.String()) 574 } 575 return fmt.Sprintf("DOLT_DIFF(%s, %s, %s)", 576 dtf.fromCommitExpr.String(), 577 dtf.toCommitExpr.String(), 578 dtf.tableNameExpr.String()) 579 } 580 581 // Name implements the sql.TableFunction interface 582 func (dtf *DiffTableFunction) Name() string { 583 return "dolt_diff" 584 }