github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/props/func_dep.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package props 12 13 import ( 14 "fmt" 15 "strings" 16 17 "github.com/cockroachdb/cockroach/pkg/sql/opt" 18 "github.com/cockroachdb/cockroach/pkg/util/log" 19 "github.com/cockroachdb/errors" 20 ) 21 22 // FuncDepSet is a set of functional dependencies (FDs) that encode useful 23 // relationships between columns in a base or derived relation. Given two sets 24 // of columns A and B, a functional dependency A-->B holds if A fully determines 25 // B. In other words, if two different rows have equal values for columns in A, 26 // then those two rows will also have equal values for columns in B. For 27 // example, where columns (a1, a2) are in set A, and column (b1) is in set B: 28 // 29 // a1 a2 b1 30 // -------- 31 // 1 2 5 32 // 1 2 5 33 // 3 4 6 34 // 3 4 6 35 // 36 // The left side of a functional dependency is called the "determinant", and 37 // the right side is called the "dependant". Each side can contain zero or more 38 // columns, though the FuncDepSet library will fold away certain combinations 39 // that don't provide useful information, like A-->A and A-->(), since every 40 // column trivially determines itself, as well as the empty set. 41 // 42 // When a dependant contains multiple columns, it is equivalent to splitting 43 // the single FD into multiple FDs, each with a single column dependant: 44 // 45 // (a)-->(b,c) 46 // 47 // is equivalent to these two FDs: 48 // 49 // (a)-->(b) 50 // (a)-->(c) 51 // 52 // When a determinant contains zero columns, as in ()-->A, then A is fully 53 // determined without reference to any other columns. An equivalent statement is 54 // that any arbitrary combination of determinant columns trivially determines A. 55 // And both of these statements are just another way of saying that columns in A 56 // are constant: 57 // 58 // a1 a2 b1 c1 59 // ---------------- 60 // 1 NULL 3 3 61 // 1 NULL 3 NULL 62 // 1 NULL 4 NULL 63 // 64 // When a determinant contains multiple columns, then the functional dependency 65 // holds for the *composite* value of those columns. For example: 66 // 67 // a1 a2 b1 68 // -------- 69 // 1 2 5 70 // 1 2 5 71 // 1 3 4 72 // 73 // These are valid values, even though a1 has the same values for all three 74 // rows, because it is only the combination of (a1,a2) that determines (b1). 75 // 76 // Multiple FDs can be transitively applied in order to compute the "closure" of 77 // a set of input columns. The closure includes the input columns plus all 78 // columns that are functionally dependent on those columns, either directly or 79 // indirectly. Consider this set of FD's: 80 // 81 // (a)-->(b,c,d) 82 // (b,c,e)-->(f) 83 // (d)-->(e) 84 // 85 // The transitive closure of (a) is (a,b,c,d,e,f). To start, (a) determines 86 // (b,c,d). From there, (d) transitively determines (e). And now that (b,c,e) 87 // have been determined, they in turn determine (f). Because (a) determines all 88 // other columns, if two rows have the same value for (a), then the rows will be 89 // duplicates, since all other columns will be equal. And if there are no 90 // duplicate rows, then (a) is a key for the relation. 91 // 92 // Deriving FD Sets 93 // 94 // Base table primary keys can be trivially mapped into an FD set, since the 95 // primary key always uniquely determines the other columns: 96 // 97 // CREATE TABLE t (a INT PRIMARY KEY, b INT, c INT) 98 // (a)-->(b,c) 99 // 100 // Each SQL relational operator derives its own FD set from the FD sets of its 101 // inputs. For example, the Select operator augments the FD set of its input, 102 // based on its filter condition: 103 // 104 // SELECT * FROM t WHERE a=1 105 // 106 // Equating a column to a constant value constructs a new FD with an empty 107 // determinant, so that the augmented FD set becomes: 108 // 109 // (a)-->(b,c) 110 // ()-->(a) 111 // 112 // Since the value of column "a" is always the same, and since "a" functionally 113 // determines "b" and "c", the values of all columns are constants. Furthermore, 114 // because "a" is known to be a key, the result set can have at most one row. 115 // 116 // This is but one example of how FDs can assist the optimizer in proving useful 117 // properties about query results. This information powers many optimizations, 118 // including eliminating unnecessary DISTINCT operators, simplifying ORDER BY 119 // columns, removing Max1Row operators, and mapping semi-joins to inner-joins. 120 // 121 // NULL Values 122 // 123 // FDs become more complex when the possibility of NULL values is introduced. 124 // SQL semantics often treat a NULL value as an "unknown" value that is not 125 // equal to any other value, including another NULL value. For example, SQL 126 // unique indexes exhibit this behavior: 127 // 128 // CREATE TABLE t (a INT PRIMARY KEY, b INT, c INT, UNIQUE (b)) 129 // 130 // Here, "b" column values are unique...except for the case of multiple NULL 131 // values, which are allowed because each NULL is treated as if it was a 132 // different value. Contrast this with the different NULL handling rules used 133 // by SQL's GROUP BY and DISTINCT operators. Those operators treat multiple NULL 134 // values as duplicates, because each NULL is treated as if it was the same 135 // value. 136 // 137 // The functional dependencies described up until now always use the "NULLs are 138 // equal" semantics (denoted as NULL= hereafter) in order to answer the question 139 // "are these two columns equal". The semantics are identical to what this SQL 140 // expression returns: 141 // 142 // ((c1 = c2) OR (c1 IS NULL AND c2 IS NULL)) IS True 143 // 144 // And here are some examples: 145 // 146 // c1 c2 NULL= 147 // ----------------- 148 // 1 1 true 149 // NULL NULL true 150 // 1 2 false 151 // 1 NULL false 152 // NULL 1 false 153 // 154 // So now for the definition of A-->B that incorporates NULL values: 155 // 156 // for any two rows r1 and r2 in the relation: 157 // A(r1) NULL= A(r2) ==> B(r1) NULL= B(r2) 158 // 159 // Intuitively, if two different rows have equal values for A using "NULLs are 160 // equal" semantics, then those rows will also have equal values for B using 161 // those same semantics. As an example, the following sets of rows would be 162 // valid for the dependency (b)-->(c): 163 // 164 // b c 165 // ---------- 166 // 1 NULL 167 // 1 NULL 168 // NULL 1 169 // NULL 1 170 // 2 3 171 // 2 3 172 // 173 // b c 174 // ---------- 175 // NULL NULL 176 // NULL NULL 177 // 178 // but these sets of rows would be invalid: 179 // 180 // b c 181 // ---------- 182 // NULL 1 183 // NULL NULL 184 // 185 // b c 186 // ---------- 187 // NULL 1 188 // NULL 2 189 // 190 // Unique constraints allow the latter cases, however, and therefore it is 191 // desirable to somehow encode these weaker dependencies as FDs, because they 192 // can be strengthened later on if NULL values are filtered from determinant 193 // columns (more on that below). 194 // 195 // The solution is to store an extra "strict" bit on each FD. If true, then the 196 // the FD is a "strict" dependency, and behaves as described above. However, if 197 // false, then the FD is a "lax" dependency. Lax dependencies use "squiggly" 198 // arrow notation to differentiate them from the strict variant: 199 // 200 // A~~>B 201 // 202 // In contrast to strict dependencies, lax dependencies treat NULLs on 203 // determinant columns as distinct from one another, with equality semantics 204 // identical to this SQL expression: 205 // 206 // (c1 = c2) IS True 207 // 208 // In other words, if either c1 or c2 is NULL, or both are NULL, then c1 is 209 // considered not equal to c2. The definition for A~~>B follows from that: 210 // 211 // for any two rows r1 and r2 in the relation: 212 // (A(r1) = A(r2)) IS True ==> B(r1) NULL= B(r2) 213 // 214 // In other words, if two different non-NULL rows have equal values for A, then 215 // those rows will also have equal values for B using NULL= semantics. Note that 216 // both strict and lax equality definitions collapse to the same semantics when 217 // the columns of A are not-NULL. The example row sets shown above that were 218 // invalid for a strict dependency are valid for a lax dependency: 219 // 220 // b c 221 // ---------- 222 // NULL 1 223 // NULL NULL 224 // 225 // b c 226 // ---------- 227 // NULL 1 228 // NULL 2 229 // 230 // To continue the CREATE TABLE example shown above, another FD can now be 231 // derived from that statement, in addition to the primary key FD: 232 // 233 // (a)-->(b,c) 234 // (b)~~>(a,c) 235 // 236 // Lax dependencies are *not* transitive, and have limited usefulness as-is. 237 // However, some operators (like Select) can "reject" NULL values, which means 238 // that they filter out rows containing the troublesome NULL values. That makes 239 // it possible for the operator to "upgrade" a lax dependency to a strict 240 // dependency (recall that the both have identical semantics when NULLs are not 241 // present), as in this example: 242 // 243 // SELECT * FROM t WHERE b>5 244 // 245 // The ">" operator rejects NULL values, which means that the Select operator 246 // can convert the lax dependency to a strict dependency: 247 // 248 // (a)-->(b,c) 249 // (b)-->(a,c) 250 // 251 // Now, either the "a" or "b" column determines the values of all other columns, 252 // and both are keys for the relation. 253 // 254 // Another thing to note is that a lax dependency with an empty determinant is 255 // the same as the corresponding strict dependency: 256 // 257 // ()~~>(a,b) 258 // ()-->(a,b) 259 // 260 // As described above, a strict dependency differs from a lax dependency only in 261 // terms of what values are allowed in determinant columns. Since the 262 // determinant has no columns in these cases, the semantics will be identical. 263 // For that reason, this library automatically maps lax constant dependencies to 264 // strict constant dependencies. 265 // 266 // Keys 267 // 268 // A key is a set of columns that have a unique composite value for every row in 269 // the relation. There are two kinds of keys, strict and lax, that parallel the 270 // two kinds of functional dependencies. Strict keys treat NULL values in key 271 // columns as equal to one another: 272 // 273 // b c 274 // -------- 275 // 1 10 276 // 2 20 277 // NULL 30 278 // 279 // Here, "b" is a key for the relation, even though it contains a NULL value, 280 // because there is only one such value. Multiple NULL values would violate the 281 // strict key, because they would compare as equal, and therefore would be 282 // considered duplicates. The SQL GROUP BY operator uses the same semantics for 283 // grouping (it's no coincidence that the definition for strict keys follows 284 // that lead). 285 // 286 // By contrast, lax keys treat NULL values in key columns as distinct from one 287 // another, and so considers column "b" as unique in the following example: 288 // 289 // b c 290 // -------- 291 // 1 10 292 // 2 20 293 // NULL 30 294 // NULL 40 295 // 296 // Note that both strict and lax keys treat non-NULL values identically; values 297 // from two different rows must never compare equal to one another. In addition, 298 // the presence of a strict or lax key always implies a functional dependency 299 // with the key as determinant and all other columns in the relation as 300 // dependants. Here is an example assuming a table with columns (a,b,c,d): 301 // 302 // lax-key(a,b) => (a,b)~~>(c,d) 303 // strict-key(a,b) => (a,b)-->(c,d) 304 // 305 // The "empty key" is a special key that has zero columns. It is used when the 306 // relation is guaranteed to have at most one row. In this special case, every 307 // column is constant. Every combination of columns is a trivial key for the 308 // relation and determines every other column. Because the lax and strict key 309 // designations are equivalent when there is a single row, empty keys are always 310 // normalized to be strict for convenience. 311 // 312 // FuncDepSet tracks whether at least one key (whether it be strict or lax) 313 // exists for the relation. If this is true, then all possible keys for the 314 // relation can be enumerated using the FD set. This is because any subset of 315 // columns forms a key if its FD closure contains every column in the relation. 316 // Therefore, all keys can be brute force enumerated by checking the closure of 317 // each combination in the power set. Again, this is only possible when the 318 // relation is known to have a key; otherwise, knowing the closure contains all 319 // columns is not a sufficient condition to identify a key, because of the 320 // possibility of duplicate rows. 321 // 322 // In practice, it is never necessary to enumerate all possible keys (fortunate, 323 // since there can be O(2**N) of them), since the vast majority of them turn out 324 // to have redundant columns that can be functionally determined from other 325 // columns in the key. Of more value is the set of "candidate keys", which are 326 // keys that contain no redundant columns. Removing any column from such a key 327 // causes it to longer be a key. It is possible to enumerate the set of 328 // candidate keys in polynomial rather than exponential time (see Wikipedia 329 // "Candidate key" entry). 330 // 331 // However, since even polynomial time can be problematic, this library tries to 332 // avoid enumerating keys by storing and maintaining a single candidate key for 333 // the relation. And while it is not always successful, the library tries to 334 // keep the candidate key that has the fewest number of columns. In most cases, 335 // this single key is enough to satisfy the requirements of the optimizer. But 336 // when it is not enough, or the existing key is no longer valid, then a new 337 // candidate key can always be generated. 338 // 339 // It turns out that the most common key-related question that must be answered 340 // is not "what are the list of keys for this relation?", but instead, "does 341 // this set of columns contain a key for the relation?". The latter question can 342 // be easily answered by computing the closure of the columns, and checking 343 // whether the closure contains the key maintained by FuncDepSet. And when a 344 // relatively short key is needed (e.g. during decorrelation), FuncDepSet has 345 // one ready to go. 346 // 347 // Equivalent Columns 348 // 349 // FD sets encode "equivalent columns", which are pairs of columns that always 350 // have equal values using the SQL equality operator with NULL= semantics. Two 351 // columns a and b are equivalent if the following expression returns true: 352 // 353 // ((a = b) OR (a IS NULL AND b IS NULL)) IS True 354 // 355 // Equivalent columns are typically derived from a Select filter condition, and 356 // are represented as two FDs with each column acting as both determinant and 357 // dependant: 358 // 359 // SELECT * FROM t WHERE b=c 360 // (a)-->(b,c) 361 // (b)~~>(a,c) 362 // (b)==(c) 363 // (c)==(b) 364 // 365 // In the common case shown above, the WHERE clause rejects NULL values, so the 366 // equivalency will always be strict, which means it retains all the same 367 // properties of a strict dependency. While lax equivalencies are theoretically 368 // possible, the library currently maps them into regular lax dependencies to 369 // simplify implementation. 370 // 371 // Theory to Practice 372 // 373 // For a more rigorous examination of functional dependencies and their 374 // interaction with various SQL operators, see the following Master's Thesis: 375 // 376 // Norman Paulley, Glenn. (2000). 377 // Exploiting Functional Dependence in Query Optimization. 378 // https://cs.uwaterloo.ca/research/tr/2000/11/CS-2000-11.thesis.pdf 379 // 380 // While this paper served as the inspiration for this library, a number of 381 // details differ, including (but not limited to): 382 // 383 // 1. Most importantly, the definition of "lax" used in the paper differs from 384 // the definition used by this library. For a lax dependency A~~>B, the 385 // paper allows this set of rows: 386 // 387 // a b 388 // ------- 389 // 1 1 390 // 1 NULL 391 // 392 // This library disallows that, since it requires that if the determinant 393 // of a lax dependency is not-null, then it is equivalent to a strict 394 // dependency. This alternate definition is briefly covered in section 395 // 2.5.3.2 of the paper (see definition 2.19). The reason for this change 396 // is to allow a lax dependency to be upgraded to a strict dependency more 397 // readily, needing only the determinant columns to be not-null rather than 398 // both determinant and dependant columns. 399 // 400 // 2. The paper simplifies FD sets so that dependants never contain more than 401 // one column. This library allows multiple dependent columns, since they 402 // can be so efficiently stored and processed as ColSets. 403 // 404 // 3. The paper deliberately avoids all simplifications when a SQL operator 405 // adds new FDs to an existing FD set, in order to avoid unneeded work and 406 // expensive reductions. This library performs quite a few simplifications 407 // in order to keep the FD set more manageable and understandable. 408 // 409 // 4. The paper "colors" columns black when they are no longer part of a 410 // derived relation. Rather than marking removed columns, this library 411 // actually removes them from the FD set. 412 // 413 // 5. In order to ensure a unique key for every relation, the paper uses a 414 // special "tuple identifier" that acts like a virtual column and can be 415 // both a determinant and a dependant. If the transitive closure of any set 416 // of columns includes the tuple identifier column, then that set of 417 // columns is a super key for the relation. As described in the Keys 418 // section above, this library takes a simplified approach so that it 419 // doesn't need to allocate virtual columns in property derivation code. 420 // 421 type FuncDepSet struct { 422 // deps contains the functional dependencies that have a non-trivial 423 // determinant and dependant (i.e. not empty, with no overlapping columns): 424 // 425 // (a)-->(b,c) 426 // (b,c)~~>(a,d) 427 // (d)==(e) 428 // (e)==(d) 429 // 430 // See the above comments for more details. 431 // 432 // This slice is owned by this FuncDepSet and shouldn't be shared unless 433 // all referencing sets are treated as immutable. 434 deps []funcDep 435 436 // hasKey is: 437 // - strictKey if the relation has no duplicate rows, which means at least 438 // one subset of its columns form a key (all columns, if no other subset). 439 // The key field contains one such key. See the "Keys" section above for 440 // more details. A strict key can be empty. 441 // - laxKey if there is a at least one subset of columns that form a lax key. 442 // The key field contains one such key. A lax key cannot be empty. 443 // 444 // See the "Keys" section above for more details. 445 hasKey keyType 446 447 // key contains a set of columns that form a key or a lax key for the 448 // relation, depending on hasKey; empty if hasKey is noKey. 449 // 450 // There is no guarantee that the key has the minimum possible number of 451 // columns, but a best effort is made to keep it as short as possible. 452 // 453 // See the "Keys" section above for more details. 454 // 455 // This set is immutable; to update it, replace it with a different set 456 // containing the desired columns. 457 key opt.ColSet 458 } 459 460 type keyType int8 461 462 const ( 463 noKey keyType = iota 464 laxKey 465 strictKey 466 ) 467 468 // funcDep stores a single functional dependency. See the comment for FuncDepSet 469 // for more details. 470 type funcDep struct { 471 // from is the determinant of the functional dependency (easier to read the 472 // code when "from" is used rather than "determinant"). 473 // 474 // This set is immutable; to update it, replace it with a different set 475 // containing the desired columns. 476 from opt.ColSet 477 478 // to is the dependant of the functional dependency (easier to read the code 479 // when "to" is used rather than "dependant"). 480 // 481 // This set is immutable; to update it, replace it with a different set 482 // containing the desired columns. 483 to opt.ColSet 484 485 // strict is true if NULL values in the determinant are treated as if they are 486 // equal to other NULL values. Every NULL determinant must therefore map to 487 // the same dependant value. If strict is false, then two NULL determinants 488 // can map to different dependant values. See the NULL Values section in the 489 // FuncDeps comment for more details. 490 strict bool 491 492 // equiv is true if the value of the determinant equals the value of each of 493 // the dependant columns, and false if there's no known equality relationship. 494 // If equiv is true, the determinant may only consist of a single column. 495 equiv bool 496 } 497 498 // StrictKey returns a strict key for the relation, if there is one. 499 // A best effort is made to return a candidate key that has the fewest columns. 500 func (f *FuncDepSet) StrictKey() (_ opt.ColSet, ok bool) { 501 if f.hasKey == strictKey { 502 return f.key, true 503 } 504 return opt.ColSet{}, false 505 } 506 507 // LaxKey returns a lax key for the relation, if there is one. 508 // Note that strict keys are implicitly also lax keys, so if the relation has a 509 // strict key, this returns the same key as StrictKey(). 510 // A best effort is made to return a lax key that has the fewest columns. 511 func (f *FuncDepSet) LaxKey() (_ opt.ColSet, ok bool) { 512 if f.hasKey != noKey { 513 return f.key, true 514 } 515 return opt.ColSet{}, false 516 } 517 518 // Empty is true if the set contains no FDs and no key. 519 func (f *FuncDepSet) Empty() bool { 520 return len(f.deps) == 0 && f.hasKey == noKey 521 } 522 523 // ColSet returns all columns referenced by the FD set. 524 func (f *FuncDepSet) ColSet() opt.ColSet { 525 var cols opt.ColSet 526 for i := 0; i < len(f.deps); i++ { 527 fd := &f.deps[i] 528 cols.UnionWith(fd.from) 529 cols.UnionWith(fd.to) 530 } 531 if f.hasKey != noKey { 532 // There are cases where key columns don't show up in FDs. For example: 533 // lax-key(2,3); ()-->(1) 534 cols.UnionWith(f.key) 535 } 536 return cols 537 } 538 539 // HasMax1Row returns true if the relation has zero or one rows. 540 func (f *FuncDepSet) HasMax1Row() bool { 541 return f.hasKey == strictKey && f.key.Empty() 542 } 543 544 // CopyFrom copies the given FD into this FD, replacing any existing data. 545 func (f *FuncDepSet) CopyFrom(fdset *FuncDepSet) { 546 // Make certain to copy FDs to the slice owned by this set. 547 f.deps = f.deps[:0] 548 f.deps = append(f.deps, fdset.deps...) 549 f.key = fdset.key 550 f.hasKey = fdset.hasKey 551 } 552 553 // ColsAreStrictKey returns true if the given columns contain a strict key for the 554 // relation. This means that any two rows in the relation will never have the 555 // same values for this set of columns. If the columns are nullable, then at 556 // most one row could have NULL values for all of the columns. For example, 557 // (a,b) is a strict key for the following relation, but (a) is not (because 558 // there are multiple rows where a=1 and a=NULL): 559 // 560 // a b c 561 // ---------------- 562 // NULL NULL NULL 563 // NULL 1 1 564 // 1 NULL 1 565 // 1 1 1 566 // 567 func (f *FuncDepSet) ColsAreStrictKey(cols opt.ColSet) bool { 568 return f.colsAreKey(cols, strictKey) 569 } 570 571 // ColsAreLaxKey returns true if the given columns contain a lax key for the 572 // relation. This means that any two rows in the relation will never have the 573 // same values for this set of columns, except potentially in the case where at 574 // least one of the columns is NULL. For example, (a,b) is a lax key for the 575 // following relation, but (a) is not (because there are multiple rows where 576 // a=1): 577 // 578 // a b c 579 // ---------------- 580 // NULL NULL NULL 581 // NULL NULL 1 582 // NULL NULL 2 583 // NULL 1 1 584 // NULL 1 2 585 // 1 NULL 1 586 // 1 NULL 2 587 // 1 1 1 588 // 589 func (f *FuncDepSet) ColsAreLaxKey(cols opt.ColSet) bool { 590 return f.colsAreKey(cols, laxKey) 591 } 592 593 // ConstantCols returns the set of columns that will always have the same value 594 // for all rows in the relation. 595 func (f *FuncDepSet) ConstantCols() opt.ColSet { 596 if len(f.deps) > 0 && f.deps[0].isConstant() { 597 return f.deps[0].to 598 } 599 return opt.ColSet{} 600 } 601 602 // ReduceCols removes redundant columns from the given set. Redundant columns 603 // can be functionally determined from the remaining columns. If the columns 604 // contain a key for the relation, then the reduced columns will form a 605 // candidate key for the relation. 606 // 607 // The reduction algorithm removes one column at a time (in an arbitrary order), 608 // and then tests to see if the closure still includes the removed column. If 609 // so, then the column is redundant. This algorithm has decent running time, but 610 // will not necessarily find the candidate key with the fewest columns. 611 func (f *FuncDepSet) ReduceCols(cols opt.ColSet) opt.ColSet { 612 var removed opt.ColSet 613 cols = cols.Copy() 614 for i, ok := cols.Next(0); ok; i, ok = cols.Next(i + 1) { 615 cols.Remove(i) 616 removed.Add(i) 617 if !f.inClosureOf(removed, cols, true /* strict */) { 618 // The column is not functionally determined by the other columns, so 619 // retain it in the set. 620 cols.Add(i) 621 } 622 removed.Remove(i) 623 } 624 return cols 625 } 626 627 // InClosureOf returns true if the given columns are functionally determined by 628 // the "in" column set. 629 func (f *FuncDepSet) InClosureOf(cols, in opt.ColSet) bool { 630 return f.inClosureOf(cols, in, true /* strict */) 631 } 632 633 // ComputeClosure returns the strict closure of the given columns. The closure 634 // includes the input columns plus all columns that are functionally dependent 635 // on those columns, either directly or indirectly. Consider this set of FD's: 636 // 637 // (a)-->(b,c,d) 638 // (b,c,e)-->(f) 639 // (d)-->(e) 640 // 641 // The strict closure of (a) is (a,b,c,d,e,f), because (a) determines all other 642 // columns. Therefore, if two rows have the same value for (a), then the rows 643 // will be duplicates, since all other columns will be equal. 644 func (f *FuncDepSet) ComputeClosure(cols opt.ColSet) opt.ColSet { 645 cols = cols.Copy() 646 for i := 0; i < len(f.deps); i++ { 647 fd := &f.deps[i] 648 649 if fd.strict && fd.from.SubsetOf(cols) && !fd.to.SubsetOf(cols) { 650 cols.UnionWith(fd.to) 651 652 // Restart iteration to get transitive closure. 653 i = -1 654 } 655 } 656 return cols 657 } 658 659 // ComputeEquivClosure returns the equivalence closure of the given columns. The 660 // closure includes the input columns plus all columns that are equivalent to 661 // any of these columns, either directly or indirectly. For example: 662 // 663 // (a)==(b) 664 // (b)==(c) 665 // (a)==(d) 666 // 667 // The equivalence closure for (a) is (a,b,c,d) because (a) is transitively 668 // equivalent to all other columns. Therefore, all columns must have equal 669 // non-NULL values, or else all must be NULL (see definition for NULL= in the 670 // comment for FuncDepSet). 671 func (f *FuncDepSet) ComputeEquivClosure(cols opt.ColSet) opt.ColSet { 672 // Don't need to get transitive closure, because equivalence closures are 673 // already maintained for every column. 674 cols = cols.Copy() 675 for i := 0; i < len(f.deps); i++ { 676 fd := &f.deps[i] 677 if fd.equiv && fd.from.SubsetOf(cols) && !fd.to.SubsetOf(cols) { 678 cols.UnionWith(fd.to) 679 } 680 } 681 return cols 682 } 683 684 // AddStrictKey adds an FD for a new key. The given key columns are reduced to a 685 // candidate key, and that becomes the determinant for the allCols column set. 686 // The resulting FD is strict, meaning that a NULL key value always maps to the 687 // same set of values in the rest of the relation's columns. For key columns 688 // (a,b) and relation columns (a,b,c,d), an FD like this is created: 689 // 690 // (a,b)-->(c,d) 691 // 692 // If the resulting candidate key has fewer columns than the current key, then 693 // the new key is adopted in its place. 694 func (f *FuncDepSet) AddStrictKey(keyCols, allCols opt.ColSet) { 695 if !keyCols.SubsetOf(allCols) { 696 panic(errors.AssertionFailedf("allCols does not include keyCols")) 697 } 698 699 // Ensure we have candidate key (i.e. has no columns that are functionally 700 // determined by other columns). 701 keyCols = f.ReduceCols(keyCols) 702 f.addDependency(keyCols, allCols, true /* strict */, false /* equiv */) 703 704 // Try to use the new FD to reduce any existing key first. 705 f.tryToReduceKey(opt.ColSet{} /* notNullCols */) 706 707 if f.hasKey != strictKey || keyCols.Len() < f.key.Len() { 708 f.setKey(keyCols, strictKey) 709 } 710 } 711 712 // AddLaxKey is similar to AddStrictKey, except that it creates a lax FD rather 713 // than a strict FD. This means that two rows with NULL key values might not 714 // have the same values in other non-key columns. For key columns (a,b) and 715 // relation columns (a,b,c,d), and FD like this is created: 716 // 717 // (a,b)~~>(c,d) 718 // 719 func (f *FuncDepSet) AddLaxKey(keyCols, allCols opt.ColSet) { 720 if !keyCols.SubsetOf(allCols) { 721 panic(errors.AssertionFailedf("allCols does not include keyCols")) 722 } 723 if keyCols.Empty() { 724 panic(errors.AssertionFailedf("lax key cannot be empty")) 725 } 726 727 // Ensure we have candidate key (i.e. has no columns that are functionally 728 // determined by other columns). 729 f.addDependency(keyCols, allCols, false /* strict */, false /* equiv */) 730 731 // TODO(radu): without null column information, we cannot reduce lax keys (see 732 // tryToReduceKey). Consider passing that information (or storing it with the 733 // FDs to begin with). In that case we would need to reduce both the given key 734 // and the existing key, similar to AddStrictKey. 735 736 if f.hasKey == noKey || (f.hasKey == laxKey && keyCols.Len() < f.key.Len()) { 737 f.setKey(keyCols, laxKey) 738 } 739 } 740 741 // MakeMax1Row initializes the FD set for a relation containing either zero or 742 // one rows, and with the given columns. In this special case, the value of 743 // every column is trivially considered a constant, and the key is the empty 744 // set, because no columns are required to ensure uniqueness of rows. This 745 // special case may seem trivial, but it is quite important to detect during 746 // optimization. For a relation with columns (a, b), the following FD is 747 // created in the set: 748 // 749 // ()-->(a,b) 750 // 751 func (f *FuncDepSet) MakeMax1Row(cols opt.ColSet) { 752 f.deps = f.deps[:0] 753 if !cols.Empty() { 754 f.deps = append(f.deps, funcDep{to: cols, strict: true}) 755 } 756 f.setKey(opt.ColSet{}, strictKey) 757 } 758 759 // MakeNotNull modifies the FD set based on which columns cannot contain NULL 760 // values. This often allows upgrading lax dependencies to strict dependencies, 761 // and lax keys to strict keys. 762 // 763 // Note: this function should be called with all known null columns; it won't do 764 // as good of a job if it's called multiple times with different subsets. 765 func (f *FuncDepSet) MakeNotNull(notNullCols opt.ColSet) { 766 // We have to collect all the FDs that can be made strict. We avoid allocation 767 // for the case where there is at most one such FD. 768 var firstLaxFD *funcDep 769 var otherLaxFDs []funcDep 770 for i := range f.deps { 771 fd := &f.deps[i] 772 if fd.strict { 773 continue 774 } 775 776 // FD can be made strict if all determinant columns are not null. 777 if fd.from.SubsetOf(notNullCols) { 778 if firstLaxFD == nil { 779 firstLaxFD = fd 780 } else { 781 otherLaxFDs = append(otherLaxFDs, *fd) 782 } 783 } 784 } 785 786 if firstLaxFD != nil { 787 f.addDependency(firstLaxFD.from, firstLaxFD.to, true /* strict */, false /* equiv */) 788 for i := range otherLaxFDs { 789 f.addDependency(otherLaxFDs[i].from, otherLaxFDs[i].to, true /* strict */, false /* equiv */) 790 } 791 } 792 793 f.tryToReduceKey(notNullCols) 794 } 795 796 // AddEquivalency adds two FDs to the set that establish a strict equivalence 797 // between the given columns. Either "a" equals "b" according to SQL equality 798 // semantics, or else "a" is NULL and "b" is NULL. The following FDs are 799 // created in the set: 800 // 801 // (a)==(b) 802 // (b)==(a) 803 // 804 func (f *FuncDepSet) AddEquivalency(a, b opt.ColumnID) { 805 if a == b { 806 return 807 } 808 809 var equiv opt.ColSet 810 equiv.Add(a) 811 equiv.Add(b) 812 f.addEquivalency(equiv) 813 } 814 815 // AddConstants adds a strict FD to the set that declares the given column as 816 // having the same constant value for all rows. If the column is nullable, then 817 // its value may be NULL, but then the column must be NULL for all rows. For 818 // column "a", the FD looks like this: 819 // 820 // ()-->(a) 821 // 822 // Since it is a constant, any set of determinant columns (including the empty 823 // set) trivially determines the value of "a". 824 func (f *FuncDepSet) AddConstants(cols opt.ColSet) { 825 if cols.Empty() { 826 return 827 } 828 829 // Determine complete set of constants by computing closure. 830 cols = f.ComputeClosure(cols) 831 832 // Ensure that first FD in the set is a constant FD and make sure the 833 // constants are part of it. 834 if len(f.deps) == 0 || !f.deps[0].isConstant() { 835 deps := make([]funcDep, len(f.deps)+1) 836 deps[0] = funcDep{to: cols, strict: true} 837 copy(deps[1:], f.deps) 838 f.deps = deps 839 } else { 840 // Update existing constant FD to include all constant columns in the set. 841 f.deps[0].to = cols 842 } 843 844 // Remove any other FDs made redundant by adding the constants. 845 n := 1 846 for i := 1; i < len(f.deps); i++ { 847 fd := &f.deps[i] 848 849 // Always retain equivalency information, even for constants. 850 if !fd.equiv { 851 if fd.strict { 852 // Constant columns can be removed from the determinant of a strict 853 // FD. If all determinant columns are constant, then the entire FD 854 // can be removed, since this means that the dependant columns must 855 // also be constant (and were part of constant closure added to the 856 // constant FD above). 857 if !fd.removeFromCols(cols) { 858 continue 859 } 860 } 861 862 // Dependant constants are redundant, so remove them. 863 if !fd.removeToCols(cols) { 864 continue 865 } 866 } 867 868 if n != i { 869 f.deps[n] = f.deps[i] 870 } 871 n++ 872 } 873 f.deps = f.deps[:n] 874 875 f.tryToReduceKey(opt.ColSet{} /* notNullCols */) 876 } 877 878 // AddSynthesizedCol adds an FD to the set that is derived from a synthesized 879 // column in a projection list. The synthesized column is often derived from 880 // other columns, in which case AddSynthesizedCol creates a new FD like this: 881 // 882 // (a,b)-->(c) 883 // 884 // Or it may be a constant column, like this: 885 // 886 // ()-->(c) 887 // 888 func (f *FuncDepSet) AddSynthesizedCol(from opt.ColSet, col opt.ColumnID) { 889 if from.Contains(col) { 890 panic(errors.AssertionFailedf("synthesized column cannot depend upon itself")) 891 } 892 893 var colSet opt.ColSet 894 colSet.Add(col) 895 f.addDependency(from, colSet, true /* strict */, false /* equiv */) 896 897 f.tryToReduceKey(opt.ColSet{} /* notNullCols */) 898 } 899 900 // ProjectCols removes all columns that are not in the given set. It does this 901 // by replacing any un-projected dependants by their closures, and then removing 902 // all FDs containing un-projected columns. While this algorithm may cause some 903 // loss of information in edge cases, it does a good job of preserving the most 904 // important dependency information. 905 func (f *FuncDepSet) ProjectCols(cols opt.ColSet) { 906 // Ensure that any existing key contains only projected columns. Do this 907 // before removing any FDs from the set, in order to take advantage of all 908 // existing transitive relationships. 909 if f.hasKey != noKey && !f.key.SubsetOf(cols) { 910 // Derive new candidate key (or key is no longer possible). 911 if f.hasKey == strictKey && f.ColsAreStrictKey(cols) { 912 f.setKey(cols, strictKey) 913 f.tryToReduceKey(opt.ColSet{} /* notNullCols */) 914 } else if f.ColsAreLaxKey(cols) { 915 f.setKey(cols, laxKey) 916 f.tryToReduceKey(opt.ColSet{} /* notNullCols */) 917 } else { 918 f.clearKey() 919 } 920 } 921 922 // Special case of <= 1 row. 923 if f.hasKey == strictKey && f.key.Empty() { 924 f.MakeMax1Row(cols) 925 return 926 } 927 928 // During first pass, add closures of un-projected columns in dependants. 929 // This will ensure that transitive relationships between remaining columns 930 // won't be lost. Also, track list of un-projected columns that are part of 931 // non-equivalent determinants. It's possible these can be mapped to 932 // equivalent columns. 933 var constCols, detCols, equivCols opt.ColSet 934 for i := range f.deps { 935 fd := &f.deps[i] 936 937 // Remember constant columns. 938 if fd.isConstant() { 939 constCols = fd.to 940 } 941 942 // Add closures to dependants containing un-projected columns. 943 if !fd.to.SubsetOf(cols) { 944 // Equivalence dependencies already maintain closure, so skip them. 945 if !fd.equiv { 946 fd.to = f.ComputeClosure(fd.to) 947 } 948 } 949 950 // Track list of un-projected columns that can possibly be mapped to 951 // equivalent columns. 952 if !fd.equiv && !fd.from.SubsetOf(cols) { 953 detCols.UnionWith(fd.from) 954 detCols.DifferenceWith(cols) 955 } 956 957 // Track all columns that have equivalent alternates that are part of the 958 // projection. 959 if fd.equiv && fd.to.Intersects(cols) { 960 equivCols.UnionWith(fd.from) 961 } 962 } 963 964 // Construct equivalence map that supports substitution of an equivalent 965 // column in place of a removed column. 966 detCols.IntersectionWith(equivCols) 967 equivMap := f.makeEquivMap(detCols, cols) 968 969 // If constants were found, then normalize constants to preserve FDs in a 970 // case like this where (2) is removed: 971 // 972 // ()-->(2), (2,3)-->(4) 973 // 974 // Rather than removing both FDs, the second FD should be preserved in this 975 // form: 976 // 977 // (3)-->(4) 978 // 979 if !constCols.Empty() { 980 f.AddConstants(constCols) 981 } 982 983 // During second pass, remove all dependencies with un-projected columns. 984 var newFDs []funcDep 985 n := 0 986 for i := range f.deps { 987 fd := &f.deps[i] 988 989 // Subtract out un-projected columns from dependants. Also subtract strict 990 // constant columns from dependants for nicer presentation. 991 if !fd.to.SubsetOf(cols) { 992 fd.to = fd.to.Intersection(cols) 993 if !fd.isConstant() { 994 fd.to.DifferenceWith(constCols) 995 } 996 if !fd.removeToCols(fd.from) { 997 continue 998 } 999 } 1000 1001 // Try to substitute equivalent columns for removed determinant columns. 1002 if !fd.from.SubsetOf(cols) { 1003 if fd.equiv { 1004 // Always discard equivalency with removed determinant, since other 1005 // equivalencies will already include this column. 1006 continue 1007 } 1008 1009 // Start with "before" list of columns that need to be mapped, and try 1010 // to find an "after" list containing equivalent columns. 1011 var afterCols opt.ColSet 1012 beforeCols := fd.from.Difference(cols) 1013 foundAll := true 1014 for c, ok := beforeCols.Next(0); ok; c, ok = beforeCols.Next(c + 1) { 1015 var id opt.ColumnID 1016 if id, foundAll = equivMap[c]; !foundAll { 1017 break 1018 } 1019 afterCols.Add(id) 1020 } 1021 if foundAll { 1022 // Dependency can be remapped using equivalencies. 1023 from := fd.from.Union(afterCols) 1024 from.DifferenceWith(beforeCols) 1025 newFDs = append(newFDs, funcDep{from: from, to: fd.to, strict: fd.strict, equiv: fd.equiv}) 1026 } 1027 continue 1028 } 1029 1030 if n != i { 1031 f.deps[n] = f.deps[i] 1032 } 1033 n++ 1034 } 1035 f.deps = f.deps[:n] 1036 1037 for i := range newFDs { 1038 fd := &newFDs[i] 1039 f.addDependency(fd.from, fd.to, fd.strict, fd.equiv) 1040 } 1041 1042 // Ensure that key still determines all other columns. 1043 f.ensureKeyClosure(cols) 1044 } 1045 1046 // AddFrom merges two FD sets by adding each FD from the given set to this set. 1047 // While this requires O(N**2) time, it's useful when the two FD sets may 1048 // overlap one another and substantial simplifications are possible (as with 1049 // IndexJoin). It is up to the caller to ensure that the two FD sets are 1050 // "compatible", meaning that they operate on the same relations, with the same 1051 // keys, same columns, etc. 1052 func (f *FuncDepSet) AddFrom(fdset *FuncDepSet) { 1053 for i := range fdset.deps { 1054 fd := &fdset.deps[i] 1055 f.addDependency(fd.from, fd.to, fd.strict, fd.equiv) 1056 } 1057 } 1058 1059 // AddEquivFrom is similar to AddFrom, except that it only adds equivalence 1060 // dependencies from the given set to this set. 1061 func (f *FuncDepSet) AddEquivFrom(fdset *FuncDepSet) { 1062 for i := range fdset.deps { 1063 fd := &fdset.deps[i] 1064 if fd.equiv { 1065 f.addDependency(fd.from, fd.to, fd.strict, fd.equiv) 1066 } 1067 } 1068 } 1069 1070 // MakeProduct modifies the FD set to reflect the impact of a cartesian product 1071 // operation between this set and the given set. The result is a union of the 1072 // FDs from each set, as well as a union of their keys. The two FD sets are 1073 // expected to operate on disjoint columns, so the FDs from each are simply 1074 // concatenated, rather than simplified via calls to addDependency (except for 1075 // case of constant columns). 1076 func (f *FuncDepSet) MakeProduct(inner *FuncDepSet) { 1077 for i := range inner.deps { 1078 fd := &inner.deps[i] 1079 if fd.isConstant() { 1080 f.addDependency(fd.from, fd.to, fd.strict, fd.equiv) 1081 } else { 1082 f.deps = append(f.deps, *fd) 1083 } 1084 } 1085 1086 if f.hasKey != noKey && inner.hasKey != noKey { 1087 // If both sides have a strict key, the union of keys is a strict key. 1088 // If one side has a lax key and the other has a lax or strict key, the 1089 // union is a lax key. 1090 typ := laxKey 1091 if f.hasKey == strictKey && inner.hasKey == strictKey { 1092 typ = strictKey 1093 } 1094 f.setKey(f.key.Union(inner.key), typ) 1095 } else { 1096 f.clearKey() 1097 } 1098 } 1099 1100 // MakeApply modifies the FD set to reflect the impact of an apply join. This 1101 // FD set reflects the properties of the outer query, and the given FD set 1102 // reflects the properties of the inner query. Constant FDs from inner set no 1103 // longer hold and some other dependencies need to be augmented in order to be 1104 // valid for the apply join operator. Consider this example: 1105 // 1106 // SELECT * 1107 // FROM a 1108 // INNER JOIN LATERAL (SELECT * FROM b WHERE b.y=a.y) 1109 // ON True 1110 // 1111 // 1. The constant dependency created from the outer column reference b.y=a.y 1112 // does not hold for the Apply operator, since b.y is no longer constant at 1113 // this level. In general, constant dependencies cannot be retained, because 1114 // they may have been generated from outer column equivalencies. 1115 // 2. If a strict dependency (b.x,b.y)-->(b.z) held, it would have been reduced 1116 // to (b.x)-->(b.z) because (b.y) is constant in the inner query. However, 1117 // (b.x)-->(b.z) does not hold for the Apply operator, since (b.y) is not 1118 // constant in that case. However, the dependency *does* hold as long as its 1119 // determinant is augmented by the left input's key columns (if key exists). 1120 // 3. Lax dependencies follow the same rules as #2. 1121 // 4. Equivalence dependencies in the inner query still hold for the Apply 1122 // operator. 1123 // 5. If both the outer and inner inputs of the apply join have keys, then the 1124 // concatenation of those keys is a key on the apply join result. 1125 // 1126 func (f *FuncDepSet) MakeApply(inner *FuncDepSet) { 1127 for i := range inner.deps { 1128 fd := &inner.deps[i] 1129 if fd.equiv { 1130 f.addDependency(fd.from, fd.to, fd.strict, fd.equiv) 1131 } else if !fd.isConstant() && f.hasKey == strictKey { 1132 f.addDependency(f.key.Union(fd.from), fd.to, fd.strict, fd.equiv) 1133 } 1134 // TODO(radu): can we use a laxKey here? 1135 } 1136 1137 if f.hasKey == strictKey && inner.hasKey == strictKey { 1138 f.setKey(f.key.Union(inner.key), strictKey) 1139 f.ensureKeyClosure(inner.ColSet()) 1140 } else { 1141 // TODO(radu): can we use a laxKey here? 1142 f.clearKey() 1143 } 1144 } 1145 1146 // MakeLeftOuter modifies the cartesian product FD set to reflect the impact of 1147 // adding NULL-extended rows to the results of an inner join. An inner join can 1148 // be modeled as a cartesian product + ON filtering, and an outer join is 1149 // modeled as an inner join + union of NULL-extended rows. MakeLeftOuter enacts 1150 // the filtering and null-extension of the cartesian product. If it is possible 1151 // to prove that there is a key over the join result that consists only of 1152 // columns from the left input, that key will be used. 1153 // 1154 // This same logic applies for right joins as well (by reversing sides). 1155 // 1156 // See the "Left outer join" section on page 84 of the Master's Thesis for the 1157 // impact of outer joins on FDs. 1158 func (f *FuncDepSet) MakeLeftOuter( 1159 leftFDs, filtersFDs *FuncDepSet, leftCols, rightCols, notNullInputCols opt.ColSet, 1160 ) { 1161 // The columns from the left input form a key over the result of the LeftJoin 1162 // if the following conditions are met: 1163 // 1164 // 1. The left input has a strict key. 1165 // 1166 // 2. The left columns form a strict key over the filtered cartesian product. 1167 // (In other words, the left columns would form a key over an inner join 1168 // with the same filters). 1169 // 1170 // The above conditions are sufficient because a strict key (over the filtered 1171 // cartesian product) that only contains columns from the left side implies 1172 // that no left rows were duplicated. This is because even a single duplicated 1173 // row would prohibit a strict key containing only those columns. And if there 1174 // was already a strict key in the original left input, adding back filtered 1175 // left rows will not create any duplicates. This means that the LeftJoin will 1176 // not duplicate any of the left rows. Therefore, a key over the left input 1177 // must also be a key over the result of the join. 1178 // 1179 // If the conditions are not met, a key over the unfiltered cartesian product 1180 // (if one exists) is used. Why is this key valid to use? 1181 // 1182 // * A left join can filter rows and null-extend rows from the cartesian 1183 // product. 1184 // * Filtering rows does not affect the presence of a key. 1185 // * Null-extending rows does not affect the presence of a key because the 1186 // cartesian product could only have a key if the left and right inputs 1187 // also had keys (see FuncDepSet.MakeProduct). Therefore, null-extended 1188 // left rows that are added back by the left join must be unique. 1189 // 1190 // As an example, consider this data and this query: 1191 // 1192 // a b 1193 // - - 1194 // 1 1 1195 // 2 2 1196 // 3 1197 // 4 1198 // 1199 // SELECT * FROM a_tab LEFT JOIN b_tab ON a < 3 1200 // 1201 // Both tables a and b have a strict key. If we take their cartesian product, 1202 // we get something like this: 1203 // 1204 // a b 1205 // ---- 1206 // 1 1 1207 // 1 2 1208 // 2 1 1209 // 2 2 1210 // 3 1 1211 // 3 2 1212 // 4 1 1213 // 4 2 1214 // 1215 // Now, columns a and b together form a strict key over the cartesian product. 1216 // If either a or b had duplicate rows to begin with, a key over the cartesian 1217 // product would not be possible. Now, the left join's "a < 3" on condition is 1218 // applied: 1219 // 1220 // a b 1221 // ---- 1222 // 1 1 1223 // 1 2 1224 // 2 1 1225 // 2 2 1226 // 1227 // Finally, the left join adds back the rows of a, null-extending b: 1228 // 1229 // a b 1230 // ---- 1231 // 1 1 1232 // 1 2 1233 // 2 1 1234 // 2 2 1235 // 3 NULL 1236 // 4 NULL 1237 // 1238 // Since a had a key to begin with, the "3" and "4" rows that are added back 1239 // are unique. Therefore, a and b are a strict key for the left join. 1240 // 1241 // TODO(drewk): support for lax keys/dependencies from the right input can be 1242 // added if it turns out to be useful. 1243 1244 // Save a strict key from the left input, if one exists. 1245 leftKey, leftHasKey := leftFDs.StrictKey() 1246 1247 // Save a key from the unfiltered cartesian product, if one exists. 1248 oldKey := f.key 1249 oldKeyType := f.hasKey 1250 1251 // If the left input has a key, add the FDs from the join filters to a copy of 1252 // the cartesian product FD set. Next, check whether the columns of the left 1253 // input form a strict key over the result of applying the join filters to the 1254 // cartesian product. 1255 // 1256 // We have to apply the filters to a copy because filter FDs are often not 1257 // valid after null-extended rows are added. For example: 1258 // 1259 // a b c d e 1260 // ---------------------- 1261 // 1 1 1 NULL 1 1262 // 1 2 NULL NULL NULL 1263 // 2 1 NULL NULL NULL 1264 // 1265 // Let's say this table is the result of a join between 'ab' and 'cde'. The 1266 // join condition might have included e = 1, but it would not be correct to 1267 // add the corresponding constant FD to the final join FD set because the e 1268 // column has been null extended, and therefore the condition doesn't hold for 1269 // the final outer join result. 1270 leftColsAreInnerJoinKey := false 1271 if leftHasKey { 1272 c := FuncDepSet{} 1273 c.CopyFrom(f) 1274 c.AddFrom(filtersFDs) 1275 leftColsAreInnerJoinKey = c.ColsAreStrictKey(leftCols) 1276 } 1277 1278 // Modify the cartesian product FD set to reflect the impact of adding 1279 // NULL-extended rows to the results of the filtered cartesian product (or, in 1280 // other words, the results of an inner join). 1281 f.nullExtendRightRows(rightCols, notNullInputCols) 1282 1283 // If the conditions have been met, use the key from the left side. Otherwise, 1284 // use the key from the cartesian product. 1285 if leftHasKey && leftColsAreInnerJoinKey { 1286 f.setKey(leftKey, strictKey) 1287 } else { 1288 // See the comment at the top of the function for why it is valid to use the 1289 // key from the cartesian product. 1290 f.setKey(oldKey, oldKeyType) 1291 // Call tryToReduceKey with only the left columns from notNullInputCols 1292 // because the right columns may have been null-extended. 1293 f.tryToReduceKey(leftCols.Intersection(notNullInputCols)) 1294 } 1295 // ensureKeyClosure must be called when oldKey is used as well as the new 1296 // leftKey because nullExtendRightRows can remove FDs, such that the closure 1297 // of oldKey ends up missing some columns from the right. 1298 f.ensureKeyClosure(leftCols.Union(rightCols)) 1299 } 1300 1301 // MakeFullOuter modifies the cartesian product FD set to reflect the impact of 1302 // adding NULL-extended rows to the results of an inner join. An inner join can 1303 // be modeled as a cartesian product + ON filtering, and an outer join is 1304 // modeled as an inner join + union of NULL-extended rows. MakeFullOuter 1305 // performs the final step for a full join, given the set of columns on each 1306 // side, as well as the set of input columns from both sides of the join that 1307 // are not null. 1308 func (f *FuncDepSet) MakeFullOuter(leftCols, rightCols, notNullInputCols opt.ColSet) { 1309 if f.hasKey == strictKey { 1310 if f.key.Empty() { 1311 // The cartesian product has an empty key when both sides have an empty key; 1312 // but the outer join can have two rows so the empty key doesn't hold. 1313 f.hasKey = noKey 1314 f.key = opt.ColSet{} 1315 } else if !f.key.Intersects(notNullInputCols) { 1316 // If the cartesian product has a strict key, the key holds on the full 1317 // outer result only if one of the key columns is known to be not-null in 1318 // the input. Otherwise, a row where all the key columns are NULL can 1319 // "conflict" with a row where these columns are NULL because of 1320 // null-extension. For example: 1321 // -- t1 and t2 each have one row containing NULL for column x. 1322 // SELECT * FROM t1 FULL JOIN t2 ON t1.x=t2.x 1323 // 1324 // t1.x t2.x 1325 // ---------- 1326 // NULL NULL 1327 // NULL NULL 1328 f.hasKey = laxKey 1329 } 1330 } 1331 f.nullExtendRightRows(leftCols, notNullInputCols) 1332 f.nullExtendRightRows(rightCols, notNullInputCols) 1333 f.ensureKeyClosure(leftCols.Union(rightCols)) 1334 } 1335 1336 // nullExtendRightRows is used by MakeLeftOuter and MakeFullOuter to modify the 1337 // cartesian product FD set to reflect the impact of adding NULL-extended rows 1338 // to the results of an inner join. See the MakeLeftOuter comment for more 1339 // information. 1340 func (f *FuncDepSet) nullExtendRightRows(rightCols, notNullInputCols opt.ColSet) { 1341 var newFDs []funcDep 1342 1343 n := 0 1344 for i := range f.deps { 1345 fd := &f.deps[i] 1346 1347 if fd.isConstant() { 1348 // Null-extended constant columns are no longer constant, because they 1349 // now may contain NULL values. 1350 if fd.to.Intersects(rightCols) { 1351 constCols := fd.to.Intersection(rightCols) 1352 if !fd.removeToCols(constCols) { 1353 continue 1354 } 1355 } 1356 } else { 1357 // The next several rules depend on whether the dependency's determinant 1358 // and dependants are on the null-supplying or row-supplying sides of 1359 // the join (or both). The rules will use the following join and set of 1360 // result rows to give examples: 1361 // 1362 // CREATE TABLE ab (a INT, b INT, PRIMARY KEY(a, b)) 1363 // CREATE TABLE cde (c INT PRIMARY KEY, d INT, e INT) 1364 // SELECT * FROM ab LEFT OUTER JOIN cde ON a=c AND b=1 1365 // 1366 // a b c d e 1367 // ---------------------- 1368 // 1 1 1 NULL 1 1369 // 1 2 NULL NULL NULL 1370 // 2 1 NULL NULL NULL 1371 // 1372 // Here are the rules: 1373 // 1374 // 1. A strict dependency with determinant on the null-supplying side of 1375 // the join becomes lax for any dependants on the row-supplying side 1376 // of the join. In the example above, null-extending the (c) column 1377 // violates the (a)==(c) equivalence dependency. Even the strict 1378 // (a)-->(c) and (c)-->(a) dependencies no longer hold. The only 1379 // dependency that still holds is (c)~~>(a), and even that is only 1380 // one way, since (a)~~>(c) is not valid. 1381 // 1382 // 2. A strict dependency with both determinant and dependants on the 1383 // null-supplying side of join becomes lax if all determinant columns 1384 // are nullable. In the example above, null-extending the (c,d,e) 1385 // columns violates a strict (d)-->(e) dependency, because the NULL 1386 // "d" value now maps to both 1 and NULL. So it must be weakened to 1387 // a lax dependency. But if at least one non-NULL column is part of 1388 // the determinant, such as (c,d)-->(e), then the (NULL,NULL) 1389 // determinant will be unique, thus preserving a strict FD. 1390 // 1391 // 3. A dependency with determinant columns drawn from both sides of 1392 // the join is discarded, unless the determinant is a key for the 1393 // relation. Null-extending one side of the join does not disturb 1394 // the relation's keys, and keys always determine all other columns. 1395 // 1396 if fd.from.Intersects(rightCols) { 1397 if !fd.from.SubsetOf(rightCols) { 1398 // Rule #3, described above. 1399 if !f.ColsAreStrictKey(fd.from) { 1400 continue 1401 } 1402 } else { 1403 // Rule #1, described above (determinant is on null-supplying side). 1404 if !fd.to.SubsetOf(rightCols) { 1405 // Split the dependants by which side of the join they're on. 1406 laxCols := fd.to.Difference(rightCols) 1407 newFDs = append(newFDs, funcDep{from: fd.from, to: laxCols}) 1408 if !fd.removeToCols(laxCols) { 1409 continue 1410 } 1411 } 1412 1413 // Rule #2, described above. Note that this rule does not apply to 1414 // equivalence FDs, which remain valid. 1415 if fd.strict && !fd.equiv && !fd.from.Intersects(notNullInputCols) { 1416 newFDs = append(newFDs, funcDep{from: fd.from, to: fd.to}) 1417 continue 1418 } 1419 } 1420 } else { 1421 // Rule #1, described above (determinant is on row-supplying side). 1422 if !fd.removeToCols(rightCols) { 1423 continue 1424 } 1425 } 1426 } 1427 1428 if n != i { 1429 f.deps[n] = f.deps[i] 1430 } 1431 n++ 1432 } 1433 f.deps = f.deps[:n] 1434 1435 for i := range newFDs { 1436 fd := &newFDs[i] 1437 f.addDependency(fd.from, fd.to, fd.strict, fd.equiv) 1438 } 1439 } 1440 1441 // EquivReps returns one "representative" column from each equivalency group in 1442 // the FD set. ComputeEquivGroup can be called to obtain the remaining columns 1443 // from each equivalency group. 1444 func (f *FuncDepSet) EquivReps() opt.ColSet { 1445 var reps opt.ColSet 1446 1447 // Equivalence closures are already maintained for every column. 1448 for i := 0; i < len(f.deps); i++ { 1449 fd := &f.deps[i] 1450 if fd.equiv && !fd.to.Intersects(reps) { 1451 reps.UnionWith(fd.from) 1452 } 1453 } 1454 return reps 1455 } 1456 1457 // ComputeEquivGroup returns the group of columns that are equivalent to the 1458 // given column. See ComputeEquivClosure for more details. 1459 func (f *FuncDepSet) ComputeEquivGroup(rep opt.ColumnID) opt.ColSet { 1460 return f.ComputeEquivClosure(opt.MakeColSet(rep)) 1461 } 1462 1463 // ensureKeyClosure checks whether the closure for this FD set's key (if there 1464 // is one) includes the given columns. If not, then it adds a dependency so that 1465 // the key determines the columns. 1466 func (f *FuncDepSet) ensureKeyClosure(cols opt.ColSet) { 1467 if f.hasKey != noKey { 1468 closure := f.ComputeClosure(f.key) 1469 if !cols.SubsetOf(closure) { 1470 cols = cols.Difference(closure) 1471 1472 // If we have a strict key, we add a strict dependency; otherwise we add a 1473 // lax dependency. 1474 strict := f.hasKey == strictKey 1475 f.addDependency(f.key, cols, strict, false /* equiv */) 1476 } 1477 } 1478 } 1479 1480 // Verify runs consistency checks against the FD set, in order to ensure that it 1481 // conforms to several invariants: 1482 // 1483 // 1. An FD determinant should not intersect its dependants. 1484 // 2. If a constant FD is present, it's the first FD in the set. 1485 // 3. A constant FD must be strict. 1486 // 4. Lax equivalencies should be reduced to lax dependencies. 1487 // 5. Equivalence determinant should be exactly one column. 1488 // 6. The dependants of an equivalence is always its closure. 1489 // 7. If FD set has a key, it should be a candidate key (already reduced). 1490 // 8. Closure of key should include all known columns in the FD set. 1491 // 9. If FD set has no key then key columns should be empty. 1492 // 1493 func (f *FuncDepSet) Verify() { 1494 for i := range f.deps { 1495 fd := &f.deps[i] 1496 1497 if fd.from.Intersects(fd.to) { 1498 panic(errors.AssertionFailedf("expected FD determinant and dependants to be disjoint: %s (%d)", log.Safe(f), log.Safe(i))) 1499 } 1500 1501 if fd.isConstant() { 1502 if i != 0 { 1503 panic(errors.AssertionFailedf("expected constant FD to be first FD in set: %s (%d)", log.Safe(f), log.Safe(i))) 1504 } 1505 if !fd.strict { 1506 panic(errors.AssertionFailedf("expected constant FD to be strict: %s", log.Safe(f))) 1507 } 1508 } 1509 1510 if fd.equiv { 1511 if !fd.strict { 1512 panic(errors.AssertionFailedf("expected equivalency to be strict: %s (%d)", f, i)) 1513 } 1514 1515 if fd.from.Len() != 1 { 1516 panic(errors.AssertionFailedf("expected equivalence determinant to be single col: %s (%d)", log.Safe(f), log.Safe(i))) 1517 } 1518 1519 if !f.ComputeEquivClosure(fd.from).Equals(fd.from.Union(fd.to)) { 1520 panic(errors.AssertionFailedf("expected equivalence dependants to be its closure: %s (%d)", log.Safe(f), log.Safe(i))) 1521 } 1522 } 1523 } 1524 1525 if f.hasKey != noKey { 1526 if f.hasKey == strictKey { 1527 if reduced := f.ReduceCols(f.key); !reduced.Equals(f.key) { 1528 panic(errors.AssertionFailedf("expected FD to have candidate key %s: %s", reduced, f)) 1529 } 1530 1531 allCols := f.ColSet() 1532 allCols.UnionWith(f.key) 1533 if !f.ComputeClosure(f.key).Equals(allCols) { 1534 panic(errors.AssertionFailedf("expected closure of FD key to include all known cols: %s", log.Safe(f))) 1535 } 1536 } 1537 1538 if f.hasKey == laxKey && f.key.Empty() { 1539 panic(errors.AssertionFailedf("expected lax key to be not empty")) 1540 } 1541 } else { 1542 if !f.key.Empty() { 1543 panic(errors.AssertionFailedf("expected empty key columns since no key: %s", f)) 1544 } 1545 } 1546 } 1547 1548 // StringOnlyFDs returns a string representation of the FDs (without the key 1549 // information). 1550 func (f FuncDepSet) StringOnlyFDs() string { 1551 var b strings.Builder 1552 f.formatFDs(&b) 1553 return b.String() 1554 } 1555 1556 func (f FuncDepSet) String() string { 1557 var b strings.Builder 1558 1559 if f.hasKey != noKey { 1560 // The key shows up as key(1,2) or lax-key(1,2). 1561 if f.hasKey == laxKey { 1562 b.WriteString("lax-") 1563 } 1564 fmt.Fprintf(&b, "key%s", f.key) 1565 if len(f.deps) > 0 { 1566 b.WriteString("; ") 1567 } 1568 } 1569 1570 f.formatFDs(&b) 1571 return b.String() 1572 } 1573 1574 func (f FuncDepSet) formatFDs(b *strings.Builder) { 1575 for i := range f.deps { 1576 if i != 0 { 1577 b.WriteString(", ") 1578 } 1579 f.deps[i].format(b) 1580 } 1581 } 1582 1583 // colsAreKey returns true if the given columns contain a strict or lax key for 1584 // the relation. 1585 func (f *FuncDepSet) colsAreKey(cols opt.ColSet, typ keyType) bool { 1586 switch f.hasKey { 1587 case strictKey: 1588 // Determine whether the key is in the closure of the given columns. The 1589 // closure is necessary in the general case since it's possible that the 1590 // columns form a different key. For example: 1591 // 1592 // f.key = (a) 1593 // cols = (b,c) 1594 // 1595 // and yet both column sets form keys for the relation. 1596 return f.inClosureOf(f.key, cols, typ == strictKey) 1597 1598 case laxKey: 1599 if typ == strictKey { 1600 // We have a lax key but we need a strict key. 1601 return false 1602 } 1603 1604 // For a lax key, we cannot use the strict closure, because the columns we 1605 // bring in from the closure might be null. For example, say that 1606 // - column a is constant but (always) null: ()-->(a) 1607 // - (a,b) is the known lax key. 1608 // The strict closure of (b) is the lax key (a,b), but (b) is not a lax 1609 // key. 1610 // 1611 // We can however use the equivalent closure, because those columns are null 1612 // only if one of the initial cols is null. 1613 // 1614 // Note: if we had information, we could use just the not-null columns from 1615 // the strict closure. 1616 return f.key.SubsetOf(f.ComputeEquivClosure(cols)) 1617 1618 default: 1619 return false 1620 } 1621 } 1622 1623 // inClosureOf computes the strict or lax closure of the "in" column set, and 1624 // returns true if the "cols" columns are all contained in the resulting 1625 // closure. 1626 func (f *FuncDepSet) inClosureOf(cols, in opt.ColSet, strict bool) bool { 1627 // Short-circuit if the "in" set already contains all the columns. 1628 if cols.SubsetOf(in) { 1629 return true 1630 } 1631 1632 in = in.Copy() 1633 1634 // Lax dependencies are not transitive (see figure 2.1 in the paper for 1635 // properties that hold for lax dependencies), so only include them if they 1636 // are reachable in a single lax dependency step from the input set. 1637 if !strict { 1638 // Keep track of all columns reached through a lax or strict dependency. 1639 laxIn := in.Copy() 1640 for i := 0; i < len(f.deps); i++ { 1641 fd := &f.deps[i] 1642 if fd.from.SubsetOf(in) && !fd.to.SubsetOf(in) { 1643 laxIn.UnionWith(fd.to) 1644 1645 // Equivalencies are always transitive. 1646 if fd.equiv { 1647 in.UnionWith(fd.to) 1648 1649 // Restart iteration to get transitive closure. 1650 i = -1 1651 } 1652 1653 // Short-circuit if the "laxIn" set now contains all the columns. 1654 if cols.SubsetOf(laxIn) { 1655 return true 1656 } 1657 } 1658 } 1659 1660 // Use the set that includes columns reached via lax dependencies. 1661 in = laxIn 1662 } 1663 1664 // Now continue with full transitive closure of strict dependencies. 1665 for i := 0; i < len(f.deps); i++ { 1666 fd := &f.deps[i] 1667 1668 if fd.strict && fd.from.SubsetOf(in) && !fd.to.SubsetOf(in) { 1669 in.UnionWith(fd.to) 1670 1671 // Short-circuit if the "in" set now contains all the columns. 1672 if cols.SubsetOf(in) { 1673 return true 1674 } 1675 1676 // Restart iteration to get transitive closure. 1677 i = -1 1678 } 1679 } 1680 return false 1681 } 1682 1683 // addDependency adds a new dependency into the set. If another FD implies the 1684 // new FD, then it's not added. If it can be merged with an existing FD, that is 1685 // done. Otherwise, a brand new FD is added to the set. 1686 func (f *FuncDepSet) addDependency(from, to opt.ColSet, strict, equiv bool) { 1687 // Fast-path for trivial no-op dependency. 1688 if to.SubsetOf(from) { 1689 return 1690 } 1691 1692 // Delegate equivalence dependency. 1693 if equiv { 1694 f.addEquivalency(from.Union(to)) 1695 return 1696 } 1697 1698 // Delegate constant dependency. 1699 if from.Empty() { 1700 if !strict { 1701 panic(errors.AssertionFailedf("expected constant FD to be strict: %s", log.Safe(f))) 1702 } 1703 f.AddConstants(to) 1704 return 1705 } 1706 1707 // Any column in the "from" set is already an implied "to" column, so no 1708 // need to include it. 1709 if to.Intersects(from) { 1710 to = to.Difference(from) 1711 } 1712 1713 newFD := funcDep{from: from, to: to, strict: strict, equiv: equiv} 1714 1715 // Merge the new dependency into the existing set. 1716 n := 0 1717 added := false 1718 for i := range f.deps { 1719 fd := &f.deps[i] 1720 1721 if newFD.implies(fd) { 1722 // The new FD is >= the existing FD, so can replace it. 1723 if added { 1724 // New FD is already part of the set, so discard this existing FD. 1725 continue 1726 } 1727 1728 // Update the existing FD. 1729 fd.from = from 1730 fd.to = to 1731 fd.strict = strict 1732 fd.equiv = equiv 1733 1734 // Keep searching, in case there's another implied FD. 1735 added = true 1736 } else if !added { 1737 if fd.implies(&newFD) { 1738 // The new FD does not add any additional information. 1739 added = true 1740 } else if fd.strict == strict && fd.equiv == equiv && fd.from.Equals(from) { 1741 // The new FD can at least add its determinant to an existing FD. 1742 fd.to = fd.to.Union(to) 1743 added = true 1744 } 1745 } 1746 1747 if n != i { 1748 f.deps[n] = f.deps[i] 1749 } 1750 n++ 1751 } 1752 1753 f.deps = f.deps[:n] 1754 1755 if !added { 1756 // Add a new FD. 1757 f.deps = append(f.deps, newFD) 1758 } 1759 } 1760 1761 func (f *FuncDepSet) addEquivalency(equiv opt.ColSet) { 1762 var addConst bool 1763 var found opt.ColSet 1764 1765 // Start by finding complete set of all columns that are equivalent to the 1766 // given set. 1767 equiv = f.ComputeEquivClosure(equiv) 1768 1769 n := 0 1770 for i := 0; i < len(f.deps); i++ { 1771 fd := &f.deps[i] 1772 1773 if fd.isConstant() { 1774 // If any equivalent column is a constant, then all are constants. 1775 if fd.to.Intersects(equiv) && !equiv.SubsetOf(fd.to) { 1776 addConst = true 1777 } 1778 } else if fd.from.SubsetOf(equiv) { 1779 // All determinant columns are equivalent to one another. 1780 if fd.equiv { 1781 // Ensure that each equivalent column directly maps to all other 1782 // columns in the group. 1783 fd.to = fd.to.Union(equiv) 1784 fd.to.DifferenceWith(fd.from) 1785 found.UnionWith(fd.from) 1786 } else { 1787 // Remove dependant columns that are equivalent, because equivalence 1788 // is a stronger relationship than a strict or lax dependency. 1789 if !fd.removeToCols(equiv) { 1790 continue 1791 } 1792 } 1793 } 1794 1795 if n != i { 1796 f.deps[n] = f.deps[i] 1797 } 1798 n++ 1799 } 1800 f.deps = f.deps[:n] 1801 1802 if addConst { 1803 // Ensure that all equivalent columns are marked as constant. 1804 f.AddConstants(equiv) 1805 } 1806 1807 if !equiv.SubsetOf(found) { 1808 add := equiv.Difference(found) 1809 deps := make([]funcDep, 0, len(f.deps)+add.Len()) 1810 deps = append(deps, f.deps...) 1811 1812 for id, ok := add.Next(0); ok; id, ok = add.Next(id + 1) { 1813 fd := funcDep{strict: true, equiv: true} 1814 fd.from.Add(id) 1815 fd.to = equiv.Copy() 1816 fd.to.Remove(id) 1817 deps = append(deps, fd) 1818 } 1819 f.deps = deps 1820 } 1821 1822 f.tryToReduceKey(opt.ColSet{} /* notNullCols */) 1823 } 1824 1825 // setKey updates the key that the set is currently maintaining. 1826 func (f *FuncDepSet) setKey(key opt.ColSet, typ keyType) { 1827 f.hasKey = typ 1828 f.key = key 1829 if f.hasKey == laxKey && f.key.Empty() { 1830 // An empty lax key is by definition equivalent to an empty strict key; we 1831 // normalize it to be strict. 1832 f.hasKey = strictKey 1833 } 1834 } 1835 1836 // clearKey removes any strict or lax key. 1837 func (f *FuncDepSet) clearKey() { 1838 f.setKey(opt.ColSet{}, noKey) 1839 } 1840 1841 // tryToReduceKey tries to reduce any set key, used after new FDs are added. 1842 func (f *FuncDepSet) tryToReduceKey(notNullCols opt.ColSet) { 1843 switch f.hasKey { 1844 case laxKey: 1845 if !notNullCols.Empty() { 1846 // We can only remove columns from a lax key if we know they are 1847 // not null; other columns must be retained. 1848 nullableKeyCols := f.key.Difference(notNullCols) 1849 if nullableKeyCols.Empty() { 1850 // All key columns are not-null; we can upgrade the key to strict. 1851 f.AddStrictKey(f.key, f.ColSet()) 1852 } else { 1853 reduced := f.ReduceCols(f.key) 1854 reduced.UnionWith(nullableKeyCols) 1855 f.key = reduced 1856 } 1857 } 1858 1859 case strictKey: 1860 f.key = f.ReduceCols(f.key) 1861 } 1862 } 1863 1864 // makeEquivMap constructs a map with an entry for each column in the "from" set 1865 // that is equivalent to a column in the "to" set. When there are multiple 1866 // equivalent columns, then makeEquivMap arbitrarily chooses one of the 1867 // alternatives. Note that some from columns may not have a mapping. If none of 1868 // them do, then makeEquivMap returns nil. 1869 func (f *FuncDepSet) makeEquivMap(from, to opt.ColSet) map[opt.ColumnID]opt.ColumnID { 1870 var equivMap map[opt.ColumnID]opt.ColumnID 1871 for i, ok := from.Next(0); ok; i, ok = from.Next(i + 1) { 1872 var oneCol opt.ColSet 1873 oneCol.Add(i) 1874 closure := f.ComputeEquivClosure(oneCol) 1875 closure.IntersectionWith(to) 1876 if !closure.Empty() { 1877 if equivMap == nil { 1878 equivMap = make(map[opt.ColumnID]opt.ColumnID) 1879 } 1880 id, _ := closure.Next(0) 1881 equivMap[i] = id 1882 } 1883 } 1884 return equivMap 1885 } 1886 1887 // isConstant returns true if this FD contains the set of constant columns. If 1888 // it exists, it must always be the first FD in the set. 1889 func (f *funcDep) isConstant() bool { 1890 return f.from.Empty() 1891 } 1892 1893 // implies returns true if this FD is at least as strong as the given FD. This 1894 // is true when: 1895 // - the determinant is a subset of the given FD's determinant 1896 // - the dependant is a superset of the given FD's dependant 1897 // - the FD is at least as strict and equivalent as the given FD 1898 func (f *funcDep) implies(fd *funcDep) bool { 1899 if f.from.SubsetOf(fd.from) && fd.to.SubsetOf(f.to) { 1900 if (f.strict || !fd.strict) && (f.equiv || !fd.equiv) { 1901 return true 1902 } 1903 } 1904 return false 1905 } 1906 1907 // removeFromCols removes columns in the given set from this FD's determinant. 1908 // If removing columns results in an empty determinant, then removeFromCols 1909 // returns false. 1910 func (f *funcDep) removeFromCols(remove opt.ColSet) bool { 1911 if f.from.Intersects(remove) { 1912 f.from = f.from.Difference(remove) 1913 } 1914 return !f.isConstant() 1915 1916 } 1917 1918 // removeToCols removes columns in the given set from this FD's dependant set. 1919 // If removing columns results in an empty dependant set, then removeToCols 1920 // returns false. 1921 func (f *funcDep) removeToCols(remove opt.ColSet) bool { 1922 if f.to.Intersects(remove) { 1923 f.to = f.to.Difference(remove) 1924 } 1925 return !f.to.Empty() 1926 } 1927 1928 func (f *funcDep) format(b *strings.Builder) { 1929 if f.equiv { 1930 if !f.strict { 1931 panic(errors.AssertionFailedf("lax equivalent columns are not supported")) 1932 } 1933 fmt.Fprintf(b, "%s==%s", f.from, f.to) 1934 } else { 1935 if f.strict { 1936 fmt.Fprintf(b, "%s-->%s", f.from, f.to) 1937 } else { 1938 fmt.Fprintf(b, "%s~~>%s", f.from, f.to) 1939 } 1940 } 1941 } 1942 1943 func (f *funcDep) String() string { 1944 var b strings.Builder 1945 f.format(&b) 1946 return b.String() 1947 }