github.com/dolthub/go-mysql-server@v0.18.0/sql/func_deps.go (about) 1 package sql 2 3 import ( 4 "fmt" 5 "strings" 6 ) 7 8 // EquivSets maintains column equivalency sets created 9 // by WHERE a = b filters. 10 type EquivSets struct { 11 sets []ColSet 12 } 13 14 // Add adds a new equivalence set, compacting any intersections 15 // with existing sets. 16 func (e *EquivSets) Add(cols ColSet) { 17 i := 0 18 for i < len(e.sets) { 19 set := e.sets[i] 20 if cols.Intersects(set) { 21 cols = cols.Union(set) 22 e.sets[i] = e.sets[len(e.sets)-1] 23 e.sets = e.sets[:len(e.sets)-1] 24 } else { 25 i++ 26 } 27 } 28 e.sets = append(e.sets, cols) 29 } 30 31 func (e *EquivSets) Len() int { 32 if e == nil { 33 return 0 34 } 35 return len(e.sets) 36 } 37 38 func (e *EquivSets) Sets() []ColSet { 39 if e == nil { 40 return nil 41 } 42 return e.sets 43 } 44 45 func (e *EquivSets) String() string { 46 if e == nil { 47 return "equiv()" 48 } 49 b := strings.Builder{} 50 sep := "" 51 for i, set := range e.sets { 52 b.WriteString(fmt.Sprintf("%sequiv%s", sep, set)) 53 if i == 0 { 54 sep = "; " 55 } 56 } 57 return b.String() 58 } 59 60 // Key maintains a strict or lax dependency 61 type Key struct { 62 strict bool 63 cols ColSet 64 allCols ColSet 65 } 66 67 func (k *Key) Empty() bool { 68 return k.cols.Len() == 0 69 } 70 71 func (k *Key) implies(other Key) bool { 72 if k.cols.SubsetOf(other.cols) { 73 return k.strict || !other.strict 74 } 75 return false 76 } 77 78 // FuncDepSet encodes functional dependencies for a relational 79 // expression. Common uses for functional dependencies: 80 // - Do a set of equality columns comprise a strict key? (lookup joins) 81 // - Is there a strict key for a relation? (decorrelate scopes) 82 // - What are the set of equivalent filters? (join planning) 83 // - Do a set of grouping columns constitute a strict key 84 // (only_full_group_by) 85 // 86 // The docs here provide a summary of how functional dependencies work: 87 // - https://github.com/cockroachdb/cockroach/blob/5a6aa768cd945118e795d1086ba6f6365f6d1284/pkg/sql/opt/props/func_dep.go#L420 88 // 89 // This object expects fields to be set in the following order: 90 // - notNull: what columns are non-nullable? 91 // - consts: what columns are constant? 92 // - equivs: transitive closure of column equivalence 93 // - keys: primary and secondary keys, simplified 94 // 95 // We use an abbreviated form to represent functional dependencies. 96 // Normally, we would encode determinant and dependency sets like 97 // (det)-->(dep). I only keep track of determinant sets, that are 98 // assumed to represent keys into the entire relation. This works 99 // for simple cases where fractional functional dependencies can 100 // be discarded. The limitation is clear when you consider joins, 101 // whose FD sets can include keys that only implicitly determine 102 // a fraction of the total input set. The first key always determines 103 // the entire relation, which seems good enough for many cases. 104 // Maintaining partials sets also requires much less bookkeeping. 105 // 106 // TODO: We used to not track dependency sets and only add keys that 107 // determined the entire relation. One observed downside of that approach 108 // is that left joins fail to convert equivalencies on the null-extended 109 // side to lax functional dependencies. For example, in the query below, 110 // the left join loses (a) == (m) because (m) can now be NULL: 111 // 112 // SELECT * from adbcd LEFT_JOIN mnpq WHERE a = m 113 // 114 // But we could maintain (m)~~>(n), which higher-level null enforcement 115 // (ex: GROUPING) can reclaim as equivalence. Although we now track partial 116 // dependency sets, this may still not be supported. 117 type FuncDepSet struct { 118 // all columns in this relation 119 all ColSet 120 // non-null columns for relation 121 notNull ColSet 122 // tracks in-scope constants 123 consts ColSet 124 // tracks in-scope equivalent closure 125 equivs *EquivSets 126 // keys includes the set of primary and secondary keys 127 // accumulated in the relation. The first key is the best 128 // key we have seen so far, where strict > lax and shorter 129 // is better. 130 keys []Key 131 } 132 133 // StrictKey returns a set of columns that act as a row identifier. 134 // No two rows can have the same identifier, like (b) below. Unique keys 135 // are only strict if all columns are non-nullable. See LaxKey() for 136 // explanation. 137 // 138 // b c 139 // ---- 140 // 1 1 141 // 2 1 142 func (f *FuncDepSet) StrictKey() (ColSet, bool) { 143 if len(f.keys) == 0 || !f.keys[0].strict { 144 return ColSet{}, false 145 } 146 return f.keys[0].cols, true 147 } 148 149 // LaxKey returns a set of columns that act as a null-safe row identifier. 150 // For example, (b) below is a lax-key for (b,c), but not a strict key. 151 // A strict key treats NULLs as equal to one-another. A lax key permits 152 // the general NULL != NULL behavior. Filtering nulls from a relation can 153 // promote a lax key into a strict key. 154 // 155 // b c 156 // ---------- 157 // NULL 1 158 // NULL NULL 159 func (f *FuncDepSet) LaxKey() (ColSet, bool) { 160 if len(f.keys) == 0 || f.keys[0].strict { 161 return ColSet{}, false 162 } 163 return f.keys[0].cols, true 164 } 165 166 func (f *FuncDepSet) All() ColSet { 167 return f.all 168 } 169 170 func (f *FuncDepSet) Empty() bool { 171 return f.all.Empty() 172 } 173 174 func (f *FuncDepSet) NotNull() ColSet { 175 return f.notNull 176 } 177 178 func (f *FuncDepSet) Equiv() *EquivSets { 179 return f.equivs 180 } 181 182 func (f *FuncDepSet) CopyKeys() []Key { 183 ret := make([]Key, len(f.keys)) 184 copy(ret, f.keys) 185 return ret 186 } 187 188 func (f *FuncDepSet) HasMax1Row() bool { 189 if len(f.keys) == 0 { 190 return false 191 } 192 key := f.keys[0] 193 return key.strict && key.Empty() 194 } 195 196 func (f *FuncDepSet) String() string { 197 b := strings.Builder{} 198 sep := "" 199 if len(f.keys) > 0 { 200 key := f.keys[0] 201 lax := "" 202 if !key.strict { 203 lax = "lax-" 204 } 205 b.WriteString(fmt.Sprintf("%skey%s", lax, key.cols)) 206 sep = "; " 207 } 208 if !f.consts.Empty() { 209 b.WriteString(fmt.Sprintf("%sconstant%s", sep, f.consts)) 210 sep = "; " 211 } 212 if f.equivs.Len() > 0 { 213 b.WriteString(fmt.Sprintf("%s%s", sep, f.equivs)) 214 sep = "; " 215 } 216 if len(f.keys) < 2 { 217 return b.String() 218 } 219 for _, k := range f.keys[1:] { 220 var cols string 221 if k.allCols == f.all { 222 cols = k.cols.String() 223 } else { 224 cols = fmt.Sprintf("%s/%s", k.cols, k.allCols) 225 } 226 if k.strict { 227 b.WriteString(fmt.Sprintf("%sfd%s", sep, cols)) 228 } else { 229 b.WriteString(fmt.Sprintf("%slax-fd%s", sep, cols)) 230 } 231 sep = "; " 232 } 233 return b.String() 234 } 235 236 func (f *FuncDepSet) Constants() ColSet { 237 return f.consts 238 } 239 240 func (f *FuncDepSet) EquivalenceClosure(cols ColSet) ColSet { 241 for _, set := range f.equivs.Sets() { 242 if set.Intersects(cols) { 243 cols = cols.Union(set) 244 } 245 } 246 return cols 247 } 248 249 func (f *FuncDepSet) AddNotNullable(cols ColSet) { 250 cols = f.simplifyCols(cols, nil) 251 f.notNull = f.notNull.Union(cols) 252 } 253 254 func (f *FuncDepSet) AddConstants(cols ColSet) { 255 f.consts = f.consts.Union(cols) 256 } 257 258 func (f *FuncDepSet) AddEquiv(i, j ColumnId) { 259 cols := NewColSet(i, j) 260 if f.equivs == nil { 261 f.equivs = &EquivSets{} 262 } 263 f.AddEquivSet(cols) 264 } 265 266 func (f *FuncDepSet) AddEquivSet(cols ColSet) { 267 if f.equivs == nil { 268 f.equivs = &EquivSets{} 269 } 270 f.equivs.Add(cols) 271 for _, set := range f.equivs.Sets() { 272 // if one col in equiv set is constant, rest are too 273 if set.Intersects(f.consts) { 274 f.AddConstants(set) 275 } 276 } 277 } 278 279 func (f *FuncDepSet) AddKey(k Key) { 280 switch k.strict { 281 case true: 282 f.AddStrictKey(k.cols) 283 case false: 284 f.AddLaxKey(k.cols) 285 } 286 } 287 288 func (f *FuncDepSet) AddStrictKey(cols ColSet) { 289 cols = f.simplifyCols(cols, nil) 290 newKey := Key{cols: cols, allCols: f.all, strict: true} 291 for i, key := range f.keys { 292 if key.implies(newKey) { 293 return 294 } 295 if newKey.implies(key) { 296 f.keys[i] = newKey 297 return 298 } 299 } 300 f.keys = append(f.keys, newKey) 301 302 if len(f.keys) > 1 { 303 lead := f.keys[0] 304 lead.cols = f.simplifyCols(lead.cols, nil) 305 if !lead.strict || lead.strict && lead.cols.Len() > cols.Len() { 306 // strict > lax 307 // short > long 308 f.keys[0], f.keys[len(f.keys)-1] = f.keys[len(f.keys)-1], lead 309 } 310 311 } 312 } 313 314 func (f *FuncDepSet) AddLaxKey(cols ColSet) { 315 nullableCols := cols.Difference(f.notNull) 316 if nullableCols.Empty() { 317 f.AddStrictKey(cols) 318 } 319 320 cols = f.simplifyCols(cols, nil) 321 newKey := Key{cols: cols, allCols: f.all, strict: false} 322 for i, key := range f.keys { 323 if key.implies(newKey) { 324 return 325 } 326 if newKey.implies(key) { 327 f.keys[i] = newKey 328 return 329 } 330 } 331 f.keys = append(f.keys, newKey) 332 if len(f.keys) > 1 && !f.keys[0].strict { 333 // only try to improve if lax key 334 lead := f.keys[0] 335 lead.cols = f.simplifyCols(lead.cols, nil) 336 if lead.cols.Len() > cols.Len() { 337 f.keys[0], f.keys[len(f.keys)-1] = f.keys[len(f.keys)-1], lead 338 } 339 } 340 } 341 342 // simplifyCols uses equivalence and constant sets to minimize 343 // a key set 344 func (f *FuncDepSet) simplifyCols(key ColSet, subKeys []Key) ColSet { 345 if key.Empty() { 346 return key 347 } 348 // for each column, attempt to remove and verify 349 // the remaining set does not determine it 350 // i.e. check if removedCol is in closure of rest of set 351 ret := key.Copy() 352 var plucked ColSet 353 for i, ok := key.Next(1); ok; i, ok = key.Next(i + 1) { 354 ret.Remove(i) 355 plucked.Add(i) 356 notConst := f.consts.Intersection(plucked).Empty() 357 if notConst && !f.inClosureOf(plucked, ret, subKeys) { 358 // plucked is novel 359 ret.Add(i) 360 } 361 plucked.Remove(i) 362 } 363 return ret 364 } 365 366 // ColsAreStrictKey returns true if the set of columns acts 367 // as a primary key into a relation. 368 func (f *FuncDepSet) ColsAreStrictKey(cols ColSet) bool { 369 if len(f.keys) == 0 { 370 return false 371 } 372 return f.inClosureOf(f.keys[0].cols, cols, nil) 373 } 374 375 // inClosureOf returns whether all the columns in `candidate` are uniquely determined by the columns in `source`. 376 // It computes this by checking whether `candidate` is contained within the transitive closure of `source`, over 377 // both equivalence rules (which state two columns must have the same value) and the functional dependencies 378 // specified by `fdsKeys` (each of which states that one or more columns determine one of more other columns.) 379 // Note that callers that don't want to consider functional dependencies (such as outer joins) can pass a nil value 380 // to `fdsKeys`. 381 func (f *FuncDepSet) inClosureOf(candidate, source ColSet, fdsKeys []Key) bool { 382 if candidate.SubsetOf(source) { 383 return true 384 } 385 var oldClosure ColSet 386 newClosure := source.Copy() 387 for !oldClosure.Equals(newClosure) { 388 oldClosure = newClosure.Copy() 389 for _, set := range f.equivs.Sets() { 390 if set.Intersects(newClosure) { 391 newClosure = newClosure.Union(set) 392 } 393 } 394 for _, key := range fdsKeys { 395 if key.cols.SubsetOf(newClosure) { 396 newClosure = newClosure.Union(key.allCols) 397 } 398 } 399 } 400 if candidate.SubsetOf(newClosure) { 401 return true 402 } 403 return false 404 } 405 406 func NewTablescanFDs(all ColSet, strict []ColSet, lax []ColSet, notNull ColSet) *FuncDepSet { 407 ret := &FuncDepSet{all: all} 408 ret.AddNotNullable(notNull) 409 for _, key := range strict { 410 ret.AddStrictKey(key) 411 } 412 for _, key := range lax { 413 ret.AddLaxKey(key) 414 } 415 return ret 416 } 417 418 // NewCrossJoinFDs makes functional dependencies for a cross join 419 // between two relations. 420 func NewCrossJoinFDs(left, right *FuncDepSet) *FuncDepSet { 421 ret := &FuncDepSet{all: left.all.Union(right.all)} 422 ret.AddNotNullable(left.notNull) 423 ret.AddNotNullable(right.notNull) 424 ret.AddConstants(left.consts) 425 ret.AddConstants(right.consts) 426 for _, set := range left.equivs.Sets() { 427 ret.AddEquivSet(set) 428 } 429 for _, set := range right.equivs.Sets() { 430 ret.AddEquivSet(set) 431 } 432 // concatenate lead key, append others 433 var lKey, rKey Key 434 if len(left.keys) > 0 { 435 lKey = left.keys[0] 436 } 437 if len(right.keys) > 0 { 438 rKey = right.keys[0] 439 } 440 var jKey Key 441 if lKey.Empty() && rKey.Empty() { 442 return ret 443 } else if lKey.Empty() { 444 jKey = rKey 445 } else if rKey.Empty() { 446 jKey = lKey 447 } else { 448 jKey.cols = lKey.cols.Union(rKey.cols) 449 jKey.allCols = ret.all 450 jKey.strict = lKey.strict && rKey.strict 451 } 452 ret.keys = append(ret.keys, jKey) 453 ret.keys = append(ret.keys, left.keys...) 454 ret.keys = append(ret.keys, right.keys...) 455 return ret 456 } 457 458 // NewInnerJoinFDs makes functional dependencies for an inner join 459 // between two relations. 460 func NewInnerJoinFDs(left, right *FuncDepSet, filters [][2]ColumnId) *FuncDepSet { 461 ret := &FuncDepSet{all: left.all.Union(right.all)} 462 ret.AddNotNullable(left.notNull) 463 ret.AddNotNullable(right.notNull) 464 if left.HasMax1Row() { 465 ret.AddConstants(left.all) 466 } else { 467 ret.AddConstants(left.consts) 468 } 469 if right.HasMax1Row() { 470 ret.AddConstants(right.all) 471 } else { 472 ret.AddConstants(right.consts) 473 } 474 for _, set := range left.Equiv().Sets() { 475 ret.AddEquivSet(set) 476 } 477 for _, set := range right.Equiv().Sets() { 478 ret.AddEquivSet(set) 479 } 480 for _, f := range filters { 481 ret.AddEquiv(f[0], f[1]) 482 } 483 leftKeys := left.CopyKeys() 484 rightKeys := right.CopyKeys() 485 // concatenate lead key, append others 486 var lKey, rKey Key 487 if len(leftKeys) > 0 { 488 lKey = leftKeys[0] 489 } 490 if len(rightKeys) > 0 { 491 rKey = rightKeys[0] 492 } 493 var jKey Key 494 if lKey.Empty() && rKey.Empty() { 495 ret.AddKey(lKey) 496 return ret 497 } else if lKey.Empty() { 498 jKey = rKey 499 } else if rKey.Empty() { 500 jKey = lKey 501 } else { 502 var subKeys []Key 503 subKeys = append(subKeys, leftKeys...) 504 subKeys = append(subKeys, rightKeys...) 505 jKey.cols = ret.simplifyCols(lKey.cols.Union(rKey.cols), subKeys) 506 jKey.allCols = ret.all 507 jKey.strict = lKey.strict && rKey.strict 508 } 509 ret.AddKey(jKey) 510 for _, k := range leftKeys { 511 ret.keys = append(ret.keys, k) 512 } 513 for _, k := range rightKeys { 514 ret.keys = append(ret.keys, k) 515 } 516 517 return ret 518 } 519 520 func NewFilterFDs(fds *FuncDepSet, notNull ColSet, constant ColSet, equiv [][2]ColumnId) *FuncDepSet { 521 ret := &FuncDepSet{all: fds.All()} 522 ret.AddNotNullable(fds.notNull.Union(notNull)) 523 ret.AddConstants(fds.Constants().Union(constant)) 524 for _, e := range fds.equivs.Sets() { 525 ret.AddEquivSet(e) 526 } 527 for _, e := range equiv { 528 ret.AddEquiv(e[0], e[1]) 529 } 530 for _, k := range fds.keys { 531 ret.AddKey(k) 532 } 533 return ret 534 } 535 536 func NewLookupFDs(fds *FuncDepSet, idxCols ColSet, notNull ColSet, constant ColSet, equiv *EquivSets) *FuncDepSet { 537 ret := &FuncDepSet{all: fds.All()} 538 ret.AddNotNullable(fds.notNull.Union(notNull)) 539 ret.AddConstants(fds.Constants().Union(constant)) 540 for _, e := range fds.equivs.Sets() { 541 ret.AddEquivSet(e) 542 } 543 for _, set := range equiv.Sets() { 544 ret.AddEquivSet(set) 545 } 546 ret.AddLaxKey(idxCols) 547 return ret 548 } 549 550 // NewProjectFDs returns a new functional dependency set projecting 551 // a subset of cols. 552 func NewProjectFDs(fds *FuncDepSet, cols ColSet, distinct bool) *FuncDepSet { 553 ret := &FuncDepSet{all: cols} 554 ret.AddNotNullable(fds.notNull.Intersection(cols)) 555 556 if keptConst := fds.consts.Intersection(cols); !keptConst.Empty() { 557 ret.AddConstants(keptConst) 558 } 559 560 if distinct { 561 ret.AddStrictKey(cols) 562 } 563 564 // mapping deleted->equiv helps us keep keys whose removed cols 565 // have projected equivalents 566 equivMapping := make(map[ColumnId]ColumnId) 567 for _, set := range fds.equivs.Sets() { 568 if set.SubsetOf(cols) { 569 if ret.equivs == nil { 570 ret.equivs = &EquivSets{} 571 } 572 ret.AddEquivSet(set) 573 } else { 574 toKeep := set.Intersection(cols) 575 if toKeep.Empty() { 576 continue 577 } 578 if toRemove := set.Difference(cols); !toRemove.Empty() { 579 for i, ok := toRemove.Next(1); ok; i, ok = toRemove.Next(i + 1) { 580 equivMapping[i], _ = toKeep.Next(1) 581 } 582 } 583 if toKeep.Len() > 1 { 584 ret.AddEquivSet(toKeep) 585 } 586 } 587 } 588 589 for _, key := range fds.keys { 590 if key.cols.SubsetOf(cols) { 591 ret.AddKey(key) 592 continue 593 } 594 toRemove := key.cols.Difference(cols) 595 newKey := key.cols.Intersection(cols) 596 allOk := true 597 var replace ColumnId 598 for i, ok := toRemove.Next(1); ok; i, ok = toRemove.Next(i + 1) { 599 replace, allOk = equivMapping[i] 600 if !allOk { 601 break 602 } 603 newKey.Add(replace) 604 } 605 if allOk { 606 ret.AddKey(Key{strict: key.strict, allCols: ret.all, cols: newKey}) 607 } 608 } 609 610 return ret 611 } 612 613 func NewMax1RowFDs(cols, notNull ColSet) *FuncDepSet { 614 ret := &FuncDepSet{all: cols, consts: cols, notNull: notNull} 615 ret.AddStrictKey(ColSet{}) 616 return ret 617 } 618 619 func NewLeftJoinFDs(left, right *FuncDepSet, filters [][2]ColumnId) *FuncDepSet { 620 leftKey, leftStrict := left.StrictKey() 621 leftColsAreInnerJoinKey := false 622 if leftStrict { 623 // leftcols are strict key 624 j := NewInnerJoinFDs(left, right, filters) 625 leftColsAreInnerJoinKey = j.inClosureOf(j.keys[0].cols, left.all, nil) 626 } 627 628 leftKeys := left.CopyKeys() 629 rightKeys := right.CopyKeys() 630 var lKey, rKey Key 631 if len(leftKeys) > 0 { 632 lKey = leftKeys[0] 633 leftKeys = leftKeys[1:] 634 } 635 if len(rightKeys) > 0 { 636 rKey = rightKeys[0] 637 rightKeys = rightKeys[1:] 638 } 639 var jKey Key 640 if lKey.Empty() && rKey.Empty() { 641 jKey = lKey 642 } else if lKey.Empty() { 643 jKey = rKey 644 } else if rKey.Empty() { 645 jKey = lKey 646 } else { 647 jKey.cols = lKey.cols.Union(rKey.cols) 648 jKey.strict = lKey.strict && rKey.strict 649 } 650 651 ret := &FuncDepSet{all: left.all.Union(right.all)} 652 // left constants and equiv are safe 653 ret.AddNotNullable(left.notNull) 654 ret.AddConstants(left.consts) 655 if left.HasMax1Row() { 656 var leftConst ColSet 657 // leftCols in filter are constant 658 for i := range filters { 659 col := filters[i][0] 660 leftConst.Add(col) 661 } 662 ret.AddConstants(leftConst) 663 } 664 // only left equiv holds 665 for _, equiv := range left.equivs.Sets() { 666 ret.AddEquivSet(equiv) 667 } 668 669 if leftStrict && leftColsAreInnerJoinKey { 670 strictKey := Key{strict: true, allCols: ret.all, cols: leftKey} 671 ret.keys = append(ret.keys, strictKey) 672 if !strictKey.implies(rKey) { 673 ret.keys = append(ret.keys, rKey) 674 } 675 } else { 676 ret.keys = append(ret.keys, jKey) 677 } 678 679 // no filter equivs are valid 680 // TODO if right columns are non-nullable in ON filter, equivs hold 681 // technically we could do (r)~~>(l), but is this useful? 682 683 // right-side keys become lax unless all non-nullable in original 684 for _, key := range rightKeys { 685 if !key.cols.SubsetOf(right.notNull) { 686 key.strict = false 687 } 688 if !ret.keys[0].implies(key) { 689 ret.keys = append(ret.keys, key) 690 } 691 } 692 for _, key := range leftKeys { 693 if !ret.keys[0].implies(key) { 694 ret.keys = append(ret.keys, key) 695 } 696 } 697 // key w cols from both sides discarded unless strict key for whole rel 698 // TODO max1Row condition 699 return ret 700 }