github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/sqle/setalgebra/union.go (about) 1 // Copyright 2020 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package setalgebra 16 17 import ( 18 "github.com/dolthub/dolt/go/store/hash" 19 "github.com/dolthub/dolt/go/store/types" 20 ) 21 22 // finiteSetUnion adds all points from both sets to a new FiniteSet 23 func finiteSetUnion(fs1, fs2 FiniteSet) (FiniteSet, error) { 24 hashToVal := make(map[hash.Hash]types.Value, len(fs1.HashToVal)+len(fs2.HashToVal)) 25 for h, v := range fs1.HashToVal { 26 hashToVal[h] = v 27 } 28 29 for h, v := range fs2.HashToVal { 30 hashToVal[h] = v 31 } 32 33 return FiniteSet{HashToVal: hashToVal}, nil 34 } 35 36 // copyIntervalEndpoint makes a copy of an interval endpoint 37 func copyIntervalEndpoint(ep *IntervalEndpoint) *IntervalEndpoint { 38 if ep == nil { 39 return nil 40 } 41 42 copyOf := *ep 43 44 return ©Of 45 } 46 47 // finiteSetIntervalUnion will check all the points of the FiniteSet and see which ones are not in the given interval. 48 // if all points are in the interval then the resulting set is the interval itself, otherwise a CompositeSet containing 49 // the missing points as a new FiniteSet and the interval is returned. 50 func finiteSetIntervalUnion(fs FiniteSet, in Interval) (Set, error) { 51 inStart := copyIntervalEndpoint(in.Start) 52 inEnd := copyIntervalEndpoint(in.End) 53 54 hashToVal := make(map[hash.Hash]types.Value, len(fs.HashToVal)) 55 for h, v := range fs.HashToVal { 56 inRange, err := in.Contains(v) 57 58 if err != nil { 59 return nil, err 60 } 61 62 if !inRange { 63 if inStart != nil && !inStart.Inclusive { 64 if inStart.Val.Equals(v) { 65 inStart.Inclusive = true 66 continue 67 } 68 } 69 70 if in.End != nil && !in.End.Inclusive { 71 if in.End.Val.Equals(v) { 72 inEnd.Inclusive = true 73 continue 74 } 75 } 76 77 hashToVal[h] = v 78 } 79 } 80 81 resInterval := Interval{in.nbf, inStart, inEnd} 82 if len(hashToVal) > 0 { 83 newSet := FiniteSet{HashToVal: hashToVal} 84 return CompositeSet{newSet, []Interval{resInterval}}, nil 85 } else { 86 return resInterval, nil 87 } 88 } 89 90 // finiteSetCompositeSetUnion checks all the points in a FiniteSet against the CompositeSet to find all points not 91 // represented in the CompositeSet (So not in any of it's intervals and not in it's existing FiniteSet), and adds those 92 // points to the compositeSet 93 func finiteSetCompositeSetUnion(fs FiniteSet, cs CompositeSet) (Set, error) { 94 hashToVal := make(map[hash.Hash]types.Value, len(fs.HashToVal)) 95 for h, v := range cs.Set.HashToVal { 96 hashToVal[h] = v 97 } 98 99 for h, v := range fs.HashToVal { 100 var inRange bool 101 var err error 102 for _, r := range cs.Intervals { 103 inRange, err = r.Contains(v) 104 105 if err != nil { 106 return nil, err 107 } 108 109 if inRange { 110 break 111 } 112 } 113 114 if !inRange { 115 hashToVal[h] = v 116 } 117 } 118 119 return CompositeSet{FiniteSet{hashToVal}, cs.Intervals}, nil 120 } 121 122 // intervalUnion takes two Interval objects and compares them then returns their union 123 func intervalUnion(in1, in2 Interval) (Set, error) { 124 intComparison, err := compareIntervals(in1, in2) 125 126 if err != nil { 127 return nil, err 128 } 129 130 return intervalUnionWithComparison(in1, in2, intComparison) 131 } 132 133 // intervalUnionWithComparison takes two Interval objects and their comparison and returns a new interval that 134 // represents all the points in both intervals where possible, and returns a CompositeInterval when the two intervals 135 // are non-overlapping. 136 func intervalUnionWithComparison(in1, in2 Interval, intComparison intervalComparison) (Set, error) { 137 var resIntervToReduce Interval 138 if intComparison == noOverlapLess { 139 if in1.End != nil && in2.Start != nil && (in1.End.Inclusive || in2.Start.Inclusive) && in1.End.Val.Equals(in2.Start.Val) { 140 // in the case where you have intervals X and Y defined as A < X < B and B <= Y < C the comparison of the 141 // end of X and the start of Y will be -1. But X includes all the points less than B, Y includes B and the 142 // points up until C. So the resulting interval Z would be A < Z < C. 143 resIntervToReduce = Interval{in1.nbf, in1.Start, in2.End} 144 } else { 145 // Non overlapping intervals. Create CompositeSet with intervals in sorted order. 146 return CompositeSet{FiniteSet{make(map[hash.Hash]types.Value)}, []Interval{in1, in2}}, nil 147 } 148 } else if intComparison == noOverlapGreater { 149 if in2.End != nil && in1.Start != nil && (in2.End.Inclusive || in1.Start.Inclusive) && in2.End.Val.Equals(in1.Start.Val) { 150 // see above for info no this case 151 resIntervToReduce = Interval{in1.nbf, in2.Start, in1.End} 152 } else { 153 // Non overlapping intervals. Create CompositeSet with intervals in sorted order. 154 return CompositeSet{FiniteSet{make(map[hash.Hash]types.Value)}, []Interval{in2, in1}}, nil 155 } 156 } else if intComparison[start1start2] <= 0 { 157 if intComparison[end1end2] >= 0 { 158 // the first interval wholly contains the second. Return the first. 159 return in1, nil 160 } else { 161 // return an interval with the smallest start point and largest end point 162 resIntervToReduce = Interval{in1.nbf, in1.Start, in2.End} 163 } 164 } else { 165 if intComparison[end1end2] <= 0 { 166 // the second interval wholly contains the first. Return the second. 167 return in2, nil 168 } else { 169 // return an interval with the smallest start point and largest end point 170 resIntervToReduce = Interval{in1.nbf, in2.Start, in1.End} 171 } 172 } 173 174 return simplifyInterval(resIntervToReduce) 175 } 176 177 // intervalCompositeSetUnion will check the CompositeSet's FiniteSet for points that the new Interval contains 178 // and exclude those from the resulting composite and then union the Interval with its existing intervals. 179 func intervalCompositeSetUnion(in Interval, cs CompositeSet) (Set, error) { 180 hashToVal := make(map[hash.Hash]types.Value) 181 for h, v := range cs.Set.HashToVal { 182 contained, err := in.Contains(v) 183 184 if err != nil { 185 return nil, err 186 } 187 188 if !contained { 189 hashToVal[h] = v 190 } 191 } 192 193 intervals, err := unionWithMultipleIntervals(in, cs.Intervals) 194 195 if err != nil { 196 return nil, err 197 } 198 199 if len(hashToVal) == 0 && len(intervals) == 1 { 200 // could possibly be universal set 201 return simplifyInterval(intervals[0]) 202 } else { 203 return CompositeSet{FiniteSet{hashToVal}, intervals}, nil 204 } 205 } 206 207 // unionWithMultipleIntervals takes an interval and a slice of intervals and returns a slice of intervals containing 208 // the minimum number of intervals required to represent the union. The src []Interval argument must be in sorted 209 // order and only contain non-overlapping intervals. 210 func unionWithMultipleIntervals(in Interval, src []Interval) ([]Interval, error) { 211 dest := make([]Interval, 0, len(src)+1) 212 213 // iterate in sorted order 214 for i, curr := range src { 215 intComparison, err := compareIntervals(in, curr) 216 217 if err != nil { 218 return nil, err 219 } 220 221 if intComparison == noOverlapLess { 222 // new interval is wholly less than the curr Interval. Check to see if we a case where we can combine them 223 // into a single interval (described in intervalUnionWithComparison) 224 if in.End != nil && curr.Start != nil && (in.End.Inclusive || curr.Start.Inclusive) && in.End.Val.Equals(curr.Start.Val) { 225 // modify the input Interval object to include the curr interval 226 in = Interval{in.nbf, in.Start, curr.End} 227 continue 228 } 229 230 // current interval is before all remaining intervals. Add it and then add all the remaining intervals 231 dest = append(dest, in) 232 in = Interval{} 233 dest = append(dest, src[i:]...) 234 break 235 } else if intComparison == noOverlapGreater { 236 // new interval is wholly greater than the curr Interval. Check to see if we a case where we can combine them 237 // into a single interval (described in intervalUnionWithComparison) 238 if curr.End != nil && in.Start != nil && (curr.End.Inclusive || in.Start.Inclusive) && curr.End.Val.Equals(in.Start.Val) { 239 // modify the input Interval object to include the curr interval 240 in = Interval{in.nbf, curr.Start, in.End} 241 continue 242 } 243 244 // add the current interval, and leave the input Interval object unchanged 245 dest = append(dest, curr) 246 } else { 247 // input interval overlaps with the curr interval. update the input Interval object to be the 248 // entire interval 249 un, err := intervalUnionWithComparison(in, curr, intComparison) 250 251 if err != nil { 252 return nil, err 253 } 254 255 switch typedVal := un.(type) { 256 case UniversalSet: 257 return []Interval{{in.nbf, nil, nil}}, nil 258 case Interval: 259 in = typedVal 260 default: 261 panic("Should not be possible.") 262 } 263 } 264 } 265 266 if in.nbf != nil { 267 dest = append(dest, in) 268 } 269 270 return dest, nil 271 } 272 273 // addIfNotInIntervals adds a value to the provided hashToValue map if a value in the FiniteSet passed in is not in any 274 // of the intervals. 275 func addIfNotInIntervals(hashToValue map[hash.Hash]types.Value, fs FiniteSet, intervals []Interval) error { 276 var err error 277 for h, v := range fs.HashToVal { 278 var found bool 279 for _, in := range intervals { 280 found, err = in.Contains(v) 281 if err != nil { 282 return err 283 } 284 285 if found { 286 break 287 } 288 } 289 290 if !found { 291 hashToValue[h] = v 292 } 293 } 294 295 return nil 296 } 297 298 // findUniqueFiniteSetForComposites takes the values from cs1.Set and adds the ones not contained in cs2.Intervals, 299 // and then takes the values from cs2.Set and adds the ones not contained in cs1.Intervals then returns the 300 // resulting FiniteSet 301 func findUniqueFiniteSetForComposites(cs1, cs2 CompositeSet) (FiniteSet, error) { 302 hashToVal := make(map[hash.Hash]types.Value) 303 err := addIfNotInIntervals(hashToVal, cs1.Set, cs2.Intervals) 304 305 if err != nil { 306 return FiniteSet{}, err 307 } 308 309 err = addIfNotInIntervals(hashToVal, cs2.Set, cs1.Intervals) 310 311 if err != nil { 312 return FiniteSet{}, err 313 } 314 315 return FiniteSet{hashToVal}, nil 316 } 317 318 // compositeUnion returns the union of 2 CompositeSets 319 func compositeUnion(cs1, cs2 CompositeSet) (Set, error) { 320 fs, err := findUniqueFiniteSetForComposites(cs1, cs2) 321 322 if err != nil { 323 return nil, err 324 } 325 326 intervals := cs1.Intervals 327 for _, currInterval := range cs2.Intervals { 328 intervals, err = unionWithMultipleIntervals(currInterval, intervals) 329 330 if err != nil { 331 return nil, err 332 } else if len(intervals) == 1 && intervals[0].Start == nil && intervals[0].End == nil { 333 return UniversalSet{}, nil 334 } 335 } 336 337 if len(intervals) == 1 { 338 return intervals[0], nil 339 } 340 341 return CompositeSet{fs, intervals}, nil 342 }