github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/sqle/setalgebra/union.go (about)

     1  // Copyright 2020 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package setalgebra
    16  
    17  import (
    18  	"github.com/dolthub/dolt/go/store/hash"
    19  	"github.com/dolthub/dolt/go/store/types"
    20  )
    21  
    22  // finiteSetUnion adds all points from both sets to a new FiniteSet
    23  func finiteSetUnion(fs1, fs2 FiniteSet) (FiniteSet, error) {
    24  	hashToVal := make(map[hash.Hash]types.Value, len(fs1.HashToVal)+len(fs2.HashToVal))
    25  	for h, v := range fs1.HashToVal {
    26  		hashToVal[h] = v
    27  	}
    28  
    29  	for h, v := range fs2.HashToVal {
    30  		hashToVal[h] = v
    31  	}
    32  
    33  	return FiniteSet{HashToVal: hashToVal}, nil
    34  }
    35  
    36  // copyIntervalEndpoint makes a copy of an interval endpoint
    37  func copyIntervalEndpoint(ep *IntervalEndpoint) *IntervalEndpoint {
    38  	if ep == nil {
    39  		return nil
    40  	}
    41  
    42  	copyOf := *ep
    43  
    44  	return &copyOf
    45  }
    46  
    47  // finiteSetIntervalUnion will check all the points of the FiniteSet and see which ones are not in the given interval.
    48  // if all points are in the interval then the resulting set is the interval itself, otherwise a CompositeSet containing
    49  // the missing points as a new FiniteSet and the interval is returned.
    50  func finiteSetIntervalUnion(fs FiniteSet, in Interval) (Set, error) {
    51  	inStart := copyIntervalEndpoint(in.Start)
    52  	inEnd := copyIntervalEndpoint(in.End)
    53  
    54  	hashToVal := make(map[hash.Hash]types.Value, len(fs.HashToVal))
    55  	for h, v := range fs.HashToVal {
    56  		inRange, err := in.Contains(v)
    57  
    58  		if err != nil {
    59  			return nil, err
    60  		}
    61  
    62  		if !inRange {
    63  			if inStart != nil && !inStart.Inclusive {
    64  				if inStart.Val.Equals(v) {
    65  					inStart.Inclusive = true
    66  					continue
    67  				}
    68  			}
    69  
    70  			if in.End != nil && !in.End.Inclusive {
    71  				if in.End.Val.Equals(v) {
    72  					inEnd.Inclusive = true
    73  					continue
    74  				}
    75  			}
    76  
    77  			hashToVal[h] = v
    78  		}
    79  	}
    80  
    81  	resInterval := Interval{in.nbf, inStart, inEnd}
    82  	if len(hashToVal) > 0 {
    83  		newSet := FiniteSet{HashToVal: hashToVal}
    84  		return CompositeSet{newSet, []Interval{resInterval}}, nil
    85  	} else {
    86  		return resInterval, nil
    87  	}
    88  }
    89  
    90  // finiteSetCompositeSetUnion checks all the points in a FiniteSet against the CompositeSet to find all points not
    91  // represented in the CompositeSet (So not in any of it's intervals and not in it's existing FiniteSet), and adds those
    92  // points to the compositeSet
    93  func finiteSetCompositeSetUnion(fs FiniteSet, cs CompositeSet) (Set, error) {
    94  	hashToVal := make(map[hash.Hash]types.Value, len(fs.HashToVal))
    95  	for h, v := range cs.Set.HashToVal {
    96  		hashToVal[h] = v
    97  	}
    98  
    99  	for h, v := range fs.HashToVal {
   100  		var inRange bool
   101  		var err error
   102  		for _, r := range cs.Intervals {
   103  			inRange, err = r.Contains(v)
   104  
   105  			if err != nil {
   106  				return nil, err
   107  			}
   108  
   109  			if inRange {
   110  				break
   111  			}
   112  		}
   113  
   114  		if !inRange {
   115  			hashToVal[h] = v
   116  		}
   117  	}
   118  
   119  	return CompositeSet{FiniteSet{hashToVal}, cs.Intervals}, nil
   120  }
   121  
   122  // intervalUnion takes two Interval objects and compares them then returns their union
   123  func intervalUnion(in1, in2 Interval) (Set, error) {
   124  	intComparison, err := compareIntervals(in1, in2)
   125  
   126  	if err != nil {
   127  		return nil, err
   128  	}
   129  
   130  	return intervalUnionWithComparison(in1, in2, intComparison)
   131  }
   132  
   133  // intervalUnionWithComparison takes two Interval objects and their comparison and returns a new interval that
   134  // represents all the points in both intervals where possible, and returns a CompositeInterval when the two intervals
   135  // are non-overlapping.
   136  func intervalUnionWithComparison(in1, in2 Interval, intComparison intervalComparison) (Set, error) {
   137  	var resIntervToReduce Interval
   138  	if intComparison == noOverlapLess {
   139  		if in1.End != nil && in2.Start != nil && (in1.End.Inclusive || in2.Start.Inclusive) && in1.End.Val.Equals(in2.Start.Val) {
   140  			// in the case where you have intervals X and Y defined as A < X < B and B <= Y < C the comparison of the
   141  			// end of X and the start of Y will be -1.  But X includes all the points less than B, Y includes B and the
   142  			// points up until C.  So the resulting interval Z would be A < Z < C.
   143  			resIntervToReduce = Interval{in1.nbf, in1.Start, in2.End}
   144  		} else {
   145  			// Non overlapping intervals. Create CompositeSet with intervals in sorted order.
   146  			return CompositeSet{FiniteSet{make(map[hash.Hash]types.Value)}, []Interval{in1, in2}}, nil
   147  		}
   148  	} else if intComparison == noOverlapGreater {
   149  		if in2.End != nil && in1.Start != nil && (in2.End.Inclusive || in1.Start.Inclusive) && in2.End.Val.Equals(in1.Start.Val) {
   150  			// see above for info no this case
   151  			resIntervToReduce = Interval{in1.nbf, in2.Start, in1.End}
   152  		} else {
   153  			// Non overlapping intervals. Create CompositeSet with intervals in sorted order.
   154  			return CompositeSet{FiniteSet{make(map[hash.Hash]types.Value)}, []Interval{in2, in1}}, nil
   155  		}
   156  	} else if intComparison[start1start2] <= 0 {
   157  		if intComparison[end1end2] >= 0 {
   158  			// the first interval wholly contains the second. Return the first.
   159  			return in1, nil
   160  		} else {
   161  			// return an interval with the smallest start point and largest end point
   162  			resIntervToReduce = Interval{in1.nbf, in1.Start, in2.End}
   163  		}
   164  	} else {
   165  		if intComparison[end1end2] <= 0 {
   166  			// the second interval wholly contains the first. Return the second.
   167  			return in2, nil
   168  		} else {
   169  			// return an interval with the smallest start point and largest end point
   170  			resIntervToReduce = Interval{in1.nbf, in2.Start, in1.End}
   171  		}
   172  	}
   173  
   174  	return simplifyInterval(resIntervToReduce)
   175  }
   176  
   177  // intervalCompositeSetUnion will check the CompositeSet's FiniteSet for points that the new Interval contains
   178  // and exclude those from the resulting composite and then union the Interval with its existing intervals.
   179  func intervalCompositeSetUnion(in Interval, cs CompositeSet) (Set, error) {
   180  	hashToVal := make(map[hash.Hash]types.Value)
   181  	for h, v := range cs.Set.HashToVal {
   182  		contained, err := in.Contains(v)
   183  
   184  		if err != nil {
   185  			return nil, err
   186  		}
   187  
   188  		if !contained {
   189  			hashToVal[h] = v
   190  		}
   191  	}
   192  
   193  	intervals, err := unionWithMultipleIntervals(in, cs.Intervals)
   194  
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  
   199  	if len(hashToVal) == 0 && len(intervals) == 1 {
   200  		// could possibly be universal set
   201  		return simplifyInterval(intervals[0])
   202  	} else {
   203  		return CompositeSet{FiniteSet{hashToVal}, intervals}, nil
   204  	}
   205  }
   206  
   207  // unionWithMultipleIntervals takes an interval and a slice of intervals and returns a slice of intervals containing
   208  // the minimum number of intervals required to represent the union.  The src []Interval argument must be in sorted
   209  // order and only contain non-overlapping intervals.
   210  func unionWithMultipleIntervals(in Interval, src []Interval) ([]Interval, error) {
   211  	dest := make([]Interval, 0, len(src)+1)
   212  
   213  	// iterate in sorted order
   214  	for i, curr := range src {
   215  		intComparison, err := compareIntervals(in, curr)
   216  
   217  		if err != nil {
   218  			return nil, err
   219  		}
   220  
   221  		if intComparison == noOverlapLess {
   222  			// new interval is wholly less than the curr Interval. Check to see if we a case where we can combine them
   223  			// into a single interval (described in intervalUnionWithComparison)
   224  			if in.End != nil && curr.Start != nil && (in.End.Inclusive || curr.Start.Inclusive) && in.End.Val.Equals(curr.Start.Val) {
   225  				// modify the input Interval object to include the curr interval
   226  				in = Interval{in.nbf, in.Start, curr.End}
   227  				continue
   228  			}
   229  
   230  			// current interval is before all remaining intervals.  Add it and then add all the remaining intervals
   231  			dest = append(dest, in)
   232  			in = Interval{}
   233  			dest = append(dest, src[i:]...)
   234  			break
   235  		} else if intComparison == noOverlapGreater {
   236  			// new interval is wholly greater than the curr Interval. Check to see if we a case where we can combine them
   237  			// into a single interval (described in intervalUnionWithComparison)
   238  			if curr.End != nil && in.Start != nil && (curr.End.Inclusive || in.Start.Inclusive) && curr.End.Val.Equals(in.Start.Val) {
   239  				// modify the input Interval object to include the curr interval
   240  				in = Interval{in.nbf, curr.Start, in.End}
   241  				continue
   242  			}
   243  
   244  			// add the current interval, and leave the input Interval object unchanged
   245  			dest = append(dest, curr)
   246  		} else {
   247  			// input interval overlaps with the curr interval.  update the input Interval object to be the
   248  			// entire interval
   249  			un, err := intervalUnionWithComparison(in, curr, intComparison)
   250  
   251  			if err != nil {
   252  				return nil, err
   253  			}
   254  
   255  			switch typedVal := un.(type) {
   256  			case UniversalSet:
   257  				return []Interval{{in.nbf, nil, nil}}, nil
   258  			case Interval:
   259  				in = typedVal
   260  			default:
   261  				panic("Should not be possible.")
   262  			}
   263  		}
   264  	}
   265  
   266  	if in.nbf != nil {
   267  		dest = append(dest, in)
   268  	}
   269  
   270  	return dest, nil
   271  }
   272  
   273  // addIfNotInIntervals adds a value to the provided hashToValue map if a value in the FiniteSet passed in is not in any
   274  // of the intervals.
   275  func addIfNotInIntervals(hashToValue map[hash.Hash]types.Value, fs FiniteSet, intervals []Interval) error {
   276  	var err error
   277  	for h, v := range fs.HashToVal {
   278  		var found bool
   279  		for _, in := range intervals {
   280  			found, err = in.Contains(v)
   281  			if err != nil {
   282  				return err
   283  			}
   284  
   285  			if found {
   286  				break
   287  			}
   288  		}
   289  
   290  		if !found {
   291  			hashToValue[h] = v
   292  		}
   293  	}
   294  
   295  	return nil
   296  }
   297  
   298  // findUniqueFiniteSetForComposites takes the values from cs1.Set and adds the ones not contained in cs2.Intervals,
   299  // and then takes the values from cs2.Set and adds the ones not contained in cs1.Intervals then returns the
   300  // resulting FiniteSet
   301  func findUniqueFiniteSetForComposites(cs1, cs2 CompositeSet) (FiniteSet, error) {
   302  	hashToVal := make(map[hash.Hash]types.Value)
   303  	err := addIfNotInIntervals(hashToVal, cs1.Set, cs2.Intervals)
   304  
   305  	if err != nil {
   306  		return FiniteSet{}, err
   307  	}
   308  
   309  	err = addIfNotInIntervals(hashToVal, cs2.Set, cs1.Intervals)
   310  
   311  	if err != nil {
   312  		return FiniteSet{}, err
   313  	}
   314  
   315  	return FiniteSet{hashToVal}, nil
   316  }
   317  
   318  // compositeUnion returns the union of 2 CompositeSets
   319  func compositeUnion(cs1, cs2 CompositeSet) (Set, error) {
   320  	fs, err := findUniqueFiniteSetForComposites(cs1, cs2)
   321  
   322  	if err != nil {
   323  		return nil, err
   324  	}
   325  
   326  	intervals := cs1.Intervals
   327  	for _, currInterval := range cs2.Intervals {
   328  		intervals, err = unionWithMultipleIntervals(currInterval, intervals)
   329  
   330  		if err != nil {
   331  			return nil, err
   332  		} else if len(intervals) == 1 && intervals[0].Start == nil && intervals[0].End == nil {
   333  			return UniversalSet{}, nil
   334  		}
   335  	}
   336  
   337  	if len(intervals) == 1 {
   338  		return intervals[0], nil
   339  	}
   340  
   341  	return CompositeSet{fs, intervals}, nil
   342  }