github.com/dolthub/go-mysql-server@v0.18.0/sql/range.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sql
    16  
    17  import (
    18  	"fmt"
    19  	"sort"
    20  	"strings"
    21  )
    22  
    23  // RangeCollection is a collection of ranges that represent different (non-overlapping) filter expressions.
    24  type RangeCollection []Range
    25  
    26  // Range is a collection of RangeColumns that are ordered by the column expressions as returned by their parent
    27  // index. A single range represents a set of values intended for iteration by an integrator's index.
    28  type Range []RangeColumnExpr
    29  
    30  // Equals returns whether the given RangeCollection matches the calling RangeCollection. The order of each Range is
    31  // important, therefore it is recommended to sort two collections beforehand.
    32  func (ranges RangeCollection) Equals(otherCollection RangeCollection) (bool, error) {
    33  	if len(ranges) != len(otherCollection) {
    34  		return false, nil
    35  	}
    36  	for i := range ranges {
    37  		if ok, err := ranges[i].Equals(otherCollection[i]); err != nil || !ok {
    38  			return ok, err
    39  		}
    40  	}
    41  	return true, nil
    42  }
    43  
    44  // Intersect attempts to intersect the given RangeCollection with the calling RangeCollection. This ensures that each
    45  // Range belonging to the same collection is treated as a union with respect to that same collection, rather than
    46  // attempting to intersect ranges that are a part of the same collection.
    47  func (ranges RangeCollection) Intersect(otherRanges RangeCollection) (RangeCollection, error) {
    48  	var newRanges RangeCollection
    49  	for _, rang := range ranges {
    50  		for _, otherRange := range otherRanges {
    51  			newRange, err := rang.Intersect(otherRange)
    52  			if err != nil {
    53  				return nil, err
    54  			}
    55  			if len(newRange) > 0 {
    56  				newRanges = append(newRanges, newRange)
    57  			}
    58  		}
    59  	}
    60  	newRanges, err := RemoveOverlappingRanges(newRanges...)
    61  	if err != nil {
    62  		return nil, err
    63  	}
    64  	if len(newRanges) == 0 {
    65  		return nil, nil
    66  	}
    67  	return newRanges, nil
    68  }
    69  
    70  // String returns this RangeCollection as a string for display purposes.
    71  func (ranges RangeCollection) String() string {
    72  	sb := strings.Builder{}
    73  	sb.WriteByte('[')
    74  	for i, rang := range ranges {
    75  		if i != 0 {
    76  			sb.WriteString(", ")
    77  		}
    78  		sb.WriteString(rang.String())
    79  	}
    80  	sb.WriteByte(']')
    81  	return sb.String()
    82  }
    83  
    84  // DebugString returns this RangeCollection as a string for debugging purposes.
    85  func (ranges RangeCollection) DebugString() string {
    86  	sb := strings.Builder{}
    87  	sb.WriteByte('[')
    88  	for i, rang := range ranges {
    89  		if i != 0 {
    90  			sb.WriteString(", ")
    91  		}
    92  		sb.WriteString(rang.DebugString())
    93  	}
    94  	sb.WriteByte(']')
    95  	return sb.String()
    96  }
    97  
    98  // AsEmpty returns a Range full of empty RangeColumns with the same types as the calling Range.
    99  func (rang Range) AsEmpty() Range {
   100  	emptyRange := make(Range, len(rang))
   101  	for i := range rang {
   102  		emptyRange[i] = EmptyRangeColumnExpr(rang[i].Typ)
   103  	}
   104  	return emptyRange
   105  }
   106  
   107  func (rang Range) IsEmpty() (bool, error) {
   108  	if len(rang) == 0 {
   109  		return true, nil
   110  	}
   111  	for i := range rang {
   112  		res, err := rang[i].IsEmpty()
   113  		if err != nil {
   114  			return false, err
   115  		}
   116  		if res {
   117  			return true, nil
   118  		}
   119  	}
   120  	return false, nil
   121  }
   122  
   123  // Copy returns a duplicate of this Range.
   124  func (rang Range) Copy() Range {
   125  	newRange := make(Range, len(rang))
   126  	copy(newRange, rang)
   127  	return newRange
   128  }
   129  
   130  // ExpressionByColumnName returns the RangeColumnExpr that belongs to the given column expression. If an index does not
   131  // contain the column expression then false is returned.
   132  func (rang Range) ExpressionByColumnName(idx Index, colExpr string) (RangeColumnExpr, bool) {
   133  	for i, idxColExpr := range idx.Expressions() {
   134  		if idxColExpr == colExpr {
   135  			if i < len(rang) {
   136  				return rang[i], true
   137  			}
   138  			break
   139  		}
   140  	}
   141  	return RangeColumnExpr{}, false
   142  }
   143  
   144  // Equals evaluates whether the calling Range is equivalent to the given Range.
   145  func (rang Range) Equals(otherRange Range) (bool, error) {
   146  	if len(rang) != len(otherRange) {
   147  		return false, nil
   148  	}
   149  	for i := range rang {
   150  		if ok, err := rang[i].Equals(otherRange[i]); err != nil || !ok {
   151  			return false, err
   152  		}
   153  	}
   154  	return true, nil
   155  }
   156  
   157  // Compare returns an integer stating the relative position of the calling Range to the given Range.
   158  func (rang Range) Compare(otherRange Range) (int, error) {
   159  	if len(rang) != len(otherRange) {
   160  		return 0, fmt.Errorf("compared ranges must have matching lengths")
   161  	}
   162  	for i := range rang {
   163  		cmp, err := rang[i].LowerBound.Compare(otherRange[i].LowerBound, rang[i].Typ)
   164  		if err != nil || cmp != 0 {
   165  			return cmp, err
   166  		}
   167  		cmp, err = rang[i].UpperBound.Compare(otherRange[i].UpperBound, rang[i].Typ)
   168  		if err != nil || cmp != 0 {
   169  			return cmp, err
   170  		}
   171  	}
   172  	return 0, nil
   173  }
   174  
   175  // Intersect intersects the given Range with the calling Range.
   176  func (rang Range) Intersect(otherRange Range) (Range, error) {
   177  	if len(rang) != len(otherRange) {
   178  		return nil, nil
   179  	}
   180  	newRangeCollection := make(Range, len(rang))
   181  	for i := range rang {
   182  		intersectedRange, ok, err := rang[i].TryIntersect(otherRange[i])
   183  		if err != nil {
   184  			return nil, err
   185  		}
   186  		if !ok {
   187  			return rang.AsEmpty(), nil
   188  		}
   189  		newRangeCollection[i] = intersectedRange
   190  	}
   191  	return newRangeCollection, nil
   192  }
   193  
   194  // TryMerge attempts to merge the given Range with the calling Range. This can only do a merge if one Range is a subset
   195  // of the other, or if all columns except for one are equivalent, upon which a union is attempted on that column.
   196  // Returns true if the merge was successful.
   197  func (rang Range) TryMerge(otherRange Range) (Range, bool, error) {
   198  	if len(rang) != len(otherRange) {
   199  		return nil, false, nil
   200  	}
   201  	if ok, err := rang.IsSupersetOf(otherRange); err != nil {
   202  		return nil, false, err
   203  	} else if ok {
   204  		return rang, true, nil
   205  	}
   206  	if ok, err := otherRange.IsSupersetOf(rang); err != nil {
   207  		return nil, false, err
   208  	} else if ok {
   209  		return otherRange, true, nil
   210  	}
   211  
   212  	indexToMerge := -1
   213  	// The superset checks will cover if every column expr is equivalent
   214  	for i := 0; i < len(rang); i++ {
   215  		if ok, err := rang[i].Equals(otherRange[i]); err != nil {
   216  			return nil, false, err
   217  		} else if !ok {
   218  			// Only one column may not equal another
   219  			if indexToMerge == -1 {
   220  				indexToMerge = i
   221  			} else {
   222  				return nil, false, nil
   223  			}
   224  		}
   225  	}
   226  	mergedLastExpr, ok, err := rang[indexToMerge].TryUnion(otherRange[indexToMerge])
   227  	if err != nil || !ok {
   228  		return nil, false, err
   229  	}
   230  	mergedRange := rang.Copy()
   231  	mergedRange[indexToMerge] = mergedLastExpr
   232  	return mergedRange, true, nil
   233  }
   234  
   235  // IsSubsetOf evaluates whether the calling Range is fully encompassed by the given Range.
   236  func (rang Range) IsSubsetOf(otherRange Range) (bool, error) {
   237  	if len(rang) != len(otherRange) {
   238  		return false, nil
   239  	}
   240  	for i := range rang {
   241  		ok, err := rang[i].IsSubsetOf(otherRange[i])
   242  		if err != nil || !ok {
   243  			return false, err
   244  		}
   245  	}
   246  	return true, nil
   247  }
   248  
   249  // IsSupersetOf evaluates whether the calling Range fully encompasses the given Range.
   250  func (rang Range) IsSupersetOf(otherRange Range) (bool, error) {
   251  	return otherRange.IsSubsetOf(rang)
   252  }
   253  
   254  // IsConnected returns whether the calling Range and given Range have overlapping values, which would result in the same
   255  // values being returned from some subset of both ranges.
   256  func (rang Range) IsConnected(otherRange Range) (bool, error) {
   257  	if len(rang) != len(otherRange) {
   258  		return false, nil
   259  	}
   260  	for i := range rang {
   261  		_, ok, err := rang[i].Overlaps(otherRange[i])
   262  		if err != nil || !ok {
   263  			return false, err
   264  		}
   265  	}
   266  	return true, nil
   267  }
   268  
   269  // Overlaps returns whether the calling Range and given Range have overlapping values, which would result in the same
   270  // values being returned from some subset of both ranges.
   271  func (rang Range) Overlaps(otherRange Range) (bool, error) {
   272  	if len(rang) != len(otherRange) {
   273  		return false, nil
   274  	}
   275  	for i := range rang {
   276  		_, ok, err := rang[i].Overlaps(otherRange[i])
   277  		if err != nil || !ok {
   278  			return false, err
   279  		}
   280  	}
   281  	return true, nil
   282  }
   283  
   284  // RemoveOverlap removes any overlap that the given Range may have with the calling Range. If the two ranges do not
   285  // overlap and are not mergeable then they're both returned. If one is a subset of the other or is mergeable then only
   286  // one Range is returned. Otherwise, this returns a collection of ranges that do not overlap with each other, and covers
   287  // the entirety of the original ranges (and nothing more). If the two ranges do not overlap and are not mergeable then
   288  // false is returned, otherwise returns true.
   289  func (rang Range) RemoveOverlap(otherRange Range) (RangeCollection, bool, error) {
   290  	// An explanation on why overlapping ranges may return more than one range, and why they can't just be merged as-is.
   291  	// Let's start with a Range that has a single RangeColumnExpression (a one-dimensional range). Imagine this as a
   292  	// number line with contiguous sections defined as the range. If you have any two sections that overlap, then you
   293  	// can simply take the lowest and highest bounds between both of those sections to create a single, larger range
   294  	// that fully encompasses both (while not including any elements that were not in the original ranges).
   295  	//
   296  	// Now let's look at a Range that has two RangeColumnExpressions (a two-dimensional range). Imagine this as a sheet
   297  	// of paper on a table (for easier visualization). If these two sheet overlap then we can't just take the lowest
   298  	// and highest bounds of these sheets as that may include areas outside either sheet of paper. Instead, we can cut
   299  	// the sheets so that we get smaller sheets of paper, with one "sub sheet" perfectly overlapping the other. This
   300  	// may be done with two cuts on each sheet, giving us a total of 8 smaller sheets overall. Of course the perfectly
   301  	// overlapping sheets can be combined, so we throw one of them away. From there we're back to our original Range
   302  	// example with only one dimension, as now this overlapping subsheet will differ from the sheets on its edges by
   303  	// only a single dimension (the sheet to the left, for example, will be the same height but extending further left).
   304  	// We can then combine it with its edge-adjacent sheets until we have a collection of sheets that do not overlap
   305  	// and all have different widths and heights.
   306  	//
   307  	// The great thing about this example with two dimensions is that it can be used for N dimensions, where we break
   308  	// down the ranges until we get a perfectly overlapping range, and then merge (a single dimension at a time) all
   309  	// edge-adjacent ranges until we arrive at a set of ranges that do not overlap and cannot be combined.
   310  
   311  	// If the two ranges may be merged then we just do that and return.
   312  	// Also allows us to not have to worry about the case where every column is equivalent.
   313  	if mergedRange, ok, err := rang.TryMerge(otherRange); err != nil {
   314  		return nil, false, err
   315  	} else if ok {
   316  		return []Range{mergedRange}, true, nil
   317  	}
   318  	// We check for overlapping after checking for merge as two ranges may not overlap but may be mergeable.
   319  	// This would occur if all other columns are equivalent except for one column that is overlapping or adjacent.
   320  	if ok, err := rang.Overlaps(otherRange); err != nil || !ok {
   321  		return []Range{rang, otherRange}, false, err
   322  	}
   323  
   324  	var ranges []Range
   325  	for i := range rang {
   326  		if ok, err := rang[i].Equals(otherRange[i]); err != nil {
   327  			return nil, false, err
   328  		} else if ok {
   329  			continue
   330  		}
   331  		// Get the RangeColumnExpr that overlaps both RangeColumnExprs
   332  		overlapExpr, _, err := rang[i].Overlaps(otherRange[i])
   333  		if err != nil {
   334  			return nil, false, err
   335  		}
   336  		// Subtract the overlapping range from each existing range.
   337  		// This will give us a collection of ranges that do not have any overlap.
   338  		range1Subtracted, err := rang[i].Subtract(overlapExpr)
   339  		if err != nil {
   340  			return nil, false, err
   341  		}
   342  		for _, newColExpr := range range1Subtracted {
   343  			ranges = append(ranges, rang.replace(i, newColExpr))
   344  		}
   345  		range2Subtracted, err := otherRange[i].Subtract(overlapExpr)
   346  		if err != nil {
   347  			return nil, false, err
   348  		}
   349  		for _, newColExpr := range range2Subtracted {
   350  			ranges = append(ranges, otherRange.replace(i, newColExpr))
   351  		}
   352  		// Create two ranges that replace each respective RangeColumnExpr with the overlapping one, giving us two
   353  		// ranges that are guaranteed to overlap (and are a subset of the originals). We can then recursively call this
   354  		// function on the new overlapping ranges which will eventually return a set of non-overlapping ranges.
   355  		newRanges, _, err := rang.replace(i, overlapExpr).RemoveOverlap(otherRange.replace(i, overlapExpr))
   356  		if err != nil {
   357  			return nil, false, err
   358  		}
   359  		ranges = append(ranges, newRanges...)
   360  		break
   361  	}
   362  
   363  	return ranges, true, nil
   364  }
   365  
   366  // String returns this Range as a string for display purposes.
   367  func (rang Range) String() string {
   368  	sb := strings.Builder{}
   369  	sb.WriteByte('{')
   370  	for i, colExpr := range rang {
   371  		if i != 0 {
   372  			sb.WriteString(", ")
   373  		}
   374  		sb.WriteString(colExpr.String())
   375  	}
   376  	sb.WriteByte('}')
   377  	return sb.String()
   378  }
   379  
   380  // DebugString returns this Range as a string for debugging purposes.
   381  func (rang Range) DebugString() string {
   382  	sb := strings.Builder{}
   383  	sb.WriteByte('{')
   384  	for i, colExpr := range rang {
   385  		if i != 0 {
   386  			sb.WriteString(", ")
   387  		}
   388  		sb.WriteString(colExpr.DebugString())
   389  	}
   390  	sb.WriteByte('}')
   391  	return sb.String()
   392  }
   393  
   394  // replace returns a new Range with the column at the given index replaced by the given RangeColumnExpr. Does NOT
   395  // perform any validation checks such as the index being within the bounds of the Range or the RangeColumnExpr having
   396  // the same type as the other columns, so use with caution.
   397  func (rang Range) replace(i int, colExpr RangeColumnExpr) Range {
   398  	newRange := rang.Copy()
   399  	newRange[i] = colExpr
   400  	return newRange
   401  }
   402  
   403  // IntersectRanges intersects each Range for each column expression. If a RangeColumnExpr ends up with no valid ranges
   404  // then a nil is returned.
   405  func IntersectRanges(ranges ...Range) Range {
   406  	if len(ranges) == 0 {
   407  		return nil
   408  	}
   409  	var rang Range
   410  	i := 0
   411  	for ; i < len(ranges); i++ {
   412  		rc := ranges[i]
   413  		if len(rc) == 0 {
   414  			continue
   415  		}
   416  		rang = rc
   417  		break
   418  	}
   419  	if len(rang) == 0 {
   420  		return nil
   421  	}
   422  	i++
   423  
   424  	for ; i < len(ranges); i++ {
   425  		rc := ranges[i]
   426  		if len(rc) == 0 {
   427  			continue
   428  		}
   429  		newRange, err := rang.Intersect(rc)
   430  		if err != nil || len(newRange) == 0 {
   431  			return nil
   432  		}
   433  	}
   434  	if len(rang) == 0 {
   435  		return nil
   436  	}
   437  	return rang
   438  }
   439  
   440  // RemoveOverlappingRanges removes all overlap between all ranges.
   441  func RemoveOverlappingRanges(ranges ...Range) (RangeCollection, error) {
   442  	if len(ranges) == 0 {
   443  		return nil, nil
   444  	}
   445  
   446  	colExprTypes := GetColExprTypes(ranges)
   447  	rangeTree, err := NewRangeColumnExprTree(ranges[0], colExprTypes)
   448  	if err != nil {
   449  		return nil, err
   450  	}
   451  	for i := 1; i < len(ranges); i++ {
   452  		rang := ranges[i]
   453  		connectingRanges, err := rangeTree.FindConnections(rang, 0)
   454  		if err != nil {
   455  			return nil, err
   456  		}
   457  		foundOverlap := false
   458  		for _, connectingRange := range connectingRanges {
   459  			if connectingRange != nil {
   460  				newRanges, ok, err := connectingRange.RemoveOverlap(rang)
   461  				if err != nil {
   462  					return nil, err
   463  				}
   464  				if ok {
   465  					foundOverlap = true
   466  					err = rangeTree.Remove(connectingRange)
   467  					if err != nil {
   468  						return nil, err
   469  					}
   470  					// Not the best idea but it works, will change to some other strategy at another time
   471  					ranges = append(ranges, newRanges...)
   472  					break
   473  				}
   474  			}
   475  		}
   476  		if !foundOverlap {
   477  			err = rangeTree.Insert(rang)
   478  			if err != nil {
   479  				return nil, err
   480  			}
   481  		}
   482  	}
   483  
   484  	rangeColl, err := rangeTree.GetRangeCollection()
   485  	if err != nil {
   486  		return nil, err
   487  	}
   488  
   489  	if err = validateRangeCollection(rangeColl); err != nil {
   490  		return nil, err
   491  	}
   492  
   493  	return rangeColl, nil
   494  }
   495  
   496  // SortRanges sorts the given ranges, returning a new slice of ranges.
   497  func SortRanges(ranges ...Range) ([]Range, error) {
   498  	sortedRanges := make([]Range, len(ranges))
   499  	copy(sortedRanges, ranges)
   500  	var err error
   501  	sort.Slice(sortedRanges, func(i, j int) bool {
   502  		cmp, cmpErr := sortedRanges[i].Compare(sortedRanges[j])
   503  		if cmpErr != nil {
   504  			err = cmpErr
   505  		}
   506  		return cmp == -1
   507  	})
   508  	return sortedRanges, err
   509  }
   510  
   511  func validateRangeCollection(rangeColl RangeCollection) error {
   512  	for i := 0; i < len(rangeColl)-1; i++ {
   513  		for j := i + 1; j < len(rangeColl); j++ {
   514  			if ok, err := rangeColl[i].Overlaps(rangeColl[j]); err != nil {
   515  				return err
   516  			} else if ok {
   517  				return fmt.Errorf("overlapping ranges: %s and %s", rangeColl[i].String(), rangeColl[j].String())
   518  			}
   519  		}
   520  	}
   521  	return nil
   522  }