github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/covering/overlap_merge.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package covering
    12  
    13  import (
    14  	"bytes"
    15  	"reflect"
    16  	"sort"
    17  )
    18  
    19  // Range is an interval with a payload.
    20  type Range struct {
    21  	Start   []byte
    22  	End     []byte
    23  	Payload interface{}
    24  }
    25  
    26  // endpoints collections of all start and endpoints
    27  type endpoints [][]byte
    28  
    29  var _ sort.Interface = endpoints{}
    30  
    31  func (e endpoints) Len() int {
    32  	return len(e)
    33  }
    34  
    35  func (e endpoints) Less(i, j int) bool {
    36  	return bytes.Compare(e[i], e[j]) < 0
    37  }
    38  
    39  func (e endpoints) Swap(i, j int) {
    40  	e[i], e[j] = e[j], e[i]
    41  }
    42  
    43  // marker is a wrapper for payload and cover index in order to be able to
    44  // append payloads based on the order among different covers
    45  type marker struct {
    46  	payload       interface{}
    47  	coveringIndex int
    48  }
    49  
    50  type markers []marker
    51  
    52  var _ sort.Interface = markers{}
    53  
    54  func (m markers) Len() int {
    55  	return len(m)
    56  }
    57  
    58  func (m markers) Less(i, j int) bool {
    59  	// sort markers based on the cover index it appears
    60  	return m[i].coveringIndex < m[j].coveringIndex
    61  }
    62  
    63  func (m markers) Swap(i, j int) {
    64  	m[i], m[j] = m[j], m[i]
    65  }
    66  
    67  // Covering represents a non-overlapping, but possibly non-contiguous, set of
    68  // intervals.
    69  type Covering []Range
    70  
    71  // OverlapCoveringMerge returns the set of intervals covering every range in the
    72  // input such that no output range crosses an input endpoint. The payloads are
    73  // returned as a `[]interface{}` and in the same order as they are in coverings.
    74  //
    75  // Example:
    76  //  covering 1: [1, 2) -> 'a', [3, 4) -> 'b', [6, 7) -> 'c'
    77  //  covering 2: [1, 5) -> 'd'
    78  //  output: [1, 2) -> 'ad', [2, 3) -> `d`, [3, 4) -> 'bd', [4, 5) -> 'd', [6, 7) -> 'c'
    79  //
    80  // The input is mutated (sorted). It is also assumed (and not checked) to be
    81  // valid (e.g. non-overlapping intervals in each covering).
    82  func OverlapCoveringMerge(coverings []Covering) []Range {
    83  
    84  	// TODO(dan): Verify that the ranges in each covering are non-overlapping.
    85  
    86  	// We would like to flatten all coverings on the the single line. Assume that total
    87  	// amount of coverings is N, then overall there are 2*N endpoints. Flatten coverings
    88  	// on single line and sort them will take O(NlogN) time. Later we pass over endpoints
    89  	// one by one and append payloads.
    90  
    91  	// captures all endpoints (starts and ends from) collected from all coverings
    92  	var totalRange endpoints
    93  	//
    94  	numsMap := map[string]struct{}{}
    95  	// map to store an empty sets, need to distinguish
    96  	// cause in such case start and end are equals, hence
    97  	// need to prevent duplications
    98  	emptySets := map[string]struct{}{}
    99  	// captures all start covering's endpoints with relevant payloads
   100  	startKeys := map[string]markers{}
   101  	// captures all end covering's endpoints with relevant payloads
   102  	endKeys := map[string]markers{}
   103  
   104  	for i, covering := range coverings {
   105  		for _, r := range covering {
   106  			startKeys[string(r.Start)] = append(startKeys[string(r.Start)], marker{
   107  				payload:       r.Payload,
   108  				coveringIndex: i,
   109  			})
   110  			if _, exist := numsMap[string(r.Start)]; !exist {
   111  				totalRange = append(totalRange, r.Start)
   112  				numsMap[string(r.Start)] = struct{}{}
   113  			}
   114  			endKeys[string(r.End)] = append(endKeys[string(r.End)], marker{
   115  				payload:       r.Payload,
   116  				coveringIndex: i,
   117  			})
   118  
   119  			if _, exist := numsMap[string(r.End)]; !exist {
   120  				totalRange = append(totalRange, r.End)
   121  				numsMap[string(r.End)] = struct{}{}
   122  			}
   123  
   124  			// if start and end differs then it's normal interval and
   125  			// we can continue to the next one
   126  			if !bytes.Equal(r.Start, r.End) {
   127  				continue
   128  			}
   129  			// otherwise it is an empty interval and we need to remember it
   130  			if _, exists := emptySets[string(r.Start)]; !exists {
   131  				totalRange = append(totalRange, r.End)
   132  				emptySets[string(r.Start)] = struct{}{}
   133  			}
   134  		}
   135  	}
   136  	sort.Sort(totalRange)
   137  
   138  	var prev []byte
   139  	var payloadsMarkers markers
   140  	var ret []Range
   141  
   142  	for it, next := range totalRange {
   143  		if len(prev) != 0 && len(payloadsMarkers) > 0 {
   144  			var payloads []interface{}
   145  			// make sure we preserve order of covers as we got them
   146  			sort.Sort(payloadsMarkers)
   147  
   148  			for _, marker := range payloadsMarkers {
   149  				payloads = append(payloads, marker.payload)
   150  			}
   151  
   152  			ret = append(ret, Range{
   153  				Start:   prev,
   154  				End:     next,
   155  				Payload: payloads,
   156  			})
   157  		}
   158  
   159  		if removeMarkers, ok := endKeys[string(next)]; ok {
   160  			for _, marker := range removeMarkers {
   161  				var index = -1
   162  				for i, p := range payloadsMarkers {
   163  					if reflect.DeepEqual(p.payload, marker.payload) {
   164  						index = i
   165  						break
   166  					}
   167  				}
   168  				if index != -1 { // if found remove
   169  					payloadsMarkers = append(payloadsMarkers[:index], payloadsMarkers[index+1:]...)
   170  				}
   171  			}
   172  		}
   173  
   174  		// we hit empty set, no need to add anything, since it was
   175  		// already added during previous iteration
   176  		if bytes.Equal(prev, next) && it > 0 {
   177  			// the check for it > 0 needed to take care
   178  			// of the case where start and end of an empty
   179  			// set presented with zero length slice, since
   180  			// this is a value prev and next both are initialized.
   181  			continue
   182  		}
   183  
   184  		if addMarkers, ok := startKeys[string(next)]; ok {
   185  			payloadsMarkers = append(payloadsMarkers, addMarkers...)
   186  		}
   187  
   188  		prev = next
   189  	}
   190  
   191  	return ret
   192  }