github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/pkg/util/overlap_merge.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package util
    15  
    16  import (
    17  	"bytes"
    18  	"sort"
    19  )
    20  
    21  // Range is an interval with a payload.
    22  type Range struct {
    23  	Start   []byte
    24  	End     []byte
    25  	Payload interface{}
    26  }
    27  
    28  // Covering represents a non-overlapping, but possibly non-contiguous, set of
    29  // intervals.
    30  type Covering []Range
    31  
    32  var _ sort.Interface = Covering{}
    33  
    34  func (c Covering) Len() int      { return len(c) }
    35  func (c Covering) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
    36  func (c Covering) Less(i, j int) bool {
    37  	if cmp := bytes.Compare(c[i].Start, c[j].Start); cmp != 0 {
    38  		return cmp < 0
    39  	}
    40  	return bytes.Compare(c[i].End, c[j].End) < 0
    41  }
    42  
    43  // OverlapCoveringMerge returns the set of intervals covering every range in the
    44  // input such that no output range crosses an input endpoint. The payloads are
    45  // returned as a `[]interface{}` and in the same order as they are in coverings.
    46  //
    47  // Example:
    48  //  covering 1: [1, 2) -> 'a', [3, 4) -> 'b', [6, 7) -> 'c'
    49  //  covering 2: [1, 5) -> 'd'
    50  //  output: [1, 2) -> 'ad', [2, 3) -> `d`, [3, 4) -> 'bd', [4, 5) -> 'd', [6, 7) -> 'c'
    51  //
    52  // The input is mutated (sorted). It is also assumed (and not checked) to be
    53  // valid (e.g. non-overlapping intervals in each covering).
    54  func OverlapCoveringMerge(coverings []Covering) []Range {
    55  	for _, covering := range coverings {
    56  		sort.Sort(covering)
    57  	}
    58  	var ret []Range
    59  	var previousEndKey []byte
    60  	for {
    61  		// Find the start key of the next range. It will either be the end key
    62  		// of the range just added to the output or the minimum start key
    63  		// remaining in the coverings (if there is a gap).
    64  		var startKey []byte
    65  		startKeySet := false
    66  		for _, covering := range coverings {
    67  			if len(covering) == 0 {
    68  				continue
    69  			}
    70  			if !startKeySet || bytes.Compare(covering[0].Start, startKey) < 0 {
    71  				startKey = covering[0].Start
    72  				startKeySet = true
    73  			}
    74  		}
    75  		if !startKeySet {
    76  			break
    77  		}
    78  		if bytes.Compare(startKey, previousEndKey) < 0 {
    79  			startKey = previousEndKey
    80  		}
    81  
    82  		// Find the end key of the next range. It's the minimum of all end keys
    83  		// of ranges that intersect the start and all start keys of ranges after
    84  		// the end key of the range just added to the output.
    85  		var endKey []byte
    86  		endKeySet := false
    87  		for _, covering := range coverings {
    88  			if len(covering) == 0 {
    89  				continue
    90  			}
    91  
    92  			if bytes.Compare(covering[0].Start, startKey) > 0 {
    93  				if !endKeySet || bytes.Compare(covering[0].Start, endKey) < 0 {
    94  					endKey = covering[0].Start
    95  					endKeySet = true
    96  				}
    97  			}
    98  			if !endKeySet || bytes.Compare(covering[0].End, endKey) < 0 {
    99  				endKey = covering[0].End
   100  				endKeySet = true
   101  			}
   102  		}
   103  
   104  		// Collect all payloads of ranges that intersect the start and end keys
   105  		// just selected. Also trim any ranges with an end key <= the one just
   106  		// selected, they will not be output after this.
   107  		var payloads []interface{}
   108  		for i := range coverings {
   109  			// Because of how we chose startKey and endKey, we know that
   110  			// coverings[i][0].End >= endKey and that coverings[i][0].Start is
   111  			// either <= startKey or >= endKey.
   112  
   113  			for len(coverings[i]) > 0 {
   114  				if bytes.Compare(coverings[i][0].Start, startKey) > 0 {
   115  					break
   116  				}
   117  				payloads = append(payloads, coverings[i][0].Payload)
   118  				if !bytes.Equal(coverings[i][0].End, endKey) {
   119  					break
   120  				}
   121  				coverings[i] = coverings[i][1:]
   122  			}
   123  		}
   124  
   125  		ret = append(ret, Range{
   126  			Start:   startKey,
   127  			End:     endKey,
   128  			Payload: payloads,
   129  		})
   130  		previousEndKey = endKey
   131  	}
   132  
   133  	return ret
   134  }