vitess.io/vitess@v0.16.2/go/vt/key/key.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package key
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"encoding/hex"
    23  	"errors"
    24  	"fmt"
    25  	"math"
    26  	"regexp"
    27  	"strings"
    28  
    29  	"google.golang.org/protobuf/proto"
    30  
    31  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    32  )
    33  
    34  //
    35  // Uint64Key definitions
    36  //
    37  
    38  // Uint64Key is a uint64 that can be converted into a KeyspaceId.
    39  type Uint64Key uint64
    40  
    41  func (i Uint64Key) String() string {
    42  	return string(i.Bytes())
    43  }
    44  
    45  // Bytes returns the keyspace id (as bytes) associated with a Uint64Key.
    46  func (i Uint64Key) Bytes() []byte {
    47  	buf := make([]byte, 8)
    48  	binary.BigEndian.PutUint64(buf, uint64(i))
    49  	return buf
    50  }
    51  
    52  //
    53  // KeyRange helper methods
    54  //
    55  
    56  // EvenShardsKeyRange returns a key range definition for a shard at index "i",
    57  // assuming range based sharding with "n" equal-width shards in total.
    58  // i starts at 0.
    59  //
    60  // Example: (1, 2) returns the second out of two shards in total i.e. "80-".
    61  //
    62  // This function must not be used in the Vitess code base because Vitess also
    63  // supports shards with different widths. In that case, the output of this
    64  // function would be wrong.
    65  //
    66  // Note: start and end values have trailing zero bytes omitted.
    67  // For example, "80-" has only the first byte (0x80) set.
    68  // We do this to produce the same KeyRange objects as ParseKeyRangeParts() does.
    69  // Because it's using the Go hex methods, it's omitting trailing zero bytes as
    70  // well.
    71  func EvenShardsKeyRange(i, n int) (*topodatapb.KeyRange, error) {
    72  	if n <= 0 {
    73  		return nil, fmt.Errorf("the shard count must be > 0: %v", n)
    74  	}
    75  	if i >= n {
    76  		return nil, fmt.Errorf("the index of the shard must be less than the total number of shards: %v < %v", i, n)
    77  	}
    78  	if n&(n-1) != 0 {
    79  		return nil, fmt.Errorf("the shard count must be a power of two: %v", n)
    80  	}
    81  
    82  	// Determine the number of bytes which are required to represent any
    83  	// KeyRange start or end for the given n.
    84  	// This is required to trim the returned values to the same length e.g.
    85  	// (256, 512) should return 8000-8080 as shard key range.
    86  	minBytes := 0
    87  	for nn := Uint64Key(n - 1); nn > 0; nn >>= 8 {
    88  		minBytes++
    89  	}
    90  
    91  	width := Uint64Key(math.MaxUint64)/Uint64Key(n) + 1
    92  	start := Uint64Key(i) * width
    93  	end := start + width
    94  
    95  	// Note: The byte value is empty if start or end is the min or the max
    96  	// respectively.
    97  	startBytes := start.Bytes()[:minBytes]
    98  	endBytes := end.Bytes()[:minBytes]
    99  	if start == 0 {
   100  		startBytes = []byte{}
   101  	}
   102  	if end == 0 {
   103  		// Always set the end except for the last shard. In that case, the
   104  		// end value (2^64) flows over and is the same as 0.
   105  		endBytes = []byte{}
   106  	}
   107  	return &topodatapb.KeyRange{Start: startBytes, End: endBytes}, nil
   108  }
   109  
   110  // KeyRangeAdd adds two adjacent keyranges into a single value.
   111  // If the values are not adjacent, it returns false.
   112  func KeyRangeAdd(first, second *topodatapb.KeyRange) (*topodatapb.KeyRange, bool) {
   113  	if first == nil || second == nil {
   114  		return nil, false
   115  	}
   116  	if len(first.End) != 0 && bytes.Equal(first.End, second.Start) {
   117  		return &topodatapb.KeyRange{Start: first.Start, End: second.End}, true
   118  	}
   119  	if len(second.End) != 0 && bytes.Equal(second.End, first.Start) {
   120  		return &topodatapb.KeyRange{Start: second.Start, End: first.End}, true
   121  	}
   122  	return nil, false
   123  }
   124  
   125  // KeyRangeContains returns true if the provided id is in the keyrange.
   126  func KeyRangeContains(kr *topodatapb.KeyRange, id []byte) bool {
   127  	if kr == nil {
   128  		return true
   129  	}
   130  	return bytes.Compare(kr.Start, id) <= 0 &&
   131  		(len(kr.End) == 0 || bytes.Compare(id, kr.End) < 0)
   132  }
   133  
   134  // ParseKeyRangeParts parses a start and end hex values and build a proto KeyRange
   135  func ParseKeyRangeParts(start, end string) (*topodatapb.KeyRange, error) {
   136  	s, err := hex.DecodeString(start)
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  	e, err := hex.DecodeString(end)
   141  	if err != nil {
   142  		return nil, err
   143  	}
   144  	return &topodatapb.KeyRange{Start: s, End: e}, nil
   145  }
   146  
   147  // KeyRangeString prints a topodatapb.KeyRange
   148  func KeyRangeString(k *topodatapb.KeyRange) string {
   149  	if k == nil {
   150  		return "-"
   151  	}
   152  	return hex.EncodeToString(k.Start) + "-" + hex.EncodeToString(k.End)
   153  }
   154  
   155  // KeyRangeIsPartial returns true if the KeyRange does not cover the entire space.
   156  func KeyRangeIsPartial(kr *topodatapb.KeyRange) bool {
   157  	if kr == nil {
   158  		return false
   159  	}
   160  	return !(len(kr.Start) == 0 && len(kr.End) == 0)
   161  }
   162  
   163  // KeyRangeEqual returns true if both key ranges cover the same area
   164  func KeyRangeEqual(left, right *topodatapb.KeyRange) bool {
   165  	if left == nil {
   166  		return right == nil || (len(right.Start) == 0 && len(right.End) == 0)
   167  	}
   168  	if right == nil {
   169  		return len(left.Start) == 0 && len(left.End) == 0
   170  	}
   171  	return bytes.Equal(addPadding(left.Start), addPadding(right.Start)) &&
   172  		bytes.Equal(addPadding(left.End), addPadding(right.End))
   173  }
   174  
   175  // addPadding adds padding to make sure keyrange represents an 8 byte integer.
   176  // From Vitess docs:
   177  // A hash vindex produces an 8-byte number.
   178  // This means that all numbers less than 0x8000000000000000 will fall in shard -80.
   179  // Any number with the highest bit set will be >= 0x8000000000000000, and will therefore
   180  // belong to shard 80-.
   181  // This means that from a keyrange perspective -80 == 00-80 == 0000-8000 == 000000-800000
   182  // If we don't add this padding, we could run into issues when transitioning from keyranges
   183  // that use 2 bytes to 4 bytes.
   184  func addPadding(kr []byte) []byte {
   185  	paddedKr := make([]byte, 8)
   186  
   187  	for i := 0; i < len(kr); i++ {
   188  		paddedKr = append(paddedKr, kr[i])
   189  	}
   190  
   191  	for i := len(kr); i < 8; i++ {
   192  		paddedKr = append(paddedKr, 0)
   193  	}
   194  	return paddedKr
   195  }
   196  
   197  // KeyRangeStartSmaller returns true if right's keyrange start is _after_ left's start
   198  func KeyRangeStartSmaller(left, right *topodatapb.KeyRange) bool {
   199  	if left == nil {
   200  		return right != nil
   201  	}
   202  	if right == nil {
   203  		return false
   204  	}
   205  	return bytes.Compare(left.Start, right.Start) < 0
   206  }
   207  
   208  // KeyRangeStartEqual returns true if both key ranges have the same start
   209  func KeyRangeStartEqual(left, right *topodatapb.KeyRange) bool {
   210  	if left == nil {
   211  		return right == nil || len(right.Start) == 0
   212  	}
   213  	if right == nil {
   214  		return len(left.Start) == 0
   215  	}
   216  	return bytes.Equal(addPadding(left.Start), addPadding(right.Start))
   217  }
   218  
   219  // KeyRangeContiguous returns true if the end of the left key range exactly
   220  // matches the start of the right key range (i.e they are contigious)
   221  func KeyRangeContiguous(left, right *topodatapb.KeyRange) bool {
   222  	if left == nil {
   223  		return right == nil || (len(right.Start) == 0 && len(right.End) == 0)
   224  	}
   225  	if right == nil {
   226  		return len(left.Start) == 0 && len(left.End) == 0
   227  	}
   228  	return bytes.Equal(addPadding(left.End), addPadding(right.Start))
   229  }
   230  
   231  // KeyRangeEndEqual returns true if both key ranges have the same end
   232  func KeyRangeEndEqual(left, right *topodatapb.KeyRange) bool {
   233  	if left == nil {
   234  		return right == nil || len(right.End) == 0
   235  	}
   236  	if right == nil {
   237  		return len(left.End) == 0
   238  	}
   239  	return bytes.Equal(addPadding(left.End), addPadding(right.End))
   240  }
   241  
   242  // For more info on the following functions, see:
   243  // See: http://stackoverflow.com/questions/4879315/what-is-a-tidy-algorithm-to-find-overlapping-intervals
   244  // two segments defined as (a,b) and (c,d) (with a<b and c<d):
   245  // intersects = (b > c) && (a < d)
   246  // overlap = min(b, d) - max(c, a)
   247  
   248  // KeyRangesIntersect returns true if some Keyspace values exist in both ranges.
   249  func KeyRangesIntersect(first, second *topodatapb.KeyRange) bool {
   250  	if first == nil || second == nil {
   251  		return true
   252  	}
   253  	return (len(first.End) == 0 || bytes.Compare(second.Start, first.End) < 0) &&
   254  		(len(second.End) == 0 || bytes.Compare(first.Start, second.End) < 0)
   255  }
   256  
   257  // KeyRangesOverlap returns the overlap between two KeyRanges.
   258  // They need to overlap, otherwise an error is returned.
   259  func KeyRangesOverlap(first, second *topodatapb.KeyRange) (*topodatapb.KeyRange, error) {
   260  	if !KeyRangesIntersect(first, second) {
   261  		return nil, fmt.Errorf("KeyRanges %v and %v don't overlap", first, second)
   262  	}
   263  	if first == nil {
   264  		return second, nil
   265  	}
   266  	if second == nil {
   267  		return first, nil
   268  	}
   269  	// compute max(c,a) and min(b,d)
   270  	// start with (a,b)
   271  	result := proto.Clone(first).(*topodatapb.KeyRange)
   272  	// if c > a, then use c
   273  	if bytes.Compare(second.Start, first.Start) > 0 {
   274  		result.Start = second.Start
   275  	}
   276  	// if b is maxed out, or
   277  	// (d is not maxed out and d < b)
   278  	//                           ^ valid test as neither b nor d are max
   279  	// then use d
   280  	if len(first.End) == 0 || (len(second.End) != 0 && bytes.Compare(second.End, first.End) < 0) {
   281  		result.End = second.End
   282  	}
   283  	return result, nil
   284  }
   285  
   286  // KeyRangeIncludes returns true if the first provided KeyRange, big,
   287  // contains the second KeyRange, small. If they intersect, but small
   288  // spills out, this returns false.
   289  func KeyRangeIncludes(big, small *topodatapb.KeyRange) bool {
   290  	if big == nil {
   291  		// The outside one covers everything, we're good.
   292  		return true
   293  	}
   294  	if small == nil {
   295  		// The smaller one covers everything, better have the
   296  		// bigger one also cover everything.
   297  		return len(big.Start) == 0 && len(big.End) == 0
   298  	}
   299  	// Now we check small.Start >= big.Start, and small.End <= big.End
   300  	if len(big.Start) != 0 && bytes.Compare(small.Start, big.Start) < 0 {
   301  		return false
   302  	}
   303  	if len(big.End) != 0 && (len(small.End) == 0 || bytes.Compare(small.End, big.End) > 0) {
   304  		return false
   305  	}
   306  	return true
   307  }
   308  
   309  // ParseShardingSpec parses a string that describes a sharding
   310  // specification. a-b-c-d will be parsed as a-b, b-c, c-d. The empty
   311  // string may serve both as the start and end of the keyspace: -a-b-
   312  // will be parsed as start-a, a-b, b-end.
   313  // "0" is treated as "-", to allow us to not have to special-case
   314  // client code.
   315  func ParseShardingSpec(spec string) ([]*topodatapb.KeyRange, error) {
   316  	parts := strings.Split(spec, "-")
   317  	if len(parts) == 1 {
   318  		if spec == "0" {
   319  			parts = []string{"", ""}
   320  		} else {
   321  			return nil, fmt.Errorf("malformed spec: doesn't define a range: %q", spec)
   322  		}
   323  	}
   324  	old := parts[0]
   325  	ranges := make([]*topodatapb.KeyRange, len(parts)-1)
   326  
   327  	for i, p := range parts[1:] {
   328  		if p == "" && i != (len(parts)-2) {
   329  			return nil, fmt.Errorf("malformed spec: MinKey/MaxKey cannot be in the middle of the spec: %q", spec)
   330  		}
   331  		if p != "" && p <= old {
   332  			return nil, fmt.Errorf("malformed spec: shard limits should be in order: %q", spec)
   333  		}
   334  		s, err := hex.DecodeString(old)
   335  		if err != nil {
   336  			return nil, err
   337  		}
   338  		if len(s) == 0 {
   339  			s = nil
   340  		}
   341  		e, err := hex.DecodeString(p)
   342  		if err != nil {
   343  			return nil, err
   344  		}
   345  		if len(e) == 0 {
   346  			e = nil
   347  		}
   348  		ranges[i] = &topodatapb.KeyRange{Start: s, End: e}
   349  		old = p
   350  	}
   351  	return ranges, nil
   352  }
   353  
   354  var krRegexp = regexp.MustCompile(`^[0-9a-fA-F]*-[0-9a-fA-F]*$`)
   355  
   356  // IsKeyRange returns true if the string represents a keyrange.
   357  func IsKeyRange(kr string) bool {
   358  	return krRegexp.MatchString(kr)
   359  }
   360  
   361  // GenerateShardRanges returns shard ranges assuming a keyspace with N shards.
   362  func GenerateShardRanges(shards int) ([]string, error) {
   363  	var format string
   364  	var maxShards int
   365  
   366  	switch {
   367  	case shards <= 0:
   368  		return nil, errors.New("shards must be greater than zero")
   369  	case shards <= 256:
   370  		format = "%02x"
   371  		maxShards = 256
   372  	case shards <= 65536:
   373  		format = "%04x"
   374  		maxShards = 65536
   375  	default:
   376  		return nil, errors.New("this function does not support more than 65336 shards in a single keyspace")
   377  	}
   378  
   379  	rangeFormatter := func(start, end int) string {
   380  		var (
   381  			startKid string
   382  			endKid   string
   383  		)
   384  
   385  		if start != 0 {
   386  			startKid = fmt.Sprintf(format, start)
   387  		}
   388  
   389  		if end != maxShards {
   390  			endKid = fmt.Sprintf(format, end)
   391  		}
   392  
   393  		return fmt.Sprintf("%s-%s", startKid, endKid)
   394  	}
   395  
   396  	start := 0
   397  	end := 0
   398  
   399  	// If shards does not divide evenly into maxShards, then there is some lossiness,
   400  	// where each shard is smaller than it should technically be (if, for example, size == 25.6).
   401  	// If we choose to keep everything in ints, then we have two choices:
   402  	// 	- Have every shard in #numshards be a uniform size, tack on an additional shard
   403  	//	  at the end of the range to account for the loss. This is bad because if you ask for
   404  	//	  7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards.
   405  	//	  It's also bad because one shard will have much different data distribution than the rest.
   406  	//	- Expand the final shard to include whatever is left in the keyrange. This will give the
   407  	//	  correct number of shards, which is good, but depending on how lossy each individual shard is,
   408  	//	  you could end with that final shard being significantly larger than the rest of the shards,
   409  	//	  so this doesn't solve the data distribution problem.
   410  	//
   411  	// By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense),
   412  	// we can re-truncate the integer end on each iteration, which spreads the lossiness more
   413  	// evenly across the shards.
   414  	//
   415  	// This implementation has no impact on shard numbers that are powers of 2, even at large numbers,
   416  	// which you can see in the tests.
   417  	size := float64(maxShards) / float64(shards)
   418  	realEnd := float64(0)
   419  	shardRanges := make([]string, 0, shards)
   420  
   421  	for i := 1; i <= shards; i++ {
   422  		realEnd = float64(i) * size
   423  
   424  		end = int(realEnd)
   425  		shardRanges = append(shardRanges, rangeFormatter(start, end))
   426  		start = end
   427  	}
   428  
   429  	return shardRanges, nil
   430  }