vitess.io/vitess@v0.16.2/go/vt/key/key.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package key 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "encoding/hex" 23 "errors" 24 "fmt" 25 "math" 26 "regexp" 27 "strings" 28 29 "google.golang.org/protobuf/proto" 30 31 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 32 ) 33 34 // 35 // Uint64Key definitions 36 // 37 38 // Uint64Key is a uint64 that can be converted into a KeyspaceId. 39 type Uint64Key uint64 40 41 func (i Uint64Key) String() string { 42 return string(i.Bytes()) 43 } 44 45 // Bytes returns the keyspace id (as bytes) associated with a Uint64Key. 46 func (i Uint64Key) Bytes() []byte { 47 buf := make([]byte, 8) 48 binary.BigEndian.PutUint64(buf, uint64(i)) 49 return buf 50 } 51 52 // 53 // KeyRange helper methods 54 // 55 56 // EvenShardsKeyRange returns a key range definition for a shard at index "i", 57 // assuming range based sharding with "n" equal-width shards in total. 58 // i starts at 0. 59 // 60 // Example: (1, 2) returns the second out of two shards in total i.e. "80-". 61 // 62 // This function must not be used in the Vitess code base because Vitess also 63 // supports shards with different widths. In that case, the output of this 64 // function would be wrong. 65 // 66 // Note: start and end values have trailing zero bytes omitted. 67 // For example, "80-" has only the first byte (0x80) set. 68 // We do this to produce the same KeyRange objects as ParseKeyRangeParts() does. 69 // Because it's using the Go hex methods, it's omitting trailing zero bytes as 70 // well. 71 func EvenShardsKeyRange(i, n int) (*topodatapb.KeyRange, error) { 72 if n <= 0 { 73 return nil, fmt.Errorf("the shard count must be > 0: %v", n) 74 } 75 if i >= n { 76 return nil, fmt.Errorf("the index of the shard must be less than the total number of shards: %v < %v", i, n) 77 } 78 if n&(n-1) != 0 { 79 return nil, fmt.Errorf("the shard count must be a power of two: %v", n) 80 } 81 82 // Determine the number of bytes which are required to represent any 83 // KeyRange start or end for the given n. 84 // This is required to trim the returned values to the same length e.g. 85 // (256, 512) should return 8000-8080 as shard key range. 86 minBytes := 0 87 for nn := Uint64Key(n - 1); nn > 0; nn >>= 8 { 88 minBytes++ 89 } 90 91 width := Uint64Key(math.MaxUint64)/Uint64Key(n) + 1 92 start := Uint64Key(i) * width 93 end := start + width 94 95 // Note: The byte value is empty if start or end is the min or the max 96 // respectively. 97 startBytes := start.Bytes()[:minBytes] 98 endBytes := end.Bytes()[:minBytes] 99 if start == 0 { 100 startBytes = []byte{} 101 } 102 if end == 0 { 103 // Always set the end except for the last shard. In that case, the 104 // end value (2^64) flows over and is the same as 0. 105 endBytes = []byte{} 106 } 107 return &topodatapb.KeyRange{Start: startBytes, End: endBytes}, nil 108 } 109 110 // KeyRangeAdd adds two adjacent keyranges into a single value. 111 // If the values are not adjacent, it returns false. 112 func KeyRangeAdd(first, second *topodatapb.KeyRange) (*topodatapb.KeyRange, bool) { 113 if first == nil || second == nil { 114 return nil, false 115 } 116 if len(first.End) != 0 && bytes.Equal(first.End, second.Start) { 117 return &topodatapb.KeyRange{Start: first.Start, End: second.End}, true 118 } 119 if len(second.End) != 0 && bytes.Equal(second.End, first.Start) { 120 return &topodatapb.KeyRange{Start: second.Start, End: first.End}, true 121 } 122 return nil, false 123 } 124 125 // KeyRangeContains returns true if the provided id is in the keyrange. 126 func KeyRangeContains(kr *topodatapb.KeyRange, id []byte) bool { 127 if kr == nil { 128 return true 129 } 130 return bytes.Compare(kr.Start, id) <= 0 && 131 (len(kr.End) == 0 || bytes.Compare(id, kr.End) < 0) 132 } 133 134 // ParseKeyRangeParts parses a start and end hex values and build a proto KeyRange 135 func ParseKeyRangeParts(start, end string) (*topodatapb.KeyRange, error) { 136 s, err := hex.DecodeString(start) 137 if err != nil { 138 return nil, err 139 } 140 e, err := hex.DecodeString(end) 141 if err != nil { 142 return nil, err 143 } 144 return &topodatapb.KeyRange{Start: s, End: e}, nil 145 } 146 147 // KeyRangeString prints a topodatapb.KeyRange 148 func KeyRangeString(k *topodatapb.KeyRange) string { 149 if k == nil { 150 return "-" 151 } 152 return hex.EncodeToString(k.Start) + "-" + hex.EncodeToString(k.End) 153 } 154 155 // KeyRangeIsPartial returns true if the KeyRange does not cover the entire space. 156 func KeyRangeIsPartial(kr *topodatapb.KeyRange) bool { 157 if kr == nil { 158 return false 159 } 160 return !(len(kr.Start) == 0 && len(kr.End) == 0) 161 } 162 163 // KeyRangeEqual returns true if both key ranges cover the same area 164 func KeyRangeEqual(left, right *topodatapb.KeyRange) bool { 165 if left == nil { 166 return right == nil || (len(right.Start) == 0 && len(right.End) == 0) 167 } 168 if right == nil { 169 return len(left.Start) == 0 && len(left.End) == 0 170 } 171 return bytes.Equal(addPadding(left.Start), addPadding(right.Start)) && 172 bytes.Equal(addPadding(left.End), addPadding(right.End)) 173 } 174 175 // addPadding adds padding to make sure keyrange represents an 8 byte integer. 176 // From Vitess docs: 177 // A hash vindex produces an 8-byte number. 178 // This means that all numbers less than 0x8000000000000000 will fall in shard -80. 179 // Any number with the highest bit set will be >= 0x8000000000000000, and will therefore 180 // belong to shard 80-. 181 // This means that from a keyrange perspective -80 == 00-80 == 0000-8000 == 000000-800000 182 // If we don't add this padding, we could run into issues when transitioning from keyranges 183 // that use 2 bytes to 4 bytes. 184 func addPadding(kr []byte) []byte { 185 paddedKr := make([]byte, 8) 186 187 for i := 0; i < len(kr); i++ { 188 paddedKr = append(paddedKr, kr[i]) 189 } 190 191 for i := len(kr); i < 8; i++ { 192 paddedKr = append(paddedKr, 0) 193 } 194 return paddedKr 195 } 196 197 // KeyRangeStartSmaller returns true if right's keyrange start is _after_ left's start 198 func KeyRangeStartSmaller(left, right *topodatapb.KeyRange) bool { 199 if left == nil { 200 return right != nil 201 } 202 if right == nil { 203 return false 204 } 205 return bytes.Compare(left.Start, right.Start) < 0 206 } 207 208 // KeyRangeStartEqual returns true if both key ranges have the same start 209 func KeyRangeStartEqual(left, right *topodatapb.KeyRange) bool { 210 if left == nil { 211 return right == nil || len(right.Start) == 0 212 } 213 if right == nil { 214 return len(left.Start) == 0 215 } 216 return bytes.Equal(addPadding(left.Start), addPadding(right.Start)) 217 } 218 219 // KeyRangeContiguous returns true if the end of the left key range exactly 220 // matches the start of the right key range (i.e they are contigious) 221 func KeyRangeContiguous(left, right *topodatapb.KeyRange) bool { 222 if left == nil { 223 return right == nil || (len(right.Start) == 0 && len(right.End) == 0) 224 } 225 if right == nil { 226 return len(left.Start) == 0 && len(left.End) == 0 227 } 228 return bytes.Equal(addPadding(left.End), addPadding(right.Start)) 229 } 230 231 // KeyRangeEndEqual returns true if both key ranges have the same end 232 func KeyRangeEndEqual(left, right *topodatapb.KeyRange) bool { 233 if left == nil { 234 return right == nil || len(right.End) == 0 235 } 236 if right == nil { 237 return len(left.End) == 0 238 } 239 return bytes.Equal(addPadding(left.End), addPadding(right.End)) 240 } 241 242 // For more info on the following functions, see: 243 // See: http://stackoverflow.com/questions/4879315/what-is-a-tidy-algorithm-to-find-overlapping-intervals 244 // two segments defined as (a,b) and (c,d) (with a<b and c<d): 245 // intersects = (b > c) && (a < d) 246 // overlap = min(b, d) - max(c, a) 247 248 // KeyRangesIntersect returns true if some Keyspace values exist in both ranges. 249 func KeyRangesIntersect(first, second *topodatapb.KeyRange) bool { 250 if first == nil || second == nil { 251 return true 252 } 253 return (len(first.End) == 0 || bytes.Compare(second.Start, first.End) < 0) && 254 (len(second.End) == 0 || bytes.Compare(first.Start, second.End) < 0) 255 } 256 257 // KeyRangesOverlap returns the overlap between two KeyRanges. 258 // They need to overlap, otherwise an error is returned. 259 func KeyRangesOverlap(first, second *topodatapb.KeyRange) (*topodatapb.KeyRange, error) { 260 if !KeyRangesIntersect(first, second) { 261 return nil, fmt.Errorf("KeyRanges %v and %v don't overlap", first, second) 262 } 263 if first == nil { 264 return second, nil 265 } 266 if second == nil { 267 return first, nil 268 } 269 // compute max(c,a) and min(b,d) 270 // start with (a,b) 271 result := proto.Clone(first).(*topodatapb.KeyRange) 272 // if c > a, then use c 273 if bytes.Compare(second.Start, first.Start) > 0 { 274 result.Start = second.Start 275 } 276 // if b is maxed out, or 277 // (d is not maxed out and d < b) 278 // ^ valid test as neither b nor d are max 279 // then use d 280 if len(first.End) == 0 || (len(second.End) != 0 && bytes.Compare(second.End, first.End) < 0) { 281 result.End = second.End 282 } 283 return result, nil 284 } 285 286 // KeyRangeIncludes returns true if the first provided KeyRange, big, 287 // contains the second KeyRange, small. If they intersect, but small 288 // spills out, this returns false. 289 func KeyRangeIncludes(big, small *topodatapb.KeyRange) bool { 290 if big == nil { 291 // The outside one covers everything, we're good. 292 return true 293 } 294 if small == nil { 295 // The smaller one covers everything, better have the 296 // bigger one also cover everything. 297 return len(big.Start) == 0 && len(big.End) == 0 298 } 299 // Now we check small.Start >= big.Start, and small.End <= big.End 300 if len(big.Start) != 0 && bytes.Compare(small.Start, big.Start) < 0 { 301 return false 302 } 303 if len(big.End) != 0 && (len(small.End) == 0 || bytes.Compare(small.End, big.End) > 0) { 304 return false 305 } 306 return true 307 } 308 309 // ParseShardingSpec parses a string that describes a sharding 310 // specification. a-b-c-d will be parsed as a-b, b-c, c-d. The empty 311 // string may serve both as the start and end of the keyspace: -a-b- 312 // will be parsed as start-a, a-b, b-end. 313 // "0" is treated as "-", to allow us to not have to special-case 314 // client code. 315 func ParseShardingSpec(spec string) ([]*topodatapb.KeyRange, error) { 316 parts := strings.Split(spec, "-") 317 if len(parts) == 1 { 318 if spec == "0" { 319 parts = []string{"", ""} 320 } else { 321 return nil, fmt.Errorf("malformed spec: doesn't define a range: %q", spec) 322 } 323 } 324 old := parts[0] 325 ranges := make([]*topodatapb.KeyRange, len(parts)-1) 326 327 for i, p := range parts[1:] { 328 if p == "" && i != (len(parts)-2) { 329 return nil, fmt.Errorf("malformed spec: MinKey/MaxKey cannot be in the middle of the spec: %q", spec) 330 } 331 if p != "" && p <= old { 332 return nil, fmt.Errorf("malformed spec: shard limits should be in order: %q", spec) 333 } 334 s, err := hex.DecodeString(old) 335 if err != nil { 336 return nil, err 337 } 338 if len(s) == 0 { 339 s = nil 340 } 341 e, err := hex.DecodeString(p) 342 if err != nil { 343 return nil, err 344 } 345 if len(e) == 0 { 346 e = nil 347 } 348 ranges[i] = &topodatapb.KeyRange{Start: s, End: e} 349 old = p 350 } 351 return ranges, nil 352 } 353 354 var krRegexp = regexp.MustCompile(`^[0-9a-fA-F]*-[0-9a-fA-F]*$`) 355 356 // IsKeyRange returns true if the string represents a keyrange. 357 func IsKeyRange(kr string) bool { 358 return krRegexp.MatchString(kr) 359 } 360 361 // GenerateShardRanges returns shard ranges assuming a keyspace with N shards. 362 func GenerateShardRanges(shards int) ([]string, error) { 363 var format string 364 var maxShards int 365 366 switch { 367 case shards <= 0: 368 return nil, errors.New("shards must be greater than zero") 369 case shards <= 256: 370 format = "%02x" 371 maxShards = 256 372 case shards <= 65536: 373 format = "%04x" 374 maxShards = 65536 375 default: 376 return nil, errors.New("this function does not support more than 65336 shards in a single keyspace") 377 } 378 379 rangeFormatter := func(start, end int) string { 380 var ( 381 startKid string 382 endKid string 383 ) 384 385 if start != 0 { 386 startKid = fmt.Sprintf(format, start) 387 } 388 389 if end != maxShards { 390 endKid = fmt.Sprintf(format, end) 391 } 392 393 return fmt.Sprintf("%s-%s", startKid, endKid) 394 } 395 396 start := 0 397 end := 0 398 399 // If shards does not divide evenly into maxShards, then there is some lossiness, 400 // where each shard is smaller than it should technically be (if, for example, size == 25.6). 401 // If we choose to keep everything in ints, then we have two choices: 402 // - Have every shard in #numshards be a uniform size, tack on an additional shard 403 // at the end of the range to account for the loss. This is bad because if you ask for 404 // 7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards. 405 // It's also bad because one shard will have much different data distribution than the rest. 406 // - Expand the final shard to include whatever is left in the keyrange. This will give the 407 // correct number of shards, which is good, but depending on how lossy each individual shard is, 408 // you could end with that final shard being significantly larger than the rest of the shards, 409 // so this doesn't solve the data distribution problem. 410 // 411 // By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense), 412 // we can re-truncate the integer end on each iteration, which spreads the lossiness more 413 // evenly across the shards. 414 // 415 // This implementation has no impact on shard numbers that are powers of 2, even at large numbers, 416 // which you can see in the tests. 417 size := float64(maxShards) / float64(shards) 418 realEnd := float64(0) 419 shardRanges := make([]string, 0, shards) 420 421 for i := 1; i <= shards; i++ { 422 realEnd = float64(i) * size 423 424 end = int(realEnd) 425 shardRanges = append(shardRanges, rangeFormatter(start, end)) 426 start = end 427 } 428 429 return shardRanges, nil 430 }