github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/roachpb/metadata.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package roachpb 12 13 import ( 14 "bytes" 15 "fmt" 16 "sort" 17 "strconv" 18 "strings" 19 20 "github.com/cockroachdb/cockroach/pkg/util/hlc" 21 "github.com/cockroachdb/cockroach/pkg/util/humanizeutil" 22 "github.com/cockroachdb/errors" 23 ) 24 25 // NodeID is a custom type for a cockroach node ID. (not a raft node ID) 26 // 0 is not a valid NodeID. 27 type NodeID int32 28 29 // String implements the fmt.Stringer interface. 30 // It is used to format the ID for use in Gossip keys. 31 func (n NodeID) String() string { 32 return strconv.FormatInt(int64(n), 10) 33 } 34 35 // StoreID is a custom type for a cockroach store ID. 36 type StoreID int32 37 38 // StoreIDSlice implements sort.Interface. 39 type StoreIDSlice []StoreID 40 41 func (s StoreIDSlice) Len() int { return len(s) } 42 func (s StoreIDSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } 43 func (s StoreIDSlice) Less(i, j int) bool { return s[i] < s[j] } 44 45 // String implements the fmt.Stringer interface. 46 // It is used to format the ID for use in Gossip keys. 47 func (n StoreID) String() string { 48 return strconv.FormatInt(int64(n), 10) 49 } 50 51 // A RangeID is a unique ID associated to a Raft consensus group. 52 type RangeID int64 53 54 // String implements the fmt.Stringer interface. 55 func (r RangeID) String() string { 56 return strconv.FormatInt(int64(r), 10) 57 } 58 59 // RangeIDSlice implements sort.Interface. 60 type RangeIDSlice []RangeID 61 62 func (r RangeIDSlice) Len() int { return len(r) } 63 func (r RangeIDSlice) Swap(i, j int) { r[i], r[j] = r[j], r[i] } 64 func (r RangeIDSlice) Less(i, j int) bool { return r[i] < r[j] } 65 66 // ReplicaID is a custom type for a range replica ID. 67 type ReplicaID int32 68 69 // String implements the fmt.Stringer interface. 70 func (r ReplicaID) String() string { 71 return strconv.FormatInt(int64(r), 10) 72 } 73 74 // Equals returns whether the Attributes lists are equivalent. Attributes lists 75 // are treated as sets, meaning that ordering and duplicates are ignored. 76 func (a Attributes) Equals(b Attributes) bool { 77 // This is O(n^2), but Attribute lists should never be long enough for that 78 // to matter, and allocating memory every time this is called would be worse. 79 if len(a.Attrs) != len(b.Attrs) { 80 return false 81 } 82 for _, aAttr := range a.Attrs { 83 var found bool 84 for _, bAttr := range b.Attrs { 85 if aAttr == bAttr { 86 found = true 87 break 88 } 89 } 90 if !found { 91 return false 92 } 93 } 94 return true 95 } 96 97 // String implements the fmt.Stringer interface. 98 func (a Attributes) String() string { 99 return strings.Join(a.Attrs, ",") 100 } 101 102 // NewRangeDescriptor returns a RangeDescriptor populated from the input. 103 func NewRangeDescriptor( 104 rangeID RangeID, start, end RKey, replicas ReplicaDescriptors, 105 ) *RangeDescriptor { 106 repls := append([]ReplicaDescriptor(nil), replicas.All()...) 107 for i := range repls { 108 repls[i].ReplicaID = ReplicaID(i + 1) 109 } 110 desc := &RangeDescriptor{ 111 RangeID: rangeID, 112 StartKey: start, 113 EndKey: end, 114 NextReplicaID: ReplicaID(len(repls) + 1), 115 } 116 desc.SetReplicas(MakeReplicaDescriptors(repls)) 117 return desc 118 } 119 120 // RSpan returns the RangeDescriptor's resolved span. 121 func (r *RangeDescriptor) RSpan() RSpan { 122 return RSpan{Key: r.StartKey, EndKey: r.EndKey} 123 } 124 125 // ContainsKey returns whether this RangeDescriptor contains the specified key. 126 func (r *RangeDescriptor) ContainsKey(key RKey) bool { 127 return r.RSpan().ContainsKey(key) 128 } 129 130 // ContainsKeyInverted returns whether this RangeDescriptor contains the 131 // specified key using an inverted range. See RSpan.ContainsKeyInverted. 132 func (r *RangeDescriptor) ContainsKeyInverted(key RKey) bool { 133 return r.RSpan().ContainsKeyInverted(key) 134 } 135 136 // ContainsKeyRange returns whether this RangeDescriptor contains the specified 137 // key range from start (inclusive) to end (exclusive). 138 // If end is empty, returns ContainsKey(start). 139 func (r *RangeDescriptor) ContainsKeyRange(start, end RKey) bool { 140 return r.RSpan().ContainsKeyRange(start, end) 141 } 142 143 // Replicas returns the set of nodes/stores on which replicas of this range are 144 // stored. 145 func (r *RangeDescriptor) Replicas() ReplicaDescriptors { 146 return MakeReplicaDescriptors(r.InternalReplicas) 147 } 148 149 // SetReplicas overwrites the set of nodes/stores on which replicas of this 150 // range are stored. 151 func (r *RangeDescriptor) SetReplicas(replicas ReplicaDescriptors) { 152 r.InternalReplicas = replicas.AsProto() 153 } 154 155 // SetReplicaType changes the type of the replica with the given ID to the given 156 // type. Returns zero values if the replica was not found and the updated 157 // descriptor, the previous type, and true, otherwise. 158 func (r *RangeDescriptor) SetReplicaType( 159 nodeID NodeID, storeID StoreID, typ ReplicaType, 160 ) (ReplicaDescriptor, ReplicaType, bool) { 161 for i := range r.InternalReplicas { 162 desc := &r.InternalReplicas[i] 163 if desc.StoreID == storeID && desc.NodeID == nodeID { 164 prevTyp := desc.GetType() 165 if typ != VOTER_FULL { 166 desc.Type = &typ 167 } else { 168 // For 19.1 compatibility. 169 desc.Type = nil 170 } 171 return *desc, prevTyp, true 172 } 173 } 174 return ReplicaDescriptor{}, 0, false 175 } 176 177 // AddReplica adds a replica on the given node and store with the supplied type. 178 // It auto-assigns a ReplicaID and returns the inserted ReplicaDescriptor. 179 func (r *RangeDescriptor) AddReplica( 180 nodeID NodeID, storeID StoreID, typ ReplicaType, 181 ) ReplicaDescriptor { 182 var typPtr *ReplicaType 183 // For 19.1 compatibility, use nil instead of VOTER_FULL. 184 if typ != VOTER_FULL { 185 typPtr = &typ 186 } 187 toAdd := ReplicaDescriptor{ 188 NodeID: nodeID, 189 StoreID: storeID, 190 ReplicaID: r.NextReplicaID, 191 Type: typPtr, 192 } 193 rs := r.Replicas() 194 rs.AddReplica(toAdd) 195 r.SetReplicas(rs) 196 r.NextReplicaID++ 197 return toAdd 198 } 199 200 // RemoveReplica removes the matching replica from this range's set and returns 201 // it. If it wasn't found to remove, false is returned. 202 func (r *RangeDescriptor) RemoveReplica(nodeID NodeID, storeID StoreID) (ReplicaDescriptor, bool) { 203 rs := r.Replicas() 204 removedRepl, ok := rs.RemoveReplica(nodeID, storeID) 205 if ok { 206 r.SetReplicas(rs) 207 } 208 return removedRepl, ok 209 } 210 211 // GetReplicaDescriptor returns the replica which matches the specified store 212 // ID. 213 func (r *RangeDescriptor) GetReplicaDescriptor(storeID StoreID) (ReplicaDescriptor, bool) { 214 for _, repDesc := range r.Replicas().All() { 215 if repDesc.StoreID == storeID { 216 return repDesc, true 217 } 218 } 219 return ReplicaDescriptor{}, false 220 } 221 222 // GetReplicaDescriptorByID returns the replica which matches the specified store 223 // ID. 224 func (r *RangeDescriptor) GetReplicaDescriptorByID(replicaID ReplicaID) (ReplicaDescriptor, bool) { 225 for _, repDesc := range r.Replicas().All() { 226 if repDesc.ReplicaID == replicaID { 227 return repDesc, true 228 } 229 } 230 return ReplicaDescriptor{}, false 231 } 232 233 // IsInitialized returns false if this descriptor represents an 234 // uninitialized range. 235 // TODO(bdarnell): unify this with Validate(). 236 func (r *RangeDescriptor) IsInitialized() bool { 237 return len(r.EndKey) != 0 238 } 239 240 // IncrementGeneration increments the generation of this RangeDescriptor. 241 // This method mutates the receiver; do not call it with shared RangeDescriptors. 242 func (r *RangeDescriptor) IncrementGeneration() { 243 r.Generation++ 244 } 245 246 // GetStickyBit returns the sticky bit of this RangeDescriptor. 247 func (r *RangeDescriptor) GetStickyBit() hlc.Timestamp { 248 if r.StickyBit == nil { 249 return hlc.Timestamp{} 250 } 251 return *r.StickyBit 252 } 253 254 // Validate performs some basic validation of the contents of a range descriptor. 255 func (r *RangeDescriptor) Validate() error { 256 if r.NextReplicaID == 0 { 257 return errors.Errorf("NextReplicaID must be non-zero") 258 } 259 seen := map[ReplicaID]struct{}{} 260 stores := map[StoreID]struct{}{} 261 for i, rep := range r.Replicas().All() { 262 if err := rep.Validate(); err != nil { 263 return errors.Errorf("replica %d is invalid: %s", i, err) 264 } 265 if rep.ReplicaID >= r.NextReplicaID { 266 return errors.Errorf("ReplicaID %d must be less than NextReplicaID %d", 267 rep.ReplicaID, r.NextReplicaID) 268 } 269 270 if _, ok := seen[rep.ReplicaID]; ok { 271 return errors.Errorf("ReplicaID %d was reused", rep.ReplicaID) 272 } 273 seen[rep.ReplicaID] = struct{}{} 274 275 if _, ok := stores[rep.StoreID]; ok { 276 return errors.Errorf("StoreID %d was reused", rep.StoreID) 277 } 278 stores[rep.StoreID] = struct{}{} 279 } 280 return nil 281 } 282 283 func (r RangeDescriptor) String() string { 284 var buf bytes.Buffer 285 fmt.Fprintf(&buf, "r%d:", r.RangeID) 286 287 if !r.IsInitialized() { 288 buf.WriteString("{-}") 289 } else { 290 buf.WriteString(r.RSpan().String()) 291 } 292 buf.WriteString(" [") 293 294 if allReplicas := r.Replicas().All(); len(allReplicas) > 0 { 295 for i, rep := range allReplicas { 296 if i > 0 { 297 buf.WriteString(", ") 298 } 299 buf.WriteString(rep.String()) 300 } 301 } else { 302 buf.WriteString("<no replicas>") 303 } 304 fmt.Fprintf(&buf, ", next=%d, gen=%d", r.NextReplicaID, r.Generation) 305 if s := r.GetStickyBit(); !s.IsEmpty() { 306 fmt.Fprintf(&buf, ", sticky=%s", s) 307 } 308 buf.WriteString("]") 309 310 return buf.String() 311 } 312 313 // SafeMessage implements the SafeMessager interface. 314 // 315 // This method should be kept in sync with the String() method, except for the Start/End keys, which are customer data. 316 func (r RangeDescriptor) SafeMessage() string { 317 var buf bytes.Buffer 318 fmt.Fprintf(&buf, "r%d:", r.RangeID) 319 if !r.IsInitialized() { 320 buf.WriteString("{-}") 321 } 322 buf.WriteString(" [") 323 324 if allReplicas := r.Replicas().All(); len(allReplicas) > 0 { 325 for i, rep := range allReplicas { 326 if i > 0 { 327 buf.WriteString(", ") 328 } 329 buf.WriteString(rep.SafeMessage()) 330 } 331 } else { 332 buf.WriteString("<no replicas>") 333 } 334 fmt.Fprintf(&buf, ", next=%d, gen=%d", r.NextReplicaID, r.Generation) 335 if s := r.GetStickyBit(); !s.IsEmpty() { 336 fmt.Fprintf(&buf, ", sticky=%s", s) 337 } 338 buf.WriteString("]") 339 340 return buf.String() 341 } 342 343 func (r ReplicationTarget) String() string { 344 return fmt.Sprintf("n%d,s%d", r.NodeID, r.StoreID) 345 } 346 347 func (r ReplicaDescriptor) String() string { 348 var buf bytes.Buffer 349 fmt.Fprintf(&buf, "(n%d,s%d):", r.NodeID, r.StoreID) 350 if r.ReplicaID == 0 { 351 buf.WriteString("?") 352 } else { 353 fmt.Fprintf(&buf, "%d", r.ReplicaID) 354 } 355 if typ := r.GetType(); typ != VOTER_FULL { 356 buf.WriteString(typ.String()) 357 } 358 return buf.String() 359 } 360 361 // SafeMessage implements the SafeMessager interface. 362 // 363 // This method should be kept in sync with the String() method, while there is no customer data in the ReplicaDescriptor 364 // today, we maintain this method for future compatibility, since its used from other places 365 // such as RangeDescriptor#SafeMessage() 366 func (r ReplicaDescriptor) SafeMessage() string { 367 var buf bytes.Buffer 368 fmt.Fprintf(&buf, "(n%d,s%d):", r.NodeID, r.StoreID) 369 if r.ReplicaID == 0 { 370 buf.WriteString("?") 371 } else { 372 fmt.Fprintf(&buf, "%d", r.ReplicaID) 373 } 374 if typ := r.GetType(); typ != VOTER_FULL { 375 buf.WriteString(typ.String()) 376 } 377 return buf.String() 378 } 379 380 // Validate performs some basic validation of the contents of a replica descriptor. 381 func (r ReplicaDescriptor) Validate() error { 382 if r.NodeID == 0 { 383 return errors.Errorf("NodeID must not be zero") 384 } 385 if r.StoreID == 0 { 386 return errors.Errorf("StoreID must not be zero") 387 } 388 if r.ReplicaID == 0 { 389 return errors.Errorf("ReplicaID must not be zero") 390 } 391 return nil 392 } 393 394 // GetType returns the type of this ReplicaDescriptor. 395 func (r ReplicaDescriptor) GetType() ReplicaType { 396 if r.Type == nil { 397 return VOTER_FULL 398 } 399 return *r.Type 400 } 401 402 // PercentilesFromData derives percentiles from a slice of data points. 403 // Sorts the input data if it isn't already sorted. 404 func PercentilesFromData(data []float64) Percentiles { 405 sort.Float64s(data) 406 407 return Percentiles{ 408 P10: percentileFromSortedData(data, 10), 409 P25: percentileFromSortedData(data, 25), 410 P50: percentileFromSortedData(data, 50), 411 P75: percentileFromSortedData(data, 75), 412 P90: percentileFromSortedData(data, 90), 413 PMax: percentileFromSortedData(data, 100), 414 } 415 } 416 417 func percentileFromSortedData(data []float64, percent float64) float64 { 418 if len(data) == 0 { 419 return 0 420 } 421 if percent < 0 { 422 percent = 0 423 } 424 if percent >= 100 { 425 return data[len(data)-1] 426 } 427 // TODO(a-robinson): Use go's rounding function once we're using 1.10. 428 idx := int(float64(len(data)) * percent / 100.0) 429 return data[idx] 430 } 431 432 // String returns a string representation of the Percentiles. 433 func (p Percentiles) String() string { 434 return fmt.Sprintf("p10=%.2f p25=%.2f p50=%.2f p75=%.2f p90=%.2f pMax=%.2f", 435 p.P10, p.P25, p.P50, p.P75, p.P90, p.PMax) 436 } 437 438 // String returns a string representation of the StoreCapacity. 439 func (sc StoreCapacity) String() string { 440 return fmt.Sprintf("disk (capacity=%s, available=%s, used=%s, logicalBytes=%s), "+ 441 "ranges=%d, leases=%d, queries=%.2f, writes=%.2f, "+ 442 "bytesPerReplica={%s}, writesPerReplica={%s}", 443 humanizeutil.IBytes(sc.Capacity), humanizeutil.IBytes(sc.Available), 444 humanizeutil.IBytes(sc.Used), humanizeutil.IBytes(sc.LogicalBytes), 445 sc.RangeCount, sc.LeaseCount, sc.QueriesPerSecond, sc.WritesPerSecond, 446 sc.BytesPerReplica, sc.WritesPerReplica) 447 } 448 449 // FractionUsed computes the fraction of storage capacity that is in use. 450 func (sc StoreCapacity) FractionUsed() float64 { 451 if sc.Capacity == 0 { 452 return 0 453 } 454 // Prefer computing the fraction of available disk space used by considering 455 // anything on the disk that isn't in the store's data directory just a sunk 456 // cost, not truly part of the disk's capacity. This means that the disk's 457 // capacity is really just the available space plus cockroach's usage. 458 // 459 // Fall back to a more pessimistic calcuation of disk usage if we don't know 460 // how much space the store's data is taking up. 461 if sc.Used == 0 { 462 return float64(sc.Capacity-sc.Available) / float64(sc.Capacity) 463 } 464 return float64(sc.Used) / float64(sc.Available+sc.Used) 465 } 466 467 // String returns a string representation of the Tier. 468 func (t Tier) String() string { 469 return fmt.Sprintf("%s=%s", t.Key, t.Value) 470 } 471 472 // FromString parses the string representation into the Tier. 473 func (t *Tier) FromString(tier string) error { 474 parts := strings.Split(tier, "=") 475 if len(parts) != 2 || len(parts[0]) == 0 || len(parts[1]) == 0 { 476 return errors.Errorf("tier must be in the form \"key=value\" not %q", tier) 477 } 478 t.Key = parts[0] 479 t.Value = parts[1] 480 return nil 481 } 482 483 // String returns a string representation of all the Tiers. This is part 484 // of pflag's value interface. 485 func (l Locality) String() string { 486 tiers := make([]string, len(l.Tiers)) 487 for i, tier := range l.Tiers { 488 tiers[i] = tier.String() 489 } 490 return strings.Join(tiers, ",") 491 } 492 493 // Type returns the underlying type in string form. This is part of pflag's 494 // value interface. 495 func (Locality) Type() string { 496 return "Locality" 497 } 498 499 // Equals returns whether the two Localities are equivalent. 500 // 501 // Because Locality Tiers are hierarchically ordered, if two Localities contain 502 // the same Tiers in different orders, they are not considered equal. 503 func (l Locality) Equals(r Locality) bool { 504 if len(l.Tiers) != len(r.Tiers) { 505 return false 506 } 507 for i := range l.Tiers { 508 if l.Tiers[i] != r.Tiers[i] { 509 return false 510 } 511 } 512 return true 513 } 514 515 // MaxDiversityScore is the largest possible diversity score, indicating that 516 // two localities are as different from each other as possible. 517 const MaxDiversityScore = 1.0 518 519 // DiversityScore returns a score comparing the two localities which ranges from 520 // 1, meaning completely diverse, to 0 which means not diverse at all (that 521 // their localities match). This function ignores the locality tier key names 522 // and only considers differences in their values. 523 // 524 // All localities are sorted from most global to most local so any localities 525 // after any differing values are irrelevant. 526 // 527 // While we recommend that all nodes have the same locality keys and same 528 // total number of keys, there's nothing wrong with having different locality 529 // keys as long as the immediately next keys are all the same for each value. 530 // For example: 531 // region:USA -> state:NY -> ... 532 // region:USA -> state:WA -> ... 533 // region:EUR -> country:UK -> ... 534 // region:EUR -> country:France -> ... 535 // is perfectly fine. This holds true at each level lower as well. 536 // 537 // There is also a need to consider the cases where the localities have 538 // different lengths. For these cases, we treat the missing key on one side as 539 // different. 540 func (l Locality) DiversityScore(other Locality) float64 { 541 length := len(l.Tiers) 542 if len(other.Tiers) < length { 543 length = len(other.Tiers) 544 } 545 for i := 0; i < length; i++ { 546 if l.Tiers[i].Value != other.Tiers[i].Value { 547 return float64(length-i) / float64(length) 548 } 549 } 550 if len(l.Tiers) != len(other.Tiers) { 551 return MaxDiversityScore / float64(length+1) 552 } 553 return 0 554 } 555 556 // Set sets the value of the Locality. It is the important part of 557 // pflag's value interface. 558 func (l *Locality) Set(value string) error { 559 if len(l.Tiers) > 0 { 560 return errors.New("can't set locality more than once") 561 } 562 if len(value) == 0 { 563 return errors.New("can't have empty locality") 564 } 565 566 tiersStr := strings.Split(value, ",") 567 tiers := make([]Tier, len(tiersStr)) 568 for i, tier := range tiersStr { 569 if err := tiers[i].FromString(tier); err != nil { 570 return err 571 } 572 } 573 l.Tiers = tiers 574 return nil 575 } 576 577 // Find searches the locality's tiers for the input key, returning its value if 578 // present. 579 func (l *Locality) Find(key string) (value string, ok bool) { 580 for i := range l.Tiers { 581 if l.Tiers[i].Key == key { 582 return l.Tiers[i].Value, true 583 } 584 } 585 return "", false 586 } 587 588 // DefaultLocationInformation is used to populate the system.locations 589 // table. The region values here are specific to GCP. 590 var DefaultLocationInformation = []struct { 591 Locality Locality 592 Latitude string 593 Longitude string 594 }{ 595 { 596 Locality: Locality{Tiers: []Tier{{Key: "region", Value: "us-east1"}}}, 597 Latitude: "33.836082", 598 Longitude: "-81.163727", 599 }, 600 { 601 Locality: Locality{Tiers: []Tier{{Key: "region", Value: "us-east4"}}}, 602 Latitude: "37.478397", 603 Longitude: "-76.453077", 604 }, 605 { 606 Locality: Locality{Tiers: []Tier{{Key: "region", Value: "us-central1"}}}, 607 Latitude: "42.032974", 608 Longitude: "-93.581543", 609 }, 610 { 611 Locality: Locality{Tiers: []Tier{{Key: "region", Value: "us-west1"}}}, 612 Latitude: "43.804133", 613 Longitude: "-120.554201", 614 }, 615 { 616 Locality: Locality{Tiers: []Tier{{Key: "region", Value: "europe-west1"}}}, 617 Latitude: "50.44816", 618 Longitude: "3.81886", 619 }, 620 }