github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/reports/reporter.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package reports 12 13 import ( 14 "context" 15 "fmt" 16 "strings" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/base" 20 "github.com/cockroachdb/cockroach/pkg/config" 21 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 22 "github.com/cockroachdb/cockroach/pkg/keys" 23 "github.com/cockroachdb/cockroach/pkg/kv" 24 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 25 "github.com/cockroachdb/cockroach/pkg/roachpb" 26 "github.com/cockroachdb/cockroach/pkg/security" 27 "github.com/cockroachdb/cockroach/pkg/settings" 28 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 29 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 30 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 31 "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" 32 "github.com/cockroachdb/cockroach/pkg/util/log" 33 "github.com/cockroachdb/cockroach/pkg/util/stop" 34 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 35 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 36 "github.com/cockroachdb/errors" 37 "github.com/cockroachdb/logtags" 38 ) 39 40 // ReporterInterval is the interval between two generations of the reports. 41 // When set to zero - disables the report generation. 42 var ReporterInterval = settings.RegisterPublicNonNegativeDurationSetting( 43 "kv.replication_reports.interval", 44 "the frequency for generating the replication_constraint_stats, replication_stats_report and "+ 45 "replication_critical_localities reports (set to 0 to disable)", 46 time.Minute, 47 ) 48 49 // Reporter periodically produces a couple of reports on the cluster's data 50 // distribution: the system tables: replication_constraint_stats, 51 // replication_stats_report and replication_critical_localities. 52 type Reporter struct { 53 // Contains the list of the stores of the current node 54 localStores *kvserver.Stores 55 // The store that is the current meta 1 leaseholder 56 meta1LeaseHolder *kvserver.Store 57 // Latest zone config 58 latestConfig *config.SystemConfig 59 60 db *kv.DB 61 liveness *kvserver.NodeLiveness 62 settings *cluster.Settings 63 storePool *kvserver.StorePool 64 executor sqlutil.InternalExecutor 65 66 frequencyMu struct { 67 syncutil.Mutex 68 interval time.Duration 69 changeCh chan struct{} 70 } 71 } 72 73 // NewReporter creates a Reporter. 74 func NewReporter( 75 db *kv.DB, 76 localStores *kvserver.Stores, 77 storePool *kvserver.StorePool, 78 st *cluster.Settings, 79 liveness *kvserver.NodeLiveness, 80 executor sqlutil.InternalExecutor, 81 ) *Reporter { 82 r := Reporter{ 83 db: db, 84 localStores: localStores, 85 storePool: storePool, 86 settings: st, 87 liveness: liveness, 88 executor: executor, 89 } 90 r.frequencyMu.changeCh = make(chan struct{}) 91 return &r 92 } 93 94 // reportInterval returns the current value of the frequency setting and a 95 // channel that will get closed when the value is not current any more. 96 func (stats *Reporter) reportInterval() (time.Duration, <-chan struct{}) { 97 stats.frequencyMu.Lock() 98 defer stats.frequencyMu.Unlock() 99 return ReporterInterval.Get(&stats.settings.SV), stats.frequencyMu.changeCh 100 } 101 102 // Start the periodic calls to Update(). 103 func (stats *Reporter) Start(ctx context.Context, stopper *stop.Stopper) { 104 ReporterInterval.SetOnChange(&stats.settings.SV, func() { 105 stats.frequencyMu.Lock() 106 defer stats.frequencyMu.Unlock() 107 // Signal the current waiter (if any), and prepare the channel for future 108 // ones. 109 ch := stats.frequencyMu.changeCh 110 close(ch) 111 stats.frequencyMu.changeCh = make(chan struct{}) 112 stats.frequencyMu.interval = ReporterInterval.Get(&stats.settings.SV) 113 }) 114 stopper.RunWorker(ctx, func(ctx context.Context) { 115 var timer timeutil.Timer 116 defer timer.Stop() 117 ctx = logtags.AddTag(ctx, "replication-reporter", nil /* value */) 118 119 replStatsSaver := makeReplicationStatsReportSaver() 120 constraintsSaver := makeReplicationConstraintStatusReportSaver() 121 criticalLocSaver := makeReplicationCriticalLocalitiesReportSaver() 122 123 for { 124 // Read the interval setting. We'll generate a report and then sleep for 125 // that long. We'll also wake up if the setting changes; that's useful for 126 // tests which want to lower the setting drastically and expect the report 127 // to be regenerated quickly, and also for users increasing the frequency. 128 interval, changeCh := stats.reportInterval() 129 130 var timerCh <-chan time.Time 131 if interval != 0 { 132 // If (some store on) this node is the leaseholder for range 1, do the 133 // work. 134 stats.meta1LeaseHolder = stats.meta1LeaseHolderStore() 135 if stats.meta1LeaseHolder != nil { 136 if err := stats.update( 137 ctx, &constraintsSaver, &replStatsSaver, &criticalLocSaver, 138 ); err != nil { 139 log.Errorf(ctx, "failed to generate replication reports: %s", err) 140 } 141 } 142 timer.Reset(interval) 143 timerCh = timer.C 144 } 145 146 // Wait until the timer expires (if there's a timer) or until there's an 147 // update to the frequency setting. 148 select { 149 case <-timerCh: 150 timer.Read = true 151 case <-changeCh: 152 case <-stopper.ShouldQuiesce(): 153 return 154 } 155 } 156 }) 157 } 158 159 // update regenerates all the reports and saves them using the provided savers. 160 func (stats *Reporter) update( 161 ctx context.Context, 162 constraintsSaver *replicationConstraintStatsReportSaver, 163 replStatsSaver *replicationStatsReportSaver, 164 locSaver *replicationCriticalLocalitiesReportSaver, 165 ) error { 166 start := timeutil.Now() 167 log.VEventf(ctx, 2, "updating replication reports...") 168 defer func() { 169 log.VEventf(ctx, 2, "updating replication reports... done. Generation took: %s.", 170 timeutil.Now().Sub(start)) 171 }() 172 stats.updateLatestConfig() 173 if stats.latestConfig == nil { 174 return nil 175 } 176 177 allStores := stats.storePool.GetStores() 178 var getStoresFromGossip StoreResolver = func( 179 r *roachpb.RangeDescriptor, 180 ) []roachpb.StoreDescriptor { 181 storeDescs := make([]roachpb.StoreDescriptor, len(r.Replicas().Voters())) 182 // We'll return empty descriptors for stores that gossip doesn't have a 183 // descriptor for. These stores will be considered to satisfy all 184 // constraints. 185 // TODO(andrei): note down that some descriptors were missing from gossip 186 // somewhere in the report. 187 for i, repl := range r.Replicas().Voters() { 188 storeDescs[i] = allStores[repl.StoreID] 189 } 190 return storeDescs 191 } 192 193 isLiveMap := stats.liveness.GetIsLiveMap() 194 isNodeLive := func(nodeID roachpb.NodeID) bool { 195 return isLiveMap[nodeID].IsLive 196 } 197 198 nodeLocalities := make(map[roachpb.NodeID]roachpb.Locality, len(allStores)) 199 for _, storeDesc := range allStores { 200 nodeDesc := storeDesc.Node 201 // Note: We might overwrite the node's localities here. We assume that all 202 // the stores for a node have the same node descriptor. 203 nodeLocalities[nodeDesc.NodeID] = nodeDesc.Locality 204 } 205 206 // Create the visitors that we're going to pass to visitRanges() below. 207 constraintConfVisitor := makeConstraintConformanceVisitor( 208 ctx, stats.latestConfig, getStoresFromGossip) 209 localityStatsVisitor := makeCriticalLocalitiesVisitor( 210 ctx, nodeLocalities, stats.latestConfig, 211 getStoresFromGossip, isNodeLive) 212 replicationStatsVisitor := makeReplicationStatsVisitor(ctx, stats.latestConfig, isNodeLive) 213 214 // Iterate through all the ranges. 215 const descriptorReadBatchSize = 10000 216 rangeIter := makeMeta2RangeIter(stats.db, descriptorReadBatchSize) 217 if err := visitRanges( 218 ctx, &rangeIter, stats.latestConfig, 219 &constraintConfVisitor, &localityStatsVisitor, &replicationStatsVisitor, 220 ); err != nil { 221 if errors.HasType(err, (*visitorError)(nil)) { 222 log.Errorf(ctx, "some reports have not been generated: %s", err) 223 } else { 224 return errors.Wrap(err, "failed to compute constraint conformance report") 225 } 226 } 227 228 if !constraintConfVisitor.failed() { 229 if err := constraintsSaver.Save( 230 ctx, constraintConfVisitor.report, timeutil.Now() /* reportTS */, stats.db, stats.executor, 231 ); err != nil { 232 return errors.Wrap(err, "failed to save constraint report") 233 } 234 } 235 if !localityStatsVisitor.failed() { 236 if err := locSaver.Save( 237 ctx, localityStatsVisitor.Report(), timeutil.Now() /* reportTS */, stats.db, stats.executor, 238 ); err != nil { 239 return errors.Wrap(err, "failed to save locality report") 240 } 241 } 242 if !replicationStatsVisitor.failed() { 243 if err := replStatsSaver.Save( 244 ctx, replicationStatsVisitor.Report(), 245 timeutil.Now() /* reportTS */, stats.db, stats.executor, 246 ); err != nil { 247 return errors.Wrap(err, "failed to save range status report") 248 } 249 } 250 return nil 251 } 252 253 // meta1LeaseHolderStore returns the node store that is the leaseholder of Meta1 254 // range or nil if none of the node's stores are holding the Meta1 lease. 255 func (stats *Reporter) meta1LeaseHolderStore() *kvserver.Store { 256 const meta1RangeID = roachpb.RangeID(1) 257 repl, store, err := stats.localStores.GetReplicaForRangeID(meta1RangeID) 258 if roachpb.IsRangeNotFoundError(err) { 259 return nil 260 } 261 if err != nil { 262 log.Fatalf(context.TODO(), "unexpected error when visiting stores: %s", err) 263 } 264 if repl.OwnsValidLease(store.Clock().Now()) { 265 return store 266 } 267 return nil 268 } 269 270 func (stats *Reporter) updateLatestConfig() { 271 stats.latestConfig = stats.meta1LeaseHolder.Gossip().GetSystemConfig() 272 } 273 274 // nodeChecker checks whether a node is to be considered alive or not. 275 type nodeChecker func(nodeID roachpb.NodeID) bool 276 277 // zoneResolver resolves ranges to their zone configs. It is optimized for the 278 // case where a range falls in the same range as a the previously-resolved range 279 // (which is the common case when asked to resolve ranges in key order). 280 type zoneResolver struct { 281 init bool 282 // curObjectID is the object (i.e. usually table) of the configured range. 283 curObjectID uint32 284 // curRootZone is the lowest zone convering the previously resolved range 285 // that's not a subzone. 286 // This is used to compute the subzone for a range. 287 curRootZone *zonepb.ZoneConfig 288 // curZoneKey is the zone key for the previously resolved range. 289 curZoneKey ZoneKey 290 } 291 292 // resolveRange resolves a range to its zone. 293 func (c *zoneResolver) resolveRange( 294 ctx context.Context, rng *roachpb.RangeDescriptor, cfg *config.SystemConfig, 295 ) (ZoneKey, error) { 296 if c.checkSameZone(ctx, rng) { 297 return c.curZoneKey, nil 298 } 299 return c.updateZone(ctx, rng, cfg) 300 } 301 302 // setZone remembers the passed-in info as the reference for further 303 // checkSameZone() calls. 304 // Clients should generally use the higher-level updateZone(). 305 func (c *zoneResolver) setZone(objectID uint32, key ZoneKey, rootZone *zonepb.ZoneConfig) { 306 c.init = true 307 c.curObjectID = objectID 308 c.curRootZone = rootZone 309 c.curZoneKey = key 310 } 311 312 // updateZone updates the state of the zoneChecker to the zone of the passed-in 313 // range descriptor. 314 func (c *zoneResolver) updateZone( 315 ctx context.Context, rd *roachpb.RangeDescriptor, cfg *config.SystemConfig, 316 ) (ZoneKey, error) { 317 objectID, _ := config.DecodeKeyIntoZoneIDAndSuffix(rd.StartKey) 318 first := true 319 var zoneKey ZoneKey 320 var rootZone *zonepb.ZoneConfig 321 // We're going to walk the zone hierarchy looking for two things: 322 // 1) The lowest zone containing rd. We'll use the subzone ID for it. 323 // 2) The lowest zone containing rd that's not a subzone. 324 // visitZones() walks the zone hierarchy from the bottom upwards. 325 found, err := visitZones( 326 ctx, rd, cfg, includeSubzonePlaceholders, 327 func(_ context.Context, zone *zonepb.ZoneConfig, key ZoneKey) bool { 328 if first { 329 first = false 330 zoneKey = key 331 } 332 if key.SubzoneID == NoSubzone { 333 rootZone = zone 334 return true 335 } 336 return false 337 }) 338 if err != nil { 339 return ZoneKey{}, err 340 } 341 if !found { 342 return ZoneKey{}, errors.AssertionFailedf("failed to resolve zone for range: %s", rd) 343 } 344 c.setZone(objectID, zoneKey, rootZone) 345 return zoneKey, nil 346 } 347 348 // checkSameZone returns true if the most specific zone that contains rng is the 349 // one previously passed to setZone(). 350 // 351 // NB: This method allows for false negatives (but no false positives). For 352 // example, if the zoneChecker was previously configured for a range starting at 353 // /Table/51 and is now queried for /Table/52, it will say that the zones don't 354 // match even if in fact they do (because neither table defines its own zone 355 // and they're both inheriting a higher zone). 356 func (c *zoneResolver) checkSameZone(ctx context.Context, rng *roachpb.RangeDescriptor) bool { 357 if !c.init { 358 return false 359 } 360 361 objectID, keySuffix := config.DecodeKeyIntoZoneIDAndSuffix(rng.StartKey) 362 if objectID != c.curObjectID { 363 return false 364 } 365 _, subzoneIdx := c.curRootZone.GetSubzoneForKeySuffix(keySuffix) 366 return subzoneIdx == c.curZoneKey.SubzoneID.ToSubzoneIndex() 367 } 368 369 type visitOpt bool 370 371 const ( 372 ignoreSubzonePlaceholders visitOpt = false 373 includeSubzonePlaceholders visitOpt = true 374 ) 375 376 // visitZones applies a visitor to the hierarchy of zone configs that apply to 377 // the given range, starting from the most specific to the default zone config. 378 // 379 // visitor is called for each zone config until it returns true, or until the 380 // default zone config is reached. It's passed zone configs and the 381 // corresponding zoneKeys. 382 // 383 // visitZones returns true if the visitor returned true and returns false is the 384 // zone hierarchy was exhausted. 385 func visitZones( 386 ctx context.Context, 387 rng *roachpb.RangeDescriptor, 388 cfg *config.SystemConfig, 389 opt visitOpt, 390 visitor func(context.Context, *zonepb.ZoneConfig, ZoneKey) bool, 391 ) (bool, error) { 392 id, keySuffix := config.DecodeKeyIntoZoneIDAndSuffix(rng.StartKey) 393 zone, err := getZoneByID(id, cfg) 394 if err != nil { 395 return false, err 396 } 397 398 // We've got the zone config (without considering for inheritance) for the 399 // "object" indicated by out key. Now we need to find where the constraints 400 // come from. We'll first look downwards - in subzones (if any). If there's no 401 // constraints there, we'll look in the zone config that we got. If not, 402 // we'll look upwards (e.g. database zone config, default zone config). 403 404 if zone != nil { 405 // Try subzones. 406 subzone, subzoneIdx := zone.GetSubzoneForKeySuffix(keySuffix) 407 if subzone != nil { 408 if visitor(ctx, &subzone.Config, MakeZoneKey(id, base.SubzoneIDFromIndex(int(subzoneIdx)))) { 409 return true, nil 410 } 411 } 412 // Try the zone for our object. 413 if (opt == includeSubzonePlaceholders) || !zone.IsSubzonePlaceholder() { 414 if visitor(ctx, zone, MakeZoneKey(id, 0)) { 415 return true, nil 416 } 417 } 418 } 419 420 // Go upwards. 421 return visitAncestors(ctx, id, cfg, visitor) 422 } 423 424 // visitAncestors invokes the visitor of all the ancestors of the zone 425 // corresponding to id. The zone corresponding to id itself is not visited. 426 func visitAncestors( 427 ctx context.Context, 428 id uint32, 429 cfg *config.SystemConfig, 430 visitor func(context.Context, *zonepb.ZoneConfig, ZoneKey) bool, 431 ) (bool, error) { 432 // Check to see if it's a table. If so, inherit from the database. 433 // For all other cases, inherit from the default. 434 descVal := cfg.GetValue(sqlbase.MakeDescMetadataKey(keys.TODOSQLCodec, sqlbase.ID(id))) 435 if descVal == nil { 436 // Couldn't find a descriptor. This is not expected to happen. 437 // Let's just look at the default zone config. 438 return visitDefaultZone(ctx, cfg, visitor), nil 439 } 440 441 var desc sqlbase.Descriptor 442 if err := descVal.GetProto(&desc); err != nil { 443 return false, err 444 } 445 tableDesc := desc.Table(descVal.Timestamp) 446 // If it's a database, the parent is the default zone. 447 if tableDesc == nil { 448 return visitDefaultZone(ctx, cfg, visitor), nil 449 } 450 451 // If it's a table, the parent is a database. 452 zone, err := getZoneByID(uint32(tableDesc.ParentID), cfg) 453 if err != nil { 454 return false, err 455 } 456 if zone != nil { 457 if visitor(ctx, zone, MakeZoneKey(uint32(tableDesc.ParentID), NoSubzone)) { 458 return true, nil 459 } 460 } 461 // The parent database did not have constraints. Its parent is the default zone. 462 return visitDefaultZone(ctx, cfg, visitor), nil 463 } 464 465 func visitDefaultZone( 466 ctx context.Context, 467 cfg *config.SystemConfig, 468 visitor func(context.Context, *zonepb.ZoneConfig, ZoneKey) bool, 469 ) bool { 470 zone, err := getZoneByID(keys.RootNamespaceID, cfg) 471 if err != nil { 472 log.Fatalf(ctx, "failed to get default zone config: %s", err) 473 } 474 if zone == nil { 475 log.Fatal(ctx, "default zone config missing unexpectedly") 476 } 477 return visitor(ctx, zone, MakeZoneKey(keys.RootNamespaceID, NoSubzone)) 478 } 479 480 // getZoneByID returns a zone given its id. Inheritance does not apply. 481 func getZoneByID(id uint32, cfg *config.SystemConfig) (*zonepb.ZoneConfig, error) { 482 zoneVal := cfg.GetValue(config.MakeZoneKey(id)) 483 if zoneVal == nil { 484 return nil, nil 485 } 486 zone := new(zonepb.ZoneConfig) 487 if err := zoneVal.GetProto(zone); err != nil { 488 return nil, err 489 } 490 return zone, nil 491 } 492 493 // StoreResolver is a function resolving a range to a store descriptor for each 494 // of the replicas. Empty store descriptors are to be returned when there's no 495 // information available for the store. 496 type StoreResolver func(*roachpb.RangeDescriptor) []roachpb.StoreDescriptor 497 498 // rangeVisitor abstracts the interface for range iteration implemented by all 499 // report generators. 500 type rangeVisitor interface { 501 // visitNewZone/visitSameZone is called by visitRanges() for each range, in 502 // order. The visitor will update its report with the range's info. If an 503 // error is returned, visit() will not be called anymore before reset(). 504 // If an error() is returned, failed() needs to return true until reset() is 505 // called. 506 // 507 // Once visitNewZone() has been called once, visitSameZone() is called for 508 // further ranges as long as these ranges are covered by the same zone config. 509 // As soon as the range is not covered by it, visitNewZone() is called again. 510 // The idea is that visitors can maintain state about that zone that applies 511 // to multiple ranges, and so visitSameZone() allows them to efficiently reuse 512 // that state (in particular, not unmarshall ZoneConfigs again). 513 visitNewZone(context.Context, *roachpb.RangeDescriptor) error 514 visitSameZone(context.Context, *roachpb.RangeDescriptor) 515 516 // failed returns true if an error was encountered by the last visit() call 517 // (and reset( ) wasn't called since). 518 // The idea is that, if failed() returns true, the report that the visitor 519 // produces will be considered incomplete and not persisted. 520 failed() bool 521 522 // reset resets the visitor's state, preparing it for visit() calls starting 523 // at the first range. This is called on retriable errors during range iteration. 524 reset(ctx context.Context) 525 } 526 527 // visitorError is returned by visitRanges when one or more visitors failed. 528 type visitorError struct { 529 errs []error 530 } 531 532 func (e *visitorError) Error() string { 533 s := make([]string, len(e.errs)) 534 for i, err := range e.errs { 535 s[i] = fmt.Sprintf("%d: %s", i, err) 536 } 537 return fmt.Sprintf("%d visitors encountered errors:\n%s", len(e.errs), strings.Join(s, "\n")) 538 } 539 540 // visitRanges iterates through all the range descriptors in Meta2 and calls the 541 // supplied visitors. 542 // 543 // An error is returned if some descriptors could not be read. Additionally, 544 // visitorError is returned if some visitors failed during the iteration. In 545 // that case, it is expected that the reports produced by those specific 546 // visitors will not be persisted, but the other reports will. 547 func visitRanges( 548 ctx context.Context, rangeStore RangeIterator, cfg *config.SystemConfig, visitors ...rangeVisitor, 549 ) error { 550 origVisitors := make([]rangeVisitor, len(visitors)) 551 copy(origVisitors, visitors) 552 var visitorErrs []error 553 var resolver zoneResolver 554 555 var key ZoneKey 556 first := true 557 558 // Iterate over all the ranges. 559 for { 560 rd, err := rangeStore.Next(ctx) 561 if err != nil { 562 if errIsRetriable(err) { 563 visitors = origVisitors 564 for _, v := range visitors { 565 v.reset(ctx) 566 } 567 // The iterator has been positioned to the beginning. 568 continue 569 } else { 570 return err 571 } 572 } 573 if rd.RangeID == 0 { 574 // We're done. 575 break 576 } 577 578 newKey, err := resolver.resolveRange(ctx, &rd, cfg) 579 if err != nil { 580 return err 581 } 582 sameZoneAsPrevRange := !first && key == newKey 583 key = newKey 584 first = false 585 586 for i, v := range visitors { 587 var err error 588 if sameZoneAsPrevRange { 589 v.visitSameZone(ctx, &rd) 590 } else { 591 err = v.visitNewZone(ctx, &rd) 592 } 593 594 if err != nil { 595 // Sanity check - v.failed() should return an error now (the same as err above). 596 if !v.failed() { 597 return errors.Errorf("expected visitor %T to have failed() after error: %s", v, err) 598 } 599 // Remove this visitor; it shouldn't be called any more. 600 visitors = append(visitors[:i], visitors[i+1:]...) 601 visitorErrs = append(visitorErrs, err) 602 } 603 } 604 } 605 if len(visitorErrs) > 0 { 606 return &visitorError{errs: visitorErrs} 607 } 608 return nil 609 } 610 611 // RangeIterator abstracts the interface for reading range descriptors. 612 type RangeIterator interface { 613 // Next returns the next range descriptors (in key order). 614 // Returns an empty RangeDescriptor when all the ranges have been exhausted. In that case, 615 // the iterator is not to be used any more (except for calling Close(), which will be a no-op). 616 // 617 // The returned error can be a retriable one (i.e. 618 // *roachpb.TransactionRetryWithProtoRefreshError, possibly wrapped). In that case, the iterator 619 // is reset automatically; the next Next() call ( should there be one) will 620 // return the first descriptor. 621 // In case of any other error, the iterator is automatically closed. 622 // It can't be used any more (except for calling Close(), which will be a noop). 623 Next(context.Context) (roachpb.RangeDescriptor, error) 624 625 // Close destroys the iterator, releasing resources. It does not need to be 626 // called after Next() indicates exhaustion by returning an empty descriptor, 627 // or after Next() returns non-retriable errors. 628 Close(context.Context) 629 } 630 631 // meta2RangeIter is an implementation of RangeIterator that scans meta2 in a 632 // paginated way. 633 type meta2RangeIter struct { 634 db *kv.DB 635 // The size of the batches that descriptors will be read in. 0 for no limit. 636 batchSize int 637 638 txn *kv.Txn 639 // buffer contains descriptors read in the first batch, but not yet returned 640 // to the client. 641 buffer []kv.KeyValue 642 // resumeSpan maintains the point where the meta2 scan stopped. 643 resumeSpan *roachpb.Span 644 // readingDone is set once we've scanned all of meta2. buffer may still 645 // contain descriptors. 646 readingDone bool 647 } 648 649 func makeMeta2RangeIter(db *kv.DB, batchSize int) meta2RangeIter { 650 return meta2RangeIter{db: db, batchSize: batchSize} 651 } 652 653 var _ RangeIterator = &meta2RangeIter{} 654 655 // Next is part of the rangeIterator interface. 656 func (r *meta2RangeIter) Next(ctx context.Context) (_ roachpb.RangeDescriptor, retErr error) { 657 defer func() { r.handleErr(ctx, retErr) }() 658 659 rd, err := r.consumerBuffer() 660 if err != nil || rd.RangeID != 0 { 661 return rd, err 662 } 663 664 if r.readingDone { 665 // No more batches to read. 666 return roachpb.RangeDescriptor{}, nil 667 } 668 669 // Read a batch and consume the first row (if any). 670 if err := r.readBatch(ctx); err != nil { 671 return roachpb.RangeDescriptor{}, err 672 } 673 return r.consumerBuffer() 674 } 675 676 func (r *meta2RangeIter) consumerBuffer() (roachpb.RangeDescriptor, error) { 677 if len(r.buffer) == 0 { 678 return roachpb.RangeDescriptor{}, nil 679 } 680 first := r.buffer[0] 681 var desc roachpb.RangeDescriptor 682 if err := first.ValueProto(&desc); err != nil { 683 return roachpb.RangeDescriptor{}, errors.NewAssertionErrorWithWrappedErrf(err, 684 "%s: unable to unmarshal range descriptor", first.Key) 685 } 686 r.buffer = r.buffer[1:] 687 return desc, nil 688 } 689 690 // Close is part of the RangeIterator interface. 691 func (r *meta2RangeIter) Close(ctx context.Context) { 692 if r.readingDone { 693 return 694 } 695 _ = r.txn.Rollback(ctx) 696 r.txn = nil 697 r.readingDone = true 698 } 699 700 func (r *meta2RangeIter) readBatch(ctx context.Context) (retErr error) { 701 defer func() { r.handleErr(ctx, retErr) }() 702 703 if len(r.buffer) > 0 { 704 log.Fatalf(ctx, "buffer not exhausted: %d keys remaining", len(r.buffer)) 705 } 706 if r.txn == nil { 707 r.txn = r.db.NewTxn(ctx, "rangeStoreImpl") 708 } 709 710 b := r.txn.NewBatch() 711 start := keys.Meta2Prefix 712 if r.resumeSpan != nil { 713 start = r.resumeSpan.Key 714 } 715 b.Scan(start, keys.MetaMax) 716 b.Header.MaxSpanRequestKeys = int64(r.batchSize) 717 err := r.txn.Run(ctx, b) 718 if err != nil { 719 return err 720 } 721 r.buffer = b.Results[0].Rows 722 r.resumeSpan = b.Results[0].ResumeSpan 723 if r.resumeSpan == nil { 724 if err := r.txn.Commit(ctx); err != nil { 725 return err 726 } 727 r.txn = nil 728 r.readingDone = true 729 } 730 return nil 731 } 732 733 func errIsRetriable(err error) bool { 734 return errors.HasType(err, (*roachpb.TransactionRetryWithProtoRefreshError)(nil)) 735 } 736 737 // handleErr manipulates the iterator's state in response to an error. 738 // In case of retriable error, the iterator is reset such that the next Next() 739 // call returns the first range. In case of any other error, resources are 740 // released and the iterator shouldn't be used any more. 741 // A nil error may be passed, in which case handleErr is a no-op. 742 // 743 // handleErr is idempotent. 744 func (r *meta2RangeIter) handleErr(ctx context.Context, err error) { 745 if err == nil { 746 return 747 } 748 if !errIsRetriable(err) { 749 if r.txn != nil { 750 // On any non-retriable error, rollback. 751 r.txn.CleanupOnError(ctx, err) 752 r.txn = nil 753 } 754 r.reset() 755 r.readingDone = true 756 } else { 757 r.reset() 758 } 759 } 760 761 // reset the iterator. The next Next() call will return the first range. 762 func (r *meta2RangeIter) reset() { 763 r.buffer = nil 764 r.resumeSpan = nil 765 r.readingDone = false 766 } 767 768 type reportID int 769 770 // getReportGenerationTime returns the time at a particular report was last 771 // generated. Returns time.Time{} if the report is not found. 772 func getReportGenerationTime( 773 ctx context.Context, rid reportID, ex sqlutil.InternalExecutor, txn *kv.Txn, 774 ) (time.Time, error) { 775 row, err := ex.QueryRowEx( 776 ctx, 777 "get-previous-timestamp", 778 txn, 779 sqlbase.InternalExecutorSessionDataOverride{User: security.NodeUser}, 780 "select generated from system.reports_meta where id = $1", 781 rid, 782 ) 783 if err != nil { 784 return time.Time{}, err 785 } 786 787 if row == nil { 788 return time.Time{}, nil 789 } 790 791 if len(row) != 1 { 792 return time.Time{}, errors.AssertionFailedf( 793 "expected 1 column from intenal query, got: %d", len(row)) 794 } 795 generated, ok := row[0].(*tree.DTimestampTZ) 796 if !ok { 797 return time.Time{}, errors.AssertionFailedf("expected to get timestamptz from "+ 798 "system.reports_meta got %+v (%T)", row[0], row[0]) 799 } 800 return generated.Time, nil 801 }