github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/reports/replication_stats_report.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package reports 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/base" 18 "github.com/cockroachdb/cockroach/pkg/config" 19 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 20 "github.com/cockroachdb/cockroach/pkg/kv" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 23 "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" 24 "github.com/cockroachdb/cockroach/pkg/util/log" 25 "github.com/cockroachdb/errors" 26 ) 27 28 // replicationStatsReportID is the id of the row in the system. reports_meta 29 // table corresponding to the replication stats report (i.e. the 30 // system.replication_stats table). 31 const replicationStatsReportID reportID = 3 32 33 // RangeReport represents the system.zone_range_status report. 34 type RangeReport map[ZoneKey]zoneRangeStatus 35 36 // zoneRangeStatus is the leaf of the RangeReport. 37 type zoneRangeStatus struct { 38 numRanges int32 39 unavailable int32 40 underReplicated int32 41 overReplicated int32 42 } 43 44 // replicationStatsReportSaver deals with saving a RangeReport to the database. 45 // The idea is for it to be used to save new version of the report over and 46 // over. It maintains the previously-saved version of the report in order to 47 // speed-up the saving of the next one. 48 type replicationStatsReportSaver struct { 49 previousVersion RangeReport 50 lastGenerated time.Time 51 lastUpdatedRowCount int 52 } 53 54 // makeReplicationStatsReportSaver creates a new report saver. 55 func makeReplicationStatsReportSaver() replicationStatsReportSaver { 56 return replicationStatsReportSaver{} 57 } 58 59 // LastUpdatedRowCount is the count of the rows that were touched during the last save. 60 func (r *replicationStatsReportSaver) LastUpdatedRowCount() int { 61 return r.lastUpdatedRowCount 62 } 63 64 // EnsureEntry creates an entry for the given key if there is none. 65 func (r RangeReport) EnsureEntry(zKey ZoneKey) { 66 if _, ok := r[zKey]; !ok { 67 r[zKey] = zoneRangeStatus{} 68 } 69 } 70 71 // CountRange adds one range's info to the report. If there's no entry in the 72 // report for the range's zone, a new one is created. 73 func (r RangeReport) CountRange( 74 zKey ZoneKey, unavailable bool, underReplicated bool, overReplicated bool, 75 ) { 76 r.EnsureEntry(zKey) 77 rStat := r[zKey] 78 rStat.numRanges++ 79 if unavailable { 80 rStat.unavailable++ 81 } 82 if underReplicated { 83 rStat.underReplicated++ 84 } 85 if overReplicated { 86 rStat.overReplicated++ 87 } 88 r[zKey] = rStat 89 } 90 91 func (r *replicationStatsReportSaver) loadPreviousVersion( 92 ctx context.Context, ex sqlutil.InternalExecutor, txn *kv.Txn, 93 ) error { 94 // The data for the previous save needs to be loaded if: 95 // - this is the first time that we call this method and lastUpdatedAt has never been set 96 // - in case that the lastUpdatedAt is set but is different than the timestamp in reports_meta 97 // this indicates that some other worker wrote after we did the write. 98 if !r.lastGenerated.IsZero() { 99 generated, err := getReportGenerationTime(ctx, replicationStatsReportID, ex, txn) 100 if err != nil { 101 return err 102 } 103 // If the report is missing, this is the first time we are running and the 104 // reload is needed. In that case, generated will be the zero value. 105 if generated == r.lastGenerated { 106 // We have the latest report; reload not needed. 107 return nil 108 } 109 } 110 const prevViolations = "select zone_id, subzone_id, total_ranges, " + 111 "unavailable_ranges, under_replicated_ranges, over_replicated_ranges " + 112 "from system.replication_stats" 113 rows, err := ex.Query( 114 ctx, "get-previous-replication-stats", txn, prevViolations, 115 ) 116 if err != nil { 117 return err 118 } 119 120 r.previousVersion = make(RangeReport, len(rows)) 121 for _, row := range rows { 122 key := ZoneKey{} 123 key.ZoneID = (uint32)(*row[0].(*tree.DInt)) 124 key.SubzoneID = base.SubzoneID(*row[1].(*tree.DInt)) 125 r.previousVersion[key] = zoneRangeStatus{ 126 (int32)(*row[2].(*tree.DInt)), 127 (int32)(*row[3].(*tree.DInt)), 128 (int32)(*row[4].(*tree.DInt)), 129 (int32)(*row[5].(*tree.DInt)), 130 } 131 } 132 133 return nil 134 } 135 136 func (r *replicationStatsReportSaver) updateTimestamp( 137 ctx context.Context, ex sqlutil.InternalExecutor, txn *kv.Txn, reportTS time.Time, 138 ) error { 139 if !r.lastGenerated.IsZero() && reportTS == r.lastGenerated { 140 return errors.Errorf( 141 "The new time %s is the same as the time of the last update %s", 142 reportTS.String(), 143 r.lastGenerated.String(), 144 ) 145 } 146 147 _, err := ex.Exec( 148 ctx, 149 "timestamp-upsert-replication-stats", 150 txn, 151 "upsert into system.reports_meta(id, generated) values($1, $2)", 152 replicationStatsReportID, 153 reportTS, 154 ) 155 return err 156 } 157 158 // Save a report in the database. 159 // 160 // report should not be used by the caller any more after this call; the callee 161 // takes ownership. 162 // reportTS is the time that will be set in the updated_at column for every row. 163 func (r *replicationStatsReportSaver) Save( 164 ctx context.Context, 165 report RangeReport, 166 reportTS time.Time, 167 db *kv.DB, 168 ex sqlutil.InternalExecutor, 169 ) error { 170 r.lastUpdatedRowCount = 0 171 if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 172 err := r.loadPreviousVersion(ctx, ex, txn) 173 if err != nil { 174 return err 175 } 176 177 err = r.updateTimestamp(ctx, ex, txn, reportTS) 178 if err != nil { 179 return err 180 } 181 182 for key, status := range report { 183 if err := r.upsertStats(ctx, txn, key, status, ex); err != nil { 184 return err 185 } 186 } 187 188 for key := range r.previousVersion { 189 if _, ok := report[key]; !ok { 190 _, err := ex.Exec( 191 ctx, 192 "delete-old-replication-stats", 193 txn, 194 "delete from system.replication_stats "+ 195 "where zone_id = $1 and subzone_id = $2", 196 key.ZoneID, 197 key.SubzoneID, 198 ) 199 200 if err != nil { 201 return err 202 } 203 r.lastUpdatedRowCount++ 204 } 205 } 206 207 return nil 208 }); err != nil { 209 return err 210 } 211 212 r.lastGenerated = reportTS 213 r.previousVersion = report 214 215 return nil 216 } 217 218 // upsertStat upserts a row into system.replication_stats. 219 func (r *replicationStatsReportSaver) upsertStats( 220 ctx context.Context, txn *kv.Txn, key ZoneKey, stats zoneRangeStatus, ex sqlutil.InternalExecutor, 221 ) error { 222 var err error 223 previousStats, hasOldVersion := r.previousVersion[key] 224 if hasOldVersion && previousStats == stats { 225 // No change in the stats so no update. 226 return nil 227 } 228 229 // Updating an old row. 230 _, err = ex.Exec( 231 ctx, "upsert-replication-stats", txn, 232 "upsert into system.replication_stats(report_id, zone_id, subzone_id, "+ 233 "total_ranges, unavailable_ranges, under_replicated_ranges, "+ 234 "over_replicated_ranges) values($1, $2, $3, $4, $5, $6, $7)", 235 replicationStatsReportID, 236 key.ZoneID, key.SubzoneID, stats.numRanges, stats.unavailable, 237 stats.underReplicated, stats.overReplicated, 238 ) 239 240 if err != nil { 241 return err 242 } 243 244 r.lastUpdatedRowCount++ 245 return nil 246 } 247 248 // replicationStatsVisitor is a visitor that builds a RangeReport. 249 type replicationStatsVisitor struct { 250 cfg *config.SystemConfig 251 nodeChecker nodeChecker 252 253 // report is the output of the visitor. visit*() methods populate it. 254 // After visiting all the ranges, it can be retrieved with Report(). 255 report RangeReport 256 visitErr bool 257 258 // prevZoneKey and prevNumReplicas maintain state from one range to the next. 259 // This state can be reused when a range is covered by the same zone config as 260 // the previous one. Reusing it speeds up the report generation. 261 prevZoneKey ZoneKey 262 prevNumReplicas int 263 } 264 265 var _ rangeVisitor = &replicationStatsVisitor{} 266 267 func makeReplicationStatsVisitor( 268 ctx context.Context, cfg *config.SystemConfig, nodeChecker nodeChecker, 269 ) replicationStatsVisitor { 270 v := replicationStatsVisitor{ 271 cfg: cfg, 272 nodeChecker: nodeChecker, 273 report: make(RangeReport), 274 } 275 v.reset(ctx) 276 return v 277 } 278 279 // failed is part of the rangeVisitor interface. 280 func (v *replicationStatsVisitor) failed() bool { 281 return v.visitErr 282 } 283 284 // Report returns the RangeReport that was populated by previous visit*() calls. 285 func (v *replicationStatsVisitor) Report() RangeReport { 286 return v.report 287 } 288 289 // reset is part of the rangeVisitor interface. 290 func (v *replicationStatsVisitor) reset(ctx context.Context) { 291 *v = replicationStatsVisitor{ 292 cfg: v.cfg, 293 nodeChecker: v.nodeChecker, 294 prevNumReplicas: -1, 295 report: make(RangeReport, len(v.report)), 296 } 297 298 // Iterate through all the zone configs to create report entries for all the 299 // zones that have constraints. Otherwise, just iterating through the ranges 300 // wouldn't create entries for zones that don't apply to any ranges. 301 maxObjectID, err := v.cfg.GetLargestObjectID(0 /* maxID - return the largest ID in the config */) 302 if err != nil { 303 log.Fatalf(ctx, "unexpected failure to compute max object id: %s", err) 304 } 305 for i := uint32(1); i <= maxObjectID; i++ { 306 zone, err := getZoneByID(i, v.cfg) 307 if err != nil { 308 log.Fatalf(ctx, "unexpected failure to compute max object id: %s", err) 309 } 310 if zone == nil { 311 continue 312 } 313 v.ensureEntries(MakeZoneKey(i, NoSubzone), zone) 314 } 315 } 316 317 func (v *replicationStatsVisitor) ensureEntries(key ZoneKey, zone *zonepb.ZoneConfig) { 318 if zoneChangesReplication(zone) { 319 v.report.EnsureEntry(key) 320 } 321 for i, sz := range zone.Subzones { 322 v.ensureEntries(MakeZoneKey(key.ZoneID, base.SubzoneIDFromIndex(i)), &sz.Config) 323 } 324 } 325 326 // visitNewZone is part of the rangeVisitor interface. 327 func (v *replicationStatsVisitor) visitNewZone( 328 ctx context.Context, r *roachpb.RangeDescriptor, 329 ) (retErr error) { 330 331 defer func() { 332 v.visitErr = retErr != nil 333 }() 334 var zKey ZoneKey 335 var zConfig *zonepb.ZoneConfig 336 var numReplicas int 337 338 // Figure out the zone config for whose report the current range is to be 339 // counted. This is the lowest-level zone config covering the range that 340 // changes replication settings. We also need to figure out the replication 341 // factor this zone is configured with; the replication factor might be 342 // inherited from a higher-level zone config. 343 found, err := visitZones(ctx, r, v.cfg, ignoreSubzonePlaceholders, 344 func(_ context.Context, zone *zonepb.ZoneConfig, key ZoneKey) bool { 345 if zConfig == nil { 346 if !zoneChangesReplication(zone) { 347 return false 348 } 349 zKey = key 350 zConfig = zone 351 if zone.NumReplicas != nil { 352 numReplicas = int(*zone.NumReplicas) 353 return true 354 } 355 // We need to continue upwards in search for the NumReplicas. 356 return false 357 } 358 // We had already found the zone to report to, but we're haven't found 359 // its NumReplicas yet. 360 if zone.NumReplicas != nil { 361 numReplicas = int(*zone.NumReplicas) 362 return true 363 } 364 return false 365 }) 366 if err != nil { 367 return errors.AssertionFailedf("unexpected error visiting zones for range %s: %s", r, err) 368 } 369 v.prevZoneKey = zKey 370 v.prevNumReplicas = numReplicas 371 if !found { 372 return errors.AssertionFailedf( 373 "no zone config with replication attributes found for range: %s", r) 374 } 375 376 v.countRange(zKey, numReplicas, r) 377 return nil 378 } 379 380 // visitSameZone is part of the rangeVisitor interface. 381 func (v *replicationStatsVisitor) visitSameZone(ctx context.Context, r *roachpb.RangeDescriptor) { 382 v.countRange(v.prevZoneKey, v.prevNumReplicas, r) 383 } 384 385 func (v *replicationStatsVisitor) countRange( 386 key ZoneKey, replicationFactor int, r *roachpb.RangeDescriptor, 387 ) { 388 voters := len(r.Replicas().Voters()) 389 var liveVoters int 390 for _, rep := range r.Replicas().Voters() { 391 if v.nodeChecker(rep.NodeID) { 392 liveVoters++ 393 } 394 } 395 396 // TODO(andrei): This unavailability determination is naive. We need to take 397 // into account two different quorums when the range is in the joint-consensus 398 // state. See #43836. 399 unavailable := liveVoters < (voters/2 + 1) 400 // TODO(andrei): In the joint-consensus state, this under-replication also 401 // needs to consider the number of live replicas in each quorum. For example, 402 // with 2 VoterFulls, 1 VoterOutgoing, 1 VoterIncoming, if the outgoing voter 403 // is on a dead node, the range should be considered under-replicated. 404 underReplicated := replicationFactor > liveVoters 405 overReplicated := replicationFactor < voters 406 // Note that a range can be under-replicated and over-replicated at the same 407 // time if it has many replicas, but sufficiently many of them are on dead 408 // nodes. 409 410 v.report.CountRange(key, unavailable, underReplicated, overReplicated) 411 } 412 413 // zoneChangesReplication determines whether a given zone config changes 414 // replication attributes: the replication factor or the replication 415 // constraints. 416 // This is used to determine which zone's report a range counts towards for the 417 // replication_stats and the critical_localities reports : it'll count towards 418 // the lowest ancestor for which this method returns true. 419 func zoneChangesReplication(zone *zonepb.ZoneConfig) bool { 420 return (zone.NumReplicas != nil && *zone.NumReplicas != 0) || 421 zone.Constraints != nil 422 }