github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/reports/constraint_stats_report.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package reports 12 13 import ( 14 "context" 15 "fmt" 16 "strings" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/base" 20 "github.com/cockroachdb/cockroach/pkg/config" 21 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 22 "github.com/cockroachdb/cockroach/pkg/kv" 23 "github.com/cockroachdb/cockroach/pkg/roachpb" 24 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 25 "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" 26 "github.com/cockroachdb/cockroach/pkg/util/log" 27 "github.com/cockroachdb/errors" 28 ) 29 30 // replicationConstraintsReportID is the id of the row in the system. 31 // reports_meta table corresponding to the constraints conformance report (i.e. 32 // the system.replicationConstraintsReportID table). 33 const replicationConstraintsReportID reportID = 1 34 35 // ConstraintReport contains information about the constraint conformance for 36 // the cluster's data. 37 type ConstraintReport map[ConstraintStatusKey]ConstraintStatus 38 39 // replicationConstraintStatsReportSaver deals with saving a ConstrainReport to 40 // the database. The idea is for it to be used to save new version of the report 41 // over and over. It maintains the previously-saved version of the report in 42 // order to speed-up the saving of the next one. 43 type replicationConstraintStatsReportSaver struct { 44 previousVersion ConstraintReport 45 lastGenerated time.Time 46 lastUpdatedRowCount int 47 } 48 49 // makeReplicationConstraintStatusReportSaver creates a new report saver. 50 func makeReplicationConstraintStatusReportSaver() replicationConstraintStatsReportSaver { 51 return replicationConstraintStatsReportSaver{} 52 } 53 54 // LastUpdatedRowCount is the count of the rows that were touched during the last save. 55 func (r *replicationConstraintStatsReportSaver) LastUpdatedRowCount() int { 56 return r.lastUpdatedRowCount 57 } 58 59 // ConstraintStatus is the leaf in the constraintReport. 60 type ConstraintStatus struct { 61 FailRangeCount int 62 } 63 64 // ConstraintType indicates what type of constraint is an entry in the 65 // constraint conformance report talking about. 66 type ConstraintType string 67 68 const ( 69 // Constraint means that the entry refers to a constraint (i.e. a member of 70 // the constraints field in a zone config). 71 Constraint ConstraintType = "constraint" 72 // TODO(andrei): add leaseholder preference 73 ) 74 75 // Less compares two ConstraintTypes. 76 func (t ConstraintType) Less(other ConstraintType) bool { 77 return -1 == strings.Compare(string(t), string(other)) 78 } 79 80 // ConstraintRepr is a string representation of a constraint. 81 type ConstraintRepr string 82 83 // Less compares two ConstraintReprs. 84 func (c ConstraintRepr) Less(other ConstraintRepr) bool { 85 return -1 == strings.Compare(string(c), string(other)) 86 } 87 88 // ConstraintStatusKey represents the key in the ConstraintReport. 89 type ConstraintStatusKey struct { 90 ZoneKey 91 ViolationType ConstraintType 92 Constraint ConstraintRepr 93 } 94 95 func (k ConstraintStatusKey) String() string { 96 return fmt.Sprintf("zone:%s type:%s constraint:%s", k.ZoneKey, k.ViolationType, k.Constraint) 97 } 98 99 // Less compares two ConstraintStatusKeys. 100 func (k ConstraintStatusKey) Less(other ConstraintStatusKey) bool { 101 if k.ZoneKey.Less(other.ZoneKey) { 102 return true 103 } 104 if other.ZoneKey.Less(k.ZoneKey) { 105 return false 106 } 107 if k.ViolationType.Less(other.ViolationType) { 108 return true 109 } 110 if other.ViolationType.Less(k.ViolationType) { 111 return true 112 } 113 return k.Constraint.Less(other.Constraint) 114 } 115 116 // AddViolation add a constraint that is being violated for a given range. Each call 117 // will increase the number of ranges that failed. 118 func (r ConstraintReport) AddViolation(z ZoneKey, t ConstraintType, c ConstraintRepr) { 119 k := ConstraintStatusKey{ 120 ZoneKey: z, 121 ViolationType: t, 122 Constraint: c, 123 } 124 if _, ok := r[k]; !ok { 125 r[k] = ConstraintStatus{} 126 } 127 cRep := r[k] 128 cRep.FailRangeCount++ 129 r[k] = cRep 130 } 131 132 // ensureEntry us used to add an entry to the report even if there is no violation. 133 func (r ConstraintReport) ensureEntry(z ZoneKey, t ConstraintType, c ConstraintRepr) { 134 k := ConstraintStatusKey{ 135 ZoneKey: z, 136 ViolationType: t, 137 Constraint: c, 138 } 139 if _, ok := r[k]; !ok { 140 r[k] = ConstraintStatus{} 141 } 142 } 143 144 func (r ConstraintReport) ensureEntries(key ZoneKey, zone *zonepb.ZoneConfig) { 145 for _, conjunction := range zone.Constraints { 146 r.ensureEntry(key, Constraint, ConstraintRepr(conjunction.String())) 147 } 148 for i, sz := range zone.Subzones { 149 szKey := ZoneKey{ZoneID: key.ZoneID, SubzoneID: base.SubzoneIDFromIndex(i)} 150 r.ensureEntries(szKey, &sz.Config) 151 } 152 } 153 154 func (r *replicationConstraintStatsReportSaver) loadPreviousVersion( 155 ctx context.Context, ex sqlutil.InternalExecutor, txn *kv.Txn, 156 ) error { 157 // The data for the previous save needs to be loaded if: 158 // - this is the first time that we call this method and lastUpdatedAt has never been set 159 // - in case that the lastUpdatedAt is set but is different than the timestamp in reports_meta 160 // this indicates that some other worker wrote after we did the write. 161 if !r.lastGenerated.IsZero() { 162 generated, err := getReportGenerationTime(ctx, replicationConstraintsReportID, ex, txn) 163 if err != nil { 164 return err 165 } 166 // If the report is missing, this is the first time we are running and the 167 // reload is needed. In that case, generated will be the zero value. 168 if generated == r.lastGenerated { 169 // We have the latest report; reload not needed. 170 return nil 171 } 172 } 173 const prevViolations = "select zone_id, subzone_id, type, config, " + 174 "violating_ranges from system.replication_constraint_stats" 175 rows, err := ex.Query( 176 ctx, "get-previous-replication-constraint-stats", txn, prevViolations, 177 ) 178 if err != nil { 179 return err 180 } 181 182 r.previousVersion = make(ConstraintReport, len(rows)) 183 for _, row := range rows { 184 key := ConstraintStatusKey{} 185 key.ZoneID = (uint32)(*row[0].(*tree.DInt)) 186 key.SubzoneID = base.SubzoneID((*row[1].(*tree.DInt))) 187 key.ViolationType = (ConstraintType)(*row[2].(*tree.DString)) 188 key.Constraint = (ConstraintRepr)(*row[3].(*tree.DString)) 189 r.previousVersion[key] = ConstraintStatus{(int)(*row[4].(*tree.DInt))} 190 } 191 192 return nil 193 } 194 195 func (r *replicationConstraintStatsReportSaver) updateTimestamp( 196 ctx context.Context, ex sqlutil.InternalExecutor, txn *kv.Txn, reportTS time.Time, 197 ) error { 198 if !r.lastGenerated.IsZero() && reportTS == r.lastGenerated { 199 return errors.Errorf( 200 "The new time %s is the same as the time of the last update %s", 201 reportTS.String(), 202 r.lastGenerated.String(), 203 ) 204 } 205 206 _, err := ex.Exec( 207 ctx, 208 "timestamp-upsert-replication-constraint-stats", 209 txn, 210 "upsert into system.reports_meta(id, generated) values($1, $2)", 211 replicationConstraintsReportID, 212 reportTS, 213 ) 214 return err 215 } 216 217 // Save the report in the database. 218 // 219 // report should not be used by the caller any more after this call; the callee 220 // takes ownership. 221 // reportTS is the time that will be set in the updated_at column for every row. 222 func (r *replicationConstraintStatsReportSaver) Save( 223 ctx context.Context, 224 report ConstraintReport, 225 reportTS time.Time, 226 db *kv.DB, 227 ex sqlutil.InternalExecutor, 228 ) error { 229 r.lastUpdatedRowCount = 0 230 if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 231 err := r.loadPreviousVersion(ctx, ex, txn) 232 if err != nil { 233 return err 234 } 235 236 err = r.updateTimestamp(ctx, ex, txn, reportTS) 237 if err != nil { 238 return err 239 } 240 241 for k, zoneCons := range report { 242 if err := r.upsertConstraintStatus( 243 ctx, reportTS, txn, k, zoneCons.FailRangeCount, db, ex, 244 ); err != nil { 245 return err 246 } 247 } 248 249 for key := range r.previousVersion { 250 if _, ok := report[key]; !ok { 251 _, err := ex.Exec( 252 ctx, 253 "delete-old-replication-constraint-stats", 254 txn, 255 "delete from system.replication_constraint_stats "+ 256 "where zone_id = $1 and subzone_id = $2 and type = $3 and config = $4", 257 key.ZoneID, 258 key.SubzoneID, 259 key.ViolationType, 260 key.Constraint, 261 ) 262 263 if err != nil { 264 return err 265 } 266 r.lastUpdatedRowCount++ 267 } 268 } 269 270 return nil 271 }); err != nil { 272 return err 273 } 274 275 r.lastGenerated = reportTS 276 r.previousVersion = report 277 278 return nil 279 } 280 281 // upsertConstraintStatus upserts a row into system.replication_constraint_stats. 282 // 283 // existing is used to decide is this is a new violation. 284 func (r *replicationConstraintStatsReportSaver) upsertConstraintStatus( 285 ctx context.Context, 286 reportTS time.Time, 287 txn *kv.Txn, 288 key ConstraintStatusKey, 289 violationCount int, 290 db *kv.DB, 291 ex sqlutil.InternalExecutor, 292 ) error { 293 var err error 294 previousStatus, hasOldVersion := r.previousVersion[key] 295 if hasOldVersion && previousStatus.FailRangeCount == violationCount { 296 // No change in the status so no update. 297 return nil 298 } else if violationCount != 0 { 299 if previousStatus.FailRangeCount != 0 { 300 // Updating an old violation. No need to update the start timestamp. 301 _, err = ex.Exec( 302 ctx, "upsert-replication-constraint-stat", txn, 303 "upsert into system.replication_constraint_stats(report_id, zone_id, subzone_id, type, "+ 304 "config, violating_ranges) values($1, $2, $3, $4, $5, $6)", 305 replicationConstraintsReportID, 306 key.ZoneID, key.SubzoneID, key.ViolationType, key.Constraint, violationCount, 307 ) 308 } else if previousStatus.FailRangeCount == 0 { 309 // New violation detected. Need to update the start timestamp. 310 _, err = ex.Exec( 311 ctx, "upsert-replication-constraint-stat", txn, 312 "upsert into system.replication_constraint_stats(report_id, zone_id, subzone_id, type, "+ 313 "config, violating_ranges, violation_start) values($1, $2, $3, $4, $5, $6, $7)", 314 replicationConstraintsReportID, 315 key.ZoneID, key.SubzoneID, key.ViolationType, key.Constraint, violationCount, reportTS, 316 ) 317 } 318 } else { 319 // Need to set the violation start to null as there was an violation that doesn't exist anymore. 320 _, err = ex.Exec( 321 ctx, "upsert-replication-constraint-stat", txn, 322 "upsert into system.replication_constraint_stats(report_id, zone_id, subzone_id, type, config, "+ 323 "violating_ranges, violation_start) values($1, $2, $3, $4, $5, $6, null)", 324 replicationConstraintsReportID, 325 key.ZoneID, key.SubzoneID, key.ViolationType, key.Constraint, violationCount, 326 ) 327 } 328 329 if err != nil { 330 return err 331 } 332 333 r.lastUpdatedRowCount++ 334 return nil 335 } 336 337 // constraintConformanceVisitor is a visitor that, when passed to visitRanges(), 338 // computes the constraint conformance report (i.e. the 339 // system.replication_constraint_stats table). 340 type constraintConformanceVisitor struct { 341 cfg *config.SystemConfig 342 storeResolver StoreResolver 343 344 // report is the output of the visitor. visit*() methods populate it. 345 // After visiting all the ranges, it can be retrieved with Report(). 346 report ConstraintReport 347 visitErr bool 348 349 // prevZoneKey and prevConstraints maintain state from one range to the next. 350 // This state can be reused when a range is covered by the same zone config as 351 // the previous one. Reusing it speeds up the report generation. 352 prevZoneKey ZoneKey 353 prevConstraints []zonepb.ConstraintsConjunction 354 } 355 356 var _ rangeVisitor = &constraintConformanceVisitor{} 357 358 func makeConstraintConformanceVisitor( 359 ctx context.Context, cfg *config.SystemConfig, storeResolver StoreResolver, 360 ) constraintConformanceVisitor { 361 v := constraintConformanceVisitor{ 362 cfg: cfg, 363 storeResolver: storeResolver, 364 } 365 v.reset(ctx) 366 return v 367 } 368 369 // failed is part of the rangeVisitor interface. 370 func (v *constraintConformanceVisitor) failed() bool { 371 return v.visitErr 372 } 373 374 // Report returns the ConstraintReport that was populated by previous visit*() 375 // calls. 376 func (v *constraintConformanceVisitor) Report() ConstraintReport { 377 return v.report 378 } 379 380 // reset is part of the rangeVisitor interface. 381 func (v *constraintConformanceVisitor) reset(ctx context.Context) { 382 *v = constraintConformanceVisitor{ 383 cfg: v.cfg, 384 storeResolver: v.storeResolver, 385 report: make(ConstraintReport, len(v.report)), 386 } 387 388 // Iterate through all the zone configs to create report entries for all the 389 // zones that have constraints. Otherwise, just iterating through the ranges 390 // wouldn't create entries for constraints that aren't violated, and 391 // definitely not for zones that don't apply to any ranges. 392 maxObjectID, err := v.cfg.GetLargestObjectID(0 /* maxID - return the largest ID in the config */) 393 if err != nil { 394 log.Fatalf(ctx, "unexpected failure to compute max object id: %s", err) 395 } 396 for i := uint32(1); i <= maxObjectID; i++ { 397 zone, err := getZoneByID(i, v.cfg) 398 if err != nil { 399 log.Fatalf(ctx, "unexpected failure to compute max object id: %s", err) 400 } 401 if zone == nil { 402 continue 403 } 404 v.report.ensureEntries(MakeZoneKey(i, NoSubzone), zone) 405 } 406 } 407 408 // visitNewZone is part of the rangeVisitor interface. 409 func (v *constraintConformanceVisitor) visitNewZone( 410 ctx context.Context, r *roachpb.RangeDescriptor, 411 ) (retErr error) { 412 413 defer func() { 414 v.visitErr = retErr != nil 415 }() 416 417 // Find the applicable constraints, which may be inherited. 418 var constraints []zonepb.ConstraintsConjunction 419 var zKey ZoneKey 420 _, err := visitZones(ctx, r, v.cfg, ignoreSubzonePlaceholders, 421 func(_ context.Context, zone *zonepb.ZoneConfig, key ZoneKey) bool { 422 if zone.Constraints == nil { 423 return false 424 } 425 constraints = zone.Constraints 426 zKey = key 427 return true 428 }) 429 if err != nil { 430 return errors.Errorf("unexpected error visiting zones: %s", err) 431 } 432 v.prevZoneKey = zKey 433 v.prevConstraints = constraints 434 v.countRange(ctx, r, zKey, constraints) 435 return nil 436 } 437 438 // visitSameZone is part of the rangeVisitor interface. 439 func (v *constraintConformanceVisitor) visitSameZone( 440 ctx context.Context, r *roachpb.RangeDescriptor, 441 ) { 442 v.countRange(ctx, r, v.prevZoneKey, v.prevConstraints) 443 } 444 445 func (v *constraintConformanceVisitor) countRange( 446 ctx context.Context, 447 r *roachpb.RangeDescriptor, 448 key ZoneKey, 449 constraints []zonepb.ConstraintsConjunction, 450 ) { 451 storeDescs := v.storeResolver(r) 452 violated := getViolations(ctx, storeDescs, constraints) 453 for _, c := range violated { 454 v.report.AddViolation(key, Constraint, c) 455 } 456 } 457 458 // getViolations returns the list of constraints violated by a range. The range 459 // is represented by the descriptors of the replicas' stores. 460 func getViolations( 461 ctx context.Context, 462 storeDescs []roachpb.StoreDescriptor, 463 constraintConjunctions []zonepb.ConstraintsConjunction, 464 ) []ConstraintRepr { 465 var res []ConstraintRepr 466 // Evaluate all zone constraints for the stores (i.e. replicas) of the given range. 467 for _, conjunction := range constraintConjunctions { 468 replicasRequiredToMatch := int(conjunction.NumReplicas) 469 if replicasRequiredToMatch == 0 { 470 replicasRequiredToMatch = len(storeDescs) 471 } 472 for _, c := range conjunction.Constraints { 473 if !constraintSatisfied(c, replicasRequiredToMatch, storeDescs) { 474 res = append(res, ConstraintRepr(conjunction.String())) 475 break 476 } 477 } 478 } 479 return res 480 } 481 482 // constraintSatisfied checks that a range (represented by its replicas' stores) 483 // satisfies a constraint. 484 func constraintSatisfied( 485 c zonepb.Constraint, replicasRequiredToMatch int, storeDescs []roachpb.StoreDescriptor, 486 ) bool { 487 passCount := 0 488 for _, storeDesc := range storeDescs { 489 // Consider stores for which we have no information to pass everything. 490 if storeDesc.StoreID == 0 { 491 passCount++ 492 continue 493 } 494 if zonepb.StoreSatisfiesConstraint(storeDesc, c) { 495 passCount++ 496 } 497 } 498 return replicasRequiredToMatch <= passCount 499 }