github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/reports/critical_localities_report.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package reports
    12  
    13  import (
    14  	"context"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/base"
    18  	"github.com/cockroachdb/cockroach/pkg/config"
    19  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    20  	"github.com/cockroachdb/cockroach/pkg/kv"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
    24  	"github.com/cockroachdb/errors"
    25  )
    26  
    27  // criticalLocalitiesReportID is the id of the row in the system. reports_meta
    28  // table corresponding to the critical localities report (i.e. the
    29  // system.replication_critical_localities table).
    30  const criticalLocalitiesReportID reportID = 2
    31  
    32  type localityKey struct {
    33  	ZoneKey
    34  	locality LocalityRepr
    35  }
    36  
    37  // LocalityRepr is a representation of a locality.
    38  type LocalityRepr string
    39  
    40  type localityStatus struct {
    41  	atRiskRanges int32
    42  }
    43  
    44  // LocalityReport stores the range status information for each locality and
    45  // applicable zone.
    46  type LocalityReport map[localityKey]localityStatus
    47  
    48  // replicationCriticalLocalitiesReportSaver deals with saving a LocalityReport
    49  // to the database. The idea is for it to be used to save new version of the
    50  // report over and over. It maintains the previously-saved version of the report
    51  // in order to speed-up the saving of the next one.
    52  type replicationCriticalLocalitiesReportSaver struct {
    53  	previousVersion     LocalityReport
    54  	lastGenerated       time.Time
    55  	lastUpdatedRowCount int
    56  }
    57  
    58  // makeReplicationCriticalLocalitiesReportSaver creates a new report saver.
    59  func makeReplicationCriticalLocalitiesReportSaver() replicationCriticalLocalitiesReportSaver {
    60  	return replicationCriticalLocalitiesReportSaver{}
    61  }
    62  
    63  // LastUpdatedRowCount is the count of the rows that were touched during the last save.
    64  func (r *replicationCriticalLocalitiesReportSaver) LastUpdatedRowCount() int {
    65  	return r.lastUpdatedRowCount
    66  }
    67  
    68  // CountRangeAtRisk increments the number of ranges at-risk for the report entry
    69  // corresponding to the given zone and locality. In other words, the report will
    70  // count the respective locality as critical for one more range in the given
    71  // zone.
    72  func (r LocalityReport) CountRangeAtRisk(zKey ZoneKey, loc LocalityRepr) {
    73  	lKey := localityKey{
    74  		ZoneKey:  zKey,
    75  		locality: loc,
    76  	}
    77  	if _, ok := r[lKey]; !ok {
    78  		r[lKey] = localityStatus{}
    79  	}
    80  	lStat := r[lKey]
    81  	lStat.atRiskRanges++
    82  	r[lKey] = lStat
    83  }
    84  
    85  func (r *replicationCriticalLocalitiesReportSaver) loadPreviousVersion(
    86  	ctx context.Context, ex sqlutil.InternalExecutor, txn *kv.Txn,
    87  ) error {
    88  	// The data for the previous save needs to be loaded if:
    89  	// - this is the first time that we call this method and lastUpdatedAt has never been set
    90  	// - in case that the lastUpdatedAt is set but is different than the timestamp in reports_meta
    91  	//   this indicates that some other worker wrote after we did the write.
    92  	if !r.lastGenerated.IsZero() {
    93  		generated, err := getReportGenerationTime(ctx, criticalLocalitiesReportID, ex, txn)
    94  		if err != nil {
    95  			return err
    96  		}
    97  		// If the report is missing, this is the first time we are running and the
    98  		// reload is needed. In that case, generated will be the zero value.
    99  		if generated == r.lastGenerated {
   100  			// We have the latest report; reload not needed.
   101  			return nil
   102  		}
   103  	}
   104  	const prevViolations = "select zone_id, subzone_id, locality, at_risk_ranges " +
   105  		"from system.replication_critical_localities"
   106  	rows, err := ex.Query(
   107  		ctx, "get-previous-replication-critical-localities", txn, prevViolations,
   108  	)
   109  	if err != nil {
   110  		return err
   111  	}
   112  
   113  	r.previousVersion = make(LocalityReport, len(rows))
   114  	for _, row := range rows {
   115  		key := localityKey{}
   116  		key.ZoneID = (uint32)(*row[0].(*tree.DInt))
   117  		key.SubzoneID = base.SubzoneID(*row[1].(*tree.DInt))
   118  		key.locality = (LocalityRepr)(*row[2].(*tree.DString))
   119  		r.previousVersion[key] = localityStatus{(int32)(*row[3].(*tree.DInt))}
   120  	}
   121  
   122  	return nil
   123  }
   124  
   125  func (r *replicationCriticalLocalitiesReportSaver) updateTimestamp(
   126  	ctx context.Context, ex sqlutil.InternalExecutor, txn *kv.Txn, reportTS time.Time,
   127  ) error {
   128  	if !r.lastGenerated.IsZero() && reportTS == r.lastGenerated {
   129  		return errors.Errorf(
   130  			"The new time %s is the same as the time of the last update %s",
   131  			reportTS.String(),
   132  			r.lastGenerated.String(),
   133  		)
   134  	}
   135  
   136  	_, err := ex.Exec(
   137  		ctx,
   138  		"timestamp-upsert-replication-critical-localities",
   139  		txn,
   140  		"upsert into system.reports_meta(id, generated) values($1, $2)",
   141  		criticalLocalitiesReportID,
   142  		reportTS,
   143  	)
   144  	return err
   145  }
   146  
   147  // Save the report to the database.
   148  //
   149  // report should not be used by the caller any more after this call; the callee
   150  // takes ownership.
   151  // reportTS is the time that will be set in the updated_at column for every row.
   152  func (r *replicationCriticalLocalitiesReportSaver) Save(
   153  	ctx context.Context,
   154  	report LocalityReport,
   155  	reportTS time.Time,
   156  	db *kv.DB,
   157  	ex sqlutil.InternalExecutor,
   158  ) error {
   159  	r.lastUpdatedRowCount = 0
   160  	if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
   161  		err := r.loadPreviousVersion(ctx, ex, txn)
   162  		if err != nil {
   163  			return err
   164  		}
   165  
   166  		err = r.updateTimestamp(ctx, ex, txn, reportTS)
   167  		if err != nil {
   168  			return err
   169  		}
   170  
   171  		for key, status := range report {
   172  			if err := r.upsertLocality(
   173  				ctx, reportTS, txn, key, status, db, ex,
   174  			); err != nil {
   175  				return err
   176  			}
   177  		}
   178  
   179  		for key := range r.previousVersion {
   180  			if _, ok := report[key]; !ok {
   181  				_, err := ex.Exec(
   182  					ctx,
   183  					"delete-old-replication-critical-localities",
   184  					txn,
   185  					"delete from system.replication_critical_localities "+
   186  						"where zone_id = $1 and subzone_id = $2 and locality = $3",
   187  					key.ZoneID,
   188  					key.SubzoneID,
   189  					key.locality,
   190  				)
   191  
   192  				if err != nil {
   193  					return err
   194  				}
   195  				r.lastUpdatedRowCount++
   196  			}
   197  		}
   198  
   199  		return nil
   200  	}); err != nil {
   201  		return err
   202  	}
   203  
   204  	r.lastGenerated = reportTS
   205  	r.previousVersion = report
   206  
   207  	return nil
   208  }
   209  
   210  // upsertLocality upserts a row into system.replication_critical_localities.
   211  //
   212  // existing is used to decide is this is a new violation.
   213  func (r *replicationCriticalLocalitiesReportSaver) upsertLocality(
   214  	ctx context.Context,
   215  	reportTS time.Time,
   216  	txn *kv.Txn,
   217  	key localityKey,
   218  	status localityStatus,
   219  	db *kv.DB,
   220  	ex sqlutil.InternalExecutor,
   221  ) error {
   222  	var err error
   223  	previousStatus, hasOldVersion := r.previousVersion[key]
   224  	if hasOldVersion && previousStatus.atRiskRanges == status.atRiskRanges {
   225  		// No change in the status so no update.
   226  		return nil
   227  	}
   228  
   229  	// Updating an old row.
   230  	_, err = ex.Exec(
   231  		ctx, "upsert-replication-critical-localities", txn,
   232  		"upsert into system.replication_critical_localities(report_id, zone_id, subzone_id, "+
   233  			"locality, at_risk_ranges) values($1, $2, $3, $4, $5)",
   234  		criticalLocalitiesReportID,
   235  		key.ZoneID, key.SubzoneID, key.locality, status.atRiskRanges,
   236  	)
   237  
   238  	if err != nil {
   239  		return err
   240  	}
   241  
   242  	r.lastUpdatedRowCount++
   243  	return nil
   244  }
   245  
   246  // criticalLocalitiesVisitor is a visitor that, when passed to visitRanges(), builds
   247  // a LocalityReport.
   248  type criticalLocalitiesVisitor struct {
   249  	allLocalities map[roachpb.NodeID]map[string]roachpb.Locality
   250  	cfg           *config.SystemConfig
   251  	storeResolver StoreResolver
   252  	nodeChecker   nodeChecker
   253  
   254  	// report is the output of the visitor. visit*() methods populate it.
   255  	// After visiting all the ranges, it can be retrieved with Report().
   256  	report   LocalityReport
   257  	visitErr bool
   258  
   259  	// prevZoneKey maintains state from one range to the next. This state can be
   260  	// reused when a range is covered by the same zone config as the previous one.
   261  	// Reusing it speeds up the report generation.
   262  	prevZoneKey ZoneKey
   263  }
   264  
   265  var _ rangeVisitor = &criticalLocalitiesVisitor{}
   266  
   267  func makeCriticalLocalitiesVisitor(
   268  	ctx context.Context,
   269  	nodeLocalities map[roachpb.NodeID]roachpb.Locality,
   270  	cfg *config.SystemConfig,
   271  	storeResolver StoreResolver,
   272  	nodeChecker nodeChecker,
   273  ) criticalLocalitiesVisitor {
   274  	allLocalities := expandLocalities(nodeLocalities)
   275  	v := criticalLocalitiesVisitor{
   276  		allLocalities: allLocalities,
   277  		cfg:           cfg,
   278  		storeResolver: storeResolver,
   279  		nodeChecker:   nodeChecker,
   280  	}
   281  	v.reset(ctx)
   282  	return v
   283  }
   284  
   285  // expandLocalities expands each locality in its input into multiple localities,
   286  // each at a different level of granularity. For example the locality
   287  // "region=r1,dc=dc1,az=az1" is expanded into ["region=r1", "region=r1,dc=dc1",
   288  // "region=r1,dc=dc1,az=az1"].
   289  // The localities are returned in a format convenient for the
   290  // criticalLocalitiesVisitor.
   291  func expandLocalities(
   292  	nodeLocalities map[roachpb.NodeID]roachpb.Locality,
   293  ) map[roachpb.NodeID]map[string]roachpb.Locality {
   294  	res := make(map[roachpb.NodeID]map[string]roachpb.Locality)
   295  	for nid, loc := range nodeLocalities {
   296  		if len(loc.Tiers) == 0 {
   297  			res[nid] = nil
   298  			continue
   299  		}
   300  		res[nid] = make(map[string]roachpb.Locality, len(loc.Tiers))
   301  		for i := range loc.Tiers {
   302  			partialLoc := roachpb.Locality{Tiers: make([]roachpb.Tier, i+1)}
   303  			copy(partialLoc.Tiers, loc.Tiers[:i+1])
   304  			res[nid][partialLoc.String()] = partialLoc
   305  		}
   306  	}
   307  	return res
   308  }
   309  
   310  // failed is part of the rangeVisitor interface.
   311  func (v *criticalLocalitiesVisitor) failed() bool {
   312  	return v.visitErr
   313  }
   314  
   315  // Report returns the LocalityReport that was populated by previous visit*()
   316  // calls.
   317  func (v *criticalLocalitiesVisitor) Report() LocalityReport {
   318  	return v.report
   319  }
   320  
   321  // reset is part of the rangeVisitor interface.
   322  func (v *criticalLocalitiesVisitor) reset(ctx context.Context) {
   323  	*v = criticalLocalitiesVisitor{
   324  		allLocalities: v.allLocalities,
   325  		cfg:           v.cfg,
   326  		storeResolver: v.storeResolver,
   327  		nodeChecker:   v.nodeChecker,
   328  		report:        make(LocalityReport, len(v.report)),
   329  	}
   330  }
   331  
   332  // visitNewZone is part of the rangeVisitor interface.
   333  func (v *criticalLocalitiesVisitor) visitNewZone(
   334  	ctx context.Context, r *roachpb.RangeDescriptor,
   335  ) (retErr error) {
   336  
   337  	defer func() {
   338  		v.visitErr = retErr != nil
   339  	}()
   340  
   341  	// Get the zone.
   342  	var zKey ZoneKey
   343  	found, err := visitZones(ctx, r, v.cfg, ignoreSubzonePlaceholders,
   344  		func(_ context.Context, zone *zonepb.ZoneConfig, key ZoneKey) bool {
   345  			if !zoneChangesReplication(zone) {
   346  				return false
   347  			}
   348  			zKey = key
   349  			return true
   350  		})
   351  	if err != nil {
   352  		return errors.AssertionFailedf("unexpected error visiting zones: %s", err)
   353  	}
   354  	if !found {
   355  		return errors.AssertionFailedf("no suitable zone config found for range: %s", r)
   356  	}
   357  	v.prevZoneKey = zKey
   358  
   359  	v.countRange(ctx, zKey, r)
   360  	return nil
   361  }
   362  
   363  // visitSameZone is part of the rangeVisitor interface.
   364  func (v *criticalLocalitiesVisitor) visitSameZone(ctx context.Context, r *roachpb.RangeDescriptor) {
   365  	v.countRange(ctx, v.prevZoneKey, r)
   366  }
   367  
   368  func (v *criticalLocalitiesVisitor) countRange(
   369  	ctx context.Context, zoneKey ZoneKey, r *roachpb.RangeDescriptor,
   370  ) {
   371  	stores := v.storeResolver(r)
   372  
   373  	// Collect all the localities of all the replicas. Note that we collect
   374  	// "expanded" localities: if a replica has a multi-tier locality like
   375  	// "region:us-east,dc=new-york", we collect both "region:us-east" and
   376  	// "region:us-east,dc=new-york".
   377  	dedupLocal := make(map[string]roachpb.Locality)
   378  	for _, rep := range r.Replicas().All() {
   379  		for s, loc := range v.allLocalities[rep.NodeID] {
   380  			if _, ok := dedupLocal[s]; ok {
   381  				continue
   382  			}
   383  			dedupLocal[s] = loc
   384  		}
   385  	}
   386  
   387  	// Any of the localities of any of the nodes could be critical. We'll check
   388  	// them one by one.
   389  	for _, loc := range dedupLocal {
   390  		processLocalityForRange(ctx, r, zoneKey, v.report, loc, v.nodeChecker, stores)
   391  	}
   392  }
   393  
   394  // processLocalityForRange checks a single locality constraint against a
   395  // range with replicas in each of the stores given, contributing to rep.
   396  func processLocalityForRange(
   397  	ctx context.Context,
   398  	r *roachpb.RangeDescriptor,
   399  	zoneKey ZoneKey,
   400  	rep LocalityReport,
   401  	loc roachpb.Locality,
   402  	nodeChecker nodeChecker,
   403  	storeDescs []roachpb.StoreDescriptor,
   404  ) {
   405  	// Compute the required quorum and the number of live nodes. If the number of
   406  	// live nodes gets lower than the required quorum then the range is already
   407  	// unavailable.
   408  	quorumCount := len(r.Replicas().Voters())/2 + 1
   409  	liveNodeCount := len(storeDescs)
   410  	for _, storeDesc := range storeDescs {
   411  		isStoreLive := nodeChecker(storeDesc.Node.NodeID)
   412  		if !isStoreLive {
   413  			if liveNodeCount >= quorumCount {
   414  				liveNodeCount--
   415  				if liveNodeCount < quorumCount {
   416  					break
   417  				}
   418  			}
   419  		}
   420  	}
   421  
   422  	localityToConstraints := func(loc roachpb.Locality) zonepb.ConstraintsConjunction {
   423  		c := zonepb.ConstraintsConjunction{
   424  			Constraints: make([]zonepb.Constraint, 0, len(loc.Tiers)),
   425  		}
   426  		for _, tier := range loc.Tiers {
   427  			c.Constraints = append(c.Constraints, zonepb.Constraint{
   428  				Type: zonepb.Constraint_REQUIRED, Key: tier.Key, Value: tier.Value,
   429  			})
   430  		}
   431  		return c
   432  	}
   433  
   434  	locStr := LocalityRepr(loc.String())
   435  	c := localityToConstraints(loc)
   436  	passCount := 0
   437  	for _, storeDesc := range storeDescs {
   438  		storeHasConstraint := true
   439  		for _, constraint := range c.Constraints {
   440  			// For required constraints - consider unavailable nodes as not matching.
   441  			if !zonepb.StoreMatchesConstraint(storeDesc, constraint) {
   442  				storeHasConstraint = false
   443  				break
   444  			}
   445  		}
   446  
   447  		if storeHasConstraint && nodeChecker(storeDesc.Node.NodeID) {
   448  			passCount++
   449  		}
   450  	}
   451  
   452  	// If the live nodes outside of the given locality are not enough to
   453  	// form quorum then this locality is critical.
   454  	if quorumCount > liveNodeCount-passCount {
   455  		rep.CountRangeAtRisk(zoneKey, locStr)
   456  	}
   457  }