vitess.io/vitess@v0.16.2/go/vt/discovery/healthcheck.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package discovery provides a way to discover all tablets e.g. within a
    18  // specific shard and monitor their current health.
    19  //
    20  // Use the HealthCheck object to query for tablets and their health.
    21  //
    22  // For an example how to use the HealthCheck object, see vtgate/tabletgateway.go
    23  //
    24  // Tablets have to be manually added to the HealthCheck using AddTablet().
    25  // Alternatively, use a Watcher implementation which will constantly watch
    26  // a source (e.g. the topology) and add and remove tablets as they are
    27  // added or removed from the source.
    28  // For a Watcher example have a look at NewCellTabletsWatcher().
    29  //
    30  // Internally, the HealthCheck module is connected to each tablet and has a
    31  // streaming RPC (StreamHealth) open to receive periodic health infos.
    32  package discovery
    33  
    34  import (
    35  	"bytes"
    36  	"context"
    37  	"encoding/json"
    38  	"fmt"
    39  	"hash/crc32"
    40  	"html/template"
    41  	"net/http"
    42  	"sort"
    43  	"strings"
    44  	"sync"
    45  	"time"
    46  
    47  	"github.com/spf13/pflag"
    48  
    49  	"vitess.io/vitess/go/netutil"
    50  	"vitess.io/vitess/go/stats"
    51  	"vitess.io/vitess/go/vt/log"
    52  	"vitess.io/vitess/go/vt/proto/query"
    53  	"vitess.io/vitess/go/vt/proto/topodata"
    54  	"vitess.io/vitess/go/vt/proto/vtrpc"
    55  	"vitess.io/vitess/go/vt/servenv"
    56  	"vitess.io/vitess/go/vt/topo"
    57  	"vitess.io/vitess/go/vt/topo/topoproto"
    58  	"vitess.io/vitess/go/vt/vterrors"
    59  	"vitess.io/vitess/go/vt/vttablet/queryservice"
    60  )
    61  
    62  var (
    63  	hcErrorCounters = stats.NewCountersWithMultiLabels("HealthcheckErrors", "Healthcheck Errors", []string{"Keyspace", "ShardName", "TabletType"})
    64  
    65  	hcPrimaryPromotedCounters = stats.NewCountersWithMultiLabels("HealthcheckPrimaryPromoted", "Primary promoted in keyspace/shard name because of health check errors", []string{"Keyspace", "ShardName"})
    66  	healthcheckOnce           sync.Once
    67  
    68  	// TabletURLTemplateString is a flag to generate URLs for the tablets that vtgate discovers.
    69  	TabletURLTemplateString = "http://{{.GetTabletHostPort}}"
    70  	tabletURLTemplate       *template.Template
    71  
    72  	// AllowedTabletTypes is the list of allowed tablet types. e.g. {PRIMARY, REPLICA}.
    73  	AllowedTabletTypes []topodata.TabletType
    74  
    75  	// KeyspacesToWatch - if provided this specifies which keyspaces should be
    76  	// visible to the healthcheck. By default the healthcheck will watch all keyspaces.
    77  	KeyspacesToWatch []string
    78  
    79  	// tabletFilters are the keyspace|shard or keyrange filters to apply to the full set of tablets.
    80  	tabletFilters []string
    81  
    82  	// refreshInterval is the interval at which healthcheck refreshes its list of tablets from topo.
    83  	refreshInterval = 1 * time.Minute
    84  
    85  	// refreshKnownTablets tells us whether to process all tablets or only new tablets.
    86  	refreshKnownTablets = true
    87  
    88  	// topoReadConcurrency tells us how many topo reads are allowed in parallel.
    89  	topoReadConcurrency = 32
    90  
    91  	// How much to sleep between each check.
    92  	waitAvailableTabletInterval = 100 * time.Millisecond
    93  )
    94  
    95  // See the documentation for NewHealthCheck below for an explanation of these parameters.
    96  const (
    97  	DefaultHealthCheckRetryDelay = 5 * time.Second
    98  	DefaultHealthCheckTimeout    = 1 * time.Minute
    99  
   100  	// DefaultTopoReadConcurrency is used as the default value for the topoReadConcurrency parameter of a TopologyWatcher.
   101  	DefaultTopoReadConcurrency int = 5
   102  	// DefaultTopologyWatcherRefreshInterval is used as the default value for
   103  	// the refresh interval of a topology watcher.
   104  	DefaultTopologyWatcherRefreshInterval = 1 * time.Minute
   105  	// HealthCheckTemplate is the HTML code to display a TabletsCacheStatusList
   106  	HealthCheckTemplate = `
   107  <style>
   108    table {
   109      border-collapse: collapse;
   110    }
   111    td, th {
   112      border: 1px solid #999;
   113      padding: 0.2rem;
   114    }
   115  </style>
   116  <table>
   117    <tr>
   118      <th colspan="5">HealthCheck Tablet Cache</th>
   119    </tr>
   120    <tr>
   121      <th>Cell</th>
   122      <th>Keyspace</th>
   123      <th>Shard</th>
   124      <th>TabletType</th>
   125      <th>TabletHealth</th>
   126    </tr>
   127    {{range $i, $ts := .}}
   128    <tr>
   129      <td>{{github_com_vitessio_vitess_vtctld_srv_cell $ts.Cell}}</td>
   130      <td>{{github_com_vitessio_vitess_vtctld_srv_keyspace $ts.Cell $ts.Target.Keyspace}}</td>
   131      <td>{{$ts.Target.Shard}}</td>
   132      <td>{{$ts.Target.TabletType}}</td>
   133      <td>{{$ts.StatusAsHTML}}</td>
   134    </tr>
   135    {{end}}
   136  </table>
   137  `
   138  )
   139  
   140  // ParseTabletURLTemplateFromFlag loads or reloads the URL template.
   141  func ParseTabletURLTemplateFromFlag() {
   142  	tabletURLTemplate = template.New("")
   143  	_, err := tabletURLTemplate.Parse(TabletURLTemplateString)
   144  	if err != nil {
   145  		log.Exitf("error parsing template: %v", err)
   146  	}
   147  }
   148  
   149  func init() {
   150  	for _, cmd := range []string{"vtgate", "vtcombo"} {
   151  		servenv.OnParseFor(cmd, registerDiscoveryFlags)
   152  		servenv.OnParseFor(cmd, registerWebUIFlags)
   153  	}
   154  
   155  	servenv.OnParseFor("vtctld", registerWebUIFlags)
   156  }
   157  
   158  func registerDiscoveryFlags(fs *pflag.FlagSet) {
   159  	fs.StringSliceVar(&tabletFilters, "tablet_filters", []string{}, "Specifies a comma-separated list of 'keyspace|shard_name or keyrange' values to filter the tablets to watch.")
   160  	fs.Var((*topoproto.TabletTypeListFlag)(&AllowedTabletTypes), "allowed_tablet_types", "Specifies the tablet types this vtgate is allowed to route queries to. Should be provided as a comma-separated set of tablet types.")
   161  	fs.StringSliceVar(&KeyspacesToWatch, "keyspaces_to_watch", []string{}, "Specifies which keyspaces this vtgate should have access to while routing queries or accessing the vschema.")
   162  }
   163  
   164  func registerWebUIFlags(fs *pflag.FlagSet) {
   165  	fs.StringVar(&TabletURLTemplateString, "tablet_url_template", "http://{{.GetTabletHostPort}}", "Format string describing debug tablet url formatting. See getTabletDebugURL() for how to customize this.")
   166  	fs.DurationVar(&refreshInterval, "tablet_refresh_interval", 1*time.Minute, "Tablet refresh interval.")
   167  	fs.BoolVar(&refreshKnownTablets, "tablet_refresh_known_tablets", true, "Whether to reload the tablet's address/port map from topo in case they change.")
   168  	fs.IntVar(&topoReadConcurrency, "topo_read_concurrency", 32, "Concurrency of topo reads.")
   169  	ParseTabletURLTemplateFromFlag()
   170  }
   171  
   172  // FilteringKeyspaces returns true if any keyspaces have been configured to be filtered.
   173  func FilteringKeyspaces() bool {
   174  	return len(KeyspacesToWatch) > 0
   175  }
   176  
   177  type KeyspaceShardTabletType string
   178  type tabletAliasString string
   179  
   180  // HealthCheck declares what the TabletGateway needs from the HealthCheck
   181  type HealthCheck interface {
   182  	// AddTablet adds the tablet.
   183  	AddTablet(tablet *topodata.Tablet)
   184  
   185  	// RemoveTablet removes the tablet.
   186  	RemoveTablet(tablet *topodata.Tablet)
   187  
   188  	// ReplaceTablet does an AddTablet and RemoveTablet in one call, effectively replacing the old tablet with the new.
   189  	ReplaceTablet(old, new *topodata.Tablet)
   190  
   191  	// CacheStatus returns a displayable version of the health check cache.
   192  	CacheStatus() TabletsCacheStatusList
   193  
   194  	// CacheStatusMap returns a map of the health check cache.
   195  	CacheStatusMap() map[string]*TabletsCacheStatus
   196  
   197  	// Close stops the healthcheck.
   198  	Close() error
   199  
   200  	// WaitForAllServingTablets waits for at least one healthy serving tablet in
   201  	// each given target before returning.
   202  	// It will return ctx.Err() if the context is canceled.
   203  	// It will return an error if it can't read the necessary topology records.
   204  	WaitForAllServingTablets(ctx context.Context, targets []*query.Target) error
   205  
   206  	// TabletConnection returns the TabletConn of the given tablet.
   207  	TabletConnection(alias *topodata.TabletAlias, target *query.Target) (queryservice.QueryService, error)
   208  
   209  	// RegisterStats registers the connection counts stats
   210  	RegisterStats()
   211  
   212  	// GetHealthyTabletStats returns only the healthy tablets.
   213  	// The returned array is owned by the caller.
   214  	// For TabletType_PRIMARY, this will only return at most one entry,
   215  	// the most recent tablet of type primary.
   216  	// This returns a copy of the data so that callers can access without
   217  	// synchronization
   218  	GetHealthyTabletStats(target *query.Target) []*TabletHealth
   219  
   220  	// GetTabletHealth results the TabletHealth of the tablet that matches the given alias
   221  	GetTabletHealth(kst KeyspaceShardTabletType, alias *topodata.TabletAlias) (*TabletHealth, error)
   222  
   223  	// GetTabletHealthByAlias results the TabletHealth of the tablet that matches the given alias
   224  	GetTabletHealthByAlias(alias *topodata.TabletAlias) (*TabletHealth, error)
   225  
   226  	// Subscribe adds a listener. Used by vtgate buffer to learn about primary changes.
   227  	Subscribe() chan *TabletHealth
   228  
   229  	// Unsubscribe removes a listener.
   230  	Unsubscribe(c chan *TabletHealth)
   231  }
   232  
   233  var _ HealthCheck = (*HealthCheckImpl)(nil)
   234  
   235  // Target includes cell which we ignore here
   236  // because tabletStatsCache is intended to be per-cell
   237  func KeyFromTarget(target *query.Target) KeyspaceShardTabletType {
   238  	return KeyspaceShardTabletType(fmt.Sprintf("%s.%s.%s", target.Keyspace, target.Shard, topoproto.TabletTypeLString(target.TabletType)))
   239  }
   240  
   241  // KeyFromTablet returns the KeyspaceShardTabletType that matches the given topodata.Tablet
   242  func KeyFromTablet(tablet *topodata.Tablet) KeyspaceShardTabletType {
   243  	return KeyspaceShardTabletType(fmt.Sprintf("%s.%s.%s", tablet.Keyspace, tablet.Shard, topoproto.TabletTypeLString(tablet.Type)))
   244  }
   245  
   246  // HealthCheckImpl performs health checking and stores the results.
   247  // The goal of this object is to maintain a StreamHealth RPC
   248  // to a lot of tablets. Tablets are added / removed by calling the
   249  // AddTablet / RemoveTablet methods (other discovery module objects
   250  // can for instance watch the topology and call these).
   251  // It contains a map of tabletHealthCheck objects by Alias.
   252  // Each tabletHealthCheck object stores the health information for one tablet.
   253  // A checkConn goroutine is spawned for each tabletHealthCheck, which is responsible for
   254  // keeping that tabletHealthCheck up-to-date.
   255  // If checkConn terminates for any reason, then the corresponding tabletHealthCheck object
   256  // is removed from the map. When a tabletHealthCheck
   257  // gets removed from the map, its cancelFunc gets called, which ensures that the associated
   258  // checkConn goroutine eventually terminates.
   259  type HealthCheckImpl struct {
   260  	// Immutable fields set at construction time.
   261  	retryDelay         time.Duration
   262  	healthCheckTimeout time.Duration
   263  	ts                 *topo.Server
   264  	cell               string
   265  	// mu protects all the following fields.
   266  	mu sync.Mutex
   267  	// authoritative map of tabletHealth by alias
   268  	healthByAlias map[tabletAliasString]*tabletHealthCheck
   269  	// a map keyed by keyspace.shard.tabletType
   270  	// contains a map of TabletHealth keyed by tablet alias for each tablet relevant to the keyspace.shard.tabletType
   271  	// has to be kept in sync with healthByAlias
   272  	healthData map[KeyspaceShardTabletType]map[tabletAliasString]*TabletHealth
   273  	// another map keyed by keyspace.shard.tabletType, this one containing a sorted list of TabletHealth
   274  	healthy map[KeyspaceShardTabletType][]*TabletHealth
   275  	// connsWG keeps track of all launched Go routines that monitor tablet connections.
   276  	connsWG sync.WaitGroup
   277  	// topology watchers that inform healthcheck of tablets being added and deleted
   278  	topoWatchers []*TopologyWatcher
   279  	// cellAliases is a cache of cell aliases
   280  	cellAliases map[string]string
   281  	// mutex to protect subscribers
   282  	subMu sync.Mutex
   283  	// subscribers
   284  	subscribers map[chan *TabletHealth]struct{}
   285  }
   286  
   287  // NewHealthCheck creates a new HealthCheck object.
   288  // Parameters:
   289  // retryDelay.
   290  //
   291  //	The duration to wait before retrying to connect (e.g. after a failed connection
   292  //	attempt).
   293  //
   294  // healthCheckTimeout.
   295  //
   296  //	The duration for which we consider a health check response to be 'fresh'. If we don't get
   297  //	a health check response from a tablet for more than this duration, we consider the tablet
   298  //	not healthy.
   299  //
   300  // topoServer.
   301  //
   302  //	The topology server that this healthcheck object can use to retrieve cell or tablet information
   303  //
   304  // localCell.
   305  //
   306  //	The localCell for this healthcheck
   307  //
   308  // callback.
   309  //
   310  //	A function to call when there is a primary change. Used to notify vtgate's buffer to stop buffering.
   311  func NewHealthCheck(ctx context.Context, retryDelay, healthCheckTimeout time.Duration, topoServer *topo.Server, localCell, cellsToWatch string) *HealthCheckImpl {
   312  	log.Infof("loading tablets for cells: %v", cellsToWatch)
   313  
   314  	hc := &HealthCheckImpl{
   315  		ts:                 topoServer,
   316  		cell:               localCell,
   317  		retryDelay:         retryDelay,
   318  		healthCheckTimeout: healthCheckTimeout,
   319  		healthByAlias:      make(map[tabletAliasString]*tabletHealthCheck),
   320  		healthData:         make(map[KeyspaceShardTabletType]map[tabletAliasString]*TabletHealth),
   321  		healthy:            make(map[KeyspaceShardTabletType][]*TabletHealth),
   322  		subscribers:        make(map[chan *TabletHealth]struct{}),
   323  		cellAliases:        make(map[string]string),
   324  	}
   325  	var topoWatchers []*TopologyWatcher
   326  	var filter TabletFilter
   327  	cells := strings.Split(cellsToWatch, ",")
   328  	if cellsToWatch == "" {
   329  		cells = append(cells, localCell)
   330  	}
   331  
   332  	for _, c := range cells {
   333  		log.Infof("Setting up healthcheck for cell: %v", c)
   334  		if c == "" {
   335  			continue
   336  		}
   337  		if len(tabletFilters) > 0 {
   338  			if len(KeyspacesToWatch) > 0 {
   339  				log.Exitf("Only one of -keyspaces_to_watch and -tablet_filters may be specified at a time")
   340  			}
   341  
   342  			fbs, err := NewFilterByShard(tabletFilters)
   343  			if err != nil {
   344  				log.Exitf("Cannot parse tablet_filters parameter: %v", err)
   345  			}
   346  			filter = fbs
   347  		} else if len(KeyspacesToWatch) > 0 {
   348  			filter = NewFilterByKeyspace(KeyspacesToWatch)
   349  		}
   350  		topoWatchers = append(topoWatchers, NewCellTabletsWatcher(ctx, topoServer, hc, filter, c, refreshInterval, refreshKnownTablets, topoReadConcurrency))
   351  	}
   352  
   353  	hc.topoWatchers = topoWatchers
   354  	healthcheckOnce.Do(func() {
   355  		http.Handle("/debug/gateway", hc)
   356  	})
   357  
   358  	// start the topo watches here
   359  	for _, tw := range hc.topoWatchers {
   360  		tw.Start()
   361  	}
   362  
   363  	return hc
   364  }
   365  
   366  // AddTablet adds the tablet, and starts health check.
   367  // It does not block on making connection.
   368  // name is an optional tag for the tablet, e.g. an alternative address.
   369  func (hc *HealthCheckImpl) AddTablet(tablet *topodata.Tablet) {
   370  	// check whether grpc port is present on tablet, if not return
   371  	if tablet.PortMap["grpc"] == 0 {
   372  		return
   373  	}
   374  
   375  	log.Infof("Adding tablet to healthcheck: %v", tablet)
   376  	hc.mu.Lock()
   377  	defer hc.mu.Unlock()
   378  	if hc.healthByAlias == nil {
   379  		// already closed.
   380  		return
   381  	}
   382  	ctx, cancelFunc := context.WithCancel(context.Background())
   383  	target := &query.Target{
   384  		Keyspace:   tablet.Keyspace,
   385  		Shard:      tablet.Shard,
   386  		TabletType: tablet.Type,
   387  	}
   388  	thc := &tabletHealthCheck{
   389  		ctx:        ctx,
   390  		cancelFunc: cancelFunc,
   391  		Tablet:     tablet,
   392  		Target:     target,
   393  	}
   394  
   395  	// add to our datastore
   396  	key := KeyFromTarget(target)
   397  	tabletAlias := topoproto.TabletAliasString(tablet.Alias)
   398  	if _, ok := hc.healthByAlias[tabletAliasString(tabletAlias)]; ok {
   399  		// We should not add a tablet that we already have
   400  		log.Errorf("Program bug: tried to add existing tablet: %v to healthcheck", tabletAlias)
   401  		return
   402  	}
   403  	hc.healthByAlias[tabletAliasString(tabletAlias)] = thc
   404  	res := thc.SimpleCopy()
   405  	if _, ok := hc.healthData[key]; !ok {
   406  		hc.healthData[key] = make(map[tabletAliasString]*TabletHealth)
   407  	}
   408  	hc.healthData[key][tabletAliasString(tabletAlias)] = res
   409  
   410  	hc.broadcast(res)
   411  	hc.connsWG.Add(1)
   412  	go thc.checkConn(hc)
   413  }
   414  
   415  // RemoveTablet removes the tablet, and stops the health check.
   416  // It does not block.
   417  func (hc *HealthCheckImpl) RemoveTablet(tablet *topodata.Tablet) {
   418  	hc.deleteTablet(tablet)
   419  }
   420  
   421  // ReplaceTablet removes the old tablet and adds the new tablet.
   422  func (hc *HealthCheckImpl) ReplaceTablet(old, new *topodata.Tablet) {
   423  	hc.RemoveTablet(old)
   424  	hc.AddTablet(new)
   425  }
   426  
   427  func (hc *HealthCheckImpl) deleteTablet(tablet *topodata.Tablet) {
   428  	log.Infof("Removing tablet from healthcheck: %v", tablet)
   429  	hc.mu.Lock()
   430  	defer hc.mu.Unlock()
   431  
   432  	tabletAlias := tabletAliasString(topoproto.TabletAliasString(tablet.Alias))
   433  	defer func() {
   434  		// We want to be sure the tablet is gone from the secondary
   435  		// maps even if it's already gone from the authoritative map.
   436  		// The tablet's type also may have recently changed as well,
   437  		// so ensure that the tablet we're removing is removed from
   438  		// any possible secondary map keys:
   439  		// key: keyspace.shard.tabletType -> val: map[tabletAlias]tabletHealth
   440  		for _, tabletType := range topoproto.AllTabletTypes {
   441  			key := KeyspaceShardTabletType(fmt.Sprintf("%s.%s.%s", tablet.Keyspace, tablet.Shard, topoproto.TabletTypeLString(tabletType)))
   442  			// delete from map by keyspace.shard.tabletType
   443  			ths, ok := hc.healthData[key]
   444  			if !ok {
   445  				continue
   446  			}
   447  			delete(ths, tabletAlias)
   448  			// delete from healthy list
   449  			healthy, ok := hc.healthy[key]
   450  			if ok && len(healthy) > 0 {
   451  				hc.recomputeHealthy(key)
   452  			}
   453  		}
   454  	}()
   455  	// delete from authoritative map
   456  	th, ok := hc.healthByAlias[tabletAlias]
   457  	if !ok {
   458  		log.Infof("We have no health data for tablet: %v, it might have been deleted already", tablet)
   459  		return
   460  	}
   461  	// Calling this will end the context associated with th.checkConn,
   462  	// which will call finalizeConn, which will close the connection.
   463  	th.cancelFunc()
   464  	delete(hc.healthByAlias, tabletAlias)
   465  }
   466  
   467  func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Target, trivialUpdate bool, up bool) {
   468  	// hc.healthByAlias is authoritative, it should be updated
   469  	hc.mu.Lock()
   470  	defer hc.mu.Unlock()
   471  
   472  	tabletAlias := tabletAliasString(topoproto.TabletAliasString(th.Tablet.Alias))
   473  	// let's be sure that this tablet hasn't been deleted from the authoritative map
   474  	// so that we're not racing to update it and in effect re-adding a copy of the
   475  	// tablet record that was deleted
   476  	if _, ok := hc.healthByAlias[tabletAlias]; !ok {
   477  		log.Infof("Tablet %v has been deleted, skipping health update", th.Tablet)
   478  		return
   479  	}
   480  
   481  	targetKey := KeyFromTarget(th.Target)
   482  	targetChanged := prevTarget.TabletType != th.Target.TabletType || prevTarget.Keyspace != th.Target.Keyspace || prevTarget.Shard != th.Target.Shard
   483  	if targetChanged {
   484  		// Error counter has to be set here in case we get a new tablet type for the first time in a stream response
   485  		hcErrorCounters.Add([]string{th.Target.Keyspace, th.Target.Shard, topoproto.TabletTypeLString(th.Target.TabletType)}, 0)
   486  		// keyspace and shard are not expected to change, but just in case ...
   487  		// move this tabletHealthCheck to the correct map
   488  		oldTargetKey := KeyFromTarget(prevTarget)
   489  		delete(hc.healthData[oldTargetKey], tabletAlias)
   490  		_, ok := hc.healthData[targetKey]
   491  		if !ok {
   492  			hc.healthData[targetKey] = make(map[tabletAliasString]*TabletHealth)
   493  		}
   494  	}
   495  	// add it to the map by target and create the map record if needed
   496  	if _, ok := hc.healthData[targetKey]; !ok {
   497  		hc.healthData[targetKey] = make(map[tabletAliasString]*TabletHealth)
   498  	}
   499  	hc.healthData[targetKey][tabletAlias] = th
   500  
   501  	isPrimary := th.Target.TabletType == topodata.TabletType_PRIMARY
   502  	switch {
   503  	case isPrimary && up:
   504  		if len(hc.healthy[targetKey]) == 0 {
   505  			hc.healthy[targetKey] = append(hc.healthy[targetKey], th)
   506  		} else {
   507  			// We already have one up server, see if we
   508  			// need to replace it.
   509  			if th.PrimaryTermStartTime < hc.healthy[targetKey][0].PrimaryTermStartTime {
   510  				log.Warningf("not marking healthy primary %s as Up for %s because its PrimaryTermStartTime is smaller than the highest known timestamp from previous PRIMARYs %s: %d < %d ",
   511  					topoproto.TabletAliasString(th.Tablet.Alias),
   512  					topoproto.KeyspaceShardString(th.Target.Keyspace, th.Target.Shard),
   513  					topoproto.TabletAliasString(hc.healthy[targetKey][0].Tablet.Alias),
   514  					th.PrimaryTermStartTime,
   515  					hc.healthy[targetKey][0].PrimaryTermStartTime)
   516  			} else {
   517  				// Just replace it.
   518  				hc.healthy[targetKey][0] = th
   519  			}
   520  		}
   521  	case isPrimary && !up:
   522  		if healthy, ok := hc.healthy[targetKey]; ok && len(healthy) > 0 {
   523  			// isPrimary is true here therefore we should only have 1 tablet in healthy
   524  			alias := tabletAliasString(topoproto.TabletAliasString(healthy[0].Tablet.Alias))
   525  			// Clear healthy list for primary if the existing tablet is down
   526  			if alias == tabletAlias {
   527  				hc.healthy[targetKey] = []*TabletHealth{}
   528  			}
   529  		}
   530  	}
   531  
   532  	if !trivialUpdate {
   533  		// We re-sort the healthy tablet list whenever we get a health update for tablets we can route to.
   534  		// Tablets from other cells for non-primary targets should not trigger a re-sort;
   535  		// they should also be excluded from healthy list.
   536  		if th.Target.TabletType != topodata.TabletType_PRIMARY && hc.isIncluded(th.Target.TabletType, th.Tablet.Alias) {
   537  			hc.recomputeHealthy(targetKey)
   538  		}
   539  		if targetChanged && prevTarget.TabletType != topodata.TabletType_PRIMARY && hc.isIncluded(th.Target.TabletType, th.Tablet.Alias) { // also recompute old target's healthy list
   540  			oldTargetKey := KeyFromTarget(prevTarget)
   541  			hc.recomputeHealthy(oldTargetKey)
   542  		}
   543  	}
   544  
   545  	isNewPrimary := isPrimary && prevTarget.TabletType != topodata.TabletType_PRIMARY
   546  	if isNewPrimary {
   547  		log.Errorf("Adding 1 to PrimaryPromoted counter for target: %v, tablet: %v, tabletType: %v", prevTarget, topoproto.TabletAliasString(th.Tablet.Alias), th.Target.TabletType)
   548  		hcPrimaryPromotedCounters.Add([]string{th.Target.Keyspace, th.Target.Shard}, 1)
   549  	}
   550  
   551  	// broadcast to subscribers
   552  	hc.broadcast(th)
   553  }
   554  
   555  func (hc *HealthCheckImpl) recomputeHealthy(key KeyspaceShardTabletType) {
   556  	all := hc.healthData[key]
   557  	allArray := make([]*TabletHealth, 0, len(all))
   558  	for _, s := range all {
   559  		// Only tablets in same cell / cellAlias are included in healthy list.
   560  		if hc.isIncluded(s.Tablet.Type, s.Tablet.Alias) {
   561  			allArray = append(allArray, s)
   562  		}
   563  	}
   564  	hc.healthy[key] = FilterStatsByReplicationLag(allArray)
   565  }
   566  
   567  // Subscribe adds a listener. Used by vtgate buffer to learn about primary changes.
   568  func (hc *HealthCheckImpl) Subscribe() chan *TabletHealth {
   569  	hc.subMu.Lock()
   570  	defer hc.subMu.Unlock()
   571  	c := make(chan *TabletHealth, 2)
   572  	hc.subscribers[c] = struct{}{}
   573  	return c
   574  }
   575  
   576  // Unsubscribe removes a listener.
   577  func (hc *HealthCheckImpl) Unsubscribe(c chan *TabletHealth) {
   578  	hc.subMu.Lock()
   579  	defer hc.subMu.Unlock()
   580  	delete(hc.subscribers, c)
   581  }
   582  
   583  func (hc *HealthCheckImpl) broadcast(th *TabletHealth) {
   584  	hc.subMu.Lock()
   585  	defer hc.subMu.Unlock()
   586  	for c := range hc.subscribers {
   587  		select {
   588  		case c <- th:
   589  		default:
   590  		}
   591  	}
   592  }
   593  
   594  // CacheStatus returns a displayable version of the cache.
   595  func (hc *HealthCheckImpl) CacheStatus() TabletsCacheStatusList {
   596  	tcsMap := hc.CacheStatusMap()
   597  	tcsl := make(TabletsCacheStatusList, 0, len(tcsMap))
   598  	for _, tcs := range tcsMap {
   599  		tcsl = append(tcsl, tcs)
   600  	}
   601  	sort.Sort(tcsl)
   602  	return tcsl
   603  }
   604  
   605  func (hc *HealthCheckImpl) CacheStatusMap() map[string]*TabletsCacheStatus {
   606  	tcsMap := make(map[string]*TabletsCacheStatus)
   607  	hc.mu.Lock()
   608  	defer hc.mu.Unlock()
   609  	for _, ths := range hc.healthData {
   610  		for _, th := range ths {
   611  			key := fmt.Sprintf("%v.%v.%v.%v", th.Tablet.Alias.Cell, th.Target.Keyspace, th.Target.Shard, th.Target.TabletType.String())
   612  			var tcs *TabletsCacheStatus
   613  			var ok bool
   614  			if tcs, ok = tcsMap[key]; !ok {
   615  				tcs = &TabletsCacheStatus{
   616  					Cell:   th.Tablet.Alias.Cell,
   617  					Target: th.Target,
   618  				}
   619  				tcsMap[key] = tcs
   620  			}
   621  			tcs.TabletsStats = append(tcs.TabletsStats, th)
   622  		}
   623  	}
   624  	return tcsMap
   625  }
   626  
   627  // Close stops the healthcheck.
   628  func (hc *HealthCheckImpl) Close() error {
   629  	hc.mu.Lock()
   630  	for _, th := range hc.healthByAlias {
   631  		th.cancelFunc()
   632  	}
   633  	hc.healthByAlias = nil
   634  	hc.healthData = nil
   635  	for _, tw := range hc.topoWatchers {
   636  		tw.Stop()
   637  	}
   638  	for s := range hc.subscribers {
   639  		close(s)
   640  	}
   641  	hc.subscribers = nil
   642  	// Release the lock early or a pending checkHealthCheckTimeout
   643  	// cannot get a read lock on it.
   644  	hc.mu.Unlock()
   645  
   646  	// Wait for the checkHealthCheckTimeout Go routine and each Go
   647  	// routine per tablet.
   648  	hc.connsWG.Wait()
   649  
   650  	return nil
   651  }
   652  
   653  // GetHealthyTabletStats returns only the healthy tablets.
   654  // The returned array is owned by the caller.
   655  // For TabletType_PRIMARY, this will only return at most one entry,
   656  // the most recent tablet of type primary.
   657  // This returns a copy of the data so that callers can access without
   658  // synchronization
   659  func (hc *HealthCheckImpl) GetHealthyTabletStats(target *query.Target) []*TabletHealth {
   660  	var result []*TabletHealth
   661  	hc.mu.Lock()
   662  	defer hc.mu.Unlock()
   663  	return append(result, hc.healthy[KeyFromTarget(target)]...)
   664  }
   665  
   666  // GetTabletStats returns all tablets for the given target.
   667  // The returned array is owned by the caller.
   668  // For TabletType_PRIMARY, this will only return at most one entry,
   669  // the most recent tablet of type primary.
   670  func (hc *HealthCheckImpl) GetTabletStats(target *query.Target) []*TabletHealth {
   671  	var result []*TabletHealth
   672  	hc.mu.Lock()
   673  	defer hc.mu.Unlock()
   674  	ths := hc.healthData[KeyFromTarget(target)]
   675  	for _, th := range ths {
   676  		result = append(result, th)
   677  	}
   678  	return result
   679  }
   680  
   681  // WaitForTablets waits for at least one tablet in the given
   682  // keyspace / shard / tablet type before returning. The tablets do not
   683  // have to be healthy.  It will return ctx.Err() if the context is canceled.
   684  func (hc *HealthCheckImpl) WaitForTablets(ctx context.Context, keyspace, shard string, tabletType topodata.TabletType) error {
   685  	targets := []*query.Target{
   686  		{
   687  			Keyspace:   keyspace,
   688  			Shard:      shard,
   689  			TabletType: tabletType,
   690  		},
   691  	}
   692  	return hc.waitForTablets(ctx, targets, false)
   693  }
   694  
   695  // WaitForAllServingTablets waits for at least one healthy serving tablet in
   696  // each given target before returning.
   697  // It will return ctx.Err() if the context is canceled.
   698  // It will return an error if it can't read the necessary topology records.
   699  func (hc *HealthCheckImpl) WaitForAllServingTablets(ctx context.Context, targets []*query.Target) error {
   700  	return hc.waitForTablets(ctx, targets, true)
   701  }
   702  
   703  // FilterTargetsByKeyspaces only returns the targets that are part of the provided keyspaces
   704  func FilterTargetsByKeyspaces(keyspaces []string, targets []*query.Target) []*query.Target {
   705  	filteredTargets := make([]*query.Target, 0)
   706  
   707  	// Keep them all if there are no keyspaces to watch
   708  	if len(KeyspacesToWatch) == 0 {
   709  		return append(filteredTargets, targets...)
   710  	}
   711  
   712  	// Let's remove from the target shards that are not in the keyspaceToWatch list.
   713  	for _, target := range targets {
   714  		for _, keyspaceToWatch := range keyspaces {
   715  			if target.Keyspace == keyspaceToWatch {
   716  				filteredTargets = append(filteredTargets, target)
   717  			}
   718  		}
   719  	}
   720  	return filteredTargets
   721  }
   722  
   723  // waitForTablets is the internal method that polls for tablets.
   724  func (hc *HealthCheckImpl) waitForTablets(ctx context.Context, targets []*query.Target, requireServing bool) error {
   725  	targets = FilterTargetsByKeyspaces(KeyspacesToWatch, targets)
   726  
   727  	for {
   728  		// We nil targets as we find them.
   729  		allPresent := true
   730  		for i, target := range targets {
   731  			if target == nil {
   732  				continue
   733  			}
   734  
   735  			var tabletHealths []*TabletHealth
   736  			if requireServing {
   737  				tabletHealths = hc.GetHealthyTabletStats(target)
   738  			} else {
   739  				tabletHealths = hc.GetTabletStats(target)
   740  			}
   741  			if len(tabletHealths) == 0 {
   742  				allPresent = false
   743  			} else {
   744  				targets[i] = nil
   745  			}
   746  		}
   747  
   748  		if allPresent {
   749  			// we found everything we needed
   750  			return nil
   751  		}
   752  
   753  		// Unblock after the sleep or when the context has expired.
   754  		timer := time.NewTimer(waitAvailableTabletInterval)
   755  		select {
   756  		case <-ctx.Done():
   757  			timer.Stop()
   758  			for _, target := range targets {
   759  				if target != nil {
   760  					log.Infof("couldn't find tablets for target: %v", target)
   761  				}
   762  			}
   763  			return ctx.Err()
   764  		case <-timer.C:
   765  		}
   766  	}
   767  }
   768  
   769  // GetTabletHealthByAlias results the TabletHealth of the tablet that matches the given alias
   770  func (hc *HealthCheckImpl) GetTabletHealthByAlias(alias *topodata.TabletAlias) (*TabletHealth, error) {
   771  	hc.mu.Lock()
   772  	defer hc.mu.Unlock()
   773  
   774  	if hd, ok := hc.healthByAlias[tabletAliasString(topoproto.TabletAliasString(alias))]; ok {
   775  		return hd.SimpleCopy(), nil
   776  	}
   777  	return nil, fmt.Errorf("could not find tablet: %s", alias.String())
   778  }
   779  
   780  // GetTabletHealth results the TabletHealth of the tablet that matches the given alias and KeyspaceShardTabletType
   781  // The data is retrieved from the healthData map.
   782  func (hc *HealthCheckImpl) GetTabletHealth(kst KeyspaceShardTabletType, alias *topodata.TabletAlias) (*TabletHealth, error) {
   783  	hc.mu.Lock()
   784  	defer hc.mu.Unlock()
   785  
   786  	if hd, ok := hc.healthData[kst]; ok {
   787  		if th, ok := hd[tabletAliasString(topoproto.TabletAliasString(alias))]; ok {
   788  			return th, nil
   789  		}
   790  	}
   791  	return nil, fmt.Errorf("could not find tablet: %s", alias.String())
   792  }
   793  
   794  // TabletConnection returns the Connection to a given tablet.
   795  func (hc *HealthCheckImpl) TabletConnection(alias *topodata.TabletAlias, target *query.Target) (queryservice.QueryService, error) {
   796  	hc.mu.Lock()
   797  	thc := hc.healthByAlias[tabletAliasString(topoproto.TabletAliasString(alias))]
   798  	hc.mu.Unlock()
   799  	if thc == nil || thc.Conn == nil {
   800  		// TODO: test that throws this error
   801  		return nil, vterrors.Errorf(vtrpc.Code_NOT_FOUND, "tablet: %v is either down or nonexistent", alias)
   802  	}
   803  	return thc.Connection(), nil
   804  }
   805  
   806  // getAliasByCell should only be called while holding hc.mu
   807  func (hc *HealthCheckImpl) getAliasByCell(cell string) string {
   808  	if alias, ok := hc.cellAliases[cell]; ok {
   809  		return alias
   810  	}
   811  
   812  	alias := topo.GetAliasByCell(context.Background(), hc.ts, cell)
   813  	// Currently cell aliases have to be non-overlapping.
   814  	// If that changes, this will need to change to account for overlaps.
   815  	hc.cellAliases[cell] = alias
   816  
   817  	return alias
   818  }
   819  
   820  func (hc *HealthCheckImpl) isIncluded(tabletType topodata.TabletType, tabletAlias *topodata.TabletAlias) bool {
   821  	if tabletType == topodata.TabletType_PRIMARY {
   822  		return true
   823  	}
   824  	if tabletAlias.Cell == hc.cell {
   825  		return true
   826  	}
   827  	if hc.getAliasByCell(tabletAlias.Cell) == hc.getAliasByCell(hc.cell) {
   828  		return true
   829  	}
   830  	return false
   831  }
   832  
   833  // topologyWatcherMaxRefreshLag returns the maximum lag since the watched
   834  // cells were refreshed from the topo server
   835  func (hc *HealthCheckImpl) topologyWatcherMaxRefreshLag() time.Duration {
   836  	var lag time.Duration
   837  	for _, tw := range hc.topoWatchers {
   838  		cellLag := tw.RefreshLag()
   839  		if cellLag > lag {
   840  			lag = cellLag
   841  		}
   842  	}
   843  	return lag
   844  }
   845  
   846  // topologyWatcherChecksum returns a checksum of the topology watcher state
   847  func (hc *HealthCheckImpl) topologyWatcherChecksum() int64 {
   848  	var checksum int64
   849  	for _, tw := range hc.topoWatchers {
   850  		checksum = checksum ^ int64(tw.TopoChecksum())
   851  	}
   852  	return checksum
   853  }
   854  
   855  // RegisterStats registers the connection counts stats
   856  func (hc *HealthCheckImpl) RegisterStats() {
   857  	stats.NewGaugeDurationFunc(
   858  		"TopologyWatcherMaxRefreshLag",
   859  		"maximum time since the topology watcher refreshed a cell",
   860  		hc.topologyWatcherMaxRefreshLag,
   861  	)
   862  
   863  	stats.NewGaugeFunc(
   864  		"TopologyWatcherChecksum",
   865  		"crc32 checksum of the topology watcher state",
   866  		hc.topologyWatcherChecksum,
   867  	)
   868  
   869  	stats.NewGaugesFuncWithMultiLabels(
   870  		"HealthcheckConnections",
   871  		"the number of healthcheck connections registered",
   872  		[]string{"Keyspace", "ShardName", "TabletType"},
   873  		hc.servingConnStats)
   874  
   875  	stats.NewGaugeFunc(
   876  		"HealthcheckChecksum",
   877  		"crc32 checksum of the current healthcheck state",
   878  		hc.stateChecksum)
   879  }
   880  
   881  // ServeHTTP is part of the http.Handler interface. It renders the current state of the discovery gateway tablet cache into json.
   882  func (hc *HealthCheckImpl) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
   883  	w.Header().Set("Content-Type", "application/json; charset=utf-8")
   884  	status := hc.CacheStatus()
   885  	b, err := json.MarshalIndent(status, "", " ")
   886  	if err != nil {
   887  		// Error logged
   888  		if _, err := w.Write([]byte(err.Error())); err != nil {
   889  			log.Errorf("write to buffer error failed: %v", err)
   890  		}
   891  
   892  		return
   893  	}
   894  
   895  	buf := bytes.NewBuffer(nil)
   896  	json.HTMLEscape(buf, b)
   897  
   898  	// Error logged
   899  	if _, err := w.Write(buf.Bytes()); err != nil {
   900  		log.Errorf("write to buffer bytes failed: %v", err)
   901  	}
   902  }
   903  
   904  // servingConnStats returns the number of serving tablets per keyspace/shard/tablet type.
   905  func (hc *HealthCheckImpl) servingConnStats() map[string]int64 {
   906  	res := make(map[string]int64)
   907  	hc.mu.Lock()
   908  	defer hc.mu.Unlock()
   909  	for key, ths := range hc.healthData {
   910  		for _, th := range ths {
   911  			if th.Serving && th.LastError == nil {
   912  				res[string(key)]++
   913  			}
   914  		}
   915  	}
   916  	return res
   917  }
   918  
   919  // stateChecksum returns a crc32 checksum of the healthcheck state
   920  func (hc *HealthCheckImpl) stateChecksum() int64 {
   921  	// CacheStatus is sorted so this should be stable across vtgates
   922  	cacheStatus := hc.CacheStatus()
   923  	var buf bytes.Buffer
   924  	for _, st := range cacheStatus {
   925  		fmt.Fprintf(&buf,
   926  			"%v%v%v%v\n",
   927  			st.Cell,
   928  			st.Target.Keyspace,
   929  			st.Target.Shard,
   930  			st.Target.TabletType.String(),
   931  		)
   932  		sort.Sort(st.TabletsStats)
   933  		for _, ts := range st.TabletsStats {
   934  			fmt.Fprintf(&buf, "%v%v\n", ts.Serving, ts.PrimaryTermStartTime)
   935  		}
   936  	}
   937  
   938  	return int64(crc32.ChecksumIEEE(buf.Bytes()))
   939  }
   940  
   941  // TabletToMapKey creates a key to the map from tablet's host and ports.
   942  // It should only be used in discovery and related module.
   943  func TabletToMapKey(tablet *topodata.Tablet) string {
   944  	parts := make([]string, 0, 1)
   945  	for name, port := range tablet.PortMap {
   946  		parts = append(parts, netutil.JoinHostPort(name, port))
   947  	}
   948  	sort.Strings(parts)
   949  	parts = append([]string{tablet.Hostname}, parts...)
   950  	return strings.Join(parts, ",")
   951  }