vitess.io/vitess@v0.16.2/go/vt/vtgate/tabletgateway.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vtgate
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math/rand"
    23  	"sort"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/spf13/pflag"
    29  
    30  	"vitess.io/vitess/go/mysql/collations"
    31  	"vitess.io/vitess/go/vt/discovery"
    32  	"vitess.io/vitess/go/vt/log"
    33  	"vitess.io/vitess/go/vt/servenv"
    34  	"vitess.io/vitess/go/vt/srvtopo"
    35  	"vitess.io/vitess/go/vt/topo"
    36  	"vitess.io/vitess/go/vt/topo/topoproto"
    37  	"vitess.io/vitess/go/vt/vterrors"
    38  	"vitess.io/vitess/go/vt/vtgate/buffer"
    39  	"vitess.io/vitess/go/vt/vttablet/queryservice"
    40  
    41  	querypb "vitess.io/vitess/go/vt/proto/query"
    42  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    43  	vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
    44  )
    45  
    46  var (
    47  	_ discovery.HealthCheck = (*discovery.HealthCheckImpl)(nil)
    48  	// CellsToWatch is the list of cells the healthcheck operates over. If it is empty, only the local cell is watched
    49  	CellsToWatch string
    50  
    51  	bufferImplementation = "keyspace_events"
    52  	initialTabletTimeout = 30 * time.Second
    53  	// retryCount is the number of times a query will be retried on error
    54  	retryCount = 2
    55  )
    56  
    57  func init() {
    58  	servenv.OnParseFor("vtgate", func(fs *pflag.FlagSet) {
    59  		fs.StringVar(&CellsToWatch, "cells_to_watch", "", "comma-separated list of cells for watching tablets")
    60  		fs.StringVar(&bufferImplementation, "buffer_implementation", "keyspace_events", "Allowed values: healthcheck (legacy implementation), keyspace_events (default)")
    61  		fs.DurationVar(&initialTabletTimeout, "gateway_initial_tablet_timeout", 30*time.Second, "At startup, the tabletGateway will wait up to this duration to get at least one tablet per keyspace/shard/tablet type")
    62  		fs.IntVar(&retryCount, "retry-count", 2, "retry count")
    63  	})
    64  }
    65  
    66  // TabletGateway implements the Gateway interface.
    67  // This implementation uses the new healthcheck module.
    68  type TabletGateway struct {
    69  	queryservice.QueryService
    70  	hc                   discovery.HealthCheck
    71  	kev                  *discovery.KeyspaceEventWatcher
    72  	srvTopoServer        srvtopo.Server
    73  	localCell            string
    74  	retryCount           int
    75  	defaultConnCollation uint32
    76  
    77  	// mu protects the fields of this group.
    78  	mu sync.Mutex
    79  	// statusAggregators is a map indexed by the key
    80  	// keyspace/shard/tablet_type.
    81  	statusAggregators map[string]*TabletStatusAggregator
    82  
    83  	// buffer, if enabled, buffers requests during a detected PRIMARY failover.
    84  	buffer *buffer.Buffer
    85  }
    86  
    87  func createHealthCheck(ctx context.Context, retryDelay, timeout time.Duration, ts *topo.Server, cell, cellsToWatch string) discovery.HealthCheck {
    88  	return discovery.NewHealthCheck(ctx, retryDelay, timeout, ts, cell, cellsToWatch)
    89  }
    90  
    91  // NewTabletGateway creates and returns a new TabletGateway
    92  func NewTabletGateway(ctx context.Context, hc discovery.HealthCheck, serv srvtopo.Server, localCell string) *TabletGateway {
    93  	// hack to accomodate various users of gateway + tests
    94  	if hc == nil {
    95  		var topoServer *topo.Server
    96  		if serv != nil {
    97  			var err error
    98  			topoServer, err = serv.GetTopoServer()
    99  			if err != nil {
   100  				log.Exitf("Unable to create new TabletGateway: %v", err)
   101  			}
   102  		}
   103  		hc = createHealthCheck(ctx, healthCheckRetryDelay, healthCheckTimeout, topoServer, localCell, CellsToWatch)
   104  	}
   105  	gw := &TabletGateway{
   106  		hc:                hc,
   107  		srvTopoServer:     serv,
   108  		localCell:         localCell,
   109  		retryCount:        retryCount,
   110  		statusAggregators: make(map[string]*TabletStatusAggregator),
   111  	}
   112  	gw.setupBuffering(ctx)
   113  	gw.QueryService = queryservice.Wrap(nil, gw.withRetry)
   114  	return gw
   115  }
   116  
   117  func (gw *TabletGateway) setupBuffering(ctx context.Context) {
   118  	cfg := buffer.NewConfigFromFlags()
   119  	gw.buffer = buffer.New(cfg)
   120  
   121  	switch bufferImplementation {
   122  	case "healthcheck":
   123  		// subscribe to healthcheck updates so that buffer can be notified if needed
   124  		// we run this in a separate goroutine so that normal processing doesn't need to block
   125  		hcChan := gw.hc.Subscribe()
   126  		bufferCtx, bufferCancel := context.WithCancel(ctx)
   127  
   128  		go func(ctx context.Context, c chan *discovery.TabletHealth, buffer *buffer.Buffer) {
   129  			defer bufferCancel()
   130  
   131  			for {
   132  				select {
   133  				case <-ctx.Done():
   134  					return
   135  				case result := <-hcChan:
   136  					if result == nil {
   137  						return
   138  					}
   139  					if result.Target.TabletType == topodatapb.TabletType_PRIMARY {
   140  						buffer.ProcessPrimaryHealth(result)
   141  					}
   142  				}
   143  			}
   144  		}(bufferCtx, hcChan, gw.buffer)
   145  
   146  	case "keyspace_events":
   147  		gw.kev = discovery.NewKeyspaceEventWatcher(ctx, gw.srvTopoServer, gw.hc, gw.localCell)
   148  		ksChan := gw.kev.Subscribe()
   149  		bufferCtx, bufferCancel := context.WithCancel(ctx)
   150  
   151  		go func(ctx context.Context, c chan *discovery.KeyspaceEvent, buffer *buffer.Buffer) {
   152  			defer bufferCancel()
   153  
   154  			for {
   155  				select {
   156  				case <-ctx.Done():
   157  					return
   158  				case result := <-ksChan:
   159  					if result == nil {
   160  						return
   161  					}
   162  					buffer.HandleKeyspaceEvent(result)
   163  				}
   164  			}
   165  		}(bufferCtx, ksChan, gw.buffer)
   166  
   167  	default:
   168  		log.Exitf("unknown buffering implementation for TabletGateway: %q", bufferImplementation)
   169  	}
   170  }
   171  
   172  // QueryServiceByAlias satisfies the Gateway interface
   173  func (gw *TabletGateway) QueryServiceByAlias(alias *topodatapb.TabletAlias, target *querypb.Target) (queryservice.QueryService, error) {
   174  	qs, err := gw.hc.TabletConnection(alias, target)
   175  	return queryservice.Wrap(qs, gw.withShardError), NewShardError(err, target)
   176  }
   177  
   178  // RegisterStats registers the stats to export the lag since the last refresh
   179  // and the checksum of the topology
   180  func (gw *TabletGateway) RegisterStats() {
   181  	gw.hc.RegisterStats()
   182  }
   183  
   184  // WaitForTablets is part of the Gateway interface.
   185  func (gw *TabletGateway) WaitForTablets(tabletTypesToWait []topodatapb.TabletType) (err error) {
   186  	log.Infof("Gateway waiting for serving tablets of types %v ...", tabletTypesToWait)
   187  	ctx, cancel := context.WithTimeout(context.Background(), initialTabletTimeout)
   188  	defer cancel()
   189  
   190  	defer func() {
   191  		switch err {
   192  		case nil:
   193  			// Log so we know everything is fine.
   194  			log.Infof("Waiting for tablets completed")
   195  		case context.DeadlineExceeded:
   196  			// In this scenario, we were able to reach the
   197  			// topology service, but some tablets may not be
   198  			// ready. We just warn and keep going.
   199  			log.Warningf("Timeout waiting for all keyspaces / shards to have healthy tablets of types %v, may be in degraded mode", tabletTypesToWait)
   200  			err = nil
   201  		}
   202  	}()
   203  
   204  	// Skip waiting for tablets if we are not told to do so.
   205  	if len(tabletTypesToWait) == 0 {
   206  		return nil
   207  	}
   208  
   209  	// Finds the targets to look for.
   210  	targets, err := srvtopo.FindAllTargets(ctx, gw.srvTopoServer, gw.localCell, tabletTypesToWait)
   211  	if err != nil {
   212  		return err
   213  	}
   214  	return gw.hc.WaitForAllServingTablets(ctx, targets)
   215  }
   216  
   217  // Close shuts down underlying connections.
   218  // This function hides the inner implementation.
   219  func (gw *TabletGateway) Close(_ context.Context) error {
   220  	gw.buffer.Shutdown()
   221  	return gw.hc.Close()
   222  }
   223  
   224  // CacheStatus returns a list of TabletCacheStatus per
   225  // keyspace/shard/tablet_type.
   226  func (gw *TabletGateway) CacheStatus() TabletCacheStatusList {
   227  	gw.mu.Lock()
   228  	res := make(TabletCacheStatusList, 0, len(gw.statusAggregators))
   229  	for _, aggr := range gw.statusAggregators {
   230  		res = append(res, aggr.GetCacheStatus())
   231  	}
   232  	gw.mu.Unlock()
   233  	sort.Sort(res)
   234  	return res
   235  }
   236  
   237  // withRetry gets available connections and executes the action. If there are retryable errors,
   238  // it retries retryCount times before failing. It does not retry if the connection is in
   239  // the middle of a transaction. While returning the error check if it maybe a result of
   240  // a resharding event, and set the re-resolve bit and let the upper layers
   241  // re-resolve and retry.
   242  //
   243  // withRetry also adds shard information to errors returned from the inner QueryService, so
   244  // withShardError should not be combined with withRetry.
   245  func (gw *TabletGateway) withRetry(ctx context.Context, target *querypb.Target, _ queryservice.QueryService,
   246  	_ string, inTransaction bool, inner func(ctx context.Context, target *querypb.Target, conn queryservice.QueryService) (bool, error)) error {
   247  	// for transactions, we connect to a specific tablet instead of letting gateway choose one
   248  	if inTransaction && target.TabletType != topodatapb.TabletType_PRIMARY {
   249  		return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "tabletGateway's query service can only be used for non-transactional queries on replicas")
   250  	}
   251  	var tabletLastUsed *topodatapb.Tablet
   252  	var err error
   253  	invalidTablets := make(map[string]bool)
   254  
   255  	if len(discovery.AllowedTabletTypes) > 0 {
   256  		var match bool
   257  		for _, allowed := range discovery.AllowedTabletTypes {
   258  			if allowed == target.TabletType {
   259  				match = true
   260  				break
   261  			}
   262  		}
   263  		if !match {
   264  			return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "requested tablet type %v is not part of the allowed tablet types for this vtgate: %+v", target.TabletType.String(), discovery.AllowedTabletTypes)
   265  		}
   266  	}
   267  
   268  	bufferedOnce := false
   269  	for i := 0; i < gw.retryCount+1; i++ {
   270  		// Check if we should buffer PRIMARY queries which failed due to an ongoing
   271  		// failover.
   272  		// Note: We only buffer once and only "!inTransaction" queries i.e.
   273  		// a) no transaction is necessary (e.g. critical reads) or
   274  		// b) no transaction was created yet.
   275  		if !bufferedOnce && !inTransaction && target.TabletType == topodatapb.TabletType_PRIMARY {
   276  			// The next call blocks if we should buffer during a failover.
   277  			retryDone, bufferErr := gw.buffer.WaitForFailoverEnd(ctx, target.Keyspace, target.Shard, err)
   278  
   279  			// Request may have been buffered.
   280  			if retryDone != nil {
   281  				// We're going to retry this request as part of a buffer drain.
   282  				// Notify the buffer after we retried.
   283  				defer retryDone()
   284  				bufferedOnce = true
   285  			}
   286  
   287  			if bufferErr != nil {
   288  				err = vterrors.Wrapf(bufferErr,
   289  					"failed to automatically buffer and retry failed request during failover. original err (type=%T): %v",
   290  					err, err)
   291  				break
   292  			}
   293  		}
   294  
   295  		tablets := gw.hc.GetHealthyTabletStats(target)
   296  		if len(tablets) == 0 {
   297  			// if we have a keyspace event watcher, check if the reason why our primary is not available is that it's currently being resharded
   298  			// or if a reparent operation is in progress.
   299  			if kev := gw.kev; kev != nil {
   300  				if kev.TargetIsBeingResharded(target) {
   301  					err = vterrors.Errorf(vtrpcpb.Code_CLUSTER_EVENT, "current keyspace is being resharded")
   302  					continue
   303  				}
   304  				if kev.PrimaryIsNotServing(target) {
   305  					err = vterrors.Errorf(vtrpcpb.Code_CLUSTER_EVENT, "primary is not serving, there is a reparent operation in progress")
   306  					continue
   307  				}
   308  			}
   309  
   310  			// fail fast if there is no tablet
   311  			err = vterrors.Errorf(vtrpcpb.Code_UNAVAILABLE, "no healthy tablet available for '%s'", target.String())
   312  			break
   313  		}
   314  		gw.shuffleTablets(gw.localCell, tablets)
   315  
   316  		var th *discovery.TabletHealth
   317  		// skip tablets we tried before
   318  		for _, t := range tablets {
   319  			if _, ok := invalidTablets[topoproto.TabletAliasString(t.Tablet.Alias)]; !ok {
   320  				th = t
   321  				break
   322  			}
   323  		}
   324  		if th == nil {
   325  			// do not override error from last attempt.
   326  			if err == nil {
   327  				err = vterrors.VT14002()
   328  			}
   329  			break
   330  		}
   331  
   332  		tabletLastUsed = th.Tablet
   333  		// execute
   334  		if th.Conn == nil {
   335  			err = vterrors.VT14003(tabletLastUsed)
   336  			invalidTablets[topoproto.TabletAliasString(tabletLastUsed.Alias)] = true
   337  			continue
   338  		}
   339  
   340  		gw.updateDefaultConnCollation(tabletLastUsed)
   341  
   342  		startTime := time.Now()
   343  		var canRetry bool
   344  		canRetry, err = inner(ctx, target, th.Conn)
   345  		gw.updateStats(target, startTime, err)
   346  		if canRetry {
   347  			invalidTablets[topoproto.TabletAliasString(tabletLastUsed.Alias)] = true
   348  			continue
   349  		}
   350  		break
   351  	}
   352  	return NewShardError(err, target)
   353  }
   354  
   355  // withShardError adds shard information to errors returned from the inner QueryService.
   356  func (gw *TabletGateway) withShardError(ctx context.Context, target *querypb.Target, conn queryservice.QueryService,
   357  	_ string, _ bool, inner func(ctx context.Context, target *querypb.Target, conn queryservice.QueryService) (bool, error)) error {
   358  	_, err := inner(ctx, target, conn)
   359  	return NewShardError(err, target)
   360  }
   361  
   362  func (gw *TabletGateway) updateStats(target *querypb.Target, startTime time.Time, err error) {
   363  	elapsed := time.Since(startTime)
   364  	aggr := gw.getStatsAggregator(target)
   365  	aggr.UpdateQueryInfo("", target.TabletType, elapsed, err != nil)
   366  }
   367  
   368  func (gw *TabletGateway) getStatsAggregator(target *querypb.Target) *TabletStatusAggregator {
   369  	key := fmt.Sprintf("%v/%v/%v", target.Keyspace, target.Shard, target.TabletType.String())
   370  
   371  	// get existing aggregator
   372  	gw.mu.Lock()
   373  	defer gw.mu.Unlock()
   374  	aggr, ok := gw.statusAggregators[key]
   375  	if ok {
   376  		return aggr
   377  	}
   378  	// create a new one if it doesn't exist yet
   379  	aggr = NewTabletStatusAggregator(target.Keyspace, target.Shard, target.TabletType, key)
   380  	gw.statusAggregators[key] = aggr
   381  	return aggr
   382  }
   383  
   384  func (gw *TabletGateway) shuffleTablets(cell string, tablets []*discovery.TabletHealth) {
   385  	sameCell, diffCell, sameCellMax := 0, 0, -1
   386  	length := len(tablets)
   387  
   388  	// move all same cell tablets to the front, this is O(n)
   389  	for {
   390  		sameCellMax = diffCell - 1
   391  		sameCell = gw.nextTablet(cell, tablets, sameCell, length, true)
   392  		diffCell = gw.nextTablet(cell, tablets, diffCell, length, false)
   393  		// either no more diffs or no more same cells should stop the iteration
   394  		if sameCell < 0 || diffCell < 0 {
   395  			break
   396  		}
   397  
   398  		if sameCell < diffCell {
   399  			// fast forward the `sameCell` lookup to `diffCell + 1`, `diffCell` unchanged
   400  			sameCell = diffCell + 1
   401  		} else {
   402  			// sameCell > diffCell, swap needed
   403  			tablets[sameCell], tablets[diffCell] = tablets[diffCell], tablets[sameCell]
   404  			sameCell++
   405  			diffCell++
   406  		}
   407  	}
   408  
   409  	// shuffle in same cell tablets
   410  	for i := sameCellMax; i > 0; i-- {
   411  		swap := rand.Intn(i + 1)
   412  		tablets[i], tablets[swap] = tablets[swap], tablets[i]
   413  	}
   414  
   415  	// shuffle in diff cell tablets
   416  	for i, diffCellMin := length-1, sameCellMax+1; i > diffCellMin; i-- {
   417  		swap := rand.Intn(i-sameCellMax) + diffCellMin
   418  		tablets[i], tablets[swap] = tablets[swap], tablets[i]
   419  	}
   420  }
   421  
   422  func (gw *TabletGateway) nextTablet(cell string, tablets []*discovery.TabletHealth, offset, length int, sameCell bool) int {
   423  	for ; offset < length; offset++ {
   424  		if (tablets[offset].Tablet.Alias.Cell == cell) == sameCell {
   425  			return offset
   426  		}
   427  	}
   428  	return -1
   429  }
   430  
   431  // TabletsCacheStatus returns a displayable version of the health check cache.
   432  func (gw *TabletGateway) TabletsCacheStatus() discovery.TabletsCacheStatusList {
   433  	return gw.hc.CacheStatus()
   434  }
   435  
   436  func (gw *TabletGateway) updateDefaultConnCollation(tablet *topodatapb.Tablet) {
   437  	if atomic.CompareAndSwapUint32(&gw.defaultConnCollation, 0, tablet.DefaultConnCollation) {
   438  		return
   439  	}
   440  	if atomic.LoadUint32(&gw.defaultConnCollation) != tablet.DefaultConnCollation {
   441  		log.Warning("this Vitess cluster has tablets with different default connection collations")
   442  	}
   443  }
   444  
   445  // DefaultConnCollation returns the default connection collation of this TabletGateway
   446  func (gw *TabletGateway) DefaultConnCollation() collations.ID {
   447  	return collations.ID(atomic.LoadUint32(&gw.defaultConnCollation))
   448  }
   449  
   450  // NewShardError returns a new error with the shard info amended.
   451  func NewShardError(in error, target *querypb.Target) error {
   452  	if in == nil {
   453  		return nil
   454  	}
   455  	if target != nil {
   456  		return vterrors.Wrapf(in, "target: %s.%s.%s", target.Keyspace, target.Shard, topoproto.TabletTypeLString(target.TabletType))
   457  	}
   458  	return in
   459  }