vitess.io/vitess@v0.16.2/go/vt/vtgate/tabletgateway.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vtgate 18 19 import ( 20 "context" 21 "fmt" 22 "math/rand" 23 "sort" 24 "sync" 25 "sync/atomic" 26 "time" 27 28 "github.com/spf13/pflag" 29 30 "vitess.io/vitess/go/mysql/collations" 31 "vitess.io/vitess/go/vt/discovery" 32 "vitess.io/vitess/go/vt/log" 33 "vitess.io/vitess/go/vt/servenv" 34 "vitess.io/vitess/go/vt/srvtopo" 35 "vitess.io/vitess/go/vt/topo" 36 "vitess.io/vitess/go/vt/topo/topoproto" 37 "vitess.io/vitess/go/vt/vterrors" 38 "vitess.io/vitess/go/vt/vtgate/buffer" 39 "vitess.io/vitess/go/vt/vttablet/queryservice" 40 41 querypb "vitess.io/vitess/go/vt/proto/query" 42 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 43 vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" 44 ) 45 46 var ( 47 _ discovery.HealthCheck = (*discovery.HealthCheckImpl)(nil) 48 // CellsToWatch is the list of cells the healthcheck operates over. If it is empty, only the local cell is watched 49 CellsToWatch string 50 51 bufferImplementation = "keyspace_events" 52 initialTabletTimeout = 30 * time.Second 53 // retryCount is the number of times a query will be retried on error 54 retryCount = 2 55 ) 56 57 func init() { 58 servenv.OnParseFor("vtgate", func(fs *pflag.FlagSet) { 59 fs.StringVar(&CellsToWatch, "cells_to_watch", "", "comma-separated list of cells for watching tablets") 60 fs.StringVar(&bufferImplementation, "buffer_implementation", "keyspace_events", "Allowed values: healthcheck (legacy implementation), keyspace_events (default)") 61 fs.DurationVar(&initialTabletTimeout, "gateway_initial_tablet_timeout", 30*time.Second, "At startup, the tabletGateway will wait up to this duration to get at least one tablet per keyspace/shard/tablet type") 62 fs.IntVar(&retryCount, "retry-count", 2, "retry count") 63 }) 64 } 65 66 // TabletGateway implements the Gateway interface. 67 // This implementation uses the new healthcheck module. 68 type TabletGateway struct { 69 queryservice.QueryService 70 hc discovery.HealthCheck 71 kev *discovery.KeyspaceEventWatcher 72 srvTopoServer srvtopo.Server 73 localCell string 74 retryCount int 75 defaultConnCollation uint32 76 77 // mu protects the fields of this group. 78 mu sync.Mutex 79 // statusAggregators is a map indexed by the key 80 // keyspace/shard/tablet_type. 81 statusAggregators map[string]*TabletStatusAggregator 82 83 // buffer, if enabled, buffers requests during a detected PRIMARY failover. 84 buffer *buffer.Buffer 85 } 86 87 func createHealthCheck(ctx context.Context, retryDelay, timeout time.Duration, ts *topo.Server, cell, cellsToWatch string) discovery.HealthCheck { 88 return discovery.NewHealthCheck(ctx, retryDelay, timeout, ts, cell, cellsToWatch) 89 } 90 91 // NewTabletGateway creates and returns a new TabletGateway 92 func NewTabletGateway(ctx context.Context, hc discovery.HealthCheck, serv srvtopo.Server, localCell string) *TabletGateway { 93 // hack to accomodate various users of gateway + tests 94 if hc == nil { 95 var topoServer *topo.Server 96 if serv != nil { 97 var err error 98 topoServer, err = serv.GetTopoServer() 99 if err != nil { 100 log.Exitf("Unable to create new TabletGateway: %v", err) 101 } 102 } 103 hc = createHealthCheck(ctx, healthCheckRetryDelay, healthCheckTimeout, topoServer, localCell, CellsToWatch) 104 } 105 gw := &TabletGateway{ 106 hc: hc, 107 srvTopoServer: serv, 108 localCell: localCell, 109 retryCount: retryCount, 110 statusAggregators: make(map[string]*TabletStatusAggregator), 111 } 112 gw.setupBuffering(ctx) 113 gw.QueryService = queryservice.Wrap(nil, gw.withRetry) 114 return gw 115 } 116 117 func (gw *TabletGateway) setupBuffering(ctx context.Context) { 118 cfg := buffer.NewConfigFromFlags() 119 gw.buffer = buffer.New(cfg) 120 121 switch bufferImplementation { 122 case "healthcheck": 123 // subscribe to healthcheck updates so that buffer can be notified if needed 124 // we run this in a separate goroutine so that normal processing doesn't need to block 125 hcChan := gw.hc.Subscribe() 126 bufferCtx, bufferCancel := context.WithCancel(ctx) 127 128 go func(ctx context.Context, c chan *discovery.TabletHealth, buffer *buffer.Buffer) { 129 defer bufferCancel() 130 131 for { 132 select { 133 case <-ctx.Done(): 134 return 135 case result := <-hcChan: 136 if result == nil { 137 return 138 } 139 if result.Target.TabletType == topodatapb.TabletType_PRIMARY { 140 buffer.ProcessPrimaryHealth(result) 141 } 142 } 143 } 144 }(bufferCtx, hcChan, gw.buffer) 145 146 case "keyspace_events": 147 gw.kev = discovery.NewKeyspaceEventWatcher(ctx, gw.srvTopoServer, gw.hc, gw.localCell) 148 ksChan := gw.kev.Subscribe() 149 bufferCtx, bufferCancel := context.WithCancel(ctx) 150 151 go func(ctx context.Context, c chan *discovery.KeyspaceEvent, buffer *buffer.Buffer) { 152 defer bufferCancel() 153 154 for { 155 select { 156 case <-ctx.Done(): 157 return 158 case result := <-ksChan: 159 if result == nil { 160 return 161 } 162 buffer.HandleKeyspaceEvent(result) 163 } 164 } 165 }(bufferCtx, ksChan, gw.buffer) 166 167 default: 168 log.Exitf("unknown buffering implementation for TabletGateway: %q", bufferImplementation) 169 } 170 } 171 172 // QueryServiceByAlias satisfies the Gateway interface 173 func (gw *TabletGateway) QueryServiceByAlias(alias *topodatapb.TabletAlias, target *querypb.Target) (queryservice.QueryService, error) { 174 qs, err := gw.hc.TabletConnection(alias, target) 175 return queryservice.Wrap(qs, gw.withShardError), NewShardError(err, target) 176 } 177 178 // RegisterStats registers the stats to export the lag since the last refresh 179 // and the checksum of the topology 180 func (gw *TabletGateway) RegisterStats() { 181 gw.hc.RegisterStats() 182 } 183 184 // WaitForTablets is part of the Gateway interface. 185 func (gw *TabletGateway) WaitForTablets(tabletTypesToWait []topodatapb.TabletType) (err error) { 186 log.Infof("Gateway waiting for serving tablets of types %v ...", tabletTypesToWait) 187 ctx, cancel := context.WithTimeout(context.Background(), initialTabletTimeout) 188 defer cancel() 189 190 defer func() { 191 switch err { 192 case nil: 193 // Log so we know everything is fine. 194 log.Infof("Waiting for tablets completed") 195 case context.DeadlineExceeded: 196 // In this scenario, we were able to reach the 197 // topology service, but some tablets may not be 198 // ready. We just warn and keep going. 199 log.Warningf("Timeout waiting for all keyspaces / shards to have healthy tablets of types %v, may be in degraded mode", tabletTypesToWait) 200 err = nil 201 } 202 }() 203 204 // Skip waiting for tablets if we are not told to do so. 205 if len(tabletTypesToWait) == 0 { 206 return nil 207 } 208 209 // Finds the targets to look for. 210 targets, err := srvtopo.FindAllTargets(ctx, gw.srvTopoServer, gw.localCell, tabletTypesToWait) 211 if err != nil { 212 return err 213 } 214 return gw.hc.WaitForAllServingTablets(ctx, targets) 215 } 216 217 // Close shuts down underlying connections. 218 // This function hides the inner implementation. 219 func (gw *TabletGateway) Close(_ context.Context) error { 220 gw.buffer.Shutdown() 221 return gw.hc.Close() 222 } 223 224 // CacheStatus returns a list of TabletCacheStatus per 225 // keyspace/shard/tablet_type. 226 func (gw *TabletGateway) CacheStatus() TabletCacheStatusList { 227 gw.mu.Lock() 228 res := make(TabletCacheStatusList, 0, len(gw.statusAggregators)) 229 for _, aggr := range gw.statusAggregators { 230 res = append(res, aggr.GetCacheStatus()) 231 } 232 gw.mu.Unlock() 233 sort.Sort(res) 234 return res 235 } 236 237 // withRetry gets available connections and executes the action. If there are retryable errors, 238 // it retries retryCount times before failing. It does not retry if the connection is in 239 // the middle of a transaction. While returning the error check if it maybe a result of 240 // a resharding event, and set the re-resolve bit and let the upper layers 241 // re-resolve and retry. 242 // 243 // withRetry also adds shard information to errors returned from the inner QueryService, so 244 // withShardError should not be combined with withRetry. 245 func (gw *TabletGateway) withRetry(ctx context.Context, target *querypb.Target, _ queryservice.QueryService, 246 _ string, inTransaction bool, inner func(ctx context.Context, target *querypb.Target, conn queryservice.QueryService) (bool, error)) error { 247 // for transactions, we connect to a specific tablet instead of letting gateway choose one 248 if inTransaction && target.TabletType != topodatapb.TabletType_PRIMARY { 249 return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "tabletGateway's query service can only be used for non-transactional queries on replicas") 250 } 251 var tabletLastUsed *topodatapb.Tablet 252 var err error 253 invalidTablets := make(map[string]bool) 254 255 if len(discovery.AllowedTabletTypes) > 0 { 256 var match bool 257 for _, allowed := range discovery.AllowedTabletTypes { 258 if allowed == target.TabletType { 259 match = true 260 break 261 } 262 } 263 if !match { 264 return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "requested tablet type %v is not part of the allowed tablet types for this vtgate: %+v", target.TabletType.String(), discovery.AllowedTabletTypes) 265 } 266 } 267 268 bufferedOnce := false 269 for i := 0; i < gw.retryCount+1; i++ { 270 // Check if we should buffer PRIMARY queries which failed due to an ongoing 271 // failover. 272 // Note: We only buffer once and only "!inTransaction" queries i.e. 273 // a) no transaction is necessary (e.g. critical reads) or 274 // b) no transaction was created yet. 275 if !bufferedOnce && !inTransaction && target.TabletType == topodatapb.TabletType_PRIMARY { 276 // The next call blocks if we should buffer during a failover. 277 retryDone, bufferErr := gw.buffer.WaitForFailoverEnd(ctx, target.Keyspace, target.Shard, err) 278 279 // Request may have been buffered. 280 if retryDone != nil { 281 // We're going to retry this request as part of a buffer drain. 282 // Notify the buffer after we retried. 283 defer retryDone() 284 bufferedOnce = true 285 } 286 287 if bufferErr != nil { 288 err = vterrors.Wrapf(bufferErr, 289 "failed to automatically buffer and retry failed request during failover. original err (type=%T): %v", 290 err, err) 291 break 292 } 293 } 294 295 tablets := gw.hc.GetHealthyTabletStats(target) 296 if len(tablets) == 0 { 297 // if we have a keyspace event watcher, check if the reason why our primary is not available is that it's currently being resharded 298 // or if a reparent operation is in progress. 299 if kev := gw.kev; kev != nil { 300 if kev.TargetIsBeingResharded(target) { 301 err = vterrors.Errorf(vtrpcpb.Code_CLUSTER_EVENT, "current keyspace is being resharded") 302 continue 303 } 304 if kev.PrimaryIsNotServing(target) { 305 err = vterrors.Errorf(vtrpcpb.Code_CLUSTER_EVENT, "primary is not serving, there is a reparent operation in progress") 306 continue 307 } 308 } 309 310 // fail fast if there is no tablet 311 err = vterrors.Errorf(vtrpcpb.Code_UNAVAILABLE, "no healthy tablet available for '%s'", target.String()) 312 break 313 } 314 gw.shuffleTablets(gw.localCell, tablets) 315 316 var th *discovery.TabletHealth 317 // skip tablets we tried before 318 for _, t := range tablets { 319 if _, ok := invalidTablets[topoproto.TabletAliasString(t.Tablet.Alias)]; !ok { 320 th = t 321 break 322 } 323 } 324 if th == nil { 325 // do not override error from last attempt. 326 if err == nil { 327 err = vterrors.VT14002() 328 } 329 break 330 } 331 332 tabletLastUsed = th.Tablet 333 // execute 334 if th.Conn == nil { 335 err = vterrors.VT14003(tabletLastUsed) 336 invalidTablets[topoproto.TabletAliasString(tabletLastUsed.Alias)] = true 337 continue 338 } 339 340 gw.updateDefaultConnCollation(tabletLastUsed) 341 342 startTime := time.Now() 343 var canRetry bool 344 canRetry, err = inner(ctx, target, th.Conn) 345 gw.updateStats(target, startTime, err) 346 if canRetry { 347 invalidTablets[topoproto.TabletAliasString(tabletLastUsed.Alias)] = true 348 continue 349 } 350 break 351 } 352 return NewShardError(err, target) 353 } 354 355 // withShardError adds shard information to errors returned from the inner QueryService. 356 func (gw *TabletGateway) withShardError(ctx context.Context, target *querypb.Target, conn queryservice.QueryService, 357 _ string, _ bool, inner func(ctx context.Context, target *querypb.Target, conn queryservice.QueryService) (bool, error)) error { 358 _, err := inner(ctx, target, conn) 359 return NewShardError(err, target) 360 } 361 362 func (gw *TabletGateway) updateStats(target *querypb.Target, startTime time.Time, err error) { 363 elapsed := time.Since(startTime) 364 aggr := gw.getStatsAggregator(target) 365 aggr.UpdateQueryInfo("", target.TabletType, elapsed, err != nil) 366 } 367 368 func (gw *TabletGateway) getStatsAggregator(target *querypb.Target) *TabletStatusAggregator { 369 key := fmt.Sprintf("%v/%v/%v", target.Keyspace, target.Shard, target.TabletType.String()) 370 371 // get existing aggregator 372 gw.mu.Lock() 373 defer gw.mu.Unlock() 374 aggr, ok := gw.statusAggregators[key] 375 if ok { 376 return aggr 377 } 378 // create a new one if it doesn't exist yet 379 aggr = NewTabletStatusAggregator(target.Keyspace, target.Shard, target.TabletType, key) 380 gw.statusAggregators[key] = aggr 381 return aggr 382 } 383 384 func (gw *TabletGateway) shuffleTablets(cell string, tablets []*discovery.TabletHealth) { 385 sameCell, diffCell, sameCellMax := 0, 0, -1 386 length := len(tablets) 387 388 // move all same cell tablets to the front, this is O(n) 389 for { 390 sameCellMax = diffCell - 1 391 sameCell = gw.nextTablet(cell, tablets, sameCell, length, true) 392 diffCell = gw.nextTablet(cell, tablets, diffCell, length, false) 393 // either no more diffs or no more same cells should stop the iteration 394 if sameCell < 0 || diffCell < 0 { 395 break 396 } 397 398 if sameCell < diffCell { 399 // fast forward the `sameCell` lookup to `diffCell + 1`, `diffCell` unchanged 400 sameCell = diffCell + 1 401 } else { 402 // sameCell > diffCell, swap needed 403 tablets[sameCell], tablets[diffCell] = tablets[diffCell], tablets[sameCell] 404 sameCell++ 405 diffCell++ 406 } 407 } 408 409 // shuffle in same cell tablets 410 for i := sameCellMax; i > 0; i-- { 411 swap := rand.Intn(i + 1) 412 tablets[i], tablets[swap] = tablets[swap], tablets[i] 413 } 414 415 // shuffle in diff cell tablets 416 for i, diffCellMin := length-1, sameCellMax+1; i > diffCellMin; i-- { 417 swap := rand.Intn(i-sameCellMax) + diffCellMin 418 tablets[i], tablets[swap] = tablets[swap], tablets[i] 419 } 420 } 421 422 func (gw *TabletGateway) nextTablet(cell string, tablets []*discovery.TabletHealth, offset, length int, sameCell bool) int { 423 for ; offset < length; offset++ { 424 if (tablets[offset].Tablet.Alias.Cell == cell) == sameCell { 425 return offset 426 } 427 } 428 return -1 429 } 430 431 // TabletsCacheStatus returns a displayable version of the health check cache. 432 func (gw *TabletGateway) TabletsCacheStatus() discovery.TabletsCacheStatusList { 433 return gw.hc.CacheStatus() 434 } 435 436 func (gw *TabletGateway) updateDefaultConnCollation(tablet *topodatapb.Tablet) { 437 if atomic.CompareAndSwapUint32(&gw.defaultConnCollation, 0, tablet.DefaultConnCollation) { 438 return 439 } 440 if atomic.LoadUint32(&gw.defaultConnCollation) != tablet.DefaultConnCollation { 441 log.Warning("this Vitess cluster has tablets with different default connection collations") 442 } 443 } 444 445 // DefaultConnCollation returns the default connection collation of this TabletGateway 446 func (gw *TabletGateway) DefaultConnCollation() collations.ID { 447 return collations.ID(atomic.LoadUint32(&gw.defaultConnCollation)) 448 } 449 450 // NewShardError returns a new error with the shard info amended. 451 func NewShardError(in error, target *querypb.Target) error { 452 if in == nil { 453 return nil 454 } 455 if target != nil { 456 return vterrors.Wrapf(in, "target: %s.%s.%s", target.Keyspace, target.Shard, topoproto.TabletTypeLString(target.TabletType)) 457 } 458 return in 459 }