vitess.io/vitess@v0.16.2/go/vt/vtctld/tablet_data.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vtctld
    18  
    19  import (
    20  	"context"
    21  	"io"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/spf13/pflag"
    26  
    27  	"vitess.io/vitess/go/vt/grpcclient"
    28  	"vitess.io/vitess/go/vt/log"
    29  	"vitess.io/vitess/go/vt/servenv"
    30  	"vitess.io/vitess/go/vt/topo"
    31  	"vitess.io/vitess/go/vt/topo/topoproto"
    32  	"vitess.io/vitess/go/vt/vttablet/tabletconn"
    33  
    34  	querypb "vitess.io/vitess/go/vt/proto/query"
    35  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    36  )
    37  
    38  // This file maintains a tablet health cache. It establishes streaming
    39  // connections with tablets, and updates its internal state with the
    40  // result.
    41  
    42  var (
    43  	tabletHealthKeepAlive = 5 * time.Minute
    44  )
    45  
    46  type tabletHealth struct {
    47  	mu sync.Mutex
    48  
    49  	// result stores the most recent response.
    50  	result *querypb.StreamHealthResponse
    51  	// accessed stores the time of the most recent access.
    52  	accessed time.Time
    53  
    54  	// err stores the result of the stream attempt.
    55  	err error
    56  	// done is closed when the stream attempt ends.
    57  	done chan struct{}
    58  	// ready is closed when there is at least one result to read.
    59  	ready chan struct{}
    60  }
    61  
    62  func init() {
    63  	for _, cmd := range []string{"vtcombo", "vtctld"} {
    64  		servenv.OnParseFor(cmd, registerVtctlTabletFlags)
    65  	}
    66  }
    67  
    68  func registerVtctlTabletFlags(fs *pflag.FlagSet) {
    69  	fs.DurationVar(&tabletHealthKeepAlive, "tablet_health_keep_alive", tabletHealthKeepAlive, "close streaming tablet health connection if there are no requests for this long")
    70  }
    71  
    72  func newTabletHealth() *tabletHealth {
    73  	return &tabletHealth{
    74  		accessed: time.Now(),
    75  		ready:    make(chan struct{}),
    76  		done:     make(chan struct{}),
    77  	}
    78  }
    79  
    80  func (th *tabletHealth) lastResult(ctx context.Context) (*querypb.StreamHealthResponse, error) {
    81  	// Wait until at least the first result comes in, or the stream ends.
    82  	select {
    83  	case <-ctx.Done():
    84  		return nil, ctx.Err()
    85  	case <-th.ready:
    86  	case <-th.done:
    87  	}
    88  
    89  	th.mu.Lock()
    90  	defer th.mu.Unlock()
    91  
    92  	th.accessed = time.Now()
    93  	return th.result, th.err
    94  }
    95  
    96  func (th *tabletHealth) lastAccessed() time.Time {
    97  	th.mu.Lock()
    98  	defer th.mu.Unlock()
    99  
   100  	return th.accessed
   101  }
   102  
   103  func (th *tabletHealth) stream(ctx context.Context, ts *topo.Server, tabletAlias *topodatapb.TabletAlias) (err error) {
   104  	defer func() {
   105  		th.mu.Lock()
   106  		th.err = err
   107  		th.mu.Unlock()
   108  		close(th.done)
   109  	}()
   110  
   111  	ti, err := ts.GetTablet(ctx, tabletAlias)
   112  	if err != nil {
   113  		return err
   114  	}
   115  
   116  	conn, err := tabletconn.GetDialer()(ti.Tablet, grpcclient.FailFast(true))
   117  	if err != nil {
   118  		return err
   119  	}
   120  	defer conn.Close(ctx)
   121  
   122  	first := true
   123  	return conn.StreamHealth(ctx, func(shr *querypb.StreamHealthResponse) error {
   124  		th.mu.Lock()
   125  		th.result = shr
   126  		th.mu.Unlock()
   127  
   128  		if first {
   129  			// We got the first result, so we're ready to be accessed.
   130  			close(th.ready)
   131  			first = false
   132  		}
   133  		if time.Since(th.lastAccessed()) >= tabletHealthKeepAlive {
   134  			return io.EOF
   135  		}
   136  		return nil
   137  	})
   138  }
   139  
   140  type tabletHealthCache struct {
   141  	ts *topo.Server
   142  
   143  	// mu protects the map.
   144  	mu sync.Mutex
   145  
   146  	// tabletMap is keyed by topoproto.TabletAliasString(tablet alias).
   147  	tabletMap map[string]*tabletHealth
   148  }
   149  
   150  func newTabletHealthCache(ts *topo.Server) *tabletHealthCache {
   151  	return &tabletHealthCache{
   152  		ts:        ts,
   153  		tabletMap: make(map[string]*tabletHealth),
   154  	}
   155  }
   156  
   157  func (thc *tabletHealthCache) Get(ctx context.Context, tabletAlias *topodatapb.TabletAlias) (*querypb.StreamHealthResponse, error) {
   158  	thc.mu.Lock()
   159  
   160  	tabletAliasStr := topoproto.TabletAliasString(tabletAlias)
   161  	th, ok := thc.tabletMap[tabletAliasStr]
   162  	if !ok {
   163  		// No existing stream, so start one.
   164  		th = newTabletHealth()
   165  		thc.tabletMap[tabletAliasStr] = th
   166  
   167  		go func() {
   168  			log.Infof("starting health stream for tablet %v", tabletAlias)
   169  			err := th.stream(context.Background(), thc.ts, tabletAlias)
   170  			log.Infof("tablet %v health stream ended, error: %v", tabletAlias, err)
   171  			thc.delete(tabletAliasStr)
   172  		}()
   173  	}
   174  
   175  	thc.mu.Unlock()
   176  
   177  	return th.lastResult(ctx)
   178  }
   179  
   180  func (thc *tabletHealthCache) delete(tabletAliasStr string) {
   181  	thc.mu.Lock()
   182  	delete(thc.tabletMap, tabletAliasStr)
   183  	thc.mu.Unlock()
   184  }