vitess.io/vitess@v0.16.2/go/vt/vtctld/tablet_data.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vtctld 18 19 import ( 20 "context" 21 "io" 22 "sync" 23 "time" 24 25 "github.com/spf13/pflag" 26 27 "vitess.io/vitess/go/vt/grpcclient" 28 "vitess.io/vitess/go/vt/log" 29 "vitess.io/vitess/go/vt/servenv" 30 "vitess.io/vitess/go/vt/topo" 31 "vitess.io/vitess/go/vt/topo/topoproto" 32 "vitess.io/vitess/go/vt/vttablet/tabletconn" 33 34 querypb "vitess.io/vitess/go/vt/proto/query" 35 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 36 ) 37 38 // This file maintains a tablet health cache. It establishes streaming 39 // connections with tablets, and updates its internal state with the 40 // result. 41 42 var ( 43 tabletHealthKeepAlive = 5 * time.Minute 44 ) 45 46 type tabletHealth struct { 47 mu sync.Mutex 48 49 // result stores the most recent response. 50 result *querypb.StreamHealthResponse 51 // accessed stores the time of the most recent access. 52 accessed time.Time 53 54 // err stores the result of the stream attempt. 55 err error 56 // done is closed when the stream attempt ends. 57 done chan struct{} 58 // ready is closed when there is at least one result to read. 59 ready chan struct{} 60 } 61 62 func init() { 63 for _, cmd := range []string{"vtcombo", "vtctld"} { 64 servenv.OnParseFor(cmd, registerVtctlTabletFlags) 65 } 66 } 67 68 func registerVtctlTabletFlags(fs *pflag.FlagSet) { 69 fs.DurationVar(&tabletHealthKeepAlive, "tablet_health_keep_alive", tabletHealthKeepAlive, "close streaming tablet health connection if there are no requests for this long") 70 } 71 72 func newTabletHealth() *tabletHealth { 73 return &tabletHealth{ 74 accessed: time.Now(), 75 ready: make(chan struct{}), 76 done: make(chan struct{}), 77 } 78 } 79 80 func (th *tabletHealth) lastResult(ctx context.Context) (*querypb.StreamHealthResponse, error) { 81 // Wait until at least the first result comes in, or the stream ends. 82 select { 83 case <-ctx.Done(): 84 return nil, ctx.Err() 85 case <-th.ready: 86 case <-th.done: 87 } 88 89 th.mu.Lock() 90 defer th.mu.Unlock() 91 92 th.accessed = time.Now() 93 return th.result, th.err 94 } 95 96 func (th *tabletHealth) lastAccessed() time.Time { 97 th.mu.Lock() 98 defer th.mu.Unlock() 99 100 return th.accessed 101 } 102 103 func (th *tabletHealth) stream(ctx context.Context, ts *topo.Server, tabletAlias *topodatapb.TabletAlias) (err error) { 104 defer func() { 105 th.mu.Lock() 106 th.err = err 107 th.mu.Unlock() 108 close(th.done) 109 }() 110 111 ti, err := ts.GetTablet(ctx, tabletAlias) 112 if err != nil { 113 return err 114 } 115 116 conn, err := tabletconn.GetDialer()(ti.Tablet, grpcclient.FailFast(true)) 117 if err != nil { 118 return err 119 } 120 defer conn.Close(ctx) 121 122 first := true 123 return conn.StreamHealth(ctx, func(shr *querypb.StreamHealthResponse) error { 124 th.mu.Lock() 125 th.result = shr 126 th.mu.Unlock() 127 128 if first { 129 // We got the first result, so we're ready to be accessed. 130 close(th.ready) 131 first = false 132 } 133 if time.Since(th.lastAccessed()) >= tabletHealthKeepAlive { 134 return io.EOF 135 } 136 return nil 137 }) 138 } 139 140 type tabletHealthCache struct { 141 ts *topo.Server 142 143 // mu protects the map. 144 mu sync.Mutex 145 146 // tabletMap is keyed by topoproto.TabletAliasString(tablet alias). 147 tabletMap map[string]*tabletHealth 148 } 149 150 func newTabletHealthCache(ts *topo.Server) *tabletHealthCache { 151 return &tabletHealthCache{ 152 ts: ts, 153 tabletMap: make(map[string]*tabletHealth), 154 } 155 } 156 157 func (thc *tabletHealthCache) Get(ctx context.Context, tabletAlias *topodatapb.TabletAlias) (*querypb.StreamHealthResponse, error) { 158 thc.mu.Lock() 159 160 tabletAliasStr := topoproto.TabletAliasString(tabletAlias) 161 th, ok := thc.tabletMap[tabletAliasStr] 162 if !ok { 163 // No existing stream, so start one. 164 th = newTabletHealth() 165 thc.tabletMap[tabletAliasStr] = th 166 167 go func() { 168 log.Infof("starting health stream for tablet %v", tabletAlias) 169 err := th.stream(context.Background(), thc.ts, tabletAlias) 170 log.Infof("tablet %v health stream ended, error: %v", tabletAlias, err) 171 thc.delete(tabletAliasStr) 172 }() 173 } 174 175 thc.mu.Unlock() 176 177 return th.lastResult(ctx) 178 } 179 180 func (thc *tabletHealthCache) delete(tabletAliasStr string) { 181 thc.mu.Lock() 182 delete(thc.tabletMap, tabletAliasStr) 183 thc.mu.Unlock() 184 }