vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/repltracker/reader.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package repltracker 18 19 import ( 20 "fmt" 21 "sync" 22 "time" 23 24 "vitess.io/vitess/go/vt/vtgate/evalengine" 25 26 "vitess.io/vitess/go/vt/vterrors" 27 28 "context" 29 30 "vitess.io/vitess/go/sqltypes" 31 "vitess.io/vitess/go/timer" 32 "vitess.io/vitess/go/vt/log" 33 "vitess.io/vitess/go/vt/logutil" 34 "vitess.io/vitess/go/vt/sqlparser" 35 "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool" 36 "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" 37 38 querypb "vitess.io/vitess/go/vt/proto/query" 39 ) 40 41 const ( 42 sqlFetchMostRecentHeartbeat = "SELECT ts FROM %s.heartbeat WHERE keyspaceShard=%a" 43 ) 44 45 // heartbeatReader reads the heartbeat table at a configured interval in order 46 // to calculate replication lag. It is meant to be run on a replica, and paired 47 // with a heartbeatWriter on a primary. 48 // Lag is calculated by comparing the most recent timestamp in the heartbeat 49 // table against the current time at read time. 50 type heartbeatReader struct { 51 env tabletenv.Env 52 53 enabled bool 54 interval time.Duration 55 keyspaceShard string 56 now func() time.Time 57 errorLog *logutil.ThrottledLogger 58 59 runMu sync.Mutex 60 isOpen bool 61 pool *connpool.Pool 62 ticks *timer.Timer 63 64 lagMu sync.Mutex 65 lastKnownLag time.Duration 66 lastKnownError error 67 } 68 69 // newHeartbeatReader returns a new heartbeatReader. 70 func newHeartbeatReader(env tabletenv.Env) *heartbeatReader { 71 config := env.Config() 72 if config.ReplicationTracker.Mode != tabletenv.Heartbeat { 73 return &heartbeatReader{} 74 } 75 76 heartbeatInterval := config.ReplicationTracker.HeartbeatIntervalSeconds.Get() 77 return &heartbeatReader{ 78 env: env, 79 enabled: true, 80 now: time.Now, 81 interval: heartbeatInterval, 82 ticks: timer.NewTimer(heartbeatInterval), 83 errorLog: logutil.NewThrottledLogger("HeartbeatReporter", 60*time.Second), 84 pool: connpool.NewPool(env, "HeartbeatReadPool", tabletenv.ConnPoolConfig{ 85 Size: 1, 86 IdleTimeoutSeconds: env.Config().OltpReadPool.IdleTimeoutSeconds, 87 }), 88 } 89 } 90 91 // InitDBConfig initializes the target name for the heartbeatReader. 92 func (r *heartbeatReader) InitDBConfig(target *querypb.Target) { 93 r.keyspaceShard = fmt.Sprintf("%s:%s", target.Keyspace, target.Shard) 94 } 95 96 // Open starts the heartbeat ticker and opens the db pool. 97 func (r *heartbeatReader) Open() { 98 if !r.enabled { 99 return 100 } 101 r.runMu.Lock() 102 defer r.runMu.Unlock() 103 if r.isOpen { 104 return 105 } 106 log.Info("Heartbeat Reader: opening") 107 108 r.pool.Open(r.env.Config().DB.AppWithDB(), r.env.Config().DB.DbaWithDB(), r.env.Config().DB.AppDebugWithDB()) 109 r.ticks.Start(func() { r.readHeartbeat() }) 110 r.isOpen = true 111 } 112 113 // Close cancels the watchHeartbeat periodic ticker and closes the db pool. 114 func (r *heartbeatReader) Close() { 115 if !r.enabled { 116 return 117 } 118 r.runMu.Lock() 119 defer r.runMu.Unlock() 120 if !r.isOpen { 121 return 122 } 123 r.ticks.Stop() 124 r.pool.Close() 125 126 currentLagNs.Set(0) 127 128 r.isOpen = false 129 log.Info("Heartbeat Reader: closed") 130 } 131 132 // Status returns the most recently recorded lag measurement or error encountered. 133 func (r *heartbeatReader) Status() (time.Duration, error) { 134 r.lagMu.Lock() 135 defer r.lagMu.Unlock() 136 if r.lastKnownError != nil { 137 return 0, r.lastKnownError 138 } 139 return r.lastKnownLag, nil 140 } 141 142 // readHeartbeat reads from the heartbeat table exactly once, updating 143 // the last known lag and/or error, and incrementing counters. 144 func (r *heartbeatReader) readHeartbeat() { 145 defer r.env.LogError() 146 147 ctx, cancel := context.WithDeadline(context.Background(), r.now().Add(r.interval)) 148 defer cancel() 149 150 res, err := r.fetchMostRecentHeartbeat(ctx) 151 if err != nil { 152 r.recordError(vterrors.Wrap(err, "failed to read most recent heartbeat")) 153 return 154 } 155 ts, err := parseHeartbeatResult(res) 156 if err != nil { 157 r.recordError(vterrors.Wrap(err, "failed to parse heartbeat result")) 158 return 159 } 160 161 lag := r.now().Sub(time.Unix(0, ts)) 162 cumulativeLagNs.Add(lag.Nanoseconds()) 163 currentLagNs.Set(lag.Nanoseconds()) 164 heartbeatLagNsHistogram.Add(lag.Nanoseconds()) 165 reads.Add(1) 166 167 r.lagMu.Lock() 168 r.lastKnownLag = lag 169 r.lastKnownError = nil 170 r.lagMu.Unlock() 171 } 172 173 // fetchMostRecentHeartbeat fetches the most recently recorded heartbeat from the heartbeat table, 174 // returning a result with the timestamp of the heartbeat. 175 func (r *heartbeatReader) fetchMostRecentHeartbeat(ctx context.Context) (*sqltypes.Result, error) { 176 conn, err := r.pool.Get(ctx, nil) 177 if err != nil { 178 return nil, err 179 } 180 defer conn.Recycle() 181 sel, err := r.bindHeartbeatFetch() 182 if err != nil { 183 return nil, err 184 } 185 return conn.Exec(ctx, sel, 1, false) 186 } 187 188 // bindHeartbeatFetch takes a heartbeat read and adds the necessary 189 // fields to the query as bind vars. This is done to protect ourselves 190 // against a badly formed keyspace or shard name. 191 func (r *heartbeatReader) bindHeartbeatFetch() (string, error) { 192 bindVars := map[string]*querypb.BindVariable{ 193 "ks": sqltypes.StringBindVariable(r.keyspaceShard), 194 } 195 parsed := sqlparser.BuildParsedQuery(sqlFetchMostRecentHeartbeat, "_vt", ":ks") 196 bound, err := parsed.GenerateQuery(bindVars, nil) 197 if err != nil { 198 return "", err 199 } 200 return bound, nil 201 } 202 203 // parseHeartbeatResult turns a raw result into the timestamp for processing. 204 func parseHeartbeatResult(res *sqltypes.Result) (int64, error) { 205 if len(res.Rows) != 1 { 206 return 0, fmt.Errorf("failed to read heartbeat: writer query did not result in 1 row. Got %v", len(res.Rows)) 207 } 208 ts, err := evalengine.ToInt64(res.Rows[0][0]) 209 if err != nil { 210 return 0, err 211 } 212 return ts, nil 213 } 214 215 // recordError keeps track of the lastKnown error for reporting to the healthcheck. 216 // Errors tracked here are logged with throttling to cut down on log spam since 217 // operations can happen very frequently in this package. 218 func (r *heartbeatReader) recordError(err error) { 219 r.lagMu.Lock() 220 r.lastKnownError = err 221 r.lagMu.Unlock() 222 r.errorLog.Errorf("%v", err) 223 readErrors.Add(1) 224 }