vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/repltracker/reader.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package repltracker
    18  
    19  import (
    20  	"fmt"
    21  	"sync"
    22  	"time"
    23  
    24  	"vitess.io/vitess/go/vt/vtgate/evalengine"
    25  
    26  	"vitess.io/vitess/go/vt/vterrors"
    27  
    28  	"context"
    29  
    30  	"vitess.io/vitess/go/sqltypes"
    31  	"vitess.io/vitess/go/timer"
    32  	"vitess.io/vitess/go/vt/log"
    33  	"vitess.io/vitess/go/vt/logutil"
    34  	"vitess.io/vitess/go/vt/sqlparser"
    35  	"vitess.io/vitess/go/vt/vttablet/tabletserver/connpool"
    36  	"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
    37  
    38  	querypb "vitess.io/vitess/go/vt/proto/query"
    39  )
    40  
    41  const (
    42  	sqlFetchMostRecentHeartbeat = "SELECT ts FROM %s.heartbeat WHERE keyspaceShard=%a"
    43  )
    44  
    45  // heartbeatReader reads the heartbeat table at a configured interval in order
    46  // to calculate replication lag. It is meant to be run on a replica, and paired
    47  // with a heartbeatWriter on a primary.
    48  // Lag is calculated by comparing the most recent timestamp in the heartbeat
    49  // table against the current time at read time.
    50  type heartbeatReader struct {
    51  	env tabletenv.Env
    52  
    53  	enabled       bool
    54  	interval      time.Duration
    55  	keyspaceShard string
    56  	now           func() time.Time
    57  	errorLog      *logutil.ThrottledLogger
    58  
    59  	runMu  sync.Mutex
    60  	isOpen bool
    61  	pool   *connpool.Pool
    62  	ticks  *timer.Timer
    63  
    64  	lagMu          sync.Mutex
    65  	lastKnownLag   time.Duration
    66  	lastKnownError error
    67  }
    68  
    69  // newHeartbeatReader returns a new heartbeatReader.
    70  func newHeartbeatReader(env tabletenv.Env) *heartbeatReader {
    71  	config := env.Config()
    72  	if config.ReplicationTracker.Mode != tabletenv.Heartbeat {
    73  		return &heartbeatReader{}
    74  	}
    75  
    76  	heartbeatInterval := config.ReplicationTracker.HeartbeatIntervalSeconds.Get()
    77  	return &heartbeatReader{
    78  		env:      env,
    79  		enabled:  true,
    80  		now:      time.Now,
    81  		interval: heartbeatInterval,
    82  		ticks:    timer.NewTimer(heartbeatInterval),
    83  		errorLog: logutil.NewThrottledLogger("HeartbeatReporter", 60*time.Second),
    84  		pool: connpool.NewPool(env, "HeartbeatReadPool", tabletenv.ConnPoolConfig{
    85  			Size:               1,
    86  			IdleTimeoutSeconds: env.Config().OltpReadPool.IdleTimeoutSeconds,
    87  		}),
    88  	}
    89  }
    90  
    91  // InitDBConfig initializes the target name for the heartbeatReader.
    92  func (r *heartbeatReader) InitDBConfig(target *querypb.Target) {
    93  	r.keyspaceShard = fmt.Sprintf("%s:%s", target.Keyspace, target.Shard)
    94  }
    95  
    96  // Open starts the heartbeat ticker and opens the db pool.
    97  func (r *heartbeatReader) Open() {
    98  	if !r.enabled {
    99  		return
   100  	}
   101  	r.runMu.Lock()
   102  	defer r.runMu.Unlock()
   103  	if r.isOpen {
   104  		return
   105  	}
   106  	log.Info("Heartbeat Reader: opening")
   107  
   108  	r.pool.Open(r.env.Config().DB.AppWithDB(), r.env.Config().DB.DbaWithDB(), r.env.Config().DB.AppDebugWithDB())
   109  	r.ticks.Start(func() { r.readHeartbeat() })
   110  	r.isOpen = true
   111  }
   112  
   113  // Close cancels the watchHeartbeat periodic ticker and closes the db pool.
   114  func (r *heartbeatReader) Close() {
   115  	if !r.enabled {
   116  		return
   117  	}
   118  	r.runMu.Lock()
   119  	defer r.runMu.Unlock()
   120  	if !r.isOpen {
   121  		return
   122  	}
   123  	r.ticks.Stop()
   124  	r.pool.Close()
   125  
   126  	currentLagNs.Set(0)
   127  
   128  	r.isOpen = false
   129  	log.Info("Heartbeat Reader: closed")
   130  }
   131  
   132  // Status returns the most recently recorded lag measurement or error encountered.
   133  func (r *heartbeatReader) Status() (time.Duration, error) {
   134  	r.lagMu.Lock()
   135  	defer r.lagMu.Unlock()
   136  	if r.lastKnownError != nil {
   137  		return 0, r.lastKnownError
   138  	}
   139  	return r.lastKnownLag, nil
   140  }
   141  
   142  // readHeartbeat reads from the heartbeat table exactly once, updating
   143  // the last known lag and/or error, and incrementing counters.
   144  func (r *heartbeatReader) readHeartbeat() {
   145  	defer r.env.LogError()
   146  
   147  	ctx, cancel := context.WithDeadline(context.Background(), r.now().Add(r.interval))
   148  	defer cancel()
   149  
   150  	res, err := r.fetchMostRecentHeartbeat(ctx)
   151  	if err != nil {
   152  		r.recordError(vterrors.Wrap(err, "failed to read most recent heartbeat"))
   153  		return
   154  	}
   155  	ts, err := parseHeartbeatResult(res)
   156  	if err != nil {
   157  		r.recordError(vterrors.Wrap(err, "failed to parse heartbeat result"))
   158  		return
   159  	}
   160  
   161  	lag := r.now().Sub(time.Unix(0, ts))
   162  	cumulativeLagNs.Add(lag.Nanoseconds())
   163  	currentLagNs.Set(lag.Nanoseconds())
   164  	heartbeatLagNsHistogram.Add(lag.Nanoseconds())
   165  	reads.Add(1)
   166  
   167  	r.lagMu.Lock()
   168  	r.lastKnownLag = lag
   169  	r.lastKnownError = nil
   170  	r.lagMu.Unlock()
   171  }
   172  
   173  // fetchMostRecentHeartbeat fetches the most recently recorded heartbeat from the heartbeat table,
   174  // returning a result with the timestamp of the heartbeat.
   175  func (r *heartbeatReader) fetchMostRecentHeartbeat(ctx context.Context) (*sqltypes.Result, error) {
   176  	conn, err := r.pool.Get(ctx, nil)
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  	defer conn.Recycle()
   181  	sel, err := r.bindHeartbeatFetch()
   182  	if err != nil {
   183  		return nil, err
   184  	}
   185  	return conn.Exec(ctx, sel, 1, false)
   186  }
   187  
   188  // bindHeartbeatFetch takes a heartbeat read and adds the necessary
   189  // fields to the query as bind vars. This is done to protect ourselves
   190  // against a badly formed keyspace or shard name.
   191  func (r *heartbeatReader) bindHeartbeatFetch() (string, error) {
   192  	bindVars := map[string]*querypb.BindVariable{
   193  		"ks": sqltypes.StringBindVariable(r.keyspaceShard),
   194  	}
   195  	parsed := sqlparser.BuildParsedQuery(sqlFetchMostRecentHeartbeat, "_vt", ":ks")
   196  	bound, err := parsed.GenerateQuery(bindVars, nil)
   197  	if err != nil {
   198  		return "", err
   199  	}
   200  	return bound, nil
   201  }
   202  
   203  // parseHeartbeatResult turns a raw result into the timestamp for processing.
   204  func parseHeartbeatResult(res *sqltypes.Result) (int64, error) {
   205  	if len(res.Rows) != 1 {
   206  		return 0, fmt.Errorf("failed to read heartbeat: writer query did not result in 1 row. Got %v", len(res.Rows))
   207  	}
   208  	ts, err := evalengine.ToInt64(res.Rows[0][0])
   209  	if err != nil {
   210  		return 0, err
   211  	}
   212  	return ts, nil
   213  }
   214  
   215  // recordError keeps track of the lastKnown error for reporting to the healthcheck.
   216  // Errors tracked here are logged with throttling to cut down on log spam since
   217  // operations can happen very frequently in this package.
   218  func (r *heartbeatReader) recordError(err error) {
   219  	r.lagMu.Lock()
   220  	r.lastKnownError = err
   221  	r.lagMu.Unlock()
   222  	r.errorLog.Errorf("%v", err)
   223  	readErrors.Add(1)
   224  }