vitess.io/vitess@v0.16.2/go/vt/vtgate/status.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vtgate
    18  
    19  import (
    20  	"fmt"
    21  	"strings"
    22  	"sync"
    23  	"time"
    24  
    25  	"vitess.io/vitess/go/stats"
    26  
    27  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    28  )
    29  
    30  const (
    31  	aggrChanSize = 10000
    32  
    33  	// StatusTemplate is the display part to use to show
    34  	// a TabletCacheStatusList.
    35  	StatusTemplate = `
    36  <style>
    37    table {
    38      border-collapse: collapse;
    39    }
    40    td, th {
    41      border: 1px solid #999;
    42      padding: 0.2rem;
    43    }
    44    table tr:nth-child(even) {
    45      background-color: #eee;
    46    }
    47    table tr:nth-child(odd) {
    48      background-color: #fff;
    49    }
    50  </style>
    51  <table>
    52    <tr>
    53      <th>Keyspace</th>
    54      <th>Shard</th>
    55      <th>TabletType</th>
    56      <th>Address</th>
    57      <th>Query Sent</th>
    58      <th>Query Error</th>
    59      <th>QPS (avg 1m)</th>
    60      <th>Latency (ms) (avg 1m)</th>
    61    </tr>
    62    {{range $i, $status := .}}
    63    <tr>
    64      <td>{{$status.Keyspace}}</td>
    65      <td>{{$status.Shard}}</td>
    66      <td>{{$status.TabletType}}</td>
    67      <td><a href="http://{{$status.Addr}}">{{$status.Name}}</a></td>
    68      <td>{{$status.QueryCount}}</td>
    69      <td>{{$status.QueryError}}</td>
    70      <td>{{$status.FormattedQPS}}</td>
    71      <td>{{$status.AvgLatency}}</td>
    72    </tr>
    73    {{end}}
    74  </table>
    75  `
    76  )
    77  
    78  var (
    79  	// aggrChan buffers queryInfo objects to be processed.
    80  	aggrChan chan *queryInfo
    81  	// muAggr protects below vars.
    82  	muAggr sync.Mutex
    83  	// aggregators holds all Aggregators created.
    84  	aggregators []*TabletStatusAggregator
    85  	// gatewayStatsChanFull tracks the number of times
    86  	// aggrChan becomes full.
    87  	gatewayStatsChanFull *stats.Counter
    88  )
    89  
    90  func init() {
    91  	// init global goroutines to aggregate stats.
    92  	aggrChan = make(chan *queryInfo, aggrChanSize)
    93  	gatewayStatsChanFull = stats.NewCounter("GatewayStatsChanFullCount", "The number of times the queryInfo buffer becomes full")
    94  	go resetAggregators()
    95  	go processQueryInfo()
    96  }
    97  
    98  // registerAggregator registers an aggregator to the global list.
    99  func registerAggregator(a *TabletStatusAggregator) {
   100  	muAggr.Lock()
   101  	defer muAggr.Unlock()
   102  	aggregators = append(aggregators, a)
   103  }
   104  
   105  // resetAggregators resets the next stats slot for all aggregators every second.
   106  func resetAggregators() {
   107  	ticker := time.NewTicker(time.Second)
   108  	for range ticker.C {
   109  		muAggr.Lock()
   110  		for _, a := range aggregators {
   111  			a.resetNextSlot()
   112  		}
   113  		muAggr.Unlock()
   114  	}
   115  }
   116  
   117  // processQueryInfo processes the next queryInfo object.
   118  func processQueryInfo() {
   119  	for qi := range aggrChan {
   120  		qi.aggr.processQueryInfo(qi)
   121  	}
   122  }
   123  
   124  //
   125  // TabletCacheStatus definitions
   126  //
   127  
   128  // TabletCacheStatus contains the status per destination for a gateway.
   129  type TabletCacheStatus struct {
   130  	Keyspace   string
   131  	Shard      string
   132  	TabletType topodatapb.TabletType
   133  	Name       string
   134  	Addr       string
   135  
   136  	QueryCount uint64
   137  	QueryError uint64
   138  	QPS        float64
   139  	AvgLatency float64 // in milliseconds
   140  }
   141  
   142  // FormattedQPS shows a 2 digit rounded value of QPS.
   143  // Used in the HTML template above.
   144  func (tcs *TabletCacheStatus) FormattedQPS() string {
   145  	return fmt.Sprintf("%.2f", tcs.QPS)
   146  }
   147  
   148  //
   149  // TabletStatusAggregator definitions
   150  //
   151  
   152  // TabletStatusAggregator tracks tablet status for a gateway.
   153  type TabletStatusAggregator struct {
   154  	Keyspace   string
   155  	Shard      string
   156  	TabletType topodatapb.TabletType
   157  	Name       string // the alternative name of a tablet
   158  	Addr       string // the host:port of a tablet
   159  
   160  	// mu protects below fields.
   161  	mu         sync.RWMutex
   162  	QueryCount uint64
   163  	QueryError uint64
   164  	// for QPS and latency (avg value over a minute)
   165  	tick               uint32
   166  	queryCountInMinute [60]uint64
   167  	latencyInMinute    [60]time.Duration
   168  }
   169  
   170  // queryInfo is sent over the aggregators channel to update the stats.
   171  type queryInfo struct {
   172  	aggr       *TabletStatusAggregator
   173  	addr       string
   174  	tabletType topodatapb.TabletType
   175  	elapsed    time.Duration
   176  	hasError   bool
   177  }
   178  
   179  // NewTabletStatusAggregator creates a TabletStatusAggregator.
   180  func NewTabletStatusAggregator(keyspace, shard string, tabletType topodatapb.TabletType, name string) *TabletStatusAggregator {
   181  	tsa := &TabletStatusAggregator{
   182  		Keyspace:   keyspace,
   183  		Shard:      shard,
   184  		TabletType: tabletType,
   185  		Name:       name,
   186  	}
   187  	registerAggregator(tsa)
   188  	return tsa
   189  }
   190  
   191  // UpdateQueryInfo updates the aggregator with the given information about a query.
   192  func (tsa *TabletStatusAggregator) UpdateQueryInfo(addr string, tabletType topodatapb.TabletType, elapsed time.Duration, hasError bool) {
   193  	qi := &queryInfo{
   194  		aggr:       tsa,
   195  		addr:       addr,
   196  		tabletType: tabletType,
   197  		elapsed:    elapsed,
   198  		hasError:   hasError,
   199  	}
   200  	select {
   201  	case aggrChan <- qi:
   202  	default:
   203  		gatewayStatsChanFull.Add(1)
   204  	}
   205  }
   206  
   207  func (tsa *TabletStatusAggregator) processQueryInfo(qi *queryInfo) {
   208  	tsa.mu.Lock()
   209  	defer tsa.mu.Unlock()
   210  	if tsa.TabletType != qi.tabletType {
   211  		tsa.TabletType = qi.tabletType
   212  		// reset counters
   213  		tsa.QueryCount = 0
   214  		tsa.QueryError = 0
   215  		for i := 0; i < len(tsa.queryCountInMinute); i++ {
   216  			tsa.queryCountInMinute[i] = 0
   217  		}
   218  		for i := 0; i < len(tsa.latencyInMinute); i++ {
   219  			tsa.latencyInMinute[i] = 0
   220  		}
   221  	}
   222  	if qi.addr != "" {
   223  		tsa.Addr = qi.addr
   224  	}
   225  	tsa.QueryCount++
   226  	tsa.queryCountInMinute[tsa.tick]++
   227  	tsa.latencyInMinute[tsa.tick] += qi.elapsed
   228  	if qi.hasError {
   229  		tsa.QueryError++
   230  	}
   231  }
   232  
   233  // GetCacheStatus returns a TabletCacheStatus representing the current gateway status.
   234  func (tsa *TabletStatusAggregator) GetCacheStatus() *TabletCacheStatus {
   235  	status := &TabletCacheStatus{
   236  		Keyspace: tsa.Keyspace,
   237  		Shard:    tsa.Shard,
   238  		Name:     tsa.Name,
   239  	}
   240  	tsa.mu.RLock()
   241  	defer tsa.mu.RUnlock()
   242  	status.TabletType = tsa.TabletType
   243  	status.Addr = tsa.Addr
   244  	status.QueryCount = tsa.QueryCount
   245  	status.QueryError = tsa.QueryError
   246  	var totalQuery uint64
   247  	for _, c := range tsa.queryCountInMinute {
   248  		totalQuery += c
   249  	}
   250  	var totalLatency time.Duration
   251  	for _, d := range tsa.latencyInMinute {
   252  		totalLatency += d
   253  	}
   254  	status.QPS = float64(totalQuery) / 60
   255  	if totalQuery > 0 {
   256  		status.AvgLatency = float64(totalLatency.Nanoseconds()) / float64(totalQuery) / 1000000
   257  	}
   258  	return status
   259  }
   260  
   261  // resetNextSlot resets the next tracking slot.
   262  func (tsa *TabletStatusAggregator) resetNextSlot() {
   263  	tsa.mu.Lock()
   264  	defer tsa.mu.Unlock()
   265  	tsa.tick = (tsa.tick + 1) % 60
   266  	tsa.queryCountInMinute[tsa.tick] = 0
   267  	tsa.latencyInMinute[tsa.tick] = time.Duration(0)
   268  }
   269  
   270  //
   271  // TabletCacheStatusList definitions
   272  //
   273  
   274  // TabletCacheStatusList is a slice of TabletCacheStatus.
   275  type TabletCacheStatusList []*TabletCacheStatus
   276  
   277  // Len is part of sort.Interface.
   278  func (gtcsl TabletCacheStatusList) Len() int {
   279  	return len(gtcsl)
   280  }
   281  
   282  // Less is part of sort.Interface.
   283  func (gtcsl TabletCacheStatusList) Less(i, j int) bool {
   284  	iKey := strings.Join([]string{gtcsl[i].Keyspace, gtcsl[i].Shard, string(gtcsl[i].TabletType), gtcsl[i].Name}, ".")
   285  	jKey := strings.Join([]string{gtcsl[j].Keyspace, gtcsl[j].Shard, string(gtcsl[j].TabletType), gtcsl[j].Name}, ".")
   286  	return iKey < jKey
   287  }
   288  
   289  // Swap is part of sort.Interface.
   290  func (gtcsl TabletCacheStatusList) Swap(i, j int) {
   291  	gtcsl[i], gtcsl[j] = gtcsl[j], gtcsl[i]
   292  }