vitess.io/vitess@v0.16.2/go/vt/vtgate/status.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vtgate 18 19 import ( 20 "fmt" 21 "strings" 22 "sync" 23 "time" 24 25 "vitess.io/vitess/go/stats" 26 27 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 28 ) 29 30 const ( 31 aggrChanSize = 10000 32 33 // StatusTemplate is the display part to use to show 34 // a TabletCacheStatusList. 35 StatusTemplate = ` 36 <style> 37 table { 38 border-collapse: collapse; 39 } 40 td, th { 41 border: 1px solid #999; 42 padding: 0.2rem; 43 } 44 table tr:nth-child(even) { 45 background-color: #eee; 46 } 47 table tr:nth-child(odd) { 48 background-color: #fff; 49 } 50 </style> 51 <table> 52 <tr> 53 <th>Keyspace</th> 54 <th>Shard</th> 55 <th>TabletType</th> 56 <th>Address</th> 57 <th>Query Sent</th> 58 <th>Query Error</th> 59 <th>QPS (avg 1m)</th> 60 <th>Latency (ms) (avg 1m)</th> 61 </tr> 62 {{range $i, $status := .}} 63 <tr> 64 <td>{{$status.Keyspace}}</td> 65 <td>{{$status.Shard}}</td> 66 <td>{{$status.TabletType}}</td> 67 <td><a href="http://{{$status.Addr}}">{{$status.Name}}</a></td> 68 <td>{{$status.QueryCount}}</td> 69 <td>{{$status.QueryError}}</td> 70 <td>{{$status.FormattedQPS}}</td> 71 <td>{{$status.AvgLatency}}</td> 72 </tr> 73 {{end}} 74 </table> 75 ` 76 ) 77 78 var ( 79 // aggrChan buffers queryInfo objects to be processed. 80 aggrChan chan *queryInfo 81 // muAggr protects below vars. 82 muAggr sync.Mutex 83 // aggregators holds all Aggregators created. 84 aggregators []*TabletStatusAggregator 85 // gatewayStatsChanFull tracks the number of times 86 // aggrChan becomes full. 87 gatewayStatsChanFull *stats.Counter 88 ) 89 90 func init() { 91 // init global goroutines to aggregate stats. 92 aggrChan = make(chan *queryInfo, aggrChanSize) 93 gatewayStatsChanFull = stats.NewCounter("GatewayStatsChanFullCount", "The number of times the queryInfo buffer becomes full") 94 go resetAggregators() 95 go processQueryInfo() 96 } 97 98 // registerAggregator registers an aggregator to the global list. 99 func registerAggregator(a *TabletStatusAggregator) { 100 muAggr.Lock() 101 defer muAggr.Unlock() 102 aggregators = append(aggregators, a) 103 } 104 105 // resetAggregators resets the next stats slot for all aggregators every second. 106 func resetAggregators() { 107 ticker := time.NewTicker(time.Second) 108 for range ticker.C { 109 muAggr.Lock() 110 for _, a := range aggregators { 111 a.resetNextSlot() 112 } 113 muAggr.Unlock() 114 } 115 } 116 117 // processQueryInfo processes the next queryInfo object. 118 func processQueryInfo() { 119 for qi := range aggrChan { 120 qi.aggr.processQueryInfo(qi) 121 } 122 } 123 124 // 125 // TabletCacheStatus definitions 126 // 127 128 // TabletCacheStatus contains the status per destination for a gateway. 129 type TabletCacheStatus struct { 130 Keyspace string 131 Shard string 132 TabletType topodatapb.TabletType 133 Name string 134 Addr string 135 136 QueryCount uint64 137 QueryError uint64 138 QPS float64 139 AvgLatency float64 // in milliseconds 140 } 141 142 // FormattedQPS shows a 2 digit rounded value of QPS. 143 // Used in the HTML template above. 144 func (tcs *TabletCacheStatus) FormattedQPS() string { 145 return fmt.Sprintf("%.2f", tcs.QPS) 146 } 147 148 // 149 // TabletStatusAggregator definitions 150 // 151 152 // TabletStatusAggregator tracks tablet status for a gateway. 153 type TabletStatusAggregator struct { 154 Keyspace string 155 Shard string 156 TabletType topodatapb.TabletType 157 Name string // the alternative name of a tablet 158 Addr string // the host:port of a tablet 159 160 // mu protects below fields. 161 mu sync.RWMutex 162 QueryCount uint64 163 QueryError uint64 164 // for QPS and latency (avg value over a minute) 165 tick uint32 166 queryCountInMinute [60]uint64 167 latencyInMinute [60]time.Duration 168 } 169 170 // queryInfo is sent over the aggregators channel to update the stats. 171 type queryInfo struct { 172 aggr *TabletStatusAggregator 173 addr string 174 tabletType topodatapb.TabletType 175 elapsed time.Duration 176 hasError bool 177 } 178 179 // NewTabletStatusAggregator creates a TabletStatusAggregator. 180 func NewTabletStatusAggregator(keyspace, shard string, tabletType topodatapb.TabletType, name string) *TabletStatusAggregator { 181 tsa := &TabletStatusAggregator{ 182 Keyspace: keyspace, 183 Shard: shard, 184 TabletType: tabletType, 185 Name: name, 186 } 187 registerAggregator(tsa) 188 return tsa 189 } 190 191 // UpdateQueryInfo updates the aggregator with the given information about a query. 192 func (tsa *TabletStatusAggregator) UpdateQueryInfo(addr string, tabletType topodatapb.TabletType, elapsed time.Duration, hasError bool) { 193 qi := &queryInfo{ 194 aggr: tsa, 195 addr: addr, 196 tabletType: tabletType, 197 elapsed: elapsed, 198 hasError: hasError, 199 } 200 select { 201 case aggrChan <- qi: 202 default: 203 gatewayStatsChanFull.Add(1) 204 } 205 } 206 207 func (tsa *TabletStatusAggregator) processQueryInfo(qi *queryInfo) { 208 tsa.mu.Lock() 209 defer tsa.mu.Unlock() 210 if tsa.TabletType != qi.tabletType { 211 tsa.TabletType = qi.tabletType 212 // reset counters 213 tsa.QueryCount = 0 214 tsa.QueryError = 0 215 for i := 0; i < len(tsa.queryCountInMinute); i++ { 216 tsa.queryCountInMinute[i] = 0 217 } 218 for i := 0; i < len(tsa.latencyInMinute); i++ { 219 tsa.latencyInMinute[i] = 0 220 } 221 } 222 if qi.addr != "" { 223 tsa.Addr = qi.addr 224 } 225 tsa.QueryCount++ 226 tsa.queryCountInMinute[tsa.tick]++ 227 tsa.latencyInMinute[tsa.tick] += qi.elapsed 228 if qi.hasError { 229 tsa.QueryError++ 230 } 231 } 232 233 // GetCacheStatus returns a TabletCacheStatus representing the current gateway status. 234 func (tsa *TabletStatusAggregator) GetCacheStatus() *TabletCacheStatus { 235 status := &TabletCacheStatus{ 236 Keyspace: tsa.Keyspace, 237 Shard: tsa.Shard, 238 Name: tsa.Name, 239 } 240 tsa.mu.RLock() 241 defer tsa.mu.RUnlock() 242 status.TabletType = tsa.TabletType 243 status.Addr = tsa.Addr 244 status.QueryCount = tsa.QueryCount 245 status.QueryError = tsa.QueryError 246 var totalQuery uint64 247 for _, c := range tsa.queryCountInMinute { 248 totalQuery += c 249 } 250 var totalLatency time.Duration 251 for _, d := range tsa.latencyInMinute { 252 totalLatency += d 253 } 254 status.QPS = float64(totalQuery) / 60 255 if totalQuery > 0 { 256 status.AvgLatency = float64(totalLatency.Nanoseconds()) / float64(totalQuery) / 1000000 257 } 258 return status 259 } 260 261 // resetNextSlot resets the next tracking slot. 262 func (tsa *TabletStatusAggregator) resetNextSlot() { 263 tsa.mu.Lock() 264 defer tsa.mu.Unlock() 265 tsa.tick = (tsa.tick + 1) % 60 266 tsa.queryCountInMinute[tsa.tick] = 0 267 tsa.latencyInMinute[tsa.tick] = time.Duration(0) 268 } 269 270 // 271 // TabletCacheStatusList definitions 272 // 273 274 // TabletCacheStatusList is a slice of TabletCacheStatus. 275 type TabletCacheStatusList []*TabletCacheStatus 276 277 // Len is part of sort.Interface. 278 func (gtcsl TabletCacheStatusList) Len() int { 279 return len(gtcsl) 280 } 281 282 // Less is part of sort.Interface. 283 func (gtcsl TabletCacheStatusList) Less(i, j int) bool { 284 iKey := strings.Join([]string{gtcsl[i].Keyspace, gtcsl[i].Shard, string(gtcsl[i].TabletType), gtcsl[i].Name}, ".") 285 jKey := strings.Join([]string{gtcsl[j].Keyspace, gtcsl[j].Shard, string(gtcsl[j].TabletType), gtcsl[j].Name}, ".") 286 return iKey < jKey 287 } 288 289 // Swap is part of sort.Interface. 290 func (gtcsl TabletCacheStatusList) Swap(i, j int) { 291 gtcsl[i], gtcsl[j] = gtcsl[j], gtcsl[i] 292 }