vitess.io/vitess@v0.16.2/go/vt/vtctld/api_utils.go (about) 1 /* 2 Copyright 2022 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vtctld 18 19 import ( 20 "fmt" 21 "sort" 22 "strings" 23 24 "vitess.io/vitess/go/vt/discovery" 25 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 26 "vitess.io/vitess/go/vt/topo/topoproto" 27 ) 28 29 const ( 30 // tabletMissing represents a missing/non-existent tablet for any metric. 31 tabletMissing = -1 32 // These values represent the threshold for replication lag. 33 lagThresholdDegraded = 60 34 lagThresholdUnhealthy = 120 35 // These values represent the health of the tablet - 1 is healthy, 2 is degraded, 3 is unhealthy 36 tabletHealthy = 0 37 tabletDegraded = 1 38 tabletUnhealthy = 2 39 ) 40 41 type ( 42 // yLabel is used to keep track of the cell and type labels of the heatmap. 43 yLabel struct { 44 CellLabel label 45 TypeLabels []label 46 } 47 48 // label is used to keep track of one label of a heatmap and how many rows it should span. 49 label struct { 50 Name string 51 Rowspan int 52 } 53 54 // heatmap stores all the needed info to construct the heatmap. 55 heatmap struct { 56 // Data is a 2D array of values of the specified metric. 57 Data [][]float64 58 // Aliases is a 2D array holding references to the tablet aliases. 59 Aliases [][]*topodatapb.TabletAlias 60 KeyspaceLabel label 61 CellAndTypeLabels []yLabel 62 ShardLabels []string 63 64 // YGridLines is used to draw gridLines on the map in the right places. 65 YGridLines []float64 66 } 67 68 topologyInfo struct { 69 Keyspaces []string 70 Cells []string 71 TabletTypes []string 72 } 73 ) 74 75 // availableTabletTypes is an array of tabletTypes that are being considered to display on the heatmap. 76 // Note: this list must always be sorted by the order they should appear (i.e. PRIMARY first, then REPLICA, then RDONLY) 77 var availableTabletTypes = []topodatapb.TabletType{topodatapb.TabletType_PRIMARY, topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY} 78 79 func makeStringTypeList(types []topodatapb.TabletType) []string { 80 var list []string 81 for _, t := range types { 82 list = append(list, t.String()) 83 } 84 return list 85 } 86 87 func sortTypes(types map[topodatapb.TabletType]bool) []topodatapb.TabletType { 88 var listOfTypes []topodatapb.TabletType 89 for _, tabType := range availableTabletTypes { 90 if t := types[tabType]; t { 91 listOfTypes = append(listOfTypes, tabType) 92 } 93 } 94 return listOfTypes 95 } 96 97 func health(stat *discovery.TabletHealth) float64 { 98 // The tablet is unhealthy if there is an health error. 99 if stat.Stats.HealthError != "" { 100 return tabletUnhealthy 101 } 102 103 // The tablet is healthy/degraded/unheathy depending on the lag. 104 lag := stat.Stats.ReplicationLagSeconds 105 switch { 106 case lag >= lagThresholdUnhealthy: 107 return tabletUnhealthy 108 case lag >= lagThresholdDegraded: 109 return tabletDegraded 110 } 111 112 // The tablet is degraded if there was an error previously. 113 if stat.LastError != nil { 114 return tabletDegraded 115 } 116 117 // The tablet is healthy or degraded based on serving status. 118 if !stat.Serving { 119 return tabletDegraded 120 } 121 122 // All else is ok so tablet is healthy. 123 return tabletHealthy 124 } 125 126 func replicationLag(stat *discovery.TabletHealth) float64 { 127 return float64(stat.Stats.ReplicationLagSeconds) 128 } 129 130 func qps(stat *discovery.TabletHealth) float64 { 131 return stat.Stats.Qps 132 } 133 134 func getTabletHealthWithCellFilter(hc discovery.HealthCheck, ks, shard, cell string, tabletType topodatapb.TabletType) []*discovery.TabletHealth { 135 tabletTypeStr := topoproto.TabletTypeLString(tabletType) 136 m := hc.CacheStatusMap() 137 key := fmt.Sprintf("%v.%v.%v.%v", cell, ks, shard, strings.ToUpper(tabletTypeStr)) 138 if _, ok := m[key]; !ok { 139 return nil 140 } 141 return m[key].TabletsStats 142 } 143 144 func getShardInKeyspace(hc discovery.HealthCheck, ks string) []string { 145 shards := []string{} 146 shardsMap := map[string]bool{} 147 cache := hc.CacheStatus() 148 for _, status := range cache { 149 if status.Target.Keyspace != ks { 150 continue 151 } 152 if ok := shardsMap[status.Target.Shard]; !ok { 153 shardsMap[status.Target.Shard] = true 154 shards = append(shards, status.Target.Shard) 155 } 156 } 157 return shards 158 } 159 160 func getTabletTypesForKeyspaceShardAndCell(hc discovery.HealthCheck, ks, shard, cell string) []topodatapb.TabletType { 161 tabletTypes := []topodatapb.TabletType{} 162 tabletTypeMap := map[topodatapb.TabletType]bool{} 163 cache := hc.CacheStatus() 164 for _, status := range cache { 165 if status.Target.Keyspace != ks || status.Cell != cell || status.Target.Shard != shard { 166 continue 167 } 168 if ok := tabletTypeMap[status.Target.TabletType]; !ok { 169 tabletTypeMap[status.Target.TabletType] = true 170 tabletTypes = append(tabletTypes, status.Target.TabletType) 171 } 172 } 173 return tabletTypes 174 } 175 176 func getTopologyInfo(healthcheck discovery.HealthCheck, selectedKeyspace, selectedCell string) *topologyInfo { 177 return &topologyInfo{ 178 Keyspaces: keyspacesLocked(healthcheck, "all"), 179 Cells: cellsInTopology(healthcheck, selectedKeyspace), 180 TabletTypes: makeStringTypeList(typesInTopology(healthcheck, selectedKeyspace, selectedCell)), 181 } 182 } 183 184 // keyspacesLocked returns the keyspaces to be displayed in the heatmap based on the dropdown filters. 185 // It returns one keyspace if a specific one was chosen or returns all of them if 'all' is chosen. 186 // This method is used by heatmapData to traverse over desired keyspaces and 187 // topologyInfo to send all available options for the keyspace dropdown. 188 func keyspacesLocked(healthcheck discovery.HealthCheck, keyspace string) []string { 189 if keyspace != "all" { 190 return []string{keyspace} 191 } 192 seenKs := map[string]bool{} 193 keyspaces := []string{} 194 cache := healthcheck.CacheStatus() 195 for _, status := range cache { 196 if _, ok := seenKs[status.Target.Keyspace]; !ok { 197 seenKs[status.Target.Keyspace] = true 198 keyspaces = append(keyspaces, status.Target.Keyspace) 199 } 200 } 201 sort.Strings(keyspaces) 202 return keyspaces 203 } 204 205 func getShardsForKeyspace(healthcheck discovery.HealthCheck, keyspace string) []string { 206 seenShards := map[string]bool{} 207 shards := []string{} 208 cache := healthcheck.CacheStatus() 209 for _, status := range cache { 210 if status.Target.Keyspace != keyspace { 211 continue 212 } 213 if _, ok := seenShards[status.Target.Shard]; !ok { 214 seenShards[status.Target.Shard] = true 215 shards = append(shards, status.Target.Shard) 216 } 217 } 218 sort.Strings(shards) 219 return shards 220 } 221 222 // cellsInTopology returns all the cells in the given keyspace. 223 // If all keyspaces is chosen, it returns the cells from every keyspace. 224 // This method is used by topologyInfo to send all available options for the cell dropdown 225 func cellsInTopology(healthcheck discovery.HealthCheck, keyspace string) []string { 226 kss := []string{keyspace} 227 if keyspace == "all" { 228 kss = keyspacesLocked(healthcheck, keyspace) 229 } 230 cells := map[string]bool{} 231 cache := healthcheck.CacheStatus() 232 for _, status := range cache { 233 found := false 234 for _, ks := range kss { 235 if status.Target.Keyspace == ks { 236 found = true 237 break 238 } 239 } 240 if !found { 241 continue 242 } 243 if _, ok := cells[status.Cell]; !ok { 244 cells[status.Cell] = true 245 } 246 } 247 var cellList []string 248 for cell := range cells { 249 cellList = append(cellList, cell) 250 } 251 sort.Strings(cellList) 252 return cellList 253 } 254 255 // typesInTopology returns all the types in the given keyspace and cell. 256 // If all keyspaces and cells is chosen, it returns the types from every cell in every keyspace. 257 // This method is used by topologyInfo to send all available options for the tablet type dropdown 258 func typesInTopology(healthcheck discovery.HealthCheck, keyspace, cell string) []topodatapb.TabletType { 259 keyspaces := keyspacesLocked(healthcheck, keyspace) 260 types := make(map[topodatapb.TabletType]bool) 261 // Going through the shards in every cell in every keyspace to get existing tablet types 262 for _, ks := range keyspaces { 263 cellsPerKeyspace := cellsLocked(healthcheck, ks, cell) 264 for _, cl := range cellsPerKeyspace { 265 shardsPerKeyspace := getShardInKeyspace(healthcheck, ks) 266 for _, s := range shardsPerKeyspace { 267 typesPerShard := getTabletTypesForKeyspaceShardAndCell(healthcheck, ks, s, cl) 268 for _, t := range typesPerShard { 269 types[t] = true 270 if len(types) == len(availableTabletTypes) { 271 break 272 } 273 } 274 } 275 } 276 } 277 typesList := sortTypes(types) 278 return typesList 279 } 280 281 // tabletTypesLocked returns the tablet types needed to be displayed in the heatmap based on the dropdown filters. 282 // It returns tablet type if a specific one was chosen or returns all of them if 'all' is chosen for keyspace and/or cell. 283 // This method is used by heatmapData to traverse over the desired tablet types. 284 func tabletTypesLocked(healthcheck discovery.HealthCheck, keyspace, cell, tabletType string) []topodatapb.TabletType { 285 if tabletType != "all" { 286 tabletTypeObj, _ := topoproto.ParseTabletType(tabletType) 287 return []topodatapb.TabletType{tabletTypeObj} 288 } 289 return typesInTopology(healthcheck, keyspace, cell) 290 } 291 292 // cellsLocked returns the cells needed to be displayed in the heatmap based on the dropdown filters. 293 // returns one cell if a specific one was chosen or returns all of them if 'all' is chosen. 294 // This method is used by heatmapData to traverse over the desired cells. 295 func cellsLocked(healthcheck discovery.HealthCheck, keyspace, cell string) []string { 296 if cell != "all" { 297 return []string{cell} 298 } 299 return cellsInTopology(healthcheck, keyspace) 300 } 301 302 // aggregatedData gets heatmapData by taking the average of the metric value of all tablets within the keyspace and cell of the 303 // specified type (or from all types if 'all' was selected). 304 func aggregatedData(healthcheck discovery.HealthCheck, keyspace, cell, selectedType, selectedMetric string, metricFunc func(stats *discovery.TabletHealth) float64) ([][]float64, [][]*topodatapb.TabletAlias, yLabel) { 305 shards := getShardsForKeyspace(healthcheck, keyspace) 306 tabletTypes := tabletTypesLocked(healthcheck, keyspace, cell, selectedType) 307 308 var cellData [][]float64 309 dataRow := make([]float64, len(shards)) 310 // This loop goes through each shard in the (keyspace-cell) combination. 311 for shardIndex, shard := range shards { 312 var sum, count float64 313 hasTablets := false 314 unhealthyFound := false 315 // Going through all the types of tablets and aggregating their information. 316 for _, tabletType := range tabletTypes { 317 tablets := getTabletHealthWithCellFilter(healthcheck, keyspace, shard, cell, tabletType) 318 if len(tablets) == 0 { 319 continue 320 } 321 for _, tablet := range tablets { 322 hasTablets = true 323 // If even one tablet is unhealthy then the entire group becomes unhealthy. 324 metricVal := metricFunc(tablet) 325 if (selectedMetric == "health" && metricVal == tabletUnhealthy) || 326 (selectedMetric == "lag" && metricVal > lagThresholdUnhealthy) { 327 sum = metricVal 328 count = 1 329 unhealthyFound = true 330 break 331 } 332 sum += metricVal 333 count++ 334 } 335 if unhealthyFound { 336 break 337 } 338 } 339 if hasTablets { 340 dataRow[shardIndex] = sum / count 341 } else { 342 dataRow[shardIndex] = tabletMissing 343 } 344 } 345 cellData = append(cellData, dataRow) 346 cellLabel := yLabel{ 347 CellLabel: label{Name: cell, Rowspan: 1}, 348 } 349 350 return cellData, nil, cellLabel 351 } 352 353 func unaggregatedData(healthcheck discovery.HealthCheck, keyspace, cell, selectedType string, metricFunc func(stats *discovery.TabletHealth) float64) ([][]float64, [][]*topodatapb.TabletAlias, yLabel) { 354 // This loop goes through every nested label (in this case, tablet type). 355 var cellData [][]float64 356 var cellAliases [][]*topodatapb.TabletAlias 357 var cellLabel yLabel 358 cellLabelSpan := 0 359 tabletTypes := tabletTypesLocked(healthcheck, keyspace, cell, selectedType) 360 shards := getShardsForKeyspace(healthcheck, keyspace) 361 for _, tabletType := range tabletTypes { 362 maxRowLength := 0 363 364 // The loop calculates the maximum number of rows needed. 365 for _, shard := range shards { 366 tabletsCount := len(getTabletHealthWithCellFilter(healthcheck, keyspace, shard, cell, tabletType)) 367 if maxRowLength < tabletsCount { 368 maxRowLength = tabletsCount 369 } 370 } 371 372 // dataRowsPerType is a 2D array that will hold the data of the tablets of one (cell, type) combination. 373 dataRowsPerType := make([][]float64, maxRowLength) 374 // aliasRowsPerType is a 2D array that will hold the aliases of the tablets of one (cell, type) combination. 375 aliasRowsPerType := make([][]*topodatapb.TabletAlias, maxRowLength) 376 for i := range dataRowsPerType { 377 dataRowsPerType[i] = make([]float64, len(shards)) 378 aliasRowsPerType[i] = make([]*topodatapb.TabletAlias, len(shards)) 379 } 380 381 // Filling in the 2D array with tablet data by columns. 382 for shardIndex, shard := range shards { 383 for tabletIndex := 0; tabletIndex < maxRowLength; tabletIndex++ { 384 // If the key doesn't exist then the tablet must not exist so that data is set to -1 (tabletMissing). 385 filteredHealthData := getTabletHealthWithCellFilter(healthcheck, keyspace, shard, cell, tabletType) 386 if tabletIndex < len(filteredHealthData) { 387 dataRowsPerType[tabletIndex][shardIndex] = metricFunc(filteredHealthData[tabletIndex]) 388 aliasRowsPerType[tabletIndex][shardIndex] = filteredHealthData[tabletIndex].Tablet.Alias 389 } else { 390 dataRowsPerType[tabletIndex][shardIndex] = tabletMissing 391 aliasRowsPerType[tabletIndex][shardIndex] = nil 392 } 393 } 394 } 395 396 if maxRowLength > 0 { 397 cellLabel.TypeLabels = append(cellLabel.TypeLabels, label{Name: tabletType.String(), Rowspan: maxRowLength}) 398 } 399 cellLabelSpan += maxRowLength 400 401 for i := 0; i < len(dataRowsPerType); i++ { 402 cellData = append(cellData, dataRowsPerType[i]) 403 cellAliases = append(cellAliases, aliasRowsPerType[i]) 404 } 405 } 406 407 cellLabel.CellLabel = label{Name: cell, Rowspan: cellLabelSpan} 408 409 return cellData, cellAliases, cellLabel 410 } 411 412 // heatmapData returns a 2D array of data (based on the specified metric) as well as the labels for the heatmap. 413 func heatmapData(healthcheck discovery.HealthCheck, selectedKeyspace, selectedCell, selectedTabletType, selectedMetric string) ([]heatmap, error) { 414 // Get the metric data. 415 var metricFunc func(stats *discovery.TabletHealth) float64 416 switch selectedMetric { 417 case "lag": 418 metricFunc = replicationLag 419 case "qps": 420 metricFunc = qps 421 case "health": 422 metricFunc = health 423 default: 424 return nil, fmt.Errorf("invalid metric: %v Select 'lag', 'cpu', or 'qps'", selectedMetric) 425 } 426 427 // Get the proper data (unaggregated tablets or aggregated tablets by types) 428 aggregated := false 429 if selectedKeyspace == "all" && selectedTabletType == "all" { 430 aggregated = true 431 } 432 433 keyspaces := keyspacesLocked(healthcheck, selectedKeyspace) 434 var heatmaps []heatmap 435 for _, keyspace := range keyspaces { 436 var h heatmap 437 h.ShardLabels = getShardsForKeyspace(healthcheck, keyspace) 438 keyspaceLabelSpan := 0 439 440 cells := cellsLocked(healthcheck, keyspace, selectedCell) 441 // The loop goes through every outer label (in this case, cell). 442 for _, cell := range cells { 443 var cellData [][]float64 444 var cellAliases [][]*topodatapb.TabletAlias 445 var cellLabel yLabel 446 447 if aggregated { 448 cellData, cellAliases, cellLabel = aggregatedData(healthcheck, keyspace, cell, selectedTabletType, selectedMetric, metricFunc) 449 } else { 450 cellData, cellAliases, cellLabel = unaggregatedData(healthcheck, keyspace, cell, selectedTabletType, metricFunc) 451 } 452 453 if cellLabel.CellLabel.Rowspan > 0 { 454 // Iterating over the rows of data for the current cell. 455 for i := 0; i < len(cellData); i++ { 456 // Adding the data in reverse to match the format that the plotly map takes in. 457 h.Data = append([][]float64{cellData[i]}, h.Data...) 458 if cellAliases != nil { 459 h.Aliases = append([][]*topodatapb.TabletAlias{cellAliases[i]}, h.Aliases...) 460 } 461 } 462 h.CellAndTypeLabels = append(h.CellAndTypeLabels, cellLabel) 463 } 464 keyspaceLabelSpan += cellLabel.CellLabel.Rowspan 465 } 466 467 // Setting the values for the yGridLines by going in reverse and subtracting 0.5 as an offset. 468 sum := 0 469 for c := len(h.CellAndTypeLabels) - 1; c >= 0; c-- { 470 // If the current view is aggregated then we need to traverse the cell labels 471 // to calculate the values for the grid line since that is the innermost label. 472 // For example if h.CellAndTypeLabels = 473 // { CellLabel: {Name: 'cell1', Rowspan: 2}, TypeLabels: nil }, 474 // { CellLabel: {Name: 'cell2', Rowspan: 3}, TypeLabels: nil }, 475 // then the resulting array will be [2.5, 4.5] which specifies the grid line indexes 476 // starting from 0 which is at the bottom of the heatmap. 477 if h.CellAndTypeLabels[c].TypeLabels == nil { 478 sum += h.CellAndTypeLabels[c].CellLabel.Rowspan 479 h.YGridLines = append(h.YGridLines, float64(sum)-0.5) 480 continue 481 } 482 // Otherwise traverse the type labels because that is the innermost label. 483 // For example if h.CellAndTypeLabels = 484 // { CellLabel: {Name: 'cell1', Rowspan: 3}, TypeLabels: [{Name: 'Primary', Rowspan: 1}, {Name: 'Replica', Rowspan: 2}] }, 485 // { CellLabel: {Name: 'cell2', Rowspan: 3}, TypeLabels: [{Name: 'Primary', Rowspan: 1}, {Name: 'Replica', Rowspan: 2}] }, 486 // then the resulting array will be [1.5, 2.5, 4.5, 5.5] which specifies the grid line indexes 487 // starting from 0 which is at the bottom of the heatmap. 488 for t := len(h.CellAndTypeLabels[c].TypeLabels) - 1; t >= 0; t-- { 489 sum += h.CellAndTypeLabels[c].TypeLabels[t].Rowspan 490 h.YGridLines = append(h.YGridLines, float64(sum)-0.5) 491 } 492 } 493 494 h.KeyspaceLabel = label{Name: keyspace, Rowspan: keyspaceLabelSpan} 495 496 heatmaps = append(heatmaps, h) 497 } 498 499 return heatmaps, nil 500 }