vitess.io/vitess@v0.16.2/go/vt/vtctld/api_utils.go (about)

     1  /*
     2  Copyright 2022 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vtctld
    18  
    19  import (
    20  	"fmt"
    21  	"sort"
    22  	"strings"
    23  
    24  	"vitess.io/vitess/go/vt/discovery"
    25  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    26  	"vitess.io/vitess/go/vt/topo/topoproto"
    27  )
    28  
    29  const (
    30  	// tabletMissing represents a missing/non-existent tablet for any metric.
    31  	tabletMissing = -1
    32  	// These values represent the threshold for replication lag.
    33  	lagThresholdDegraded  = 60
    34  	lagThresholdUnhealthy = 120
    35  	// These values represent the health of the tablet - 1 is healthy, 2 is degraded, 3 is unhealthy
    36  	tabletHealthy   = 0
    37  	tabletDegraded  = 1
    38  	tabletUnhealthy = 2
    39  )
    40  
    41  type (
    42  	// yLabel is used to keep track of the cell and type labels of the heatmap.
    43  	yLabel struct {
    44  		CellLabel  label
    45  		TypeLabels []label
    46  	}
    47  
    48  	// label is used to keep track of one label of a heatmap and how many rows it should span.
    49  	label struct {
    50  		Name    string
    51  		Rowspan int
    52  	}
    53  
    54  	// heatmap stores all the needed info to construct the heatmap.
    55  	heatmap struct {
    56  		// Data is a 2D array of values of the specified metric.
    57  		Data [][]float64
    58  		// Aliases is a 2D array holding references to the tablet aliases.
    59  		Aliases           [][]*topodatapb.TabletAlias
    60  		KeyspaceLabel     label
    61  		CellAndTypeLabels []yLabel
    62  		ShardLabels       []string
    63  
    64  		// YGridLines is used to draw gridLines on the map in the right places.
    65  		YGridLines []float64
    66  	}
    67  
    68  	topologyInfo struct {
    69  		Keyspaces   []string
    70  		Cells       []string
    71  		TabletTypes []string
    72  	}
    73  )
    74  
    75  // availableTabletTypes is an array of tabletTypes that are being considered to display on the heatmap.
    76  // Note: this list must always be sorted by the order they should appear (i.e. PRIMARY first, then REPLICA, then RDONLY)
    77  var availableTabletTypes = []topodatapb.TabletType{topodatapb.TabletType_PRIMARY, topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY}
    78  
    79  func makeStringTypeList(types []topodatapb.TabletType) []string {
    80  	var list []string
    81  	for _, t := range types {
    82  		list = append(list, t.String())
    83  	}
    84  	return list
    85  }
    86  
    87  func sortTypes(types map[topodatapb.TabletType]bool) []topodatapb.TabletType {
    88  	var listOfTypes []topodatapb.TabletType
    89  	for _, tabType := range availableTabletTypes {
    90  		if t := types[tabType]; t {
    91  			listOfTypes = append(listOfTypes, tabType)
    92  		}
    93  	}
    94  	return listOfTypes
    95  }
    96  
    97  func health(stat *discovery.TabletHealth) float64 {
    98  	// The tablet is unhealthy if there is an health error.
    99  	if stat.Stats.HealthError != "" {
   100  		return tabletUnhealthy
   101  	}
   102  
   103  	// The tablet is healthy/degraded/unheathy depending on the lag.
   104  	lag := stat.Stats.ReplicationLagSeconds
   105  	switch {
   106  	case lag >= lagThresholdUnhealthy:
   107  		return tabletUnhealthy
   108  	case lag >= lagThresholdDegraded:
   109  		return tabletDegraded
   110  	}
   111  
   112  	// The tablet is degraded if there was an error previously.
   113  	if stat.LastError != nil {
   114  		return tabletDegraded
   115  	}
   116  
   117  	// The tablet is healthy or degraded based on serving status.
   118  	if !stat.Serving {
   119  		return tabletDegraded
   120  	}
   121  
   122  	// All else is ok so tablet is healthy.
   123  	return tabletHealthy
   124  }
   125  
   126  func replicationLag(stat *discovery.TabletHealth) float64 {
   127  	return float64(stat.Stats.ReplicationLagSeconds)
   128  }
   129  
   130  func qps(stat *discovery.TabletHealth) float64 {
   131  	return stat.Stats.Qps
   132  }
   133  
   134  func getTabletHealthWithCellFilter(hc discovery.HealthCheck, ks, shard, cell string, tabletType topodatapb.TabletType) []*discovery.TabletHealth {
   135  	tabletTypeStr := topoproto.TabletTypeLString(tabletType)
   136  	m := hc.CacheStatusMap()
   137  	key := fmt.Sprintf("%v.%v.%v.%v", cell, ks, shard, strings.ToUpper(tabletTypeStr))
   138  	if _, ok := m[key]; !ok {
   139  		return nil
   140  	}
   141  	return m[key].TabletsStats
   142  }
   143  
   144  func getShardInKeyspace(hc discovery.HealthCheck, ks string) []string {
   145  	shards := []string{}
   146  	shardsMap := map[string]bool{}
   147  	cache := hc.CacheStatus()
   148  	for _, status := range cache {
   149  		if status.Target.Keyspace != ks {
   150  			continue
   151  		}
   152  		if ok := shardsMap[status.Target.Shard]; !ok {
   153  			shardsMap[status.Target.Shard] = true
   154  			shards = append(shards, status.Target.Shard)
   155  		}
   156  	}
   157  	return shards
   158  }
   159  
   160  func getTabletTypesForKeyspaceShardAndCell(hc discovery.HealthCheck, ks, shard, cell string) []topodatapb.TabletType {
   161  	tabletTypes := []topodatapb.TabletType{}
   162  	tabletTypeMap := map[topodatapb.TabletType]bool{}
   163  	cache := hc.CacheStatus()
   164  	for _, status := range cache {
   165  		if status.Target.Keyspace != ks || status.Cell != cell || status.Target.Shard != shard {
   166  			continue
   167  		}
   168  		if ok := tabletTypeMap[status.Target.TabletType]; !ok {
   169  			tabletTypeMap[status.Target.TabletType] = true
   170  			tabletTypes = append(tabletTypes, status.Target.TabletType)
   171  		}
   172  	}
   173  	return tabletTypes
   174  }
   175  
   176  func getTopologyInfo(healthcheck discovery.HealthCheck, selectedKeyspace, selectedCell string) *topologyInfo {
   177  	return &topologyInfo{
   178  		Keyspaces:   keyspacesLocked(healthcheck, "all"),
   179  		Cells:       cellsInTopology(healthcheck, selectedKeyspace),
   180  		TabletTypes: makeStringTypeList(typesInTopology(healthcheck, selectedKeyspace, selectedCell)),
   181  	}
   182  }
   183  
   184  // keyspacesLocked returns the keyspaces to be displayed in the heatmap based on the dropdown filters.
   185  // It returns one keyspace if a specific one was chosen or returns all of them if 'all' is chosen.
   186  // This method is used by heatmapData to traverse over desired keyspaces and
   187  // topologyInfo to send all available options for the keyspace dropdown.
   188  func keyspacesLocked(healthcheck discovery.HealthCheck, keyspace string) []string {
   189  	if keyspace != "all" {
   190  		return []string{keyspace}
   191  	}
   192  	seenKs := map[string]bool{}
   193  	keyspaces := []string{}
   194  	cache := healthcheck.CacheStatus()
   195  	for _, status := range cache {
   196  		if _, ok := seenKs[status.Target.Keyspace]; !ok {
   197  			seenKs[status.Target.Keyspace] = true
   198  			keyspaces = append(keyspaces, status.Target.Keyspace)
   199  		}
   200  	}
   201  	sort.Strings(keyspaces)
   202  	return keyspaces
   203  }
   204  
   205  func getShardsForKeyspace(healthcheck discovery.HealthCheck, keyspace string) []string {
   206  	seenShards := map[string]bool{}
   207  	shards := []string{}
   208  	cache := healthcheck.CacheStatus()
   209  	for _, status := range cache {
   210  		if status.Target.Keyspace != keyspace {
   211  			continue
   212  		}
   213  		if _, ok := seenShards[status.Target.Shard]; !ok {
   214  			seenShards[status.Target.Shard] = true
   215  			shards = append(shards, status.Target.Shard)
   216  		}
   217  	}
   218  	sort.Strings(shards)
   219  	return shards
   220  }
   221  
   222  // cellsInTopology returns all the cells in the given keyspace.
   223  // If all keyspaces is chosen, it returns the cells from every keyspace.
   224  // This method is used by topologyInfo to send all available options for the cell dropdown
   225  func cellsInTopology(healthcheck discovery.HealthCheck, keyspace string) []string {
   226  	kss := []string{keyspace}
   227  	if keyspace == "all" {
   228  		kss = keyspacesLocked(healthcheck, keyspace)
   229  	}
   230  	cells := map[string]bool{}
   231  	cache := healthcheck.CacheStatus()
   232  	for _, status := range cache {
   233  		found := false
   234  		for _, ks := range kss {
   235  			if status.Target.Keyspace == ks {
   236  				found = true
   237  				break
   238  			}
   239  		}
   240  		if !found {
   241  			continue
   242  		}
   243  		if _, ok := cells[status.Cell]; !ok {
   244  			cells[status.Cell] = true
   245  		}
   246  	}
   247  	var cellList []string
   248  	for cell := range cells {
   249  		cellList = append(cellList, cell)
   250  	}
   251  	sort.Strings(cellList)
   252  	return cellList
   253  }
   254  
   255  // typesInTopology returns all the types in the given keyspace and cell.
   256  // If all keyspaces and cells is chosen, it returns the types from every cell in every keyspace.
   257  // This method is used by topologyInfo to send all available options for the tablet type dropdown
   258  func typesInTopology(healthcheck discovery.HealthCheck, keyspace, cell string) []topodatapb.TabletType {
   259  	keyspaces := keyspacesLocked(healthcheck, keyspace)
   260  	types := make(map[topodatapb.TabletType]bool)
   261  	// Going through the shards in every cell in every keyspace to get existing tablet types
   262  	for _, ks := range keyspaces {
   263  		cellsPerKeyspace := cellsLocked(healthcheck, ks, cell)
   264  		for _, cl := range cellsPerKeyspace {
   265  			shardsPerKeyspace := getShardInKeyspace(healthcheck, ks)
   266  			for _, s := range shardsPerKeyspace {
   267  				typesPerShard := getTabletTypesForKeyspaceShardAndCell(healthcheck, ks, s, cl)
   268  				for _, t := range typesPerShard {
   269  					types[t] = true
   270  					if len(types) == len(availableTabletTypes) {
   271  						break
   272  					}
   273  				}
   274  			}
   275  		}
   276  	}
   277  	typesList := sortTypes(types)
   278  	return typesList
   279  }
   280  
   281  // tabletTypesLocked returns the tablet types needed to be displayed in the heatmap based on the dropdown filters.
   282  // It returns tablet type if a specific one was chosen or returns all of them if 'all' is chosen for keyspace and/or cell.
   283  // This method is used by heatmapData to traverse over the desired tablet types.
   284  func tabletTypesLocked(healthcheck discovery.HealthCheck, keyspace, cell, tabletType string) []topodatapb.TabletType {
   285  	if tabletType != "all" {
   286  		tabletTypeObj, _ := topoproto.ParseTabletType(tabletType)
   287  		return []topodatapb.TabletType{tabletTypeObj}
   288  	}
   289  	return typesInTopology(healthcheck, keyspace, cell)
   290  }
   291  
   292  // cellsLocked returns the cells needed to be displayed in the heatmap based on the dropdown filters.
   293  // returns one cell if a specific one was chosen or returns all of them if 'all' is chosen.
   294  // This method is used by heatmapData to traverse over the desired cells.
   295  func cellsLocked(healthcheck discovery.HealthCheck, keyspace, cell string) []string {
   296  	if cell != "all" {
   297  		return []string{cell}
   298  	}
   299  	return cellsInTopology(healthcheck, keyspace)
   300  }
   301  
   302  // aggregatedData gets heatmapData by taking the average of the metric value of all tablets within the keyspace and cell of the
   303  // specified type (or from all types if 'all' was selected).
   304  func aggregatedData(healthcheck discovery.HealthCheck, keyspace, cell, selectedType, selectedMetric string, metricFunc func(stats *discovery.TabletHealth) float64) ([][]float64, [][]*topodatapb.TabletAlias, yLabel) {
   305  	shards := getShardsForKeyspace(healthcheck, keyspace)
   306  	tabletTypes := tabletTypesLocked(healthcheck, keyspace, cell, selectedType)
   307  
   308  	var cellData [][]float64
   309  	dataRow := make([]float64, len(shards))
   310  	// This loop goes through each shard in the (keyspace-cell) combination.
   311  	for shardIndex, shard := range shards {
   312  		var sum, count float64
   313  		hasTablets := false
   314  		unhealthyFound := false
   315  		// Going through all the types of tablets and aggregating their information.
   316  		for _, tabletType := range tabletTypes {
   317  			tablets := getTabletHealthWithCellFilter(healthcheck, keyspace, shard, cell, tabletType)
   318  			if len(tablets) == 0 {
   319  				continue
   320  			}
   321  			for _, tablet := range tablets {
   322  				hasTablets = true
   323  				// If even one tablet is unhealthy then the entire group becomes unhealthy.
   324  				metricVal := metricFunc(tablet)
   325  				if (selectedMetric == "health" && metricVal == tabletUnhealthy) ||
   326  					(selectedMetric == "lag" && metricVal > lagThresholdUnhealthy) {
   327  					sum = metricVal
   328  					count = 1
   329  					unhealthyFound = true
   330  					break
   331  				}
   332  				sum += metricVal
   333  				count++
   334  			}
   335  			if unhealthyFound {
   336  				break
   337  			}
   338  		}
   339  		if hasTablets {
   340  			dataRow[shardIndex] = sum / count
   341  		} else {
   342  			dataRow[shardIndex] = tabletMissing
   343  		}
   344  	}
   345  	cellData = append(cellData, dataRow)
   346  	cellLabel := yLabel{
   347  		CellLabel: label{Name: cell, Rowspan: 1},
   348  	}
   349  
   350  	return cellData, nil, cellLabel
   351  }
   352  
   353  func unaggregatedData(healthcheck discovery.HealthCheck, keyspace, cell, selectedType string, metricFunc func(stats *discovery.TabletHealth) float64) ([][]float64, [][]*topodatapb.TabletAlias, yLabel) {
   354  	// This loop goes through every nested label (in this case, tablet type).
   355  	var cellData [][]float64
   356  	var cellAliases [][]*topodatapb.TabletAlias
   357  	var cellLabel yLabel
   358  	cellLabelSpan := 0
   359  	tabletTypes := tabletTypesLocked(healthcheck, keyspace, cell, selectedType)
   360  	shards := getShardsForKeyspace(healthcheck, keyspace)
   361  	for _, tabletType := range tabletTypes {
   362  		maxRowLength := 0
   363  
   364  		// The loop calculates the maximum number of rows needed.
   365  		for _, shard := range shards {
   366  			tabletsCount := len(getTabletHealthWithCellFilter(healthcheck, keyspace, shard, cell, tabletType))
   367  			if maxRowLength < tabletsCount {
   368  				maxRowLength = tabletsCount
   369  			}
   370  		}
   371  
   372  		// dataRowsPerType is a 2D array that will hold the data of the tablets of one (cell, type) combination.
   373  		dataRowsPerType := make([][]float64, maxRowLength)
   374  		// aliasRowsPerType is a 2D array that will hold the aliases of the tablets of one (cell, type) combination.
   375  		aliasRowsPerType := make([][]*topodatapb.TabletAlias, maxRowLength)
   376  		for i := range dataRowsPerType {
   377  			dataRowsPerType[i] = make([]float64, len(shards))
   378  			aliasRowsPerType[i] = make([]*topodatapb.TabletAlias, len(shards))
   379  		}
   380  
   381  		// Filling in the 2D array with tablet data by columns.
   382  		for shardIndex, shard := range shards {
   383  			for tabletIndex := 0; tabletIndex < maxRowLength; tabletIndex++ {
   384  				// If the key doesn't exist then the tablet must not exist so that data is set to -1 (tabletMissing).
   385  				filteredHealthData := getTabletHealthWithCellFilter(healthcheck, keyspace, shard, cell, tabletType)
   386  				if tabletIndex < len(filteredHealthData) {
   387  					dataRowsPerType[tabletIndex][shardIndex] = metricFunc(filteredHealthData[tabletIndex])
   388  					aliasRowsPerType[tabletIndex][shardIndex] = filteredHealthData[tabletIndex].Tablet.Alias
   389  				} else {
   390  					dataRowsPerType[tabletIndex][shardIndex] = tabletMissing
   391  					aliasRowsPerType[tabletIndex][shardIndex] = nil
   392  				}
   393  			}
   394  		}
   395  
   396  		if maxRowLength > 0 {
   397  			cellLabel.TypeLabels = append(cellLabel.TypeLabels, label{Name: tabletType.String(), Rowspan: maxRowLength})
   398  		}
   399  		cellLabelSpan += maxRowLength
   400  
   401  		for i := 0; i < len(dataRowsPerType); i++ {
   402  			cellData = append(cellData, dataRowsPerType[i])
   403  			cellAliases = append(cellAliases, aliasRowsPerType[i])
   404  		}
   405  	}
   406  
   407  	cellLabel.CellLabel = label{Name: cell, Rowspan: cellLabelSpan}
   408  
   409  	return cellData, cellAliases, cellLabel
   410  }
   411  
   412  // heatmapData returns a 2D array of data (based on the specified metric) as well as the labels for the heatmap.
   413  func heatmapData(healthcheck discovery.HealthCheck, selectedKeyspace, selectedCell, selectedTabletType, selectedMetric string) ([]heatmap, error) {
   414  	// Get the metric data.
   415  	var metricFunc func(stats *discovery.TabletHealth) float64
   416  	switch selectedMetric {
   417  	case "lag":
   418  		metricFunc = replicationLag
   419  	case "qps":
   420  		metricFunc = qps
   421  	case "health":
   422  		metricFunc = health
   423  	default:
   424  		return nil, fmt.Errorf("invalid metric: %v Select 'lag', 'cpu', or 'qps'", selectedMetric)
   425  	}
   426  
   427  	// Get the proper data (unaggregated tablets or aggregated tablets by types)
   428  	aggregated := false
   429  	if selectedKeyspace == "all" && selectedTabletType == "all" {
   430  		aggregated = true
   431  	}
   432  
   433  	keyspaces := keyspacesLocked(healthcheck, selectedKeyspace)
   434  	var heatmaps []heatmap
   435  	for _, keyspace := range keyspaces {
   436  		var h heatmap
   437  		h.ShardLabels = getShardsForKeyspace(healthcheck, keyspace)
   438  		keyspaceLabelSpan := 0
   439  
   440  		cells := cellsLocked(healthcheck, keyspace, selectedCell)
   441  		// The loop goes through every outer label (in this case, cell).
   442  		for _, cell := range cells {
   443  			var cellData [][]float64
   444  			var cellAliases [][]*topodatapb.TabletAlias
   445  			var cellLabel yLabel
   446  
   447  			if aggregated {
   448  				cellData, cellAliases, cellLabel = aggregatedData(healthcheck, keyspace, cell, selectedTabletType, selectedMetric, metricFunc)
   449  			} else {
   450  				cellData, cellAliases, cellLabel = unaggregatedData(healthcheck, keyspace, cell, selectedTabletType, metricFunc)
   451  			}
   452  
   453  			if cellLabel.CellLabel.Rowspan > 0 {
   454  				// Iterating over the rows of data for the current cell.
   455  				for i := 0; i < len(cellData); i++ {
   456  					// Adding the data in reverse to match the format that the plotly map takes in.
   457  					h.Data = append([][]float64{cellData[i]}, h.Data...)
   458  					if cellAliases != nil {
   459  						h.Aliases = append([][]*topodatapb.TabletAlias{cellAliases[i]}, h.Aliases...)
   460  					}
   461  				}
   462  				h.CellAndTypeLabels = append(h.CellAndTypeLabels, cellLabel)
   463  			}
   464  			keyspaceLabelSpan += cellLabel.CellLabel.Rowspan
   465  		}
   466  
   467  		// Setting the values for the yGridLines by going in reverse and subtracting 0.5 as an offset.
   468  		sum := 0
   469  		for c := len(h.CellAndTypeLabels) - 1; c >= 0; c-- {
   470  			// If the current view is aggregated then we need to traverse the cell labels
   471  			// to calculate the values for the grid line since that is the innermost label.
   472  			// For example if h.CellAndTypeLabels =
   473  			//   { CellLabel: {Name: 'cell1', Rowspan: 2}, TypeLabels: nil },
   474  			//   { CellLabel: {Name: 'cell2', Rowspan: 3}, TypeLabels: nil },
   475  			// then the resulting array will be [2.5, 4.5] which specifies the grid line indexes
   476  			// starting from 0 which is at the bottom of the heatmap.
   477  			if h.CellAndTypeLabels[c].TypeLabels == nil {
   478  				sum += h.CellAndTypeLabels[c].CellLabel.Rowspan
   479  				h.YGridLines = append(h.YGridLines, float64(sum)-0.5)
   480  				continue
   481  			}
   482  			// Otherwise traverse the type labels because that is the innermost label.
   483  			// For example if h.CellAndTypeLabels =
   484  			//   { CellLabel: {Name: 'cell1', Rowspan: 3}, TypeLabels: [{Name: 'Primary', Rowspan: 1},  {Name: 'Replica', Rowspan: 2}] },
   485  			//   { CellLabel: {Name: 'cell2', Rowspan: 3}, TypeLabels: [{Name: 'Primary', Rowspan: 1},  {Name: 'Replica', Rowspan: 2}] },
   486  			// then the resulting array will be [1.5, 2.5, 4.5, 5.5] which specifies the grid line indexes
   487  			// starting from 0 which is at the bottom of the heatmap.
   488  			for t := len(h.CellAndTypeLabels[c].TypeLabels) - 1; t >= 0; t-- {
   489  				sum += h.CellAndTypeLabels[c].TypeLabels[t].Rowspan
   490  				h.YGridLines = append(h.YGridLines, float64(sum)-0.5)
   491  			}
   492  		}
   493  
   494  		h.KeyspaceLabel = label{Name: keyspace, Rowspan: keyspaceLabelSpan}
   495  
   496  		heatmaps = append(heatmaps, h)
   497  	}
   498  
   499  	return heatmaps, nil
   500  }