agones.dev/agones@v1.54.0/pkg/metrics/controller_metrics.go (about)

     1  // Copyright 2019 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metrics
    16  
    17  import (
    18  	"go.opencensus.io/stats"
    19  	"go.opencensus.io/stats/view"
    20  	"go.opencensus.io/tag"
    21  )
    22  
    23  const (
    24  	fleetRolloutPercent                     = "fleet_rollout_percent"
    25  	fleetReplicaCountName                   = "fleets_replicas_count"
    26  	fleetAutoscalerBufferLimitName          = "fleet_autoscalers_buffer_limits"
    27  	fleetAutoscalterBufferSizeName          = "fleet_autoscalers_buffer_size"
    28  	fleetAutoscalerCurrentReplicaCountName  = "fleet_autoscalers_current_replicas_count"
    29  	fleetAutoscalersDesiredReplicaCountName = "fleet_autoscalers_desired_replicas_count"
    30  	fleetAutoscalersAbleToScaleName         = "fleet_autoscalers_able_to_scale"
    31  	fleetAutoscalersLimitedName             = "fleet_autoscalers_limited"
    32  	fleetCountersName                       = "fleet_counters"
    33  	fleetListsName                          = "fleet_lists"
    34  	gameServersCountName                    = "gameservers_count"
    35  	gameServersTotalName                    = "gameservers_total"
    36  	gameServersPlayerConnectedTotalName     = "gameserver_player_connected_total"
    37  	gameServersPlayerCapacityTotalName      = "gameserver_player_capacity_total"
    38  	nodeCountName                           = "nodes_count"
    39  	gameServersNodeCountName                = "gameservers_node_count"
    40  	gameServerStateDurationName             = "gameserver_state_duration"
    41  )
    42  
    43  var (
    44  	// fleetAutoscalerViews are metric views associated with FleetAutoscalers
    45  	fleetAutoscalerViews = []string{fleetAutoscalerBufferLimitName, fleetAutoscalterBufferSizeName, fleetAutoscalerCurrentReplicaCountName,
    46  		fleetAutoscalersDesiredReplicaCountName, fleetAutoscalersAbleToScaleName, fleetAutoscalersLimitedName}
    47  	// fleetViews are metric views associated with Fleets
    48  	fleetViews = append([]string{fleetRolloutPercent, fleetReplicaCountName, gameServersCountName, gameServersTotalName, gameServersPlayerConnectedTotalName, gameServersPlayerCapacityTotalName, gameServerStateDurationName, fleetCountersName, fleetListsName}, fleetAutoscalerViews...)
    49  
    50  	stateDurationSeconds           = []float64{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}
    51  	fleetRolloutPercentStats       = stats.Int64("fleets/rollout_percent", "The current fleet rollout percentage", "1")
    52  	fleetsReplicasCountStats       = stats.Int64("fleets/replicas_count", "The count of replicas per fleet", "1")
    53  	fasBufferLimitsCountStats      = stats.Int64("fas/buffer_limits", "The buffer limits of autoscalers", "1")
    54  	fasBufferSizeStats             = stats.Int64("fas/buffer_size", "The buffer size value of autoscalers", "1")
    55  	fasCurrentReplicasStats        = stats.Int64("fas/current_replicas_count", "The current replicas cout as seen by autoscalers", "1")
    56  	fasDesiredReplicasStats        = stats.Int64("fas/desired_replicas_count", "The desired replicas cout as seen by autoscalers", "1")
    57  	fasAbleToScaleStats            = stats.Int64("fas/able_to_scale", "The fleet autoscaler can access the fleet to scale (0 indicates false, 1 indicates true)", "1")
    58  	fasLimitedStats                = stats.Int64("fas/limited", "The fleet autoscaler is capped (0 indicates false, 1 indicates true)", "1")
    59  	fleetCountersStats             = stats.Int64("fleets/counters", "Aggregated Counters counts and capacity across GameServers in the Fleet", "1")
    60  	fleetListsStats                = stats.Int64("fleets/lists", "Aggregated Lists counts and capacity across GameServers in the Fleet", "1")
    61  	gameServerCountStats           = stats.Int64("gameservers/count", "The count of gameservers", "1")
    62  	gameServerTotalStats           = stats.Int64("gameservers/total", "The total of gameservers", "1")
    63  	gameServerPlayerConnectedTotal = stats.Int64("gameservers/player_connected", "The total number of players connected to gameservers", "1")
    64  	gameServerPlayerCapacityTotal  = stats.Int64("gameservers/player_capacity", "The available player capacity for gameservers", "1")
    65  	nodesCountStats                = stats.Int64("nodes/count", "The count of nodes in the cluster", "1")
    66  	gsPerNodesCountStats           = stats.Int64("gameservers_node/count", "The count of gameservers per node in the cluster", "1")
    67  	gsStateDurationSec             = stats.Float64("gameservers_state/duration", "The duration of gameservers to be in a particular state", stats.UnitSeconds)
    68  
    69  	stateViews = []*view.View{
    70  		{
    71  			Name:        fleetRolloutPercent,
    72  			Measure:     fleetRolloutPercentStats,
    73  			Description: "Measures the current progress of fleet rollout",
    74  			Aggregation: view.LastValue(),
    75  			TagKeys:     []tag.Key{keyName, keyType, keyNamespace},
    76  		},
    77  		{
    78  			Name:        fleetReplicaCountName,
    79  			Measure:     fleetsReplicasCountStats,
    80  			Description: "The number of replicas per fleet",
    81  			Aggregation: view.LastValue(),
    82  			TagKeys:     []tag.Key{keyName, keyType, keyNamespace},
    83  		},
    84  		{
    85  			Name:        fleetAutoscalerBufferLimitName,
    86  			Measure:     fasBufferLimitsCountStats,
    87  			Description: "The limits of buffer based fleet autoscalers",
    88  			Aggregation: view.LastValue(),
    89  			TagKeys:     []tag.Key{keyName, keyType, keyFleetName, keyNamespace},
    90  		},
    91  		{
    92  			Name:        fleetAutoscalterBufferSizeName,
    93  			Measure:     fasBufferSizeStats,
    94  			Description: "The buffer size of fleet autoscalers",
    95  			Aggregation: view.LastValue(),
    96  			TagKeys:     []tag.Key{keyName, keyType, keyFleetName, keyNamespace},
    97  		},
    98  		{
    99  			Name:        fleetAutoscalerCurrentReplicaCountName,
   100  			Measure:     fasCurrentReplicasStats,
   101  			Description: "The current replicas count as seen by autoscalers",
   102  			Aggregation: view.LastValue(),
   103  			TagKeys:     []tag.Key{keyName, keyFleetName, keyNamespace},
   104  		},
   105  		{
   106  			Name:        fleetAutoscalersDesiredReplicaCountName,
   107  			Measure:     fasDesiredReplicasStats,
   108  			Description: "The desired replicas count as seen by autoscalers",
   109  			Aggregation: view.LastValue(),
   110  			TagKeys:     []tag.Key{keyName, keyFleetName, keyNamespace},
   111  		},
   112  		{
   113  			Name:        fleetAutoscalersAbleToScaleName,
   114  			Measure:     fasAbleToScaleStats,
   115  			Description: "The fleet autoscaler can access the fleet to scale",
   116  			Aggregation: view.LastValue(),
   117  			TagKeys:     []tag.Key{keyName, keyFleetName, keyNamespace},
   118  		},
   119  		{
   120  			Name:        fleetAutoscalersLimitedName,
   121  			Measure:     fasLimitedStats,
   122  			Description: "The fleet autoscaler is capped",
   123  			Aggregation: view.LastValue(),
   124  			TagKeys:     []tag.Key{keyName, keyFleetName, keyNamespace},
   125  		},
   126  		{
   127  			Name:        fleetCountersName,
   128  			Measure:     fleetCountersStats,
   129  			Description: "Aggregated Counters counts and capacity across GameServers in the Fleet",
   130  			Aggregation: view.LastValue(),
   131  			TagKeys:     []tag.Key{keyName, keyNamespace, keyType, keyCounter},
   132  		},
   133  		{
   134  			Name:        fleetListsName,
   135  			Measure:     fleetListsStats,
   136  			Description: "Aggregated Lists counts and capacity across GameServers in the Fleet",
   137  			Aggregation: view.LastValue(),
   138  			TagKeys:     []tag.Key{keyName, keyNamespace, keyType, keyList},
   139  		},
   140  		{
   141  			Name:        gameServersCountName,
   142  			Measure:     gameServerCountStats,
   143  			Description: "The number of gameservers",
   144  			Aggregation: view.LastValue(),
   145  			TagKeys:     []tag.Key{keyType, keyFleetName, keyNamespace},
   146  		},
   147  		{
   148  			Name:        gameServersTotalName,
   149  			Measure:     gameServerTotalStats,
   150  			Description: "The total of gameservers",
   151  			Aggregation: view.Count(),
   152  			TagKeys:     []tag.Key{keyType, keyFleetName, keyNamespace},
   153  		},
   154  		{
   155  			Name:        gameServersPlayerConnectedTotalName,
   156  			Measure:     gameServerPlayerConnectedTotal,
   157  			Description: "The current amount of players connected in gameservers",
   158  			Aggregation: view.LastValue(),
   159  			TagKeys:     []tag.Key{keyFleetName, keyName, keyNamespace},
   160  		},
   161  		{
   162  			Name:        gameServersPlayerCapacityTotalName,
   163  			Measure:     gameServerPlayerCapacityTotal,
   164  			Description: "The available player capacity per gameserver",
   165  			Aggregation: view.LastValue(),
   166  			TagKeys:     []tag.Key{keyFleetName, keyName, keyNamespace},
   167  		},
   168  		{
   169  			Name:        nodeCountName,
   170  			Measure:     nodesCountStats,
   171  			Description: "The count of nodes in the cluster",
   172  			Aggregation: view.LastValue(),
   173  			TagKeys:     []tag.Key{keyEmpty},
   174  		},
   175  		{
   176  			Name:        gameServersNodeCountName,
   177  			Measure:     gsPerNodesCountStats,
   178  			Description: "The count of gameservers per node in the cluster",
   179  			Aggregation: view.Distribution(0.00001, 1.00001, 2.00001, 3.00001, 4.00001, 5.00001, 6.00001, 7.00001, 8.00001, 9.00001, 10.00001, 11.00001, 12.00001, 13.00001, 14.00001, 15.00001, 16.00001, 32.00001, 40.00001, 50.00001, 60.00001, 70.00001, 80.00001, 90.00001, 100.00001, 110.00001, 120.00001),
   180  		},
   181  		{
   182  			Name:        gameServerStateDurationName,
   183  			Measure:     gsStateDurationSec,
   184  			Description: "The time gameserver exists in the current state in seconds",
   185  			Aggregation: view.Distribution(stateDurationSeconds...),
   186  			TagKeys:     []tag.Key{keyType, keyFleetName, keyNamespace},
   187  		},
   188  	}
   189  )
   190  
   191  // register all our state views to OpenCensus
   192  func registerViews() {
   193  	for _, v := range stateViews {
   194  		if err := view.Register(v); err != nil {
   195  			logger.WithError(err).Error("could not register view")
   196  		}
   197  	}
   198  }
   199  
   200  // unregister views, this is only useful for tests as it trigger reporting.
   201  func unRegisterViews() {
   202  	for _, v := range stateViews {
   203  		view.Unregister(v)
   204  	}
   205  }
   206  
   207  // resetViews resets the values of an entire view.
   208  // Since we have no way to delete a gauge, we have to reset
   209  // the whole thing and start from scratch.
   210  func resetViews(names []string) {
   211  	for _, v := range stateViews {
   212  		for _, name := range names {
   213  			if v.Name == name {
   214  				view.Unregister(v)
   215  				if err := view.Register(v); err != nil {
   216  					logger.WithError(err).Error("could not register view")
   217  				}
   218  			}
   219  		}
   220  	}
   221  }