agones.dev/agones@v1.53.0/pkg/gameserverallocations/metrics.go (about)

     1  // Copyright 2019 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gameserverallocations
    16  
    17  import (
    18  	"context"
    19  	"strconv"
    20  	"time"
    21  
    22  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    23  	allocationv1 "agones.dev/agones/pkg/apis/allocation/v1"
    24  	listerv1 "agones.dev/agones/pkg/client/listers/agones/v1"
    25  	mt "agones.dev/agones/pkg/metrics"
    26  	"agones.dev/agones/pkg/util/runtime"
    27  	"github.com/sirupsen/logrus"
    28  	"go.opencensus.io/stats"
    29  	"go.opencensus.io/stats/view"
    30  	"go.opencensus.io/tag"
    31  	k8sruntime "k8s.io/apimachinery/pkg/runtime"
    32  )
    33  
    34  var (
    35  	logger = runtime.NewLoggerWithSource("metrics")
    36  
    37  	keyFleetName          = mt.MustTagKey("fleet_name")
    38  	keyClusterName        = mt.MustTagKey("cluster_name")
    39  	keyMultiCluster       = mt.MustTagKey("is_multicluster")
    40  	keyStatus             = mt.MustTagKey("status")
    41  	keySchedulingStrategy = mt.MustTagKey("scheduling_strategy")
    42  
    43  	gameServerAllocationsLatency    = stats.Float64("gameserver_allocations/latency", "The duration of gameserver allocations", "s")
    44  	gameServerAllocationsRetryTotal = stats.Int64("gameserver_allocations/errors", "The errors of gameserver allocations", "1")
    45  
    46  	stateViews = []*view.View{
    47  		{
    48  			Name:        "gameserver_allocations_duration_seconds",
    49  			Measure:     gameServerAllocationsLatency,
    50  			Description: "The distribution of gameserver allocation requests latencies.",
    51  			Aggregation: view.Distribution(0, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1, 2, 3),
    52  			TagKeys:     []tag.Key{keyFleetName, keyClusterName, keyMultiCluster, keyStatus, keySchedulingStrategy},
    53  		},
    54  		{
    55  			Name:        "gameserver_allocations_retry_total",
    56  			Measure:     gameServerAllocationsRetryTotal,
    57  			Description: "The count of gameserver allocation retry until it succeeds",
    58  			Aggregation: view.Distribution(1, 2, 3, 4, 5),
    59  			TagKeys:     []tag.Key{keyFleetName, keyClusterName, keyMultiCluster, keyStatus, keySchedulingStrategy},
    60  		},
    61  	}
    62  )
    63  
    64  // register all our state views to OpenCensus
    65  func registerViews() {
    66  	for _, v := range stateViews {
    67  		if err := view.Register(v); err != nil {
    68  			logger.WithError(err).Error("could not register view")
    69  		}
    70  	}
    71  }
    72  
    73  // unregister views, this is only useful for tests as it trigger reporting.
    74  func unRegisterViews() {
    75  	for _, v := range stateViews {
    76  		view.Unregister(v)
    77  	}
    78  }
    79  
    80  // default set of tags for latency metric
    81  var latencyTags = []tag.Mutator{
    82  	tag.Insert(keyMultiCluster, "none"),
    83  	tag.Insert(keyClusterName, "none"),
    84  	tag.Insert(keySchedulingStrategy, "none"),
    85  	tag.Insert(keyFleetName, "none"),
    86  	tag.Insert(keyStatus, "none"),
    87  }
    88  
    89  type metrics struct {
    90  	ctx              context.Context
    91  	gameServerLister listerv1.GameServerLister
    92  	logger           *logrus.Entry
    93  	start            time.Time
    94  }
    95  
    96  // mutate the current set of metric tags
    97  func (r *metrics) mutate(m ...tag.Mutator) {
    98  	var err error
    99  	r.ctx, err = tag.New(r.ctx, m...)
   100  	if err != nil {
   101  		r.logger.WithError(err).Warn("failed to mutate request context.")
   102  	}
   103  }
   104  
   105  // setStatus set the latency status tag.
   106  func (r *metrics) setStatus(status string) {
   107  	r.mutate(tag.Update(keyStatus, status))
   108  }
   109  
   110  // setError set the latency status tag as error.
   111  func (r *metrics) setError() {
   112  	r.mutate(tag.Update(keyStatus, "error"))
   113  }
   114  
   115  // setRequest set request metric tags.
   116  func (r *metrics) setRequest(in *allocationv1.GameServerAllocation) {
   117  	tags := []tag.Mutator{
   118  		tag.Update(keySchedulingStrategy, string(in.Spec.Scheduling)),
   119  	}
   120  
   121  	tags = append(tags, tag.Update(keyMultiCluster, strconv.FormatBool(in.Spec.MultiClusterSetting.Enabled)))
   122  	r.mutate(tags...)
   123  }
   124  
   125  // setResponse set response metric tags.
   126  func (r *metrics) setResponse(o k8sruntime.Object) {
   127  	out, ok := o.(*allocationv1.GameServerAllocation)
   128  	if out == nil || !ok {
   129  		return
   130  	}
   131  	r.setStatus(string(out.Status.State))
   132  	var tags []tag.Mutator
   133  	// sets the fleet name tag if possible
   134  	if out.Status.State == allocationv1.GameServerAllocationAllocated && out.Status.Source == localAllocationSource {
   135  		gs, err := r.gameServerLister.GameServers(out.Namespace).Get(out.Status.GameServerName)
   136  		if err != nil {
   137  			r.logger.WithError(err).Warnf("failed to get gameserver:%s namespace:%s", out.Status.GameServerName, out.Namespace)
   138  			return
   139  		}
   140  		fleetName := gs.Labels[agonesv1.FleetNameLabel]
   141  		if fleetName != "" {
   142  			tags = append(tags, tag.Update(keyFleetName, fleetName))
   143  		}
   144  	}
   145  	r.mutate(tags...)
   146  }
   147  
   148  // record the current allocation latency.
   149  func (r *metrics) record() {
   150  	stats.Record(r.ctx, gameServerAllocationsLatency.M(time.Since(r.start).Seconds()))
   151  }
   152  
   153  // record the current allocation retry rate.
   154  func (r *metrics) recordAllocationRetrySuccess(ctx context.Context, retryCount int) {
   155  	mt.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(keyStatus, "Success")},
   156  		gameServerAllocationsRetryTotal.M(int64(retryCount)))
   157  }