go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gce/appengine/backend/internal/metrics/instances.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metrics
    16  
    17  import (
    18  	"context"
    19  	"time"
    20  
    21  	"go.chromium.org/luci/common/clock"
    22  	"go.chromium.org/luci/common/errors"
    23  	"go.chromium.org/luci/common/logging"
    24  	"go.chromium.org/luci/common/tsmon"
    25  	"go.chromium.org/luci/common/tsmon/field"
    26  	"go.chromium.org/luci/common/tsmon/metric"
    27  	"go.chromium.org/luci/gae/service/datastore"
    28  )
    29  
    30  var (
    31  	configuredInstances = metric.NewInt(
    32  		"gce/instances/configured",
    33  		"The number of GCE instances configured to exist.",
    34  		nil,
    35  		field.String("prefix"),
    36  		field.String("project"),
    37  	)
    38  
    39  	createdInstances = metric.NewInt(
    40  		"gce/instances/created",
    41  		"The number of GCE instances created.",
    42  		nil,
    43  		field.String("prefix"),
    44  		field.String("project"),
    45  		field.String("zone"),
    46  	)
    47  
    48  	CreatedInstanceChecked = metric.NewCounter(
    49  		"gce/instance/creation/checked",
    50  		"Cumulative number of GCE instances created and checked.",
    51  		nil,
    52  		field.String("prefix"),
    53  		field.String("project"),
    54  		field.String("zone"),
    55  		field.String("instance"),
    56  	)
    57  
    58  	DestroyInstanceUnchecked = metric.NewCounter(
    59  		"gce/instance/destroy/unchecked",
    60  		"Cumulative number of GCE instances requested to destroy.",
    61  		nil,
    62  		field.String("prefix"),
    63  		field.String("project"),
    64  		field.String("zone"),
    65  		field.String("instance"),
    66  	)
    67  
    68  	connectedInstances = metric.NewInt(
    69  		"gce/instances/connected",
    70  		"The number of GCE instances connected to Swarming.",
    71  		nil,
    72  		field.String("prefix"),
    73  		field.String("project"),
    74  		field.String("server"),
    75  		field.String("zone"),
    76  	)
    77  )
    78  
    79  // configuredCount encapsulates a count of configured VMs.
    80  type configuredCount struct {
    81  	Count   int
    82  	Project string
    83  }
    84  
    85  // createdCount encapsulates a count of created VMs.
    86  type createdCount struct {
    87  	Count   int
    88  	Project string
    89  	Zone    string
    90  }
    91  
    92  // connectedCount encapsulates a count of connected VMs.
    93  type connectedCount struct {
    94  	Count   int
    95  	Project string
    96  	Server  string
    97  	Zone    string
    98  }
    99  
   100  // InstanceCount is a root entity representing a count for instances with a
   101  // common prefix.
   102  type InstanceCount struct {
   103  	// _extra is where unknown properties are put into memory.
   104  	// Extra properties are not written to the datastore.
   105  	_extra datastore.PropertyMap `gae:"-,extra"`
   106  	// _kind is the entity's kind in the datastore.
   107  	_kind string `gae:"$kind,InstanceCount"`
   108  	// ID is the unique identifier for this count.
   109  	ID string `gae:"$id"`
   110  	// Prefix is the prefix for this count.
   111  	Prefix string `gae:"prefix"`
   112  	// Computed is the time this count was computed.
   113  	Computed time.Time `gae:"computed"`
   114  	// Configured is a slice of configuredCounts.
   115  	Configured []configuredCount `gae:"configured,noindex"`
   116  	// Connected is a slice of connectedCounts.
   117  	Connected []connectedCount `gae:"connected,noindex"`
   118  	// Created is a slice of createdCounts.
   119  	Created []createdCount `gae:"created,noindex"`
   120  }
   121  
   122  // AddConfigured increments the count of configured VMs for the given project.
   123  func (ic *InstanceCount) AddConfigured(n int, project string) {
   124  	for i, c := range ic.Configured {
   125  		if c.Project == project {
   126  			ic.Configured[i].Count += n
   127  			return
   128  		}
   129  	}
   130  	ic.Configured = append(ic.Configured, configuredCount{
   131  		Count:   n,
   132  		Project: project,
   133  	})
   134  }
   135  
   136  // AddCreated increments the count of created VMs for the given project and
   137  // zone.
   138  func (ic *InstanceCount) AddCreated(n int, project, zone string) {
   139  	for i, c := range ic.Created {
   140  		if c.Project == project && c.Zone == zone {
   141  			ic.Created[i].Count += n
   142  			return
   143  		}
   144  	}
   145  	ic.Created = append(ic.Created, createdCount{
   146  		Count:   n,
   147  		Project: project,
   148  		Zone:    zone,
   149  	})
   150  }
   151  
   152  // AddConnected increments the count of connected VMs for the given project,
   153  // server, and zone.
   154  func (ic *InstanceCount) AddConnected(n int, project, server, zone string) {
   155  	for i, c := range ic.Connected {
   156  		if c.Project == project && c.Server == server && c.Zone == zone {
   157  			ic.Connected[i].Count += n
   158  			return
   159  		}
   160  	}
   161  	ic.Connected = append(ic.Connected, connectedCount{
   162  		Count:   n,
   163  		Project: project,
   164  		Server:  server,
   165  		Zone:    zone,
   166  	})
   167  }
   168  
   169  // Update updates metrics for all known counts of VMs for the given prefix.
   170  func (ic *InstanceCount) Update(c context.Context, prefix string) error {
   171  	// Prefixes are globally unique, so we can use them as IDs.
   172  	ic.ID = prefix
   173  	ic.Computed = clock.Now(c).UTC()
   174  	ic.Prefix = prefix
   175  	if err := datastore.Put(c, ic); err != nil {
   176  		return errors.Annotate(err, "failed to store count").Err()
   177  	}
   178  	return nil
   179  }
   180  
   181  // updateInstances sets GCE instance metrics.
   182  func updateInstances(c context.Context) {
   183  	now := clock.Now(c)
   184  	q := datastore.NewQuery("InstanceCount").Order("computed")
   185  	if err := datastore.Run(c, q, func(ic *InstanceCount) {
   186  		if now.Sub(ic.Computed) > 10*time.Minute {
   187  			logging.Debugf(c, "deleting outdated count %q", ic.Prefix)
   188  			if err := datastore.Delete(c, ic); err != nil {
   189  				logging.Errorf(c, "%s", err)
   190  			}
   191  			return
   192  		}
   193  		for _, conf := range ic.Configured {
   194  			configuredInstances.Set(c, int64(conf.Count), ic.Prefix, conf.Project)
   195  		}
   196  		for _, crea := range ic.Created {
   197  			createdInstances.Set(c, int64(crea.Count), ic.Prefix, crea.Project, crea.Zone)
   198  		}
   199  		for _, conn := range ic.Connected {
   200  			connectedInstances.Set(c, int64(conn.Count), ic.Prefix, conn.Project, conn.Server, conn.Zone)
   201  		}
   202  	}); err != nil {
   203  		errors.Log(c, errors.Annotate(err, "failed to fetch counts").Err())
   204  	}
   205  }
   206  
   207  func init() {
   208  	tsmon.RegisterGlobalCallback(updateInstances, configuredInstances, connectedInstances, createdInstances)
   209  }