go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gce/appengine/backend/internal/metrics/instances.go (about) 1 // Copyright 2019 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package metrics 16 17 import ( 18 "context" 19 "time" 20 21 "go.chromium.org/luci/common/clock" 22 "go.chromium.org/luci/common/errors" 23 "go.chromium.org/luci/common/logging" 24 "go.chromium.org/luci/common/tsmon" 25 "go.chromium.org/luci/common/tsmon/field" 26 "go.chromium.org/luci/common/tsmon/metric" 27 "go.chromium.org/luci/gae/service/datastore" 28 ) 29 30 var ( 31 configuredInstances = metric.NewInt( 32 "gce/instances/configured", 33 "The number of GCE instances configured to exist.", 34 nil, 35 field.String("prefix"), 36 field.String("project"), 37 ) 38 39 createdInstances = metric.NewInt( 40 "gce/instances/created", 41 "The number of GCE instances created.", 42 nil, 43 field.String("prefix"), 44 field.String("project"), 45 field.String("zone"), 46 ) 47 48 CreatedInstanceChecked = metric.NewCounter( 49 "gce/instance/creation/checked", 50 "Cumulative number of GCE instances created and checked.", 51 nil, 52 field.String("prefix"), 53 field.String("project"), 54 field.String("zone"), 55 field.String("instance"), 56 ) 57 58 DestroyInstanceUnchecked = metric.NewCounter( 59 "gce/instance/destroy/unchecked", 60 "Cumulative number of GCE instances requested to destroy.", 61 nil, 62 field.String("prefix"), 63 field.String("project"), 64 field.String("zone"), 65 field.String("instance"), 66 ) 67 68 connectedInstances = metric.NewInt( 69 "gce/instances/connected", 70 "The number of GCE instances connected to Swarming.", 71 nil, 72 field.String("prefix"), 73 field.String("project"), 74 field.String("server"), 75 field.String("zone"), 76 ) 77 ) 78 79 // configuredCount encapsulates a count of configured VMs. 80 type configuredCount struct { 81 Count int 82 Project string 83 } 84 85 // createdCount encapsulates a count of created VMs. 86 type createdCount struct { 87 Count int 88 Project string 89 Zone string 90 } 91 92 // connectedCount encapsulates a count of connected VMs. 93 type connectedCount struct { 94 Count int 95 Project string 96 Server string 97 Zone string 98 } 99 100 // InstanceCount is a root entity representing a count for instances with a 101 // common prefix. 102 type InstanceCount struct { 103 // _extra is where unknown properties are put into memory. 104 // Extra properties are not written to the datastore. 105 _extra datastore.PropertyMap `gae:"-,extra"` 106 // _kind is the entity's kind in the datastore. 107 _kind string `gae:"$kind,InstanceCount"` 108 // ID is the unique identifier for this count. 109 ID string `gae:"$id"` 110 // Prefix is the prefix for this count. 111 Prefix string `gae:"prefix"` 112 // Computed is the time this count was computed. 113 Computed time.Time `gae:"computed"` 114 // Configured is a slice of configuredCounts. 115 Configured []configuredCount `gae:"configured,noindex"` 116 // Connected is a slice of connectedCounts. 117 Connected []connectedCount `gae:"connected,noindex"` 118 // Created is a slice of createdCounts. 119 Created []createdCount `gae:"created,noindex"` 120 } 121 122 // AddConfigured increments the count of configured VMs for the given project. 123 func (ic *InstanceCount) AddConfigured(n int, project string) { 124 for i, c := range ic.Configured { 125 if c.Project == project { 126 ic.Configured[i].Count += n 127 return 128 } 129 } 130 ic.Configured = append(ic.Configured, configuredCount{ 131 Count: n, 132 Project: project, 133 }) 134 } 135 136 // AddCreated increments the count of created VMs for the given project and 137 // zone. 138 func (ic *InstanceCount) AddCreated(n int, project, zone string) { 139 for i, c := range ic.Created { 140 if c.Project == project && c.Zone == zone { 141 ic.Created[i].Count += n 142 return 143 } 144 } 145 ic.Created = append(ic.Created, createdCount{ 146 Count: n, 147 Project: project, 148 Zone: zone, 149 }) 150 } 151 152 // AddConnected increments the count of connected VMs for the given project, 153 // server, and zone. 154 func (ic *InstanceCount) AddConnected(n int, project, server, zone string) { 155 for i, c := range ic.Connected { 156 if c.Project == project && c.Server == server && c.Zone == zone { 157 ic.Connected[i].Count += n 158 return 159 } 160 } 161 ic.Connected = append(ic.Connected, connectedCount{ 162 Count: n, 163 Project: project, 164 Server: server, 165 Zone: zone, 166 }) 167 } 168 169 // Update updates metrics for all known counts of VMs for the given prefix. 170 func (ic *InstanceCount) Update(c context.Context, prefix string) error { 171 // Prefixes are globally unique, so we can use them as IDs. 172 ic.ID = prefix 173 ic.Computed = clock.Now(c).UTC() 174 ic.Prefix = prefix 175 if err := datastore.Put(c, ic); err != nil { 176 return errors.Annotate(err, "failed to store count").Err() 177 } 178 return nil 179 } 180 181 // updateInstances sets GCE instance metrics. 182 func updateInstances(c context.Context) { 183 now := clock.Now(c) 184 q := datastore.NewQuery("InstanceCount").Order("computed") 185 if err := datastore.Run(c, q, func(ic *InstanceCount) { 186 if now.Sub(ic.Computed) > 10*time.Minute { 187 logging.Debugf(c, "deleting outdated count %q", ic.Prefix) 188 if err := datastore.Delete(c, ic); err != nil { 189 logging.Errorf(c, "%s", err) 190 } 191 return 192 } 193 for _, conf := range ic.Configured { 194 configuredInstances.Set(c, int64(conf.Count), ic.Prefix, conf.Project) 195 } 196 for _, crea := range ic.Created { 197 createdInstances.Set(c, int64(crea.Count), ic.Prefix, crea.Project, crea.Zone) 198 } 199 for _, conn := range ic.Connected { 200 connectedInstances.Set(c, int64(conn.Count), ic.Prefix, conn.Project, conn.Server, conn.Zone) 201 } 202 }); err != nil { 203 errors.Log(c, errors.Annotate(err, "failed to fetch counts").Err()) 204 } 205 } 206 207 func init() { 208 tsmon.RegisterGlobalCallback(updateInstances, configuredInstances, connectedInstances, createdInstances) 209 }