go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gce/appengine/backend/internal/metrics/tasks.go (about) 1 // Copyright 2019 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package metrics 16 17 import ( 18 "context" 19 "time" 20 21 "go.chromium.org/luci/common/clock" 22 "go.chromium.org/luci/common/errors" 23 "go.chromium.org/luci/common/logging" 24 "go.chromium.org/luci/common/tsmon" 25 "go.chromium.org/luci/common/tsmon/field" 26 "go.chromium.org/luci/common/tsmon/metric" 27 "go.chromium.org/luci/gae/service/datastore" 28 ) 29 30 var ( 31 tasksExecuting = metric.NewInt( 32 "gce/tasks/executing", 33 "The number of task queue tasks currently executing.", 34 nil, 35 field.String("queue"), 36 ) 37 38 tasksPending = metric.NewInt( 39 "gce/tasks/pending", 40 "The number of task queue tasks currently pending.", 41 nil, 42 field.String("queue"), 43 ) 44 45 tasksTotal = metric.NewInt( 46 "gce/tasks/total", 47 "The total number of task queue tasks in the queue.", 48 nil, 49 field.String("queue"), 50 ) 51 ) 52 53 // TaskCount is a root entity representing a count of task queue tasks. 54 type TaskCount struct { 55 // _extra is where unknown properties are put into memory. 56 // Extra properties are not written to the datastore. 57 _extra datastore.PropertyMap `gae:"-,extra"` 58 // _kind is the entity's kind in the datastore. 59 _kind string `gae:"$kind,TaskCount"` 60 // ID is the unique identifier for this count. 61 ID string `gae:"$id"` 62 // Queue is the task queue for this count. 63 Queue string `gae:"queue"` 64 // Computed is the time this count was computed. 65 Computed time.Time `gae:"computed"` 66 // Executing is a count of currently executing tasks. 67 Executing int `gae:"executing,noindex"` 68 // Total is a count of the total number of tasks in the queue. 69 Total int `gae:"pending,noindex"` 70 } 71 72 // Update updates metrics for counts of tasks for the given queue. 73 func (tc *TaskCount) Update(c context.Context, queue string, exec, tot int) error { 74 // Queue names are globally unique, so we can use them as IDs. 75 tc.ID = queue 76 tc.Computed = clock.Now(c).UTC() 77 tc.Queue = queue 78 tc.Executing = exec 79 tc.Total = tot 80 if err := datastore.Put(c, tc); err != nil { 81 return errors.Annotate(err, "failed to store count").Err() 82 } 83 return nil 84 } 85 86 // updateTasks sets task queue task metrics. 87 func updateTasks(c context.Context) { 88 now := clock.Now(c) 89 q := datastore.NewQuery("TaskCount").Order("computed") 90 if err := datastore.Run(c, q, func(tc *TaskCount) { 91 if now.Sub(tc.Computed) > 5*time.Minute { 92 logging.Debugf(c, "deleting outdated count %q", tc.Queue) 93 if err := datastore.Delete(c, tc); err != nil { 94 logging.Errorf(c, "%s", err) 95 } 96 return 97 } 98 tasksExecuting.Set(c, int64(tc.Executing), tc.Queue) 99 tasksPending.Set(c, int64(tc.Total-tc.Executing), tc.Queue) 100 tasksTotal.Set(c, int64(tc.Total), tc.Queue) 101 }); err != nil { 102 errors.Log(c, errors.Annotate(err, "failed to fetch counts").Err()) 103 } 104 } 105 106 func init() { 107 tsmon.RegisterGlobalCallback(updateTasks, tasksExecuting, tasksPending, tasksTotal) 108 }