go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gce/appengine/backend/cron.go (about) 1 // Copyright 2018 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package backend 16 17 import ( 18 "context" 19 "net/http" 20 "reflect" 21 22 "github.com/golang/protobuf/proto" 23 24 "go.chromium.org/luci/appengine/tq" 25 "go.chromium.org/luci/common/errors" 26 "go.chromium.org/luci/common/logging" 27 "go.chromium.org/luci/gae/service/datastore" 28 "go.chromium.org/luci/gae/service/taskqueue" 29 "go.chromium.org/luci/server/router" 30 31 "go.chromium.org/luci/gce/api/config/v1" 32 "go.chromium.org/luci/gce/api/tasks/v1" 33 "go.chromium.org/luci/gce/appengine/backend/internal/metrics" 34 "go.chromium.org/luci/gce/appengine/model" 35 ) 36 37 // newHTTPHandler returns a router.Handler which invokes the given function. 38 func newHTTPHandler(f func(c context.Context) error) router.Handler { 39 return func(c *router.Context) { 40 c.Writer.Header().Set("Content-Type", "text/plain") 41 42 if err := f(c.Request.Context()); err != nil { 43 errors.Log(c.Request.Context(), err) 44 c.Writer.WriteHeader(http.StatusInternalServerError) 45 return 46 } 47 48 c.Writer.WriteHeader(http.StatusOK) 49 } 50 } 51 52 // payloadFn is a function which receives an ID and returns a proto.Message to 53 // use as the Payload in a *tq.Task. 54 type payloadFn func(string) proto.Message 55 56 // payloadFactory returns a payloadFn which can be called to return a 57 // proto.Message to use as the Payload in a *tq.Task. 58 func payloadFactory(t tasks.Task) payloadFn { 59 rt := reflect.TypeOf(t).Elem() 60 return func(id string) proto.Message { 61 p := reflect.New(rt) 62 p.Elem().FieldByName("Id").SetString(id) 63 return p.Interface().(proto.Message) 64 } 65 } 66 67 // trigger triggers a task queue task for each key returned by the given query. 68 func trigger(c context.Context, t tasks.Task, q *datastore.Query) error { 69 tasks := make([]*tq.Task, 0) 70 newPayload := payloadFactory(t) 71 addTask := func(k *datastore.Key) { 72 tasks = append(tasks, &tq.Task{ 73 Payload: newPayload(k.StringID()), 74 }) 75 } 76 if err := datastore.Run(c, q, addTask); err != nil { 77 return errors.Annotate(err, "failed to fetch keys").Err() 78 } 79 logging.Debugf(c, "scheduling %d tasks", len(tasks)) 80 if err := getDispatcher(c).AddTask(c, tasks...); err != nil { 81 return errors.Annotate(err, "failed to schedule tasks").Err() 82 } 83 return nil 84 } 85 86 // countVMsAsync schedules task queue tasks to count VMs for each config. 87 func countVMsAsync(c context.Context) error { 88 return trigger(c, &tasks.CountVMs{}, datastore.NewQuery(model.ConfigKind)) 89 } 90 91 // createInstancesAsync schedules task queue tasks to create each GCE instance. 92 func createInstancesAsync(c context.Context) error { 93 return trigger(c, &tasks.CreateInstance{}, datastore.NewQuery(model.VMKind).Eq("url", "")) 94 } 95 96 // expandConfigsAsync schedules task queue tasks to expand each config. 97 func expandConfigsAsync(c context.Context) error { 98 return trigger(c, &tasks.ExpandConfig{}, datastore.NewQuery(model.ConfigKind)) 99 } 100 101 // manageBotsAsync schedules task queue tasks to manage each Swarming bot. 102 func manageBotsAsync(c context.Context) error { 103 return trigger(c, &tasks.ManageBot{}, datastore.NewQuery(model.VMKind).Gt("url", "")) 104 } 105 106 // drainVMsAsync comapres the config table to the vm table and determines the VMs that can be 107 // drained. It schedules a drainVM task for each of those VMs 108 func drainVMsAsync(c context.Context) error { 109 configMap := make(map[string]*config.Config) 110 // Get all the configs in datastore 111 qC := datastore.NewQuery("Config") 112 if err := datastore.Run(c, qC, func(cfg *model.Config) { 113 configMap[cfg.ID] = cfg.Config 114 }); err != nil { 115 return errors.Annotate(err, "failed to list Config").Err() 116 } 117 vmMap := make(map[string]*model.VM) 118 qV := datastore.NewQuery("VM") 119 if err := datastore.Run(c, qV, func(vm *model.VM) { 120 vmMap[vm.ID] = vm 121 }); err != nil { 122 return errors.Annotate(err, "failed to list VMs").Err() 123 } 124 /* Config dictate how many VMs can be online for any given prefix. Check if there are 125 * more bots assigned than required by the config and drain them. 126 */ 127 //TODO(anushruth): Delete VMs based on uptime instead of ID. 128 var taskList []*tq.Task 129 for id, vm := range vmMap { 130 if configMap[vm.Config].GetCurrentAmount() <= vm.Index { 131 taskList = append(taskList, &tq.Task{ 132 Payload: &tasks.DrainVM{ 133 Id: id, 134 }, 135 }) 136 } 137 } 138 if len(taskList) > 0 { 139 if err := getDispatcher(c).AddTask(c, taskList...); err != nil { 140 return errors.Annotate(err, "failed to schedule tasks").Err() 141 } 142 } 143 return nil 144 } 145 146 // auditInstances schedules an audit task for every project:zone combination 147 func auditInstances(c context.Context) error { 148 var projects []string 149 addProject := func(p *model.Project) { 150 proj := p.Config.GetProject() 151 projects = append(projects, proj) 152 } 153 q := datastore.NewQuery(model.ProjectKind) 154 if err := datastore.Run(c, q, addProject); err != nil { 155 return errors.Annotate(err, "failed to schedule audits").Err() 156 } 157 jobs := make([]*tq.Task, 0) 158 srv := getCompute(c).Stable.Zones 159 for _, proj := range projects { 160 zoneList, err := srv.List(proj).Context(c).Do() 161 if err != nil { 162 logging.Errorf(c, "Failed to list zones for %s. %v", proj, err) 163 continue 164 } 165 for _, zone := range zoneList.Items { 166 jobs = append(jobs, &tq.Task{ 167 Payload: &tasks.AuditProject{ 168 Project: proj, 169 Zone: zone.Name, 170 }, 171 }) 172 } 173 } 174 if err := getDispatcher(c).AddTask(c, jobs...); err != nil { 175 return errors.Annotate(err, "failed to schedule tasks").Err() 176 } 177 return nil 178 } 179 180 // reportQuotasAsync schedules task queue tasks to report quota in each project. 181 func reportQuotasAsync(c context.Context) error { 182 return trigger(c, &tasks.ReportQuota{}, datastore.NewQuery(model.ProjectKind)) 183 } 184 185 // countTasks counts tasks for each queue. 186 func countTasks(c context.Context) error { 187 qs := getDispatcher(c).GetQueues() 188 logging.Debugf(c, "found %d task queues", len(qs)) 189 for _, q := range qs { 190 s, err := taskqueue.Stats(c, q) 191 switch { 192 case err != nil: 193 return errors.Annotate(err, "failed to get %q task queue stats", q).Err() 194 case len(s) < 1: 195 return errors.Reason("failed to get %q task queue stats", q).Err() 196 } 197 t := &metrics.TaskCount{} 198 if err := t.Update(c, q, s[0].InFlight, s[0].Tasks); err != nil { 199 return errors.Annotate(err, "failed to update %q task queue count", q).Err() 200 } 201 } 202 return nil 203 }