go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gce/appengine/backend/cron.go (about)

     1  // Copyright 2018 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package backend
    16  
    17  import (
    18  	"context"
    19  	"net/http"
    20  	"reflect"
    21  
    22  	"github.com/golang/protobuf/proto"
    23  
    24  	"go.chromium.org/luci/appengine/tq"
    25  	"go.chromium.org/luci/common/errors"
    26  	"go.chromium.org/luci/common/logging"
    27  	"go.chromium.org/luci/gae/service/datastore"
    28  	"go.chromium.org/luci/gae/service/taskqueue"
    29  	"go.chromium.org/luci/server/router"
    30  
    31  	"go.chromium.org/luci/gce/api/config/v1"
    32  	"go.chromium.org/luci/gce/api/tasks/v1"
    33  	"go.chromium.org/luci/gce/appengine/backend/internal/metrics"
    34  	"go.chromium.org/luci/gce/appengine/model"
    35  )
    36  
    37  // newHTTPHandler returns a router.Handler which invokes the given function.
    38  func newHTTPHandler(f func(c context.Context) error) router.Handler {
    39  	return func(c *router.Context) {
    40  		c.Writer.Header().Set("Content-Type", "text/plain")
    41  
    42  		if err := f(c.Request.Context()); err != nil {
    43  			errors.Log(c.Request.Context(), err)
    44  			c.Writer.WriteHeader(http.StatusInternalServerError)
    45  			return
    46  		}
    47  
    48  		c.Writer.WriteHeader(http.StatusOK)
    49  	}
    50  }
    51  
    52  // payloadFn is a function which receives an ID and returns a proto.Message to
    53  // use as the Payload in a *tq.Task.
    54  type payloadFn func(string) proto.Message
    55  
    56  // payloadFactory returns a payloadFn which can be called to return a
    57  // proto.Message to use as the Payload in a *tq.Task.
    58  func payloadFactory(t tasks.Task) payloadFn {
    59  	rt := reflect.TypeOf(t).Elem()
    60  	return func(id string) proto.Message {
    61  		p := reflect.New(rt)
    62  		p.Elem().FieldByName("Id").SetString(id)
    63  		return p.Interface().(proto.Message)
    64  	}
    65  }
    66  
    67  // trigger triggers a task queue task for each key returned by the given query.
    68  func trigger(c context.Context, t tasks.Task, q *datastore.Query) error {
    69  	tasks := make([]*tq.Task, 0)
    70  	newPayload := payloadFactory(t)
    71  	addTask := func(k *datastore.Key) {
    72  		tasks = append(tasks, &tq.Task{
    73  			Payload: newPayload(k.StringID()),
    74  		})
    75  	}
    76  	if err := datastore.Run(c, q, addTask); err != nil {
    77  		return errors.Annotate(err, "failed to fetch keys").Err()
    78  	}
    79  	logging.Debugf(c, "scheduling %d tasks", len(tasks))
    80  	if err := getDispatcher(c).AddTask(c, tasks...); err != nil {
    81  		return errors.Annotate(err, "failed to schedule tasks").Err()
    82  	}
    83  	return nil
    84  }
    85  
    86  // countVMsAsync schedules task queue tasks to count VMs for each config.
    87  func countVMsAsync(c context.Context) error {
    88  	return trigger(c, &tasks.CountVMs{}, datastore.NewQuery(model.ConfigKind))
    89  }
    90  
    91  // createInstancesAsync schedules task queue tasks to create each GCE instance.
    92  func createInstancesAsync(c context.Context) error {
    93  	return trigger(c, &tasks.CreateInstance{}, datastore.NewQuery(model.VMKind).Eq("url", ""))
    94  }
    95  
    96  // expandConfigsAsync schedules task queue tasks to expand each config.
    97  func expandConfigsAsync(c context.Context) error {
    98  	return trigger(c, &tasks.ExpandConfig{}, datastore.NewQuery(model.ConfigKind))
    99  }
   100  
   101  // manageBotsAsync schedules task queue tasks to manage each Swarming bot.
   102  func manageBotsAsync(c context.Context) error {
   103  	return trigger(c, &tasks.ManageBot{}, datastore.NewQuery(model.VMKind).Gt("url", ""))
   104  }
   105  
   106  // drainVMsAsync comapres the config table to the vm table and determines the VMs that can be
   107  // drained. It schedules a drainVM task for each of those VMs
   108  func drainVMsAsync(c context.Context) error {
   109  	configMap := make(map[string]*config.Config)
   110  	// Get all the configs in datastore
   111  	qC := datastore.NewQuery("Config")
   112  	if err := datastore.Run(c, qC, func(cfg *model.Config) {
   113  		configMap[cfg.ID] = cfg.Config
   114  	}); err != nil {
   115  		return errors.Annotate(err, "failed to list Config").Err()
   116  	}
   117  	vmMap := make(map[string]*model.VM)
   118  	qV := datastore.NewQuery("VM")
   119  	if err := datastore.Run(c, qV, func(vm *model.VM) {
   120  		vmMap[vm.ID] = vm
   121  	}); err != nil {
   122  		return errors.Annotate(err, "failed to list VMs").Err()
   123  	}
   124  	/* Config dictate how many VMs can be online for any given prefix. Check if there are
   125  	 * more bots assigned than required by the config and drain them.
   126  	 */
   127  	//TODO(anushruth): Delete VMs based on uptime instead of ID.
   128  	var taskList []*tq.Task
   129  	for id, vm := range vmMap {
   130  		if configMap[vm.Config].GetCurrentAmount() <= vm.Index {
   131  			taskList = append(taskList, &tq.Task{
   132  				Payload: &tasks.DrainVM{
   133  					Id: id,
   134  				},
   135  			})
   136  		}
   137  	}
   138  	if len(taskList) > 0 {
   139  		if err := getDispatcher(c).AddTask(c, taskList...); err != nil {
   140  			return errors.Annotate(err, "failed to schedule tasks").Err()
   141  		}
   142  	}
   143  	return nil
   144  }
   145  
   146  // auditInstances schedules an audit task for every project:zone combination
   147  func auditInstances(c context.Context) error {
   148  	var projects []string
   149  	addProject := func(p *model.Project) {
   150  		proj := p.Config.GetProject()
   151  		projects = append(projects, proj)
   152  	}
   153  	q := datastore.NewQuery(model.ProjectKind)
   154  	if err := datastore.Run(c, q, addProject); err != nil {
   155  		return errors.Annotate(err, "failed to schedule audits").Err()
   156  	}
   157  	jobs := make([]*tq.Task, 0)
   158  	srv := getCompute(c).Stable.Zones
   159  	for _, proj := range projects {
   160  		zoneList, err := srv.List(proj).Context(c).Do()
   161  		if err != nil {
   162  			logging.Errorf(c, "Failed to list zones for %s. %v", proj, err)
   163  			continue
   164  		}
   165  		for _, zone := range zoneList.Items {
   166  			jobs = append(jobs, &tq.Task{
   167  				Payload: &tasks.AuditProject{
   168  					Project: proj,
   169  					Zone:    zone.Name,
   170  				},
   171  			})
   172  		}
   173  	}
   174  	if err := getDispatcher(c).AddTask(c, jobs...); err != nil {
   175  		return errors.Annotate(err, "failed to schedule tasks").Err()
   176  	}
   177  	return nil
   178  }
   179  
   180  // reportQuotasAsync schedules task queue tasks to report quota in each project.
   181  func reportQuotasAsync(c context.Context) error {
   182  	return trigger(c, &tasks.ReportQuota{}, datastore.NewQuery(model.ProjectKind))
   183  }
   184  
   185  // countTasks counts tasks for each queue.
   186  func countTasks(c context.Context) error {
   187  	qs := getDispatcher(c).GetQueues()
   188  	logging.Debugf(c, "found %d task queues", len(qs))
   189  	for _, q := range qs {
   190  		s, err := taskqueue.Stats(c, q)
   191  		switch {
   192  		case err != nil:
   193  			return errors.Annotate(err, "failed to get %q task queue stats", q).Err()
   194  		case len(s) < 1:
   195  			return errors.Reason("failed to get %q task queue stats", q).Err()
   196  		}
   197  		t := &metrics.TaskCount{}
   198  		if err := t.Update(c, q, s[0].InFlight, s[0].Tasks); err != nil {
   199  			return errors.Annotate(err, "failed to update %q task queue count", q).Err()
   200  		}
   201  	}
   202  	return nil
   203  }