go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/buildbucket/appengine/tasks/update_build_task.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tasks
    16  
    17  import (
    18  	"context"
    19  	"encoding/base64"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"strconv"
    24  	"strings"
    25  	"time"
    26  
    27  	"google.golang.org/api/pubsub/v1"
    28  	"google.golang.org/grpc/codes"
    29  	"google.golang.org/protobuf/proto"
    30  	"google.golang.org/protobuf/types/known/timestamppb"
    31  
    32  	"go.chromium.org/luci/common/clock"
    33  	"go.chromium.org/luci/common/errors"
    34  	"go.chromium.org/luci/common/logging"
    35  	"go.chromium.org/luci/common/retry/transient"
    36  	"go.chromium.org/luci/gae/service/datastore"
    37  	"go.chromium.org/luci/gae/service/info"
    38  	"go.chromium.org/luci/grpc/appstatus"
    39  	"go.chromium.org/luci/server/caching"
    40  
    41  	"go.chromium.org/luci/buildbucket/appengine/common"
    42  	"go.chromium.org/luci/buildbucket/appengine/internal/buildstatus"
    43  	"go.chromium.org/luci/buildbucket/appengine/internal/config"
    44  	"go.chromium.org/luci/buildbucket/appengine/internal/metrics"
    45  	"go.chromium.org/luci/buildbucket/appengine/model"
    46  	pb "go.chromium.org/luci/buildbucket/proto"
    47  	"go.chromium.org/luci/buildbucket/protoutil"
    48  )
    49  
    50  type pushRequest struct {
    51  	Message      pubsub.PubsubMessage `json:"message"`
    52  	Subscription string               `json:"subscription"`
    53  }
    54  
    55  type buildTaskUpdate struct {
    56  	*pb.BuildTaskUpdate
    57  
    58  	// Message id of the pubsub message that sent this request.
    59  	msgID string
    60  
    61  	// Subscription of the pubsub message that sent this request.
    62  	subscription string
    63  }
    64  
    65  func unpackUpdateBuildTaskMsg(ctx context.Context, body io.Reader) (req buildTaskUpdate, err error) {
    66  	req = buildTaskUpdate{}
    67  
    68  	blob, err := io.ReadAll(body)
    69  	if err != nil {
    70  		return req, errors.Annotate(err, "failed to read the request body").Tag(transient.Tag).Err()
    71  	}
    72  
    73  	// process pubsub message
    74  	var msg pushRequest
    75  	if err := json.Unmarshal(blob, &msg); err != nil {
    76  		return req, errors.Annotate(err, "failed to unmarshal UpdateBuildTask PubSub message").Err()
    77  	}
    78  	// process UpdateBuildTask message data
    79  	data, err := base64.StdEncoding.DecodeString(msg.Message.Data)
    80  	if err != nil {
    81  		return req, errors.Annotate(err, "cannot decode UpdateBuildTask message data as base64").Err()
    82  	}
    83  
    84  	bldTskUpdte := &pb.BuildTaskUpdate{}
    85  	if err := (proto.UnmarshalOptions{DiscardUnknown: true}).Unmarshal(data, bldTskUpdte); err != nil {
    86  		return req, errors.Annotate(err, "failed to unmarshal the BuildTaskUpdate pubsub data").Err()
    87  	}
    88  	req.BuildTaskUpdate = bldTskUpdte
    89  	req.subscription = msg.Subscription
    90  	req.msgID = msg.Message.MessageId
    91  	return req, nil
    92  }
    93  
    94  func validateTaskStatus(taskStatus pb.Status, allowPending bool) error {
    95  	switch taskStatus {
    96  	case pb.Status_ENDED_MASK,
    97  		pb.Status_STATUS_UNSPECIFIED:
    98  		return errors.Reason("task.status: invalid status %s for UpdateBuildTask", taskStatus).Err()
    99  	}
   100  
   101  	if !allowPending && taskStatus == pb.Status_SCHEDULED {
   102  		return errors.Reason("task.status: invalid status %s for UpdateBuildTask", taskStatus).Err()
   103  	}
   104  
   105  	if _, ok := pb.Status_value[taskStatus.String()]; !ok {
   106  		return errors.Reason("task.status: invalid status %s for UpdateBuildTask", taskStatus).Err()
   107  	}
   108  	return nil
   109  }
   110  
   111  func validatePubsubSubscription(ctx context.Context, req buildTaskUpdate) error {
   112  	globalCfg, err := config.GetSettingsCfg(ctx)
   113  	if err != nil {
   114  		return errors.Annotate(err, "error fetching service config").Err()
   115  	}
   116  
   117  	target := req.GetTask().GetId().GetTarget()
   118  	if target == "" {
   119  		return errors.Reason("could not validate message. task.id.target not provided.").Err()
   120  	}
   121  
   122  	isValid := false
   123  	for _, backend := range globalCfg.Backends {
   124  		if backend.Target == target {
   125  			var subscription string
   126  			switch backend.Mode.(type) {
   127  			case *pb.BackendSetting_LiteMode_:
   128  				return errors.Reason("backend target %s is in lite mode. The task update isn't supported", target).Err()
   129  			case *pb.BackendSetting_FullMode_:
   130  				subscription = fmt.Sprintf("projects/%s/subscriptions/%s", info.AppID(ctx), backend.GetFullMode().GetPubsubId())
   131  			}
   132  			if subscription == req.subscription {
   133  				isValid = true
   134  			}
   135  			break
   136  		}
   137  	}
   138  
   139  	if !isValid {
   140  		return errors.Reason("pubsub subscription %s did not match the one configured for target %s", req.subscription, target).Err()
   141  	}
   142  	return nil
   143  }
   144  
   145  func validateTask(task *pb.Task, allowPending bool) error {
   146  	if task.GetId().GetId() == "" {
   147  		return errors.Reason("task.id: required").Err()
   148  	}
   149  	if task.GetUpdateId() == 0 {
   150  		return errors.Reason("task.UpdateId: required").Err()
   151  	}
   152  	if err := validateTaskStatus(task.Status, allowPending); err != nil {
   153  		return errors.Annotate(err, "task.Status").Err()
   154  	}
   155  	detailsInKb := float64(len(task.GetDetails().String()) / 1024)
   156  	if detailsInKb > 10 {
   157  		return errors.Reason("task.details is greater than 10 kb").Err()
   158  	}
   159  	return nil
   160  }
   161  
   162  // validateBuildTaskUpdate ensures that the build_id, task, status, and details
   163  // are correctly set be sender.
   164  func validateBuildTaskUpdate(ctx context.Context, req *pb.BuildTaskUpdate) error {
   165  	if req.BuildId == "" {
   166  		return errors.Reason("build_id required").Err()
   167  	}
   168  	return validateTask(req.Task, false)
   169  }
   170  
   171  // validateBuildTask ensures that the taskID provided in the request matches
   172  // the taskID that is stored in the build model. If there is no task associated
   173  // with the build, an error is returned and the update message is lost.
   174  func validateBuildTask(ctx context.Context, req *pb.BuildTaskUpdate, infra *model.BuildInfra) error {
   175  	switch {
   176  	case infra.Proto.GetBackend() == nil:
   177  		return appstatus.Errorf(codes.NotFound, "Build %s does not support task backend", req.BuildId)
   178  	case infra.Proto.Backend.GetTask().GetId().GetId() == "":
   179  		return appstatus.Errorf(codes.NotFound, "No task is associated with the build. Cannot update.")
   180  	case infra.Proto.Backend.Task.Id.GetTarget() != req.Task.Id.GetTarget() || (infra.Proto.Backend.Task.Id.GetId() != "" && infra.Proto.Backend.Task.Id.GetId() != req.Task.Id.GetId()):
   181  		return errors.Reason("TaskID in request does not match TaskID associated with build").Err()
   182  	}
   183  	if protoutil.IsEnded(infra.Proto.Backend.Task.Status) {
   184  		return appstatus.Errorf(codes.FailedPrecondition, "cannot update an ended task")
   185  	}
   186  	return nil
   187  }
   188  
   189  func prepareUpdate(ctx context.Context, build *model.Build, infra *model.BuildInfra, task *pb.Task) ([]any, error) {
   190  	if task.UpdateId <= infra.Proto.Backend.Task.UpdateId {
   191  		// Returning nil since there is no work to do here.
   192  		// The task in the request is outdated.
   193  		return nil, nil
   194  	}
   195  	// Required fields to change
   196  	now := clock.Now(ctx)
   197  	build.Proto.UpdateTime = timestamppb.New(now)
   198  	proto.Merge(infra.Proto.Backend.Task, task)
   199  
   200  	toSave := []any{build, infra}
   201  
   202  	bs, steps, err := updateBuildStatusOnTaskStatusChange(ctx, build, nil, &buildstatus.StatusWithDetails{Status: task.Status}, now)
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  	if bs != nil {
   207  		toSave = append(toSave, bs)
   208  	}
   209  	if steps != nil {
   210  		toSave = append(toSave, steps)
   211  	}
   212  	return toSave, nil
   213  }
   214  
   215  func updateTaskEntity(ctx context.Context, req *pb.BuildTaskUpdate, buildID int64) error {
   216  	var build *model.Build
   217  	setBuildToEnd := false
   218  	txErr := datastore.RunInTransaction(ctx, func(ctx context.Context) error {
   219  		entities, err := common.GetBuildEntities(ctx, buildID, model.BuildKind, model.BuildInfraKind)
   220  		if err != nil {
   221  			return errors.Annotate(err, "invalid Build or BuildInfra").Err()
   222  		}
   223  
   224  		build = entities[0].(*model.Build)
   225  		infra := entities[1].(*model.BuildInfra)
   226  
   227  		if protoutil.IsEnded(build.Status) {
   228  			// Cannot update an ended build.
   229  			logging.Infof(ctx, "build %d is ended", build.ID)
   230  			return nil
   231  		}
   232  
   233  		toSave, err := prepareUpdate(ctx, build, infra, req.Task)
   234  		if err != nil {
   235  			return err
   236  		}
   237  
   238  		setBuildToEnd = protoutil.IsEnded(req.Task.Status)
   239  		return datastore.Put(ctx, toSave)
   240  	}, nil)
   241  
   242  	if txErr != nil {
   243  		return txErr
   244  	}
   245  
   246  	if setBuildToEnd {
   247  		metrics.BuildCompleted(ctx, build)
   248  	}
   249  	return nil
   250  }
   251  
   252  // updateBuildTask allows the Backend to preemptively update the
   253  // status of the task (e.g. if it knows that the task has crashed, etc.).
   254  func updateBuildTask(ctx context.Context, req buildTaskUpdate) error {
   255  	buildID, err := strconv.ParseInt(req.GetBuildId(), 10, 64)
   256  	if err != nil {
   257  		return errors.Annotate(err, "bad build id").Err()
   258  	}
   259  	if err := validatePubsubSubscription(ctx, req); err != nil {
   260  		return errors.Annotate(err, "pubsub subscription").Err()
   261  	}
   262  	if err := validateBuildTaskUpdate(ctx, req.BuildTaskUpdate); err != nil {
   263  		return errors.Annotate(err, "invalid BuildTaskUpdate").Err()
   264  	}
   265  	logging.Infof(ctx, "Received an BuildTaskUpdate message for build %q", req.BuildId)
   266  
   267  	// TODO(b/288158829): remove it once the root cause for the Skia failure is found.
   268  	if strings.Contains(req.subscription, "skia") {
   269  		logging.Debugf(ctx, "BuildTaskUpdate.Task: %v", req.BuildTaskUpdate.Task)
   270  	}
   271  
   272  	entities, err := common.GetBuildEntities(ctx, buildID, model.BuildInfraKind)
   273  	if err != nil {
   274  		return errors.Annotate(err, "invalid buildInfra").Err()
   275  	}
   276  	infra := entities[0].(*model.BuildInfra)
   277  
   278  	// Pre-check if the task can be updated before updating it with a transaction.
   279  	// Ensures that the taskID provided in the request matches the taskID that is
   280  	// stored in the build model. If there is no task associated with the build model,
   281  	// an error is returned and the update message is lost.
   282  	err = validateBuildTask(ctx, req.BuildTaskUpdate, infra)
   283  	if err != nil {
   284  		return errors.Annotate(err, "invalid task").Err()
   285  	}
   286  
   287  	err = updateTaskEntity(ctx, req.BuildTaskUpdate, buildID)
   288  	if err != nil {
   289  		if _, isAppStatusErr := appstatus.Get(err); isAppStatusErr {
   290  			return err
   291  		} else {
   292  			return appstatus.Errorf(codes.Internal, "failed to update the build entity: %s", err)
   293  		}
   294  	}
   295  
   296  	return nil
   297  }
   298  
   299  // UpdateBuildTask handles task backend PubSub push messages produced by various
   300  // task backends.
   301  // For a retryable error, it will be tagged with transient.Tag.
   302  func UpdateBuildTask(ctx context.Context, body io.Reader) error {
   303  	req, err := unpackUpdateBuildTaskMsg(ctx, body)
   304  	if err != nil {
   305  		return err
   306  	}
   307  
   308  	// Try not to process same message more than once.
   309  	cache := caching.GlobalCache(ctx, "update-build-task-pubsub-msg-id")
   310  	if cache == nil {
   311  		return errors.Reason("global cache is not found").Tag(transient.Tag).Err()
   312  	}
   313  	msgCached, err := cache.Get(ctx, req.msgID)
   314  	switch {
   315  	case err == caching.ErrCacheMiss: // no-op, continue
   316  	case err != nil:
   317  		return errors.Annotate(err, "failed to read %s from the global cache", req.msgID).Tag(transient.Tag).Err()
   318  	case msgCached != nil:
   319  		logging.Infof(ctx, "seen this message %s before, ignoring", req.msgID)
   320  		return nil
   321  	}
   322  	err = updateBuildTask(ctx, req)
   323  	if err != nil {
   324  		return err
   325  	}
   326  
   327  	return cache.Set(ctx, req.msgID, []byte{1}, 10*time.Minute)
   328  }