go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/run/impl/longops/postgerritmessage.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package longops
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"strings"
    21  	"sync"
    22  	"time"
    23  
    24  	"google.golang.org/protobuf/types/known/timestamppb"
    25  
    26  	"go.chromium.org/luci/common/clock"
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/logging"
    29  	gerritpb "go.chromium.org/luci/common/proto/gerrit"
    30  	"go.chromium.org/luci/common/retry"
    31  	"go.chromium.org/luci/common/retry/transient"
    32  	"go.chromium.org/luci/common/sync/parallel"
    33  	"go.chromium.org/luci/cv/internal/common"
    34  	"go.chromium.org/luci/cv/internal/common/lease"
    35  	"go.chromium.org/luci/cv/internal/gerrit"
    36  	"go.chromium.org/luci/cv/internal/run"
    37  	"go.chromium.org/luci/cv/internal/run/eventpb"
    38  	"go.chromium.org/luci/cv/internal/run/impl/util"
    39  )
    40  
    41  // PostGerritMessageOp posts the given message to gerrit.
    42  //
    43  // PostGerritMessageOp is a single-use object.
    44  type PostGerritMessageOp struct {
    45  	// All public fields must be set.
    46  	*Base
    47  	GFactory gerrit.Factory
    48  	Env      *common.Env
    49  
    50  	// These private fields are set internally as implementation details.
    51  	lock           sync.Mutex
    52  	latestPostedAt time.Time
    53  }
    54  
    55  // Do actually posts the message.
    56  func (op *PostGerritMessageOp) Do(ctx context.Context) (*eventpb.LongOpCompleted, error) {
    57  	op.assertCalledOnce()
    58  
    59  	if op.IsCancelRequested() {
    60  		return &eventpb.LongOpCompleted{Status: eventpb.LongOpCompleted_CANCELLED}, nil
    61  	}
    62  
    63  	rcls, err := run.LoadRunCLs(ctx, op.Run.ID, op.Run.CLs)
    64  	if err != nil {
    65  		return nil, err
    66  	}
    67  
    68  	if op.Op.GetDeadline() == nil {
    69  		panic(errors.New("PostGerritMessageOp: missing deadline"))
    70  	}
    71  
    72  	errs := make(errors.MultiError, len(rcls))
    73  	poolError := parallel.WorkPool(min(len(rcls), 8), func(work chan<- func() error) {
    74  		for i, rcl := range rcls {
    75  			i, rcl := i, rcl
    76  			work <- func() error {
    77  				switch posted, err := op.doCL(ctx, rcl); {
    78  				case err != nil:
    79  					// all errors will be aggregated across all CLs below.
    80  					errs[i] = err
    81  				default:
    82  					op.lock.Lock()
    83  					if op.latestPostedAt.IsZero() || op.latestPostedAt.Before(posted) {
    84  						op.latestPostedAt = posted
    85  					}
    86  					op.lock.Unlock()
    87  				}
    88  				return nil
    89  			}
    90  		}
    91  	})
    92  	if poolError != nil {
    93  		panic(fmt.Errorf("unexpected WorkPool error %w", poolError))
    94  	}
    95  
    96  	var hasCancelled, hasFailed bool
    97  	for i, err := range errs {
    98  		switch {
    99  		case err == nil:
   100  		case errors.Unwrap(err) == errCancelHonored:
   101  			hasCancelled = true
   102  		default:
   103  			hasFailed = true
   104  			logging.Warningf(ctx, "failed to post gerrit message on CL %d %q: %s",
   105  				rcls[i].ID, rcls[i].ExternalID, err)
   106  		}
   107  	}
   108  	result := &eventpb.LongOpCompleted{
   109  		Result: &eventpb.LongOpCompleted_PostGerritMessage_{
   110  			PostGerritMessage: &eventpb.LongOpCompleted_PostGerritMessage{},
   111  		},
   112  	}
   113  	switch {
   114  	case hasFailed:
   115  		result.Status = eventpb.LongOpCompleted_FAILED
   116  	case hasCancelled:
   117  		result.Status = eventpb.LongOpCompleted_CANCELLED
   118  	default:
   119  		result.Status = eventpb.LongOpCompleted_SUCCEEDED
   120  		result.GetPostGerritMessage().Time = timestamppb.New(op.latestPostedAt)
   121  	}
   122  	// doCL() retries on transient failures until 30 secs before the op
   123  	// deadline. If any failure cases, this returns nil in error to prevent
   124  	// the TQ task from being retried.
   125  	return result, nil
   126  }
   127  
   128  func (op *PostGerritMessageOp) doCL(ctx context.Context, rcl *run.RunCL) (time.Time, error) {
   129  	ctx = logging.SetField(ctx, "cl", rcl.ID)
   130  	if rcl.Detail.GetGerrit() == nil {
   131  		panic(fmt.Errorf("CL %d is not a Gerrit CL", rcl.ID))
   132  	}
   133  	req, err := op.makeSetReviewReq(rcl)
   134  	if err != nil {
   135  		return notPosted, err
   136  	}
   137  
   138  	var lastNonDeadlineErr error
   139  	var postedAt time.Time
   140  	queryOpts := []gerritpb.QueryOption{gerritpb.QueryOption_MESSAGES}
   141  	err = retry.Retry(clock.Tag(ctx, common.LaunchRetryClockTag), op.makeRetryFactory(), func() error {
   142  		if op.IsCancelRequested() {
   143  			return errCancelHonored
   144  		}
   145  
   146  		var err error
   147  		switch postedAt, err = util.IsActionTakenOnGerritCL(ctx, op.GFactory, rcl, queryOpts, op.hasGerritMessagePosted); {
   148  		case postedAt != notPosted:
   149  			logging.Debugf(ctx, "PostGerritMessageOp: the CL already has this message at %s", postedAt)
   150  			return nil
   151  		case err == nil:
   152  		case errors.Unwrap(err) != context.DeadlineExceeded:
   153  			lastNonDeadlineErr = err
   154  			fallthrough
   155  		default:
   156  			return errors.Annotate(err, "failed to check if message was already posted").Err()
   157  		}
   158  		switch err = util.MutateGerritCL(ctx, op.GFactory, rcl, req, 2*time.Minute, "post-gerrit-message"); {
   159  		case err == nil:
   160  			// NOTE: to avoid another round-trip to Gerrit, use the CV time here even
   161  			// though it isn't the same as what Gerrit recorded.
   162  			postedAt = clock.Now(ctx).Truncate(time.Second)
   163  		case errors.Unwrap(err) != context.DeadlineExceeded:
   164  			lastNonDeadlineErr = err
   165  			fallthrough
   166  		default:
   167  			logging.Debugf(ctx, "PostGerritMessageOp: failed to mutate Gerrit CL: %s", err)
   168  		}
   169  		return err
   170  	}, nil)
   171  
   172  	switch {
   173  	case err == nil:
   174  		return postedAt, nil
   175  	case errors.Unwrap(err) == context.DeadlineExceeded && lastNonDeadlineErr != nil:
   176  		// if the deadline error occurred after retries, then returns the last
   177  		// error before the deadline error. It should be more informative than
   178  		// `context deadline exceeded`.
   179  		return notPosted, lastNonDeadlineErr
   180  	default:
   181  		return notPosted, err
   182  	}
   183  }
   184  
   185  // hasGerritMessagePosted returns when the gerrit message was posted on a CL or
   186  // zero time.
   187  func (op *PostGerritMessageOp) hasGerritMessagePosted(rcl *run.RunCL, ci *gerritpb.ChangeInfo) time.Time {
   188  	// Look at latest messages first for efficiency,
   189  	// and skip all messages which are too old.
   190  	clTriggeredAt := rcl.Trigger.Time.AsTime()
   191  	msg := strings.TrimSpace(op.Op.GetPostGerritMessage().GetMessage())
   192  	for i := len(ci.GetMessages()) - 1; i >= 0; i-- {
   193  		switch m := ci.GetMessages()[i]; {
   194  		case m.GetDate().AsTime().Before(clTriggeredAt):
   195  			return notPosted
   196  		// Gerrit might prepend some metadata around the posted msg such as the
   197  		// patchset this is posted on to the msg. We use contains check instead
   198  		// of equality to work around this.
   199  		case strings.Contains(m.Message, msg):
   200  			// This msg has been already posted to gerrit.
   201  			return m.GetDate().AsTime()
   202  		}
   203  	}
   204  
   205  	return notPosted
   206  }
   207  
   208  func (op *PostGerritMessageOp) makeSetReviewReq(rcl *run.RunCL) (*gerritpb.SetReviewRequest, error) {
   209  	return &gerritpb.SetReviewRequest{
   210  		Project:    rcl.Detail.GetGerrit().GetInfo().GetProject(),
   211  		Number:     rcl.Detail.GetGerrit().GetInfo().GetNumber(),
   212  		RevisionId: rcl.Detail.GetGerrit().GetInfo().GetCurrentRevision(),
   213  		Tag:        op.Run.Mode.GerritMessageTag(),
   214  		Notify:     gerritpb.Notify_NOTIFY_NONE,
   215  		Message:    op.Op.GetPostGerritMessage().GetMessage(),
   216  	}, nil
   217  }
   218  
   219  func (op *PostGerritMessageOp) makeRetryFactory() retry.Factory {
   220  	return lease.RetryIfLeased(transient.Only(func() retry.Iterator {
   221  		return &retry.ExponentialBackoff{
   222  			Limited: retry.Limited{
   223  				Delay:   100 * time.Millisecond,
   224  				Retries: -1, // unlimited
   225  			},
   226  			Multiplier: 2,
   227  			MaxDelay:   1 * time.Minute,
   228  		}
   229  	}))
   230  }