go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/run/impl/longops/postgerritmessage.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package longops 16 17 import ( 18 "context" 19 "fmt" 20 "strings" 21 "sync" 22 "time" 23 24 "google.golang.org/protobuf/types/known/timestamppb" 25 26 "go.chromium.org/luci/common/clock" 27 "go.chromium.org/luci/common/errors" 28 "go.chromium.org/luci/common/logging" 29 gerritpb "go.chromium.org/luci/common/proto/gerrit" 30 "go.chromium.org/luci/common/retry" 31 "go.chromium.org/luci/common/retry/transient" 32 "go.chromium.org/luci/common/sync/parallel" 33 "go.chromium.org/luci/cv/internal/common" 34 "go.chromium.org/luci/cv/internal/common/lease" 35 "go.chromium.org/luci/cv/internal/gerrit" 36 "go.chromium.org/luci/cv/internal/run" 37 "go.chromium.org/luci/cv/internal/run/eventpb" 38 "go.chromium.org/luci/cv/internal/run/impl/util" 39 ) 40 41 // PostGerritMessageOp posts the given message to gerrit. 42 // 43 // PostGerritMessageOp is a single-use object. 44 type PostGerritMessageOp struct { 45 // All public fields must be set. 46 *Base 47 GFactory gerrit.Factory 48 Env *common.Env 49 50 // These private fields are set internally as implementation details. 51 lock sync.Mutex 52 latestPostedAt time.Time 53 } 54 55 // Do actually posts the message. 56 func (op *PostGerritMessageOp) Do(ctx context.Context) (*eventpb.LongOpCompleted, error) { 57 op.assertCalledOnce() 58 59 if op.IsCancelRequested() { 60 return &eventpb.LongOpCompleted{Status: eventpb.LongOpCompleted_CANCELLED}, nil 61 } 62 63 rcls, err := run.LoadRunCLs(ctx, op.Run.ID, op.Run.CLs) 64 if err != nil { 65 return nil, err 66 } 67 68 if op.Op.GetDeadline() == nil { 69 panic(errors.New("PostGerritMessageOp: missing deadline")) 70 } 71 72 errs := make(errors.MultiError, len(rcls)) 73 poolError := parallel.WorkPool(min(len(rcls), 8), func(work chan<- func() error) { 74 for i, rcl := range rcls { 75 i, rcl := i, rcl 76 work <- func() error { 77 switch posted, err := op.doCL(ctx, rcl); { 78 case err != nil: 79 // all errors will be aggregated across all CLs below. 80 errs[i] = err 81 default: 82 op.lock.Lock() 83 if op.latestPostedAt.IsZero() || op.latestPostedAt.Before(posted) { 84 op.latestPostedAt = posted 85 } 86 op.lock.Unlock() 87 } 88 return nil 89 } 90 } 91 }) 92 if poolError != nil { 93 panic(fmt.Errorf("unexpected WorkPool error %w", poolError)) 94 } 95 96 var hasCancelled, hasFailed bool 97 for i, err := range errs { 98 switch { 99 case err == nil: 100 case errors.Unwrap(err) == errCancelHonored: 101 hasCancelled = true 102 default: 103 hasFailed = true 104 logging.Warningf(ctx, "failed to post gerrit message on CL %d %q: %s", 105 rcls[i].ID, rcls[i].ExternalID, err) 106 } 107 } 108 result := &eventpb.LongOpCompleted{ 109 Result: &eventpb.LongOpCompleted_PostGerritMessage_{ 110 PostGerritMessage: &eventpb.LongOpCompleted_PostGerritMessage{}, 111 }, 112 } 113 switch { 114 case hasFailed: 115 result.Status = eventpb.LongOpCompleted_FAILED 116 case hasCancelled: 117 result.Status = eventpb.LongOpCompleted_CANCELLED 118 default: 119 result.Status = eventpb.LongOpCompleted_SUCCEEDED 120 result.GetPostGerritMessage().Time = timestamppb.New(op.latestPostedAt) 121 } 122 // doCL() retries on transient failures until 30 secs before the op 123 // deadline. If any failure cases, this returns nil in error to prevent 124 // the TQ task from being retried. 125 return result, nil 126 } 127 128 func (op *PostGerritMessageOp) doCL(ctx context.Context, rcl *run.RunCL) (time.Time, error) { 129 ctx = logging.SetField(ctx, "cl", rcl.ID) 130 if rcl.Detail.GetGerrit() == nil { 131 panic(fmt.Errorf("CL %d is not a Gerrit CL", rcl.ID)) 132 } 133 req, err := op.makeSetReviewReq(rcl) 134 if err != nil { 135 return notPosted, err 136 } 137 138 var lastNonDeadlineErr error 139 var postedAt time.Time 140 queryOpts := []gerritpb.QueryOption{gerritpb.QueryOption_MESSAGES} 141 err = retry.Retry(clock.Tag(ctx, common.LaunchRetryClockTag), op.makeRetryFactory(), func() error { 142 if op.IsCancelRequested() { 143 return errCancelHonored 144 } 145 146 var err error 147 switch postedAt, err = util.IsActionTakenOnGerritCL(ctx, op.GFactory, rcl, queryOpts, op.hasGerritMessagePosted); { 148 case postedAt != notPosted: 149 logging.Debugf(ctx, "PostGerritMessageOp: the CL already has this message at %s", postedAt) 150 return nil 151 case err == nil: 152 case errors.Unwrap(err) != context.DeadlineExceeded: 153 lastNonDeadlineErr = err 154 fallthrough 155 default: 156 return errors.Annotate(err, "failed to check if message was already posted").Err() 157 } 158 switch err = util.MutateGerritCL(ctx, op.GFactory, rcl, req, 2*time.Minute, "post-gerrit-message"); { 159 case err == nil: 160 // NOTE: to avoid another round-trip to Gerrit, use the CV time here even 161 // though it isn't the same as what Gerrit recorded. 162 postedAt = clock.Now(ctx).Truncate(time.Second) 163 case errors.Unwrap(err) != context.DeadlineExceeded: 164 lastNonDeadlineErr = err 165 fallthrough 166 default: 167 logging.Debugf(ctx, "PostGerritMessageOp: failed to mutate Gerrit CL: %s", err) 168 } 169 return err 170 }, nil) 171 172 switch { 173 case err == nil: 174 return postedAt, nil 175 case errors.Unwrap(err) == context.DeadlineExceeded && lastNonDeadlineErr != nil: 176 // if the deadline error occurred after retries, then returns the last 177 // error before the deadline error. It should be more informative than 178 // `context deadline exceeded`. 179 return notPosted, lastNonDeadlineErr 180 default: 181 return notPosted, err 182 } 183 } 184 185 // hasGerritMessagePosted returns when the gerrit message was posted on a CL or 186 // zero time. 187 func (op *PostGerritMessageOp) hasGerritMessagePosted(rcl *run.RunCL, ci *gerritpb.ChangeInfo) time.Time { 188 // Look at latest messages first for efficiency, 189 // and skip all messages which are too old. 190 clTriggeredAt := rcl.Trigger.Time.AsTime() 191 msg := strings.TrimSpace(op.Op.GetPostGerritMessage().GetMessage()) 192 for i := len(ci.GetMessages()) - 1; i >= 0; i-- { 193 switch m := ci.GetMessages()[i]; { 194 case m.GetDate().AsTime().Before(clTriggeredAt): 195 return notPosted 196 // Gerrit might prepend some metadata around the posted msg such as the 197 // patchset this is posted on to the msg. We use contains check instead 198 // of equality to work around this. 199 case strings.Contains(m.Message, msg): 200 // This msg has been already posted to gerrit. 201 return m.GetDate().AsTime() 202 } 203 } 204 205 return notPosted 206 } 207 208 func (op *PostGerritMessageOp) makeSetReviewReq(rcl *run.RunCL) (*gerritpb.SetReviewRequest, error) { 209 return &gerritpb.SetReviewRequest{ 210 Project: rcl.Detail.GetGerrit().GetInfo().GetProject(), 211 Number: rcl.Detail.GetGerrit().GetInfo().GetNumber(), 212 RevisionId: rcl.Detail.GetGerrit().GetInfo().GetCurrentRevision(), 213 Tag: op.Run.Mode.GerritMessageTag(), 214 Notify: gerritpb.Notify_NOTIFY_NONE, 215 Message: op.Op.GetPostGerritMessage().GetMessage(), 216 }, nil 217 } 218 219 func (op *PostGerritMessageOp) makeRetryFactory() retry.Factory { 220 return lease.RetryIfLeased(transient.Only(func() retry.Iterator { 221 return &retry.ExponentialBackoff{ 222 Limited: retry.Limited{ 223 Delay: 100 * time.Millisecond, 224 Retries: -1, // unlimited 225 }, 226 Multiplier: 2, 227 MaxDelay: 1 * time.Minute, 228 } 229 })) 230 }