go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/luciexe/invoke/subprocess.go (about) 1 // Copyright 2019 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package invoke 16 17 import ( 18 "bytes" 19 "context" 20 "os/exec" 21 "sync" 22 "sync/atomic" 23 24 "google.golang.org/protobuf/proto" 25 "google.golang.org/protobuf/reflect/protoreflect" 26 "google.golang.org/protobuf/types/known/timestamppb" 27 28 "go.chromium.org/luci/common/clock" 29 "go.chromium.org/luci/common/data/stringset" 30 "go.chromium.org/luci/common/errors" 31 "go.chromium.org/luci/common/logging" 32 "go.chromium.org/luci/lucictx" 33 "go.chromium.org/luci/luciexe" 34 35 bbpb "go.chromium.org/luci/buildbucket/proto" 36 ) 37 38 // Subprocess represents a running luciexe. 39 type Subprocess struct { 40 Step *bbpb.Step 41 collectPath string 42 43 ctx context.Context 44 cmd *exec.Cmd 45 46 closeChannels chan<- struct{} 47 allClosed <-chan error 48 49 waitOnce sync.Once 50 build *bbpb.Build 51 err errors.MultiError 52 firstDeadlineEvent atomic.Value // stores lucictx.DeadlineEvent 53 } 54 55 // Start launches a binary implementing the luciexe protocol and returns 56 // immediately with a *Subprocess. 57 // 58 // Args: 59 // - ctx will be used for deadlines/cancellation of the started luciexe. 60 // - luciexeArgs[0] must be the full absolute path to the luciexe binary. 61 // - input must be the Build message you wish to pass to the luciexe binary. 62 // - opts is optional (may be nil to take all defaults) 63 // 64 // Callers MUST call Wait and/or cancel the context or this will leak handles 65 // for the process' stdout/stderr. 66 // 67 // This assumes that the current process is already operating within a "host 68 // application" environment. See "go.chromium.org/luci/luciexe" for details. 69 // 70 // The caller SHOULD immediately take Subprocess.Step, append it to the current 71 // Build state, and send that (e.g. using `exe.BuildSender`). Otherwise this 72 // luciexe's steps will not show up in the Build. 73 func Start(ctx context.Context, luciexeArgs []string, input *bbpb.Build, opts *Options) (*Subprocess, error) { 74 initialBuildData, err := proto.Marshal(mkInitialBuild(ctx, input)) 75 if err != nil { 76 return nil, errors.Annotate(err, "marshalling initial Build").Err() 77 } 78 79 launchOpts, _, err := opts.rationalize(ctx) 80 if err != nil { 81 return nil, errors.Annotate(err, "normalizing options").Err() 82 } 83 84 closeChannels := make(chan struct{}) 85 allClosed := make(chan error) 86 go func() { 87 select { 88 case <-ctx.Done(): 89 case <-closeChannels: 90 } 91 err := errors.NewLazyMultiError(2) 92 err.Assign(0, errors.Annotate(launchOpts.stdout.Close(), "closing stdout").Err()) 93 err.Assign(1, errors.Annotate(launchOpts.stderr.Close(), "closing stderr").Err()) 94 allClosed <- err.Get() 95 }() 96 97 args := make([]string, 0, len(luciexeArgs)+len(launchOpts.args)-1) 98 args = append(args, luciexeArgs[1:]...) 99 args = append(args, launchOpts.args...) 100 101 cmd := exec.CommandContext(ctx, luciexeArgs[0], args...) 102 cmd.Env = launchOpts.env.Sorted() 103 cmd.Dir = launchOpts.workDir 104 cmd.Stdin = bytes.NewBuffer(initialBuildData) 105 cmd.Stdout = launchOpts.stdout 106 cmd.Stderr = launchOpts.stderr 107 setSysProcAttr(cmd) 108 109 // NOTE: Technically this is racy; if `ctx` expires immediately after we check 110 // this, then we'll return no error, but CommandContext will kill the process 111 // straight away. 112 // 113 // However, in tests, when you've misconfigured the Deadline on ctx (e.g. 114 // using a fake clock), this check is generally not racy, and can provide 115 // a very valuable hint that's clearer than getting an error from Wait(). 116 if err := ctx.Err(); err != nil { 117 // clean up stdout/stderr 118 close(closeChannels) 119 <-allClosed 120 return nil, errors.Annotate(err, "prior to starting subprocess").Err() 121 } 122 123 if err := cmd.Start(); err != nil { 124 // clean up stdout/stderr 125 close(closeChannels) 126 <-allClosed 127 return nil, errors.Annotate(err, "launching luciexe").Err() 128 } 129 130 s := &Subprocess{ 131 Step: launchOpts.step, 132 collectPath: launchOpts.collectPath, 133 ctx: ctx, 134 cmd: cmd, 135 136 closeChannels: closeChannels, 137 allClosed: allClosed, 138 } 139 140 if deadlineEvtCh := lucictx.SoftDeadlineDone(ctx); deadlineEvtCh != nil { 141 go func() { 142 select { 143 case <-closeChannels: 144 // luciexe subprocess exits normally 145 case evt := <-deadlineEvtCh: 146 s.firstDeadlineEvent.Store(evt) 147 logging.Warningf(ctx, "got SoftDeadline event %s", evt) 148 149 if evt == lucictx.InterruptEvent || evt == lucictx.TimeoutEvent { 150 logging.Infof(ctx, "sending Terminate") 151 if err := s.terminate(); err != nil { 152 logging.Errorf(ctx, "failed to terminate luciexe subprocess, reason: %s", err) 153 } 154 } 155 // if evt == lucictx.ClosureEvent, it means that ctx.Done() is closed, 156 // which means that CommandContext has already sent Kill to the process. 157 } 158 }() 159 } 160 return s, nil 161 } 162 163 // Wait waits for the subprocess to terminate. 164 // 165 // If Options.CollectOutput (default: false) was specified, this will return the 166 // final Build message, as reported by the luciexe. 167 // 168 // In all cases, finalBuild.StatusDetails will indicate if this Subprocess 169 // instructed the luciexe to stop via timeout from deadlineEvtCh passed to Start. 170 // 171 // If you wish to cancel the subprocess (e.g. due to a timeout or deadline), 172 // make sure to pass a cancelable/deadline context to Start(). 173 // 174 // Calling this multiple times is OK; it will return the same values every time. 175 func (s *Subprocess) Wait() (finalBuild *bbpb.Build, err error) { 176 s.waitOnce.Do(func() { 177 defer func() { 178 if s.build == nil { 179 s.build = &bbpb.Build{} 180 } 181 // If our process saw a timeout or we think we're in the grace period now, 182 // then we indicate that here. 183 if s.firstDeadlineEvent.Load() == lucictx.TimeoutEvent { 184 proto.Merge(s.build, &bbpb.Build{ 185 StatusDetails: &bbpb.StatusDetails{ 186 Timeout: &bbpb.StatusDetails_Timeout{}, 187 }, 188 }) 189 } 190 }() 191 192 defer func() { 193 var errMsg string 194 195 // We need to check both evt and ctxErr since they can race. 196 ctxErr := s.ctx.Err() 197 evt := s.firstDeadlineEvent.Load() 198 switch { 199 case evt == lucictx.InterruptEvent: 200 errMsg = "luciexe process is interrupted" 201 case evt == lucictx.TimeoutEvent || ctxErr == context.DeadlineExceeded: 202 errMsg = "luciexe process timed out" 203 case evt == lucictx.ClosureEvent || ctxErr == context.Canceled: 204 errMsg = "luciexe process's context is cancelled" 205 } 206 207 if errMsg != "" { 208 s.err.MaybeAdd(errors.New(errMsg)) 209 } 210 }() 211 // No matter what, we want to close stdout/stderr; if none of the other 212 // return values have set `err`, it will be set to the result of closing 213 // stdout/stderr. 214 defer func() { 215 close(s.closeChannels) 216 s.err.MaybeAdd(<-s.allClosed) 217 }() 218 219 err := s.cmd.Wait() 220 s.err.MaybeAdd(errors.Annotate(err, "waiting for luciexe").Err()) 221 222 // Even if the Wait fails (e.g. process returns non-0 exit code, or other 223 // issue), still try to read the build output. 224 s.build, err = luciexe.ReadBuildFile(s.collectPath) 225 s.err.MaybeAdd(err) 226 }) 227 return s.build, s.err.AsError() 228 } 229 230 // fieldsToClear are a set of fields that MUST be cleared in the initial build 231 // to luciexe. 232 var fieldsToClear = stringset.NewFromSlice( 233 "end_time", 234 "status_details", 235 "summary_markdown", 236 "steps", 237 "output", 238 "update_time", 239 ) 240 241 func mkInitialBuild(ctx context.Context, input *bbpb.Build) *bbpb.Build { 242 ib := &bbpb.Build{} 243 ibr := ib.ProtoReflect() 244 input.ProtoReflect().Range(func(field protoreflect.FieldDescriptor, val protoreflect.Value) bool { 245 if !fieldsToClear.Has(string(field.Name())) { 246 ibr.Set(field, val) 247 } 248 return true 249 }) 250 now := clock.Now(ctx) 251 if ib.CreateTime == nil { 252 ib.CreateTime = timestamppb.New(now) 253 } 254 if ib.StartTime == nil { 255 ib.StartTime = timestamppb.New(now) 256 } 257 ib.Status = bbpb.Status_STARTED 258 return ib 259 }