go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/luciexe/invoke/subprocess.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package invoke
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"os/exec"
    21  	"sync"
    22  	"sync/atomic"
    23  
    24  	"google.golang.org/protobuf/proto"
    25  	"google.golang.org/protobuf/reflect/protoreflect"
    26  	"google.golang.org/protobuf/types/known/timestamppb"
    27  
    28  	"go.chromium.org/luci/common/clock"
    29  	"go.chromium.org/luci/common/data/stringset"
    30  	"go.chromium.org/luci/common/errors"
    31  	"go.chromium.org/luci/common/logging"
    32  	"go.chromium.org/luci/lucictx"
    33  	"go.chromium.org/luci/luciexe"
    34  
    35  	bbpb "go.chromium.org/luci/buildbucket/proto"
    36  )
    37  
    38  // Subprocess represents a running luciexe.
    39  type Subprocess struct {
    40  	Step        *bbpb.Step
    41  	collectPath string
    42  
    43  	ctx context.Context
    44  	cmd *exec.Cmd
    45  
    46  	closeChannels chan<- struct{}
    47  	allClosed     <-chan error
    48  
    49  	waitOnce           sync.Once
    50  	build              *bbpb.Build
    51  	err                errors.MultiError
    52  	firstDeadlineEvent atomic.Value // stores lucictx.DeadlineEvent
    53  }
    54  
    55  // Start launches a binary implementing the luciexe protocol and returns
    56  // immediately with a *Subprocess.
    57  //
    58  // Args:
    59  //   - ctx will be used for deadlines/cancellation of the started luciexe.
    60  //   - luciexeArgs[0] must be the full absolute path to the luciexe binary.
    61  //   - input must be the Build message you wish to pass to the luciexe binary.
    62  //   - opts is optional (may be nil to take all defaults)
    63  //
    64  // Callers MUST call Wait and/or cancel the context or this will leak handles
    65  // for the process' stdout/stderr.
    66  //
    67  // This assumes that the current process is already operating within a "host
    68  // application" environment. See "go.chromium.org/luci/luciexe" for details.
    69  //
    70  // The caller SHOULD immediately take Subprocess.Step, append it to the current
    71  // Build state, and send that (e.g. using `exe.BuildSender`). Otherwise this
    72  // luciexe's steps will not show up in the Build.
    73  func Start(ctx context.Context, luciexeArgs []string, input *bbpb.Build, opts *Options) (*Subprocess, error) {
    74  	initialBuildData, err := proto.Marshal(mkInitialBuild(ctx, input))
    75  	if err != nil {
    76  		return nil, errors.Annotate(err, "marshalling initial Build").Err()
    77  	}
    78  
    79  	launchOpts, _, err := opts.rationalize(ctx)
    80  	if err != nil {
    81  		return nil, errors.Annotate(err, "normalizing options").Err()
    82  	}
    83  
    84  	closeChannels := make(chan struct{})
    85  	allClosed := make(chan error)
    86  	go func() {
    87  		select {
    88  		case <-ctx.Done():
    89  		case <-closeChannels:
    90  		}
    91  		err := errors.NewLazyMultiError(2)
    92  		err.Assign(0, errors.Annotate(launchOpts.stdout.Close(), "closing stdout").Err())
    93  		err.Assign(1, errors.Annotate(launchOpts.stderr.Close(), "closing stderr").Err())
    94  		allClosed <- err.Get()
    95  	}()
    96  
    97  	args := make([]string, 0, len(luciexeArgs)+len(launchOpts.args)-1)
    98  	args = append(args, luciexeArgs[1:]...)
    99  	args = append(args, launchOpts.args...)
   100  
   101  	cmd := exec.CommandContext(ctx, luciexeArgs[0], args...)
   102  	cmd.Env = launchOpts.env.Sorted()
   103  	cmd.Dir = launchOpts.workDir
   104  	cmd.Stdin = bytes.NewBuffer(initialBuildData)
   105  	cmd.Stdout = launchOpts.stdout
   106  	cmd.Stderr = launchOpts.stderr
   107  	setSysProcAttr(cmd)
   108  
   109  	// NOTE: Technically this is racy; if `ctx` expires immediately after we check
   110  	// this, then we'll return no error, but CommandContext will kill the process
   111  	// straight away.
   112  	//
   113  	// However, in tests, when you've misconfigured the Deadline on ctx (e.g.
   114  	// using a fake clock), this check is generally not racy, and can provide
   115  	// a very valuable hint that's clearer than getting an error from Wait().
   116  	if err := ctx.Err(); err != nil {
   117  		// clean up stdout/stderr
   118  		close(closeChannels)
   119  		<-allClosed
   120  		return nil, errors.Annotate(err, "prior to starting subprocess").Err()
   121  	}
   122  
   123  	if err := cmd.Start(); err != nil {
   124  		// clean up stdout/stderr
   125  		close(closeChannels)
   126  		<-allClosed
   127  		return nil, errors.Annotate(err, "launching luciexe").Err()
   128  	}
   129  
   130  	s := &Subprocess{
   131  		Step:        launchOpts.step,
   132  		collectPath: launchOpts.collectPath,
   133  		ctx:         ctx,
   134  		cmd:         cmd,
   135  
   136  		closeChannels: closeChannels,
   137  		allClosed:     allClosed,
   138  	}
   139  
   140  	if deadlineEvtCh := lucictx.SoftDeadlineDone(ctx); deadlineEvtCh != nil {
   141  		go func() {
   142  			select {
   143  			case <-closeChannels:
   144  				// luciexe subprocess exits normally
   145  			case evt := <-deadlineEvtCh:
   146  				s.firstDeadlineEvent.Store(evt)
   147  				logging.Warningf(ctx, "got SoftDeadline event %s", evt)
   148  
   149  				if evt == lucictx.InterruptEvent || evt == lucictx.TimeoutEvent {
   150  					logging.Infof(ctx, "sending Terminate")
   151  					if err := s.terminate(); err != nil {
   152  						logging.Errorf(ctx, "failed to terminate luciexe subprocess, reason: %s", err)
   153  					}
   154  				}
   155  				// if evt == lucictx.ClosureEvent, it means that ctx.Done() is closed,
   156  				// which means that CommandContext has already sent Kill to the process.
   157  			}
   158  		}()
   159  	}
   160  	return s, nil
   161  }
   162  
   163  // Wait waits for the subprocess to terminate.
   164  //
   165  // If Options.CollectOutput (default: false) was specified, this will return the
   166  // final Build message, as reported by the luciexe.
   167  //
   168  // In all cases, finalBuild.StatusDetails will indicate if this Subprocess
   169  // instructed the luciexe to stop via timeout from deadlineEvtCh passed to Start.
   170  //
   171  // If you wish to cancel the subprocess (e.g. due to a timeout or deadline),
   172  // make sure to pass a cancelable/deadline context to Start().
   173  //
   174  // Calling this multiple times is OK; it will return the same values every time.
   175  func (s *Subprocess) Wait() (finalBuild *bbpb.Build, err error) {
   176  	s.waitOnce.Do(func() {
   177  		defer func() {
   178  			if s.build == nil {
   179  				s.build = &bbpb.Build{}
   180  			}
   181  			// If our process saw a timeout or we think we're in the grace period now,
   182  			// then we indicate that here.
   183  			if s.firstDeadlineEvent.Load() == lucictx.TimeoutEvent {
   184  				proto.Merge(s.build, &bbpb.Build{
   185  					StatusDetails: &bbpb.StatusDetails{
   186  						Timeout: &bbpb.StatusDetails_Timeout{},
   187  					},
   188  				})
   189  			}
   190  		}()
   191  
   192  		defer func() {
   193  			var errMsg string
   194  
   195  			// We need to check both evt and ctxErr since they can race.
   196  			ctxErr := s.ctx.Err()
   197  			evt := s.firstDeadlineEvent.Load()
   198  			switch {
   199  			case evt == lucictx.InterruptEvent:
   200  				errMsg = "luciexe process is interrupted"
   201  			case evt == lucictx.TimeoutEvent || ctxErr == context.DeadlineExceeded:
   202  				errMsg = "luciexe process timed out"
   203  			case evt == lucictx.ClosureEvent || ctxErr == context.Canceled:
   204  				errMsg = "luciexe process's context is cancelled"
   205  			}
   206  
   207  			if errMsg != "" {
   208  				s.err.MaybeAdd(errors.New(errMsg))
   209  			}
   210  		}()
   211  		// No matter what, we want to close stdout/stderr; if none of the other
   212  		// return values have set `err`, it will be set to the result of closing
   213  		// stdout/stderr.
   214  		defer func() {
   215  			close(s.closeChannels)
   216  			s.err.MaybeAdd(<-s.allClosed)
   217  		}()
   218  
   219  		err := s.cmd.Wait()
   220  		s.err.MaybeAdd(errors.Annotate(err, "waiting for luciexe").Err())
   221  
   222  		// Even if the Wait fails (e.g. process returns non-0 exit code, or other
   223  		// issue), still try to read the build output.
   224  		s.build, err = luciexe.ReadBuildFile(s.collectPath)
   225  		s.err.MaybeAdd(err)
   226  	})
   227  	return s.build, s.err.AsError()
   228  }
   229  
   230  // fieldsToClear are a set of fields that MUST be cleared in the initial build
   231  // to luciexe.
   232  var fieldsToClear = stringset.NewFromSlice(
   233  	"end_time",
   234  	"status_details",
   235  	"summary_markdown",
   236  	"steps",
   237  	"output",
   238  	"update_time",
   239  )
   240  
   241  func mkInitialBuild(ctx context.Context, input *bbpb.Build) *bbpb.Build {
   242  	ib := &bbpb.Build{}
   243  	ibr := ib.ProtoReflect()
   244  	input.ProtoReflect().Range(func(field protoreflect.FieldDescriptor, val protoreflect.Value) bool {
   245  		if !fieldsToClear.Has(string(field.Name())) {
   246  			ibr.Set(field, val)
   247  		}
   248  		return true
   249  	})
   250  	now := clock.Now(ctx)
   251  	if ib.CreateTime == nil {
   252  		ib.CreateTime = timestamppb.New(now)
   253  	}
   254  	if ib.StartTime == nil {
   255  		ib.StartTime = timestamppb.New(now)
   256  	}
   257  	ib.Status = bbpb.Status_STARTED
   258  	return ib
   259  }