go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/luciexe/host/host.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package host implements the 'Host Application' portion of the luciexe
    16  // protocol.
    17  //
    18  // It manages a local Logdog Butler service, and also runs all LUCI Auth related
    19  // daemons. It intercepts and interprets build.proto streams within the Butler
    20  // context, merging them as necessary.
    21  package host
    22  
    23  import (
    24  	"context"
    25  	"strings"
    26  	"time"
    27  
    28  	bbpb "go.chromium.org/luci/buildbucket/proto"
    29  	"go.chromium.org/luci/common/clock"
    30  	"go.chromium.org/luci/common/logging"
    31  	"go.chromium.org/luci/lucictx"
    32  )
    33  
    34  var maxLogFlushWaitTime = 30 * time.Second
    35  
    36  // Run executes `cb` in a "luciexe" host environment.
    37  //
    38  // The merged Build objects collected from the host environment (i.e. generated
    39  // within `cb`) will be pushed to the returned "<-chan *bbpb.Build" as `cb`
    40  // executes.
    41  //
    42  // Error during starting up the host environment will be directly returned.
    43  // But `cb` does not return anything to avoid messy semantics because it's run
    44  // in a goroutine; If `cb` could error out, it's recommended to make your own
    45  // `chan error` and have `cb` push to that.
    46  //
    47  // The context should be used for cancellation of the callback function; It's up
    48  // to the `cb` implementation to respect the cancelled context.
    49  //
    50  // When the callback function completes, Run closes the returned channel.
    51  //
    52  // Blocking the returned channel may block the execution of `cb`.
    53  //
    54  // NOTE: This modifies the environment (i.e. with os.Setenv) while `cb` is
    55  // running. Be careful when using Run concurrently with other code. You MUST
    56  // completely drain the returned channel in order to be guaranteed that all
    57  // side-effects of Run have been unwound.
    58  func Run(ctx context.Context, options *Options, cb func(context.Context, Options, <-chan lucictx.DeadlineEvent, func())) (<-chan *bbpb.Build, error) {
    59  	var opts Options
    60  	if options != nil {
    61  		opts = *options
    62  	}
    63  	if err := opts.initialize(); err != nil {
    64  		return nil, err
    65  	}
    66  	logging.Infof(ctx, "starting luciexe host env with: %+v", opts)
    67  
    68  	// cleanup will accumulate all of the cleanup functions as we set up the
    69  	// environment. If an error occurs before we can start the user code (`cb`),
    70  	// the defer below will run them all. Otherwise they'll be transferred to the
    71  	// goroutine.
    72  	var cleanup cleanupSlice
    73  	defer cleanup.run(ctx)
    74  
    75  	cleanupComplete := make(chan struct{})
    76  	cleanup.add("cleanupComplete", func() error {
    77  		close(cleanupComplete)
    78  		return nil
    79  	})
    80  
    81  	// First, capture the entire env to restore it later.
    82  	cleanup.add("restoreEnv", restoreEnv())
    83  
    84  	logging.Infof(ctx, "starting auth services")
    85  	if err := cleanup.concat(startAuthServices(ctx, &opts)); err != nil {
    86  		return nil, err
    87  	}
    88  
    89  	logging.Infof(ctx, "starting butler")
    90  	butler, err := startButler(ctx, &opts)
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  	cleanup.add("butler", func() error {
    95  		butler.Activate()
    96  		return butler.Wait()
    97  	})
    98  
    99  	logging.Infof(ctx, "starting build.proto merging agent")
   100  	agent, err := spyOn(ctx, butler, opts.BaseBuild)
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  	cleanup.add("buildmerge spy", func() error {
   105  		agent.Close()
   106  		logging.Infof(ctx, "waiting for buildmerge spy to finish")
   107  		<-agent.DrainC
   108  		return nil
   109  	})
   110  
   111  	buildCh := make(chan *bbpb.Build)
   112  	go func() {
   113  		defer close(buildCh)
   114  		for build := range agent.MergedBuildC {
   115  			buildCh <- build
   116  		}
   117  		<-cleanupComplete
   118  	}()
   119  
   120  	// Transfer ownership of cleanups to goroutine
   121  	userCleanup := cleanup
   122  	userCleanup.add("flush u/", func() error {
   123  		wt := calcLogFlushWaitTime(ctx)
   124  		cctx, cancel := clock.WithTimeout(ctx, wt)
   125  		defer cancel()
   126  		logging.Infof(ctx, "waiting up to %s for user logs to flush", wt)
   127  		leftovers := butler.DrainNamespace(cctx, agent.UserNamespace)
   128  		if len(leftovers) > 0 {
   129  			builder := strings.Builder{}
   130  			for _, leftover := range leftovers {
   131  				builder.WriteString("\n  ")
   132  				builder.WriteString(string(leftover))
   133  			}
   134  			logging.Errorf(
   135  				ctx, "failed to flush the following logs:\n  %s", builder.String())
   136  		}
   137  		return nil
   138  	})
   139  	userCleanup.add("butler.Activate", func() error {
   140  		butler.Activate()
   141  		return nil
   142  	})
   143  	cleanup = nil
   144  
   145  	// Buildbucket assigns some grace period to the surrounding task which is
   146  	// more than what the user requested in `input.Build.GracePeriod`. We
   147  	// reserve the difference here so the user task only gets what they asked
   148  	// for.
   149  	deadline := lucictx.GetDeadline(ctx)
   150  	toReserve := deadline.GracePeriodDuration() - opts.BaseBuild.GracePeriod.AsDuration()
   151  	logging.Infof(
   152  		ctx, "Reserving %s out of %s of grace_period from LUCI_CONTEXT.",
   153  		toReserve, lucictx.GetDeadline(ctx).GracePeriodDuration())
   154  	dctx, shutdown := lucictx.TrackSoftDeadline(ctx, toReserve)
   155  
   156  	go func() {
   157  		defer userCleanup.run(ctx)
   158  		logging.Infof(ctx, "invoking host environment callback")
   159  
   160  		cb(dctx, opts, lucictx.SoftDeadlineDone(dctx), shutdown)
   161  	}()
   162  
   163  	return buildCh, nil
   164  }
   165  
   166  // If ctx has the deadline set, waitTime is min(half of the remaining time
   167  // towards deadline, `maxLogFlushWaitTime`). Otherwise, waitTime is the same
   168  // as `maxLogFlushWaitTime`.
   169  func calcLogFlushWaitTime(ctx context.Context) time.Duration {
   170  	if deadline, ok := ctx.Deadline(); ok {
   171  		switch waitTime := deadline.Sub(clock.Now(ctx)) / 2; {
   172  		case waitTime < 0:
   173  			return 0
   174  		case waitTime > maxLogFlushWaitTime:
   175  			return maxLogFlushWaitTime
   176  		default:
   177  			return waitTime
   178  		}
   179  	}
   180  	return maxLogFlushWaitTime
   181  }