github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/pkg/inputprocessor/inputprocessor.go (about)

     1  // Copyright 2023 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package inputprocessor is used to find non-obvious inputs for action types like C++ compile,
    16  // Java compile, C++ link etc.
    17  package inputprocessor
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"runtime"
    24  	"sync"
    25  	"time"
    26  
    27  	ppb "github.com/bazelbuild/reclient/api/proxy"
    28  	"github.com/bazelbuild/reclient/internal/pkg/cppdependencyscanner"
    29  	"github.com/bazelbuild/reclient/internal/pkg/features"
    30  	iproc "github.com/bazelbuild/reclient/internal/pkg/inputprocessor"
    31  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/archive"
    32  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/clangcl"
    33  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/clanglink"
    34  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/clanglint"
    35  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/cppcompile"
    36  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/d8"
    37  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/headerabi"
    38  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/javac"
    39  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/metalava"
    40  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/nacl"
    41  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/r8"
    42  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/tool"
    43  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/typescript"
    44  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/depscache"
    45  	"github.com/bazelbuild/reclient/internal/pkg/labels"
    46  	"github.com/bazelbuild/reclient/internal/pkg/localresources"
    47  	"github.com/bazelbuild/reclient/internal/pkg/logger"
    48  	"github.com/bazelbuild/reclient/internal/pkg/logger/event"
    49  
    50  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/cache"
    51  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/command"
    52  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/filemetadata"
    53  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/outerr"
    54  	"golang.org/x/sync/semaphore"
    55  
    56  	"google.golang.org/grpc/codes"
    57  	"google.golang.org/grpc/status"
    58  )
    59  
    60  const (
    61  	// shallowLabel is the label key to indicate whether to use the shallow
    62  	// input processor for the command.
    63  	shallowLabel = "shallow"
    64  )
    65  
    66  var (
    67  	// ErrIPTimeout is an error returned when IP action times out
    68  	ErrIPTimeout = errors.New("Input Processor timeout")
    69  	// shallowFallbackConfig denotes whether a specific action type, identified by a set of
    70  	// labels, can fallback to shallow input processor or not, when their
    71  	// primary input processor fails.
    72  	// The default behaviour is to fallback to shallow mode if a set of labels are NOT present
    73  	// in the following config.
    74  	// If a CPP compile and remote execution strategy is specified, shallow fallback will be disabled.
    75  	shallowFallbackConfig = map[labels.Labels]map[ppb.ExecutionStrategy_Value]bool{
    76  		labels.HeaderAbiDumpLabels(): {ppb.ExecutionStrategy_UNSPECIFIED: false},
    77  		labels.ClangLintLabels():     {ppb.ExecutionStrategy_UNSPECIFIED: false},
    78  		labels.ClangCppLabels(): {ppb.ExecutionStrategy_REMOTE: false,
    79  			ppb.ExecutionStrategy_REMOTE_LOCAL_FALLBACK: false},
    80  		labels.ClangCLCppLabels(): {ppb.ExecutionStrategy_REMOTE: false,
    81  			ppb.ExecutionStrategy_REMOTE_LOCAL_FALLBACK: false},
    82  		labels.NaClLabels(): {ppb.ExecutionStrategy_REMOTE: false,
    83  			ppb.ExecutionStrategy_REMOTE_LOCAL_FALLBACK: false},
    84  	}
    85  )
    86  
    87  // Executor can run commands and retrieve their outputs.
    88  type Executor interface {
    89  	Execute(ctx context.Context, cmd *command.Command) (string, string, error)
    90  	ExecuteInBackground(ctx context.Context, cmd *command.Command, oe outerr.OutErr, ch chan *command.Result) error
    91  }
    92  
    93  // InputProcessor retrieves the input spec for commands.
    94  type InputProcessor struct {
    95  	cppDepScanner   cppcompile.CPPDependencyScanner
    96  	cppLinkDeepScan bool
    97  	depScanTimeout  time.Duration
    98  	executor        Executor
    99  	resMgr          *localresources.Manager
   100  	fmc             filemetadata.Cache
   101  	depsCache       *depscache.Cache
   102  	nfc             cache.SingleFlight
   103  	fsc             cache.SingleFlight
   104  	slots           *semaphore.Weighted
   105  
   106  	// logger is a logger for input processor events that span a single reproxy run.
   107  	logger *logger.Logger
   108  }
   109  
   110  type depsCacheMode int
   111  
   112  const (
   113  	noDepsCache depsCacheMode = iota
   114  	reproxyDepsCache
   115  	gomaDepsCache
   116  )
   117  
   118  // Options adds extra control for the input processor
   119  type Options struct {
   120  	EnableDepsCache    bool
   121  	CacheDir           string
   122  	LogDir             string
   123  	DepsCacheMaxMb     int
   124  	CppLinkDeepScan    bool
   125  	IPTimeout          time.Duration
   126  	DepsScannerAddress string
   127  	ProxyServerAddress string
   128  }
   129  
   130  // TODO(b/169675226): Replace usage with sync.OnceFunc when we upgrade to go 1.21
   131  func onceFunc(f func()) func() {
   132  	var once sync.Once
   133  	return func() {
   134  		once.Do(f)
   135  	}
   136  }
   137  
   138  // NewInputProcessor creates a new input processor.
   139  // Its resources are bound by the local resources manager.
   140  func NewInputProcessor(ctx context.Context, executor Executor, resMgr *localresources.Manager, fmc filemetadata.Cache, l *logger.Logger, opt *Options) (*InputProcessor, func(), error) {
   141  	useDepsCache := opt.CacheDir != "" && opt.EnableDepsCache
   142  	depScanner, err := cppdependencyscanner.New(ctx, executor, opt.CacheDir, opt.LogDir, opt.DepsCacheMaxMb, useDepsCache && !features.GetConfig().ExperimentalGomaDepsCache, opt.DepsScannerAddress, opt.ProxyServerAddress)
   143  	if err != nil {
   144  		return nil, func() {}, err
   145  	}
   146  	ip := newInputProcessor(depScanner, opt.IPTimeout, opt.CppLinkDeepScan, executor, resMgr, fmc, l)
   147  	cleanup := func() {}
   148  	if useDepsCache && (!depScanner.Capabilities().GetCaching() || features.GetConfig().ExperimentalGomaDepsCache) {
   149  		ip.depsCache, cleanup = newDepsCache(fmc, opt.CacheDir, l)
   150  	}
   151  	return ip, onceFunc(func() {
   152  		cleanup()
   153  		depScanner.Close()
   154  	}), nil
   155  }
   156  
   157  // NewInputProcessorWithStubDependencyScanner creates a new input processor with given parallelism
   158  // and a stub CPP dependency scanner. It is meant to be only used for testing.
   159  func NewInputProcessorWithStubDependencyScanner(ds cppcompile.CPPDependencyScanner, cppLinkDeepScan bool, executor Executor, resMgr *localresources.Manager) *InputProcessor {
   160  	return newInputProcessor(ds, 0, cppLinkDeepScan, executor, resMgr, nil, nil)
   161  }
   162  
   163  func newInputProcessor(ds cppcompile.CPPDependencyScanner, depScanTimeout time.Duration, cppLinkDeepScan bool, executor Executor, resMgr *localresources.Manager, fmc filemetadata.Cache, l *logger.Logger) *InputProcessor {
   164  	return &InputProcessor{
   165  		cppDepScanner:   ds,
   166  		cppLinkDeepScan: cppLinkDeepScan,
   167  		depScanTimeout:  depScanTimeout,
   168  		executor:        executor,
   169  		resMgr:          resMgr,
   170  		fmc:             fmc,
   171  		slots:           semaphore.NewWeighted(int64(runtime.NumCPU())),
   172  		logger:          l,
   173  	}
   174  }
   175  
   176  func newDepsCache(fmc filemetadata.Cache, depsCacheDir string, l *logger.Logger) (*depscache.Cache, func()) {
   177  	dc := depscache.New(fmc)
   178  	dc.Logger = l
   179  	go dc.LoadFromDir(depsCacheDir)
   180  	return dc, func() {
   181  		dc.WriteToDisk(depsCacheDir)
   182  	}
   183  }
   184  
   185  // ProcessInputsOptions encapsulates options for a ProcessInputs call.
   186  type ProcessInputsOptions struct {
   187  	// ExecutionID is the ID of the action.
   188  	ExecutionID string
   189  	// Cmd is the list of args.
   190  	Cmd []string
   191  	// WorkingDir is the working directory of the action.
   192  	WorkingDir string
   193  	// ExecRoot is the exec root of the action.
   194  	ExecRoot string
   195  	// Inputs is the InputSpec passed explicitly with the action request.
   196  	Inputs *command.InputSpec
   197  	// Labels is a map of label keys to values.
   198  	Labels map[string]string
   199  	// ToolchainInputs is a list of toolchain inputs in addition to the toolchains
   200  	// inferred from the command.
   201  	ToolchainInputs []string
   202  
   203  	// WindowsCross indicates whether use linux worker for Windows.
   204  	WindowsCross bool
   205  
   206  	// ExecStrategy indicates which execution strategy was used
   207  	ExecStrategy ppb.ExecutionStrategy_Value
   208  
   209  	// CmdEnvironment captures the environment of the command to be executed, in the form "key=value" strings.
   210  	CmdEnvironment []string
   211  }
   212  
   213  // CommandIO encapsulates the inputs and outputs a command. All paths are relative to the
   214  // exec root.
   215  type CommandIO struct {
   216  	// InputSpec holds information about files and environment variables required to
   217  	// run the command.
   218  	InputSpec *command.InputSpec
   219  	// OutputFiles is a list of output files produced by the command.
   220  	OutputFiles []string
   221  	// OutputDirectories is a list of output directories produced by the command.
   222  	OutputDirectories []string
   223  	// EmiitedDependencyFile is the name of the dependency file produced by the command.
   224  	EmittedDependencyFile string
   225  	// UsedShallowMode indicates whether the shallow input processor was used to
   226  	// determine inputs.
   227  	UsedShallowMode bool
   228  }
   229  
   230  // ProcessInputs receives a valid action command and returns the set of inputs needed to
   231  // successfully run the command remotely. Also returns a struct of parsed flags and the
   232  // .d file produced by the command if exists.
   233  func (p *InputProcessor) ProcessInputs(ctx context.Context, opts *ProcessInputsOptions, rec *logger.LogRecord) (*CommandIO, error) {
   234  	st := time.Now()
   235  	defer rec.RecordEventTime(event.ProcessInputs, st)
   236  	lbls := labels.FromMap(opts.Labels)
   237  
   238  	// We set shallow fallback based on the labels and execution strategy.
   239  	shallowFallback := true
   240  	if m, ok := shallowFallbackConfig[lbls]; ok {
   241  		if s, ok := m[opts.ExecStrategy]; ok {
   242  			shallowFallback = s
   243  		}
   244  	}
   245  	// The code here is a temporary hack to make CLs easier to review. The entire input
   246  	// processor package under pkg/ should be removed and replaced with
   247  	// internal/pkg/inputprocessor, where there will be only one definition of
   248  	// ProcessInputsOptions and CommandIO.
   249  	options := iproc.Options{
   250  		ExecutionID:     opts.ExecutionID,
   251  		Cmd:             opts.Cmd,
   252  		WorkingDir:      opts.WorkingDir,
   253  		ExecRoot:        opts.ExecRoot,
   254  		Inputs:          opts.Inputs,
   255  		Labels:          opts.Labels,
   256  		ToolchainInputs: opts.ToolchainInputs,
   257  		ShallowFallback: shallowFallback,
   258  		WindowsCross:    opts.WindowsCross,
   259  	}
   260  	var pp iproc.Preprocessor
   261  	bp := &iproc.BasePreprocessor{Ctx: ctx,
   262  		Executor:            p.executor,
   263  		ResourceManager:     p.resMgr,
   264  		FileMetadataCache:   p.fmc,
   265  		NormalizedFileCache: &p.nfc,
   266  		FileStatCache:       &p.fsc,
   267  	}
   268  	cp := &cppcompile.Preprocessor{
   269  		BasePreprocessor: bp,
   270  		CPPDepScanner:    p.cppDepScanner,
   271  		Rec:              rec,
   272  		DepsCache:        p.depsCache,
   273  		CmdEnvironment:   opts.CmdEnvironment,
   274  		DepScanTimeout:   p.depScanTimeout,
   275  		Slots:            p.slots,
   276  	}
   277  	switch lbls {
   278  	case labels.ToolLabels():
   279  		pp = &tool.Preprocessor{
   280  			BasePreprocessor: bp,
   281  		}
   282  	// SignAPKLabels is equivalent to ToolLabels, but
   283  	// is kept for historical reasons and for distinction.
   284  	case labels.SignAPKLabels():
   285  		pp = &tool.Preprocessor{
   286  			BasePreprocessor: bp,
   287  		}
   288  	case labels.D8Labels():
   289  		pp = &d8.Preprocessor{
   290  			BasePreprocessor: bp,
   291  		}
   292  	case labels.R8Labels():
   293  		pp = &r8.Preprocessor{
   294  			BasePreprocessor: bp,
   295  		}
   296  	case labels.MetalavaLabels():
   297  		pp = &metalava.Preprocessor{
   298  			BasePreprocessor: bp,
   299  		}
   300  	case labels.ClangCppLabels():
   301  		pp = cp
   302  	case labels.ClangLintLabels():
   303  		pp = &clanglint.Preprocessor{
   304  			Preprocessor: cp,
   305  		}
   306  	case labels.HeaderAbiDumpLabels():
   307  		pp = &headerabi.Preprocessor{
   308  			Preprocessor: cp,
   309  		}
   310  	case labels.ClangCLCppLabels():
   311  		pp = &clangcl.Preprocessor{
   312  			Preprocessor: cp,
   313  		}
   314  	case labels.NaClLabels():
   315  		pp = &nacl.Preprocessor{
   316  			Preprocessor: cp,
   317  		}
   318  	case labels.ClangLinkLabels():
   319  		pp = &clanglink.Preprocessor{
   320  			BasePreprocessor: bp,
   321  			ARDeepScan:       p.cppLinkDeepScan,
   322  		}
   323  	case labels.NaClLinkLabels():
   324  		pp = &clanglink.Preprocessor{
   325  			BasePreprocessor: bp,
   326  			ARDeepScan:       p.cppLinkDeepScan,
   327  		}
   328  	case labels.JavacLabels():
   329  		pp = &javac.Preprocessor{
   330  			BasePreprocessor: bp,
   331  		}
   332  	case labels.LLVMArLabels():
   333  		pp = &archive.Preprocessor{
   334  			BasePreprocessor: bp,
   335  		}
   336  	case labels.TscLabels():
   337  		pp = &typescript.Preprocessor{
   338  			BasePreprocessor: bp,
   339  		}
   340  	}
   341  	if pp != nil {
   342  		ch := make(chan bool)
   343  		var res *iproc.ActionSpec
   344  		var err error
   345  		go func() {
   346  			res, err = iproc.Compute(pp, options)
   347  			// in a general sense ErrIPTimeout represents an error caused by IP execution
   348  			// exceeding IPTimeout value (set by ip_timeout) flag; however,
   349  			// at the moment IPTimeout is used only by cpp dependency scanner.
   350  			// If, in the future, it will be used more widely, more error types might need to be
   351  			// translated to ErrIPTimeout
   352  			if errors.Is(err, cppdependencyscanner.ErrDepsScanTimeout) {
   353  				err = fmt.Errorf("%w: %v", ErrIPTimeout, err)
   354  			}
   355  			close(ch)
   356  		}()
   357  		select {
   358  		case <-ch:
   359  			if err != nil {
   360  				return nil, err
   361  			}
   362  			return &CommandIO{
   363  				InputSpec:             res.InputSpec,
   364  				OutputFiles:           res.OutputFiles,
   365  				OutputDirectories:     res.OutputDirectories,
   366  				EmittedDependencyFile: res.EmittedDependencyFile,
   367  				UsedShallowMode:       res.UsedShallowMode,
   368  			}, nil
   369  		case <-ctx.Done():
   370  			return nil, fmt.Errorf("context was cancelled before completing input processing")
   371  		}
   372  
   373  	}
   374  	return nil, status.Errorf(codes.Unimplemented, "unsupported labels: %v", opts.Labels)
   375  }