github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/internal/pkg/inputprocessor/inputprocessor.go (about)

     1  // Copyright 2023 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package inputprocessor contains code for processing actions to determine their inputs/outputs.
    16  package inputprocessor
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"io/fs"
    22  	"os"
    23  	"path/filepath"
    24  	"strconv"
    25  	"strings"
    26  	"sync"
    27  
    28  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/flags"
    29  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/flagsparser"
    30  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/toolchain"
    31  	"github.com/bazelbuild/reclient/internal/pkg/localresources"
    32  	"github.com/bazelbuild/reclient/internal/pkg/pathtranslator"
    33  
    34  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/cache"
    35  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/command"
    36  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/filemetadata"
    37  	log "github.com/golang/glog"
    38  )
    39  
    40  const (
    41  	// shallowLabel is the label key to indicate whether to use the shallow
    42  	// input processor for the command.
    43  	shallowLabel = "shallow"
    44  )
    45  
    46  var (
    47  	normalizedFileErrCache = &sync.Map{}
    48  )
    49  
    50  // Options encapsulates options for initializing a preprocessor.
    51  type Options struct {
    52  	// ExecutionID is the ID of the action.
    53  	ExecutionID string
    54  	// Cmd is the list of args.
    55  	Cmd []string
    56  	// WorkingDir is the working directory of the action.
    57  	WorkingDir string
    58  	// ExecRoot is the exec root of the action.
    59  	ExecRoot string
    60  	// Inputs is the InputSpec passed explicitly with the action request.
    61  	Inputs *command.InputSpec
    62  	// Labels is a map of label keys to values.
    63  	Labels map[string]string
    64  	// ToolchainInputs is a list of toolchain inputs in addition to the toolchains
    65  	// inferred from the command.
    66  	ToolchainInputs []string
    67  	// ShallowFallback indicates whether preprocessing is allowed to fallback to shallow
    68  	// mode if an error is encountered.
    69  	ShallowFallback bool
    70  
    71  	// WindowsCross indicates whether to use Linux worker for Windows.
    72  	WindowsCross bool
    73  }
    74  
    75  // ActionSpec encapsulates the inputs and outputs a command. All paths are relative to the
    76  // exec root.
    77  type ActionSpec struct {
    78  	// InputSpec holds information about files and environment variables required to
    79  	// run the command.
    80  	InputSpec *command.InputSpec
    81  	// OutputFiles is a list of output files produced by the command.
    82  	OutputFiles []string
    83  	// OutputDirectories is a list of output directories produced by the command.
    84  	OutputDirectories []string
    85  	// EmiitedDependencyFile is the name of the dependency file produced by the command.
    86  	EmittedDependencyFile string
    87  	// UsedShallowMode indicates whether the shallow input processor was used to
    88  	// determine inputs.
    89  	UsedShallowMode bool
    90  }
    91  
    92  // Executor can run commands and retrieve their outputs.
    93  type Executor interface {
    94  	Execute(ctx context.Context, cmd *command.Command) (string, string, error)
    95  }
    96  
    97  // Preprocessor is an interface for determining specs of an action. Refer to Compute() below to
    98  // see how implementers of the interface should be called.
    99  type Preprocessor interface {
   100  	// Init initializes the preprocessor with the given options.
   101  	Init(options Options)
   102  	// ParseFlags parses the commands flags and populates the ActionSpec object with inferred
   103  	// information. Returning an error means preprocessing should not continue, and the user
   104  	// should Sanitize() the currently processed ActionSpec and then read it if available using
   105  	// Spec().
   106  	ParseFlags() error
   107  	// ProcessToolchains determines toolchain inputs required for the command. Returning an
   108  	// error means preprocessing should not continue, and the user should Sanitize() the
   109  	// currently processed ActionSpec and then read it if available using Spec().
   110  	ProcessToolchains() error
   111  	// ComputeSpec computes any further action specification that is not immediately inferrable
   112  	// from flags or toolchain configuration. Returning an error means preprocessing should not
   113  	// continue, and the user should Sanitize() the currently processed ActionSpec and then read
   114  	// it if available using Spec().
   115  	ComputeSpec() error
   116  	// Sanitize cleans up the spec by removing unwanted entries. Normally this includes
   117  	// deduping and removing non-existent inputs.
   118  	Sanitize()
   119  	// Spec retrieves the ActionSpec currently inferred for the options passed to the
   120  	// context.
   121  	Spec() (*ActionSpec, error)
   122  	// Error returns the fatal error encountered during preprocessing, if exists.
   123  	Error() error
   124  }
   125  
   126  // BasePreprocessor is the base preprocessor with commonly objects and preprocessing logic.
   127  type BasePreprocessor struct {
   128  	// Ctx is the context to use for internal preprocessing actions.
   129  	Ctx context.Context
   130  	// Executor is an entity that can execute commands on the local system.
   131  	Executor Executor
   132  	// ResourceManager manages available local resources to ensure local operations do not
   133  	// overwhelm the machine.
   134  	ResourceManager *localresources.Manager
   135  	// Options is the options used to initialize the preprocessor. Should not change once the
   136  	// preprocessor is initialized.
   137  	Options Options
   138  	// Flags is the set of flags determined from parsing the command. Should not change once
   139  	// ParseFlags has been called.
   140  	Flags *flags.CommandFlags
   141  	// Err is an error encountered during preprocessing. An action that has an error could
   142  	// not be remotely executed or cached.
   143  	Err error
   144  	// actionSpec is the specification of the action as currently determined by the
   145  	// preprocessor.
   146  	actionSpec *ActionSpec
   147  	// FileMetadataCache is used to obtain the metadata of files.
   148  	FileMetadataCache filemetadata.Cache
   149  	// NormalizedFileCache is used to cache normalized paths.
   150  	NormalizedFileCache *cache.SingleFlight
   151  	// FileStatCache caches the results of os.Stat calls.
   152  	FileStatCache *cache.SingleFlight
   153  }
   154  
   155  // Init initializes the preprocessor with the given options.
   156  func (c *BasePreprocessor) Init(options Options) {
   157  	c.Options = options
   158  	c.actionSpec = &ActionSpec{
   159  		InputSpec: c.Options.Inputs,
   160  	}
   161  
   162  	if c.actionSpec.InputSpec == nil {
   163  		c.actionSpec.InputSpec = &command.InputSpec{}
   164  	}
   165  
   166  	v, ok := options.Labels[shallowLabel]
   167  	if !ok {
   168  		return
   169  	}
   170  	shallow, err := strconv.ParseBool(v)
   171  	if err != nil {
   172  		log.Warningf("Failed to parse shallow label value: %v", err)
   173  	}
   174  	c.actionSpec.UsedShallowMode = shallow
   175  }
   176  
   177  // ParseFlags parses the commands flags and populates the ActionSpec object with inferred
   178  // information.
   179  func (c *BasePreprocessor) ParseFlags() error {
   180  	f, err := flagsparser.CommandFlags(c.Ctx, c.Options.Cmd, c.Options.WorkingDir, c.Options.ExecRoot)
   181  	if err != nil {
   182  		c.Err = fmt.Errorf("flag parsing failed. %v", err)
   183  		return c.Err
   184  	}
   185  	c.Flags = f
   186  	c.FlagsToActionSpec()
   187  	return nil
   188  }
   189  
   190  // ProcessToolchains determines toolchain inputs required for the command.
   191  func (c *BasePreprocessor) ProcessToolchains() error {
   192  	if c.Flags == nil {
   193  		c.Err = fmt.Errorf("no flags set")
   194  		return c.Err
   195  	}
   196  	tp := &toolchain.InputProcessor{}
   197  	tci, err := tp.ProcessToolchainInputs(c.Ctx, c.Options.ExecRoot, c.Options.WorkingDir, c.Flags.ExecutablePath, c.Options.ToolchainInputs, c.FileMetadataCache)
   198  	if err != nil {
   199  		c.actionSpec.UsedShallowMode = true
   200  		return fmt.Errorf("toolchain processing failed. %v", err)
   201  	}
   202  	c.AppendSpec(&ActionSpec{InputSpec: tci})
   203  	return nil
   204  }
   205  
   206  // ComputeSpec computes any further action specification that is not immediately inferrable
   207  // from flags or toolchain configuration.
   208  func (c *BasePreprocessor) ComputeSpec() error {
   209  	return nil
   210  }
   211  
   212  // Sanitize cleans up the spec by removing unwanted entries. Normally this includes
   213  // deduping and removing non-existent inputs.
   214  func (c *BasePreprocessor) Sanitize() {
   215  	c.FilterInputsUnderExecRoot()
   216  	c.RewriteEnvironmentVariables()
   217  	c.FilterVirtualInputs()
   218  }
   219  
   220  // Spec retrieves the ActionSpec currently inferred for the options passed to the context.
   221  // For complete input/output processing, call ComputeSpec first.
   222  func (c *BasePreprocessor) Spec() (*ActionSpec, error) {
   223  	if c.Err != nil {
   224  		return nil, c.Err
   225  	}
   226  	return c.actionSpec, nil
   227  }
   228  
   229  // Error returns the fatal error encountered during input processing if exists.
   230  func (c *BasePreprocessor) Error() error {
   231  	return c.Err
   232  }
   233  
   234  // FlagsToActionSpec populates the ActionSpec struct with inputs parsed fromt the command flags.
   235  func (c *BasePreprocessor) FlagsToActionSpec() {
   236  	if c.actionSpec == nil || c.actionSpec.InputSpec == nil {
   237  		c.actionSpec = &ActionSpec{InputSpec: c.Options.Inputs}
   238  	}
   239  	allInputs := append(c.Flags.TargetFilePaths, c.Flags.Dependencies...)
   240  	if c.Flags.ExecutablePath != "" {
   241  		allInputs = append(allInputs, c.Flags.ExecutablePath)
   242  	}
   243  	allInputs = pathtranslator.ListRelToExecRoot(c.Options.ExecRoot, c.Options.WorkingDir, allInputs)
   244  	//TODO(b/161932505): remove virtual inputs from CommandFlags.
   245  	var vi []*command.VirtualInput
   246  	for _, od := range pathtranslator.ListRelToExecRoot(c.Options.ExecRoot, c.Options.WorkingDir, c.Flags.VirtualDirectories) {
   247  		vi = append(vi, &command.VirtualInput{Path: od, IsEmptyDirectory: true})
   248  	}
   249  	c.AppendSpec(&ActionSpec{
   250  		InputSpec: &command.InputSpec{
   251  			Inputs:        allInputs,
   252  			VirtualInputs: vi,
   253  		},
   254  		OutputFiles:           pathtranslator.ListRelToExecRoot(c.Options.ExecRoot, c.Options.WorkingDir, c.Flags.OutputFilePaths),
   255  		OutputDirectories:     pathtranslator.ListRelToExecRoot(c.Options.ExecRoot, c.Options.WorkingDir, c.Flags.OutputDirPaths),
   256  		EmittedDependencyFile: pathtranslator.RelToExecRoot(c.Options.ExecRoot, c.Options.WorkingDir, c.Flags.EmittedDependencyFile),
   257  	})
   258  }
   259  
   260  // FilterInputsUnderExecRoot removes any inputs that do not exist under the exec root.
   261  func (c *BasePreprocessor) FilterInputsUnderExecRoot() {
   262  	if c.actionSpec == nil || c.actionSpec.InputSpec == nil {
   263  		return
   264  	}
   265  	pn := newPathNormalizer(c.Options.WindowsCross)
   266  	var filtered []string
   267  	m := make(map[string]bool)
   268  	for _, f := range c.actionSpec.InputSpec.Inputs {
   269  		if f == "" {
   270  			continue
   271  		}
   272  		var normalized string
   273  		var err error
   274  		if c.NormalizedFileCache != nil {
   275  			cache, e, loaded := c.NormalizedFileCache.Load(f)
   276  			if !loaded {
   277  				cache, e = pn.normalize(c.Options.ExecRoot, f)
   278  				if e == nil {
   279  					c.NormalizedFileCache.Store(f, cache)
   280  				}
   281  			}
   282  			normalized = cache.(string)
   283  			err = e
   284  		} else {
   285  			normalized, err = pn.normalize(c.Options.ExecRoot, f)
   286  		}
   287  		if err != nil {
   288  			//Example error msg we want to get rid of:
   289  			//c23aff58-b24a-41fb-8676-3f2886f544eb: failed to normalize out/rbe-build/python3 @ /tmpfs/source/chromium/src/: stat /tmpfs/source/chromium/src/out/rbe-build/python3: no such file or directory
   290  			//with these variables defined:
   291  			//c.Options.ExecutionID: c23aff58-b24a-41fb-8676-3f2886f544eb
   292  			//f: out/rbe-build/python3
   293  			//c.Options.ExecRoot: /tmpfs/source/chromium/src/
   294  			//err: stat /tmpfs/source/chromium/src/out/rbe-build/python3: no such file or directory
   295  			//
   296  			//These error msgs are all generated from this line: cache, e = pn.normalize(c.Options.ExecRoot, f)
   297  			// TODO(b/302290967) Update normalize() to prevent this error msg from generating.
   298  			_, loaded := normalizedFileErrCache.LoadOrStore(err.Error(), struct{}{})
   299  			if !loaded || bool(log.V(3)) {
   300  				log.Warningf("%v: failed to normalize %s @ %s: %v", c.Options.ExecutionID, f, c.Options.ExecRoot, err)
   301  			}
   302  			continue
   303  		}
   304  		if normalized != f {
   305  			log.V(2).Infof("%v: normalize %s -> %s @ %s", c.Options.ExecutionID, f, normalized, c.Options.ExecRoot)
   306  		}
   307  		if m[normalized] {
   308  			log.V(1).Infof("%v: drop duplicate %s (normalized from %s) @ %s", c.Options.ExecutionID, normalized, f, c.Options.ExecRoot)
   309  			continue
   310  		}
   311  		m[normalized] = true
   312  		filtered = append(filtered, normalized)
   313  	}
   314  	c.actionSpec.InputSpec.Inputs = filtered
   315  
   316  	// need to normalize other fields in InputSpec?
   317  }
   318  
   319  // FilterVirtualInputs removes virtual inputs that are not physically existing directories.
   320  func (c *BasePreprocessor) FilterVirtualInputs() {
   321  	if c.actionSpec == nil || c.actionSpec.InputSpec == nil {
   322  		return
   323  	}
   324  
   325  	var filtered []*command.VirtualInput
   326  	for _, vi := range c.actionSpec.InputSpec.VirtualInputs {
   327  		virtualInputPath := vi.Path
   328  		isDir := false
   329  		path := filepath.Join(c.Options.ExecRoot, virtualInputPath)
   330  		if c.FileMetadataCache != nil {
   331  			fileMetadata := c.FileMetadataCache.Get(path)
   332  			isDir = fileMetadata.IsDirectory
   333  		} else {
   334  			var stat fs.FileInfo
   335  			var err error
   336  			if c.FileStatCache != nil {
   337  				cache, e, loaded := c.FileStatCache.Load(path)
   338  				if !loaded {
   339  					cache, e = os.Stat(path)
   340  					if e == nil {
   341  						c.FileStatCache.Store(path, cache)
   342  					}
   343  				}
   344  				stat = cache.(fs.FileInfo)
   345  				err = e
   346  			} else {
   347  				stat, err = os.Stat(path)
   348  			}
   349  			if err != nil {
   350  				log.Warningf("Failed to determine whether virtual input %v exists", virtualInputPath)
   351  			} else {
   352  				isDir = stat.IsDir()
   353  			}
   354  		}
   355  
   356  		if (isDir && vi.IsEmptyDirectory) || !vi.IsEmptyDirectory {
   357  			filtered = append(filtered, vi)
   358  		}
   359  	}
   360  
   361  	c.actionSpec.InputSpec.VirtualInputs = filtered
   362  }
   363  
   364  // RewriteEnvironmentVariables makes all environment variables specified in the input spec relative
   365  // to the working directory.
   366  func (c *BasePreprocessor) RewriteEnvironmentVariables() {
   367  	if c.actionSpec == nil || c.actionSpec.InputSpec == nil {
   368  		return
   369  	}
   370  	for k, v := range c.actionSpec.InputSpec.EnvironmentVariables {
   371  		var vals []string
   372  		for _, val := range strings.Split(v, string(os.PathListSeparator)) {
   373  			if !filepath.IsAbs(val) {
   374  				// If the path is not absolute, assume it is relative to the working directory.
   375  				// Otherwise the action is incorrect even in local execution.
   376  				vals = append(vals, val)
   377  				continue
   378  			}
   379  			newVal := val
   380  			if pathtranslator.RelToExecRoot(c.Options.ExecRoot, c.Options.WorkingDir, val) != "" {
   381  				newVal = pathtranslator.RelToWorkingDir(c.Options.ExecRoot, c.Options.WorkingDir, val)
   382  			}
   383  			vals = append(vals, newVal)
   384  		}
   385  		c.actionSpec.InputSpec.EnvironmentVariables[k] = strings.Join(vals, string(os.PathListSeparator))
   386  	}
   387  }
   388  
   389  // AppendSpec appends the given ActionSpec to the ActionSpec stored in the preprocessor.
   390  func (c *BasePreprocessor) AppendSpec(s *ActionSpec) {
   391  	if c.actionSpec == nil || c.actionSpec.InputSpec == nil {
   392  		c.actionSpec = &ActionSpec{InputSpec: &command.InputSpec{}}
   393  	}
   394  	if s == nil {
   395  		s = &ActionSpec{InputSpec: &command.InputSpec{}}
   396  	}
   397  	if s.InputSpec == nil {
   398  		s.InputSpec = &command.InputSpec{}
   399  	}
   400  
   401  	c.actionSpec.InputSpec.Inputs = append(c.actionSpec.InputSpec.Inputs, s.InputSpec.Inputs...)
   402  	c.actionSpec.InputSpec.VirtualInputs = append(c.actionSpec.InputSpec.VirtualInputs, s.InputSpec.VirtualInputs...)
   403  	c.actionSpec.InputSpec.InputExclusions = append(c.actionSpec.InputSpec.InputExclusions, s.InputSpec.InputExclusions...)
   404  	if c.actionSpec.InputSpec.EnvironmentVariables == nil && len(s.InputSpec.EnvironmentVariables) > 0 {
   405  		c.actionSpec.InputSpec.EnvironmentVariables = make(map[string]string)
   406  	}
   407  	for k, v := range s.InputSpec.EnvironmentVariables {
   408  		c.actionSpec.InputSpec.EnvironmentVariables[k] = v
   409  	}
   410  	c.actionSpec.OutputFiles = append(c.actionSpec.OutputFiles, s.OutputFiles...)
   411  	c.actionSpec.OutputDirectories = append(c.actionSpec.OutputDirectories, s.OutputDirectories...)
   412  	if s.EmittedDependencyFile != "" {
   413  		c.actionSpec.EmittedDependencyFile = s.EmittedDependencyFile
   414  	}
   415  	if !c.actionSpec.UsedShallowMode {
   416  		c.actionSpec.UsedShallowMode = s.UsedShallowMode
   417  	}
   418  }
   419  
   420  // Compute computes the ActionSpec using the given preprocessor and options.
   421  func Compute(p Preprocessor, options Options) (*ActionSpec, error) {
   422  	p.Init(options)
   423  	if err := p.ParseFlags(); err != nil {
   424  		if options.ShallowFallback {
   425  			log.Warningf("%v: Encountered error when parsing flags: %v", options.ExecutionID, err)
   426  			p.Sanitize()
   427  			return p.Spec()
   428  		}
   429  		return nil, err
   430  	}
   431  	if err := p.ProcessToolchains(); err != nil {
   432  		if options.ShallowFallback {
   433  			log.Warningf("%v: Encountered error when processing toolchains: %v", options.ExecutionID, err)
   434  			p.Sanitize()
   435  			return p.Spec()
   436  		}
   437  		return nil, err
   438  	}
   439  	s, _ := p.Spec()
   440  	if s.UsedShallowMode {
   441  		p.Sanitize()
   442  		return p.Spec()
   443  	}
   444  	if err := p.ComputeSpec(); err != nil {
   445  		if options.ShallowFallback {
   446  			log.Warningf("%v: Encountered error when computing spec: %v", options.ExecutionID, err)
   447  			p.Sanitize()
   448  			return p.Spec()
   449  		}
   450  		return nil, err
   451  	}
   452  	p.Sanitize()
   453  	return p.Spec()
   454  }