github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/internal/pkg/inputprocessor/action/cppcompile/preprocessor.go (about)

     1  // Copyright 2023 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cppcompile
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"os"
    22  	"path/filepath"
    23  	"strings"
    24  	"sync"
    25  	"time"
    26  
    27  	spb "github.com/bazelbuild/reclient/api/scandeps"
    28  	"github.com/bazelbuild/reclient/internal/pkg/cppdependencyscanner"
    29  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor"
    30  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/depscache"
    31  	"github.com/bazelbuild/reclient/internal/pkg/inputprocessor/flags"
    32  	"github.com/bazelbuild/reclient/internal/pkg/logger"
    33  	"github.com/bazelbuild/reclient/internal/pkg/logger/event"
    34  	"github.com/bazelbuild/reclient/internal/pkg/pathtranslator"
    35  
    36  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/cache"
    37  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/digest"
    38  	"golang.org/x/sync/semaphore"
    39  
    40  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/command"
    41  
    42  	log "github.com/golang/glog"
    43  )
    44  
    45  const (
    46  	cpus                 = 1
    47  	ramMBs               = 64
    48  	clangCompilerArgFlag = "-Xclang"
    49  )
    50  
    51  var (
    52  	toAbsArgs = map[string]bool{
    53  		"--sysroot":              true,
    54  		"--sysroot=":             true,
    55  		"-isysroot":              true,
    56  		"-fprofile-sample-use=":  true,
    57  		"-fsanitize-blacklist=":  true,
    58  		"-fsanitize-ignorelist=": true,
    59  		"-fprofile-list=":        true,
    60  	}
    61  	// These arguments are unsupported in the current version of clang-scan-deps and
    62  	// need to be removed, otherwise they cause input processing to fail early in
    63  	// argument parsing.
    64  	toRemoveArgs = map[string]struct{}{
    65  		"-fno-experimental-new-pass-manager": struct{}{},
    66  		"-fexperimental-new-pass-manager":    struct{}{},
    67  	}
    68  	// These Xclang flags are unsupported, need to be removed before calling clang-scan-deps.
    69  	toRemoveXclangFlags = map[string]struct{}{
    70  		// Clang-scan-deps removes comments in the source code for optimized
    71  		// preprocessing, which results in removal of "expected-*" directives
    72  		// in comments. Consequently when the "-verify" flag is used and no
    73  		// "expected-*" directives are found, the preprocessor fails with error.
    74  		// Thus, remove the verify flag before calling clang-scan-deps.
    75  		// TODO(b/148145163): Fix it in clang-scan-deps lexer in the long term.
    76  		"-verify": struct{}{},
    77  		// Clang-scan-deps is not able to process the `-fallow-half-arguments-and-returns`
    78  		// flag, actions with this flag will not be able to reach to the RBE and
    79  		// cause remote_failures. Thus, this flag is removed.
    80  		// For details about "-fallow-half-arguments-and-returns", see b/296438658.
    81  		"-fallow-half-arguments-and-returns": struct{}{},
    82  	}
    83  	virtualInputFlags = map[string]bool{
    84  		"-I":                        true,
    85  		"-isystem":                  true,
    86  		"-isysroot":                 true,
    87  		"--sysroot=":                true,
    88  		"--sysroot":                 true,
    89  		"-internal-isystem":         true,
    90  		"-internal-externc-isystem": true,
    91  	}
    92  )
    93  
    94  // CPPDependencyScanner is an interface to dependency scanner which provides
    95  // functionality to find input dependencies given a compile command.
    96  type CPPDependencyScanner interface {
    97  	ProcessInputs(ctx context.Context, execID string, command []string, filename, directory string, cmdEnv []string) ([]string, bool, error)
    98  	Capabilities() *spb.CapabilitiesResponse
    99  }
   100  
   101  type resourceDirInfo struct {
   102  	fileInfo    os.FileInfo
   103  	resourceDir string
   104  }
   105  
   106  var (
   107  	resourceDirsMu sync.Mutex
   108  	resourceDirs   = map[string]resourceDirInfo{}
   109  	// virtualInputCache is a cache for the virtual inputs calculated from a given path.
   110  	virtualInputCache = cache.SingleFlight{}
   111  )
   112  
   113  // VirtualInputsProcessor processes the flags and aappends virtual inputs to command's InputSpec.
   114  type VirtualInputsProcessor interface {
   115  	// IsVirtualInput returns true if the specified flag should result in appending a virtual input.
   116  	IsVirtualInput(flag string) bool
   117  	// AppendVirtualInput appends a virtual input to res. path arg should be exec root relative.
   118  	AppendVirtualInput(res []*command.VirtualInput, flag, path string) []*command.VirtualInput
   119  }
   120  
   121  // Preprocessor is the preprocessor of clang cpp compile actions.
   122  type Preprocessor struct {
   123  	*inputprocessor.BasePreprocessor
   124  	// CPPDepScanner is used to perform include processing by invoking
   125  	// clang-scan-deps library.
   126  	// https://github.com/llvm/llvm-project/tree/main/clang/tools/clang-scan-deps
   127  	CPPDepScanner CPPDependencyScanner
   128  
   129  	// DepScanTimeout is the max duration allowed for CPP dependency scanning operation
   130  	// before it's interrupted
   131  	DepScanTimeout time.Duration
   132  
   133  	Rec *logger.LogRecord
   134  
   135  	// DepsCache is a cache for cpp header dependencies.
   136  	DepsCache *depscache.Cache
   137  
   138  	// CmdEnvironment captures the environment of the command to be executed, in the form "key=value" strings.
   139  	CmdEnvironment []string
   140  
   141  	Slots *semaphore.Weighted
   142  
   143  	testOnlySetDone func()
   144  }
   145  
   146  // ParseFlags parses the commands flags and populates the ActionSpec object with inferred
   147  // information.
   148  func (p *Preprocessor) ParseFlags() error {
   149  	f, err := ClangParser{}.ParseFlags(p.Ctx, p.Options.Cmd, p.Options.WorkingDir, p.Options.ExecRoot)
   150  	if err != nil {
   151  		p.Err = fmt.Errorf("flag parsing failed. %v", err)
   152  		return p.Err
   153  	}
   154  	p.Flags = f
   155  	p.FlagsToActionSpec()
   156  	return nil
   157  }
   158  
   159  // ComputeSpec computes cpp header dependencies.
   160  func (p *Preprocessor) ComputeSpec() error {
   161  	s := &inputprocessor.ActionSpec{InputSpec: &command.InputSpec{}}
   162  	defer p.AppendSpec(s)
   163  
   164  	args := p.BuildCommandLine("-o", false, toAbsArgs)
   165  	if p.CPPDepScanner.Capabilities().GetExpectsResourceDir() {
   166  		args = p.addResourceDir(args)
   167  	}
   168  
   169  	headerInputFiles, err := p.FindDependencies(args)
   170  	if err != nil {
   171  		s.UsedShallowMode = true
   172  		return err
   173  	}
   174  
   175  	s.InputSpec = &command.InputSpec{
   176  		Inputs:        headerInputFiles,
   177  		VirtualInputs: VirtualInputs(p.Flags, p),
   178  	}
   179  	return nil
   180  }
   181  
   182  // FindDependencies finds the dependencies of the given adjusted command args
   183  // using the current include scanner.
   184  func (p *Preprocessor) FindDependencies(args []string) ([]string, error) {
   185  	dg := digest.NewFromBlob([]byte(strings.Join(args, " ")))
   186  	key := depscache.Key{
   187  		CommandDigest: dg.String(),
   188  		SrcFilePath:   filepath.Join(p.Flags.ExecRoot, p.Flags.WorkingDirectory, p.Flags.TargetFilePaths[0]),
   189  	}
   190  	res, ok := p.getFromDepsCache(key)
   191  	if ok {
   192  		return pathtranslator.ListRelToExecRoot(p.Flags.ExecRoot, "", res), nil
   193  	}
   194  
   195  	from := time.Now()
   196  	evt := event.InputProcessorWait
   197  	if p.Rec == nil {
   198  		p.Rec = logger.NewLogRecord()
   199  	}
   200  	if p.Slots != nil {
   201  		if err := p.Slots.Acquire(p.Ctx, 1); err != nil {
   202  			return nil, err
   203  		}
   204  	}
   205  	p.Rec.RecordEventTime(evt, from)
   206  
   207  	from = time.Now()
   208  	evt = event.CPPInputProcessor
   209  	usedCache := false
   210  	defer func() {
   211  		if p.Slots != nil {
   212  			p.Slots.Release(1)
   213  		}
   214  		if usedCache {
   215  			evt = event.InputProcessorCacheLookup
   216  		}
   217  		if p.Rec == nil {
   218  			p.Rec = logger.NewLogRecord()
   219  		}
   220  		p.Rec.RecordEventTime(evt, from)
   221  	}()
   222  
   223  	if len(p.Flags.TargetFilePaths) != 1 {
   224  		return nil, fmt.Errorf("expected exactly one targetfilepath, got: %v", p.Flags.TargetFilePaths)
   225  	}
   226  
   227  	filename := p.Flags.TargetFilePaths[0]
   228  	directory := filepath.Join(p.Flags.ExecRoot, p.Flags.WorkingDirectory)
   229  	if !filepath.IsAbs(filename) {
   230  		filename = filepath.Join(directory, filename)
   231  	}
   232  	dsCtx, cancel := maybeWithTimeout(p.Ctx, p.DepScanTimeout)
   233  	defer cancel()
   234  	var err error
   235  	if res, usedCache, err = p.CPPDepScanner.ProcessInputs(dsCtx, p.Options.ExecutionID, args, filename, directory, p.CmdEnvironment); err != nil {
   236  		if errors.Is(err, context.DeadlineExceeded) {
   237  			return nil, fmt.Errorf("%w: %v", cppdependencyscanner.ErrDepsScanTimeout, err)
   238  		}
   239  		return nil, fmt.Errorf("cpp dependency scanner's ProcessInputs failed: %w", err)
   240  	}
   241  
   242  	headerInputFiles := pathtranslator.ListRelToExecRoot(p.Flags.ExecRoot, "", res)
   243  	go p.saveDeps(key, headerInputFiles)
   244  	return headerInputFiles, nil
   245  }
   246  
   247  // BuildCommandLine builds a command line arguments from flags
   248  func (p *Preprocessor) BuildCommandLine(outputFlag string, outputFlagJoined bool, toAbsArgs map[string]bool) []string {
   249  	directory := filepath.Join(p.Flags.ExecRoot, p.Flags.WorkingDirectory)
   250  	filename := p.Flags.TargetFilePaths[0]
   251  	executablePath := p.Flags.ExecutablePath
   252  	if !filepath.IsAbs(executablePath) {
   253  		executablePath = filepath.Join(directory, executablePath)
   254  	}
   255  	if !filepath.IsAbs(p.Flags.TargetFilePaths[0]) {
   256  		filename = filepath.Join(directory, p.Flags.TargetFilePaths[0])
   257  	}
   258  	args := []string{executablePath}
   259  	for _, flag := range p.Flags.Flags {
   260  		key, value := flag.Key, flag.Value
   261  		if key == clangCompilerArgFlag {
   262  			if _, present := toRemoveXclangFlags[value]; present {
   263  				continue
   264  			}
   265  		}
   266  		// Some arguments have files/directories that must be
   267  		// relative to the current working directory or be
   268  		// an absolute path for clang.
   269  		// (b/157729681) This is a placeholder fix for flags not
   270  		// using the provided working directory.
   271  		if _, present := toAbsArgs[key]; present {
   272  			if !filepath.IsAbs(value) {
   273  				value = filepath.Join(directory, value)
   274  			} else {
   275  				value = filepath.Clean(value)
   276  			}
   277  		}
   278  		if _, present := toRemoveArgs[key]; present {
   279  			continue
   280  		}
   281  		args = appendFlag(args, flag.OriginalKey(), value, flag.Joined)
   282  	}
   283  
   284  	// When the input processor fails, we log the error and the error
   285  	// output from clang-scan-deps has too much clutter about unused
   286  	// arguments for assembly actions. Hence supress them.
   287  	args = append(args, "-Qunused-arguments")
   288  
   289  	for _, v := range p.Flags.OutputFilePaths {
   290  		if v != p.Flags.EmittedDependencyFile {
   291  			args = appendFlag(args, outputFlag, v, outputFlagJoined)
   292  		}
   293  	}
   294  	args = append(args, filename)
   295  	return args
   296  }
   297  
   298  // SaveDeps adjusts all header paths to be absolute and saves them in the deps cache. This should be called in a go routine
   299  func (p *Preprocessor) saveDeps(key depscache.Key, headerInputFiles []string) {
   300  	absInputFiles := make([]string, len(headerInputFiles))
   301  	for i, path := range headerInputFiles {
   302  		if filepath.IsAbs(path) {
   303  			absInputFiles[i] = path
   304  			continue
   305  		}
   306  		absInputFiles[i] = filepath.Join(p.Flags.ExecRoot, "", path)
   307  	}
   308  	if err := p.DepsCache.SetDeps(key, absInputFiles); err != nil {
   309  		log.Errorf("SetDeps(%v) failed: %v", key, err)
   310  	}
   311  	if p.testOnlySetDone != nil {
   312  		p.testOnlySetDone()
   313  	}
   314  }
   315  
   316  func (p *Preprocessor) getFromDepsCache(key depscache.Key) ([]string, bool) {
   317  	from := time.Now()
   318  	evt := event.InputProcessorCacheLookup
   319  	deps, ok := p.DepsCache.GetDeps(key)
   320  	if ok {
   321  		log.V(1).Infof("Found Deps Cache Hit for %v", key)
   322  		if p.Rec == nil {
   323  			p.Rec = logger.NewLogRecord()
   324  		}
   325  		p.Rec.RecordEventTime(evt, from)
   326  		return deps, true
   327  	}
   328  	return nil, false
   329  }
   330  
   331  func appendFlag(slice []string, key, value string, joined bool) []string {
   332  	if joined {
   333  		return appendNonEmpty(slice, key+value)
   334  	}
   335  	return appendNonEmpty(appendNonEmpty(slice, key), value)
   336  }
   337  
   338  func appendNonEmpty(slice []string, value string) []string {
   339  	if value != "" {
   340  		slice = append(slice, value)
   341  	}
   342  	return slice
   343  }
   344  
   345  func extractVirtualSubdirectories(path string) []string {
   346  	viFn := func() (interface{}, error) {
   347  		var vis []string
   348  		cpath := ""
   349  		lastElem := ""
   350  		for _, elem := range strings.Split(path, string(filepath.Separator)) {
   351  			// A 'go back' has been hit, handle the various cases.
   352  			if elem == ".." {
   353  				// We've hit the first '..' in a possible sequence of '..'.
   354  				// But we are at a directory that needs to be a virtual input.
   355  				if lastElem != "" && lastElem != ".." {
   356  					vis = append(vis, filepath.Clean(cpath))
   357  				}
   358  			}
   359  			cpath = filepath.Join(cpath, elem)
   360  			lastElem = elem
   361  		}
   362  		// cpath needs to be added as a virtual input, only if lastElem isn't '..' or there's
   363  		// nothing in vis.  (Still needs adding if cpath is just full of '..' elements)
   364  		if lastElem != ".." || len(vis) == 0 {
   365  			vis = append(vis, filepath.Clean(cpath))
   366  		}
   367  		return vis, nil
   368  	}
   369  	val, err := virtualInputCache.LoadOrStore(path, viFn)
   370  	if err != nil {
   371  		log.Errorf("failed to process include directory path for virtual inputs %v", path)
   372  		return []string{path}
   373  	}
   374  	return val.([]string)
   375  }
   376  
   377  // VirtualInputs returns paths extracted from virtualInputFlags. If paths are absolute, they're transformed to working dir relative.
   378  func VirtualInputs(f *flags.CommandFlags, vip VirtualInputsProcessor) []*command.VirtualInput {
   379  	var res []*command.VirtualInput
   380  	for _, flag := range f.Flags {
   381  		if vip.IsVirtualInput(flag.Key) {
   382  			path := flag.Value
   383  			if path == "" {
   384  				log.Warningf("Invalid flag specification, missing value after flag %s", flag.Key)
   385  				continue
   386  			}
   387  			if filepath.IsAbs(path) {
   388  				path, _ = filepath.Rel(filepath.Join(f.ExecRoot, f.WorkingDirectory), path)
   389  			}
   390  			path = filepath.Join(f.WorkingDirectory, path)
   391  			for _, p := range extractVirtualSubdirectories(path) {
   392  				res = vip.AppendVirtualInput(res, flag.Key, p)
   393  			}
   394  		}
   395  	}
   396  	return res
   397  }
   398  
   399  // IsVirtualInput returns true if the flag specifies a virtual input to be added to InputSpec.
   400  func (p *Preprocessor) IsVirtualInput(flag string) bool {
   401  	return virtualInputFlags[flag]
   402  }
   403  
   404  // AppendVirtualInput appends a virtual input to res.
   405  func (p *Preprocessor) AppendVirtualInput(res []*command.VirtualInput, flag, path string) []*command.VirtualInput {
   406  	return append(res, &command.VirtualInput{Path: path, IsEmptyDirectory: true})
   407  }
   408  
   409  func (p *Preprocessor) addResourceDir(args []string) []string {
   410  	for _, arg := range args {
   411  		if arg == "-resource-dir" {
   412  			return args
   413  		}
   414  	}
   415  	resourceDir := p.resourceDir(args)
   416  	if resourceDir != "" {
   417  		return append(args, "-resource-dir", resourceDir)
   418  	}
   419  	return args
   420  }
   421  
   422  func getObjFilePath(fname string) string {
   423  	return strings.TrimSuffix(fname, filepath.Ext(fname)) + ".o"
   424  }
   425  
   426  func (p *Preprocessor) resourceDir(args []string) string {
   427  	return p.ResourceDir(args, "-print-resource-dir", func(stdout string) (string, error) {
   428  		return strings.TrimSpace(stdout), nil
   429  	})
   430  }
   431  
   432  // StdoutToResourceDirMapper maps clang output to resourceDir
   433  type StdoutToResourceDirMapper func(string) (string, error)
   434  
   435  // ResourceDir extracts clang command and runs it with provided resource the resourceDirFlag
   436  // to return resource directory relative to the clang
   437  // it maps the command output to resourceDir with provided stdoutToResourceDirMapper argument
   438  // cache the results for reuse.
   439  // It returns resource directory path associated with the give invocation command.
   440  func (p *Preprocessor) ResourceDir(args []string, resourceDirFlag string, stdoutToResourceDirMapper StdoutToResourceDirMapper) string {
   441  	if len(args) == 0 {
   442  		return ""
   443  	}
   444  	if !filepath.IsAbs(args[0]) {
   445  		return ""
   446  	}
   447  	resourceDirsMu.Lock()
   448  	defer resourceDirsMu.Unlock()
   449  	if p.Executor == nil {
   450  		log.Errorf("Executor is not set in Preprocessor")
   451  		return ""
   452  	}
   453  	fi, err := os.Stat(args[0])
   454  	if err != nil {
   455  		log.Errorf("Failed to access %s: %v", args[0], err)
   456  		return ""
   457  	}
   458  	ri, ok := resourceDirs[args[0]]
   459  	if ok {
   460  		if os.SameFile(ri.fileInfo, fi) {
   461  			return ri.resourceDir
   462  		}
   463  		log.Infof("%s seems to be updated: %v -> %v", args[0], ri.fileInfo, fi)
   464  	}
   465  	stdout, stderr, err := p.Executor.Execute(p.Ctx, &command.Command{
   466  		Args:       []string{args[0], resourceDirFlag},
   467  		WorkingDir: "/",
   468  	})
   469  	if err != nil {
   470  		log.Warningf("failed to execute \"%v %v\" : %v\nstdout:%s\nstderr:%s", args[0], resourceDirFlag, err, stdout, stderr)
   471  		return ""
   472  	}
   473  	resourceDir, err := stdoutToResourceDirMapper(stdout)
   474  	if err != nil {
   475  		log.Warning(err)
   476  		return resourceDir
   477  	}
   478  	resourceDirs[args[0]] = resourceDirInfo{
   479  		fileInfo:    fi,
   480  		resourceDir: resourceDir,
   481  	}
   482  	return resourceDir
   483  }
   484  
   485  func maybeWithTimeout(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) {
   486  	if timeout <= 0 {
   487  		return context.WithCancel(ctx)
   488  	}
   489  	return context.WithTimeout(ctx, timeout)
   490  }