github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/pkg/inputprocessor/inputprocessor.go (about) 1 // Copyright 2023 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package inputprocessor is used to find non-obvious inputs for action types like C++ compile, 16 // Java compile, C++ link etc. 17 package inputprocessor 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "runtime" 24 "sync" 25 "time" 26 27 ppb "github.com/bazelbuild/reclient/api/proxy" 28 "github.com/bazelbuild/reclient/internal/pkg/cppdependencyscanner" 29 "github.com/bazelbuild/reclient/internal/pkg/features" 30 iproc "github.com/bazelbuild/reclient/internal/pkg/inputprocessor" 31 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/archive" 32 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/clangcl" 33 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/clanglink" 34 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/clanglint" 35 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/cppcompile" 36 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/d8" 37 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/headerabi" 38 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/javac" 39 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/metalava" 40 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/nacl" 41 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/r8" 42 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/tool" 43 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/action/typescript" 44 "github.com/bazelbuild/reclient/internal/pkg/inputprocessor/depscache" 45 "github.com/bazelbuild/reclient/internal/pkg/labels" 46 "github.com/bazelbuild/reclient/internal/pkg/localresources" 47 "github.com/bazelbuild/reclient/internal/pkg/logger" 48 "github.com/bazelbuild/reclient/internal/pkg/logger/event" 49 50 "github.com/bazelbuild/remote-apis-sdks/go/pkg/cache" 51 "github.com/bazelbuild/remote-apis-sdks/go/pkg/command" 52 "github.com/bazelbuild/remote-apis-sdks/go/pkg/filemetadata" 53 "github.com/bazelbuild/remote-apis-sdks/go/pkg/outerr" 54 "golang.org/x/sync/semaphore" 55 56 "google.golang.org/grpc/codes" 57 "google.golang.org/grpc/status" 58 ) 59 60 const ( 61 // shallowLabel is the label key to indicate whether to use the shallow 62 // input processor for the command. 63 shallowLabel = "shallow" 64 ) 65 66 var ( 67 // ErrIPTimeout is an error returned when IP action times out 68 ErrIPTimeout = errors.New("Input Processor timeout") 69 // shallowFallbackConfig denotes whether a specific action type, identified by a set of 70 // labels, can fallback to shallow input processor or not, when their 71 // primary input processor fails. 72 // The default behaviour is to fallback to shallow mode if a set of labels are NOT present 73 // in the following config. 74 // If a CPP compile and remote execution strategy is specified, shallow fallback will be disabled. 75 shallowFallbackConfig = map[labels.Labels]map[ppb.ExecutionStrategy_Value]bool{ 76 labels.HeaderAbiDumpLabels(): {ppb.ExecutionStrategy_UNSPECIFIED: false}, 77 labels.ClangLintLabels(): {ppb.ExecutionStrategy_UNSPECIFIED: false}, 78 labels.ClangCppLabels(): {ppb.ExecutionStrategy_REMOTE: false, 79 ppb.ExecutionStrategy_REMOTE_LOCAL_FALLBACK: false}, 80 labels.ClangCLCppLabels(): {ppb.ExecutionStrategy_REMOTE: false, 81 ppb.ExecutionStrategy_REMOTE_LOCAL_FALLBACK: false}, 82 labels.NaClLabels(): {ppb.ExecutionStrategy_REMOTE: false, 83 ppb.ExecutionStrategy_REMOTE_LOCAL_FALLBACK: false}, 84 } 85 ) 86 87 // Executor can run commands and retrieve their outputs. 88 type Executor interface { 89 Execute(ctx context.Context, cmd *command.Command) (string, string, error) 90 ExecuteInBackground(ctx context.Context, cmd *command.Command, oe outerr.OutErr, ch chan *command.Result) error 91 } 92 93 // InputProcessor retrieves the input spec for commands. 94 type InputProcessor struct { 95 cppDepScanner cppcompile.CPPDependencyScanner 96 cppLinkDeepScan bool 97 depScanTimeout time.Duration 98 executor Executor 99 resMgr *localresources.Manager 100 fmc filemetadata.Cache 101 depsCache *depscache.Cache 102 nfc cache.SingleFlight 103 fsc cache.SingleFlight 104 slots *semaphore.Weighted 105 106 // logger is a logger for input processor events that span a single reproxy run. 107 logger *logger.Logger 108 } 109 110 type depsCacheMode int 111 112 const ( 113 noDepsCache depsCacheMode = iota 114 reproxyDepsCache 115 gomaDepsCache 116 ) 117 118 // Options adds extra control for the input processor 119 type Options struct { 120 EnableDepsCache bool 121 CacheDir string 122 LogDir string 123 DepsCacheMaxMb int 124 CppLinkDeepScan bool 125 IPTimeout time.Duration 126 DepsScannerAddress string 127 ProxyServerAddress string 128 } 129 130 // TODO(b/169675226): Replace usage with sync.OnceFunc when we upgrade to go 1.21 131 func onceFunc(f func()) func() { 132 var once sync.Once 133 return func() { 134 once.Do(f) 135 } 136 } 137 138 // NewInputProcessor creates a new input processor. 139 // Its resources are bound by the local resources manager. 140 func NewInputProcessor(ctx context.Context, executor Executor, resMgr *localresources.Manager, fmc filemetadata.Cache, l *logger.Logger, opt *Options) (*InputProcessor, func(), error) { 141 useDepsCache := opt.CacheDir != "" && opt.EnableDepsCache 142 depScanner, err := cppdependencyscanner.New(ctx, executor, opt.CacheDir, opt.LogDir, opt.DepsCacheMaxMb, useDepsCache && !features.GetConfig().ExperimentalGomaDepsCache, opt.DepsScannerAddress, opt.ProxyServerAddress) 143 if err != nil { 144 return nil, func() {}, err 145 } 146 ip := newInputProcessor(depScanner, opt.IPTimeout, opt.CppLinkDeepScan, executor, resMgr, fmc, l) 147 cleanup := func() {} 148 if useDepsCache && (!depScanner.Capabilities().GetCaching() || features.GetConfig().ExperimentalGomaDepsCache) { 149 ip.depsCache, cleanup = newDepsCache(fmc, opt.CacheDir, l) 150 } 151 return ip, onceFunc(func() { 152 cleanup() 153 depScanner.Close() 154 }), nil 155 } 156 157 // NewInputProcessorWithStubDependencyScanner creates a new input processor with given parallelism 158 // and a stub CPP dependency scanner. It is meant to be only used for testing. 159 func NewInputProcessorWithStubDependencyScanner(ds cppcompile.CPPDependencyScanner, cppLinkDeepScan bool, executor Executor, resMgr *localresources.Manager) *InputProcessor { 160 return newInputProcessor(ds, 0, cppLinkDeepScan, executor, resMgr, nil, nil) 161 } 162 163 func newInputProcessor(ds cppcompile.CPPDependencyScanner, depScanTimeout time.Duration, cppLinkDeepScan bool, executor Executor, resMgr *localresources.Manager, fmc filemetadata.Cache, l *logger.Logger) *InputProcessor { 164 return &InputProcessor{ 165 cppDepScanner: ds, 166 cppLinkDeepScan: cppLinkDeepScan, 167 depScanTimeout: depScanTimeout, 168 executor: executor, 169 resMgr: resMgr, 170 fmc: fmc, 171 slots: semaphore.NewWeighted(int64(runtime.NumCPU())), 172 logger: l, 173 } 174 } 175 176 func newDepsCache(fmc filemetadata.Cache, depsCacheDir string, l *logger.Logger) (*depscache.Cache, func()) { 177 dc := depscache.New(fmc) 178 dc.Logger = l 179 go dc.LoadFromDir(depsCacheDir) 180 return dc, func() { 181 dc.WriteToDisk(depsCacheDir) 182 } 183 } 184 185 // ProcessInputsOptions encapsulates options for a ProcessInputs call. 186 type ProcessInputsOptions struct { 187 // ExecutionID is the ID of the action. 188 ExecutionID string 189 // Cmd is the list of args. 190 Cmd []string 191 // WorkingDir is the working directory of the action. 192 WorkingDir string 193 // ExecRoot is the exec root of the action. 194 ExecRoot string 195 // Inputs is the InputSpec passed explicitly with the action request. 196 Inputs *command.InputSpec 197 // Labels is a map of label keys to values. 198 Labels map[string]string 199 // ToolchainInputs is a list of toolchain inputs in addition to the toolchains 200 // inferred from the command. 201 ToolchainInputs []string 202 203 // WindowsCross indicates whether use linux worker for Windows. 204 WindowsCross bool 205 206 // ExecStrategy indicates which execution strategy was used 207 ExecStrategy ppb.ExecutionStrategy_Value 208 209 // CmdEnvironment captures the environment of the command to be executed, in the form "key=value" strings. 210 CmdEnvironment []string 211 } 212 213 // CommandIO encapsulates the inputs and outputs a command. All paths are relative to the 214 // exec root. 215 type CommandIO struct { 216 // InputSpec holds information about files and environment variables required to 217 // run the command. 218 InputSpec *command.InputSpec 219 // OutputFiles is a list of output files produced by the command. 220 OutputFiles []string 221 // OutputDirectories is a list of output directories produced by the command. 222 OutputDirectories []string 223 // EmiitedDependencyFile is the name of the dependency file produced by the command. 224 EmittedDependencyFile string 225 // UsedShallowMode indicates whether the shallow input processor was used to 226 // determine inputs. 227 UsedShallowMode bool 228 } 229 230 // ProcessInputs receives a valid action command and returns the set of inputs needed to 231 // successfully run the command remotely. Also returns a struct of parsed flags and the 232 // .d file produced by the command if exists. 233 func (p *InputProcessor) ProcessInputs(ctx context.Context, opts *ProcessInputsOptions, rec *logger.LogRecord) (*CommandIO, error) { 234 st := time.Now() 235 defer rec.RecordEventTime(event.ProcessInputs, st) 236 lbls := labels.FromMap(opts.Labels) 237 238 // We set shallow fallback based on the labels and execution strategy. 239 shallowFallback := true 240 if m, ok := shallowFallbackConfig[lbls]; ok { 241 if s, ok := m[opts.ExecStrategy]; ok { 242 shallowFallback = s 243 } 244 } 245 // The code here is a temporary hack to make CLs easier to review. The entire input 246 // processor package under pkg/ should be removed and replaced with 247 // internal/pkg/inputprocessor, where there will be only one definition of 248 // ProcessInputsOptions and CommandIO. 249 options := iproc.Options{ 250 ExecutionID: opts.ExecutionID, 251 Cmd: opts.Cmd, 252 WorkingDir: opts.WorkingDir, 253 ExecRoot: opts.ExecRoot, 254 Inputs: opts.Inputs, 255 Labels: opts.Labels, 256 ToolchainInputs: opts.ToolchainInputs, 257 ShallowFallback: shallowFallback, 258 WindowsCross: opts.WindowsCross, 259 } 260 var pp iproc.Preprocessor 261 bp := &iproc.BasePreprocessor{Ctx: ctx, 262 Executor: p.executor, 263 ResourceManager: p.resMgr, 264 FileMetadataCache: p.fmc, 265 NormalizedFileCache: &p.nfc, 266 FileStatCache: &p.fsc, 267 } 268 cp := &cppcompile.Preprocessor{ 269 BasePreprocessor: bp, 270 CPPDepScanner: p.cppDepScanner, 271 Rec: rec, 272 DepsCache: p.depsCache, 273 CmdEnvironment: opts.CmdEnvironment, 274 DepScanTimeout: p.depScanTimeout, 275 Slots: p.slots, 276 } 277 switch lbls { 278 case labels.ToolLabels(): 279 pp = &tool.Preprocessor{ 280 BasePreprocessor: bp, 281 } 282 // SignAPKLabels is equivalent to ToolLabels, but 283 // is kept for historical reasons and for distinction. 284 case labels.SignAPKLabels(): 285 pp = &tool.Preprocessor{ 286 BasePreprocessor: bp, 287 } 288 case labels.D8Labels(): 289 pp = &d8.Preprocessor{ 290 BasePreprocessor: bp, 291 } 292 case labels.R8Labels(): 293 pp = &r8.Preprocessor{ 294 BasePreprocessor: bp, 295 } 296 case labels.MetalavaLabels(): 297 pp = &metalava.Preprocessor{ 298 BasePreprocessor: bp, 299 } 300 case labels.ClangCppLabels(): 301 pp = cp 302 case labels.ClangLintLabels(): 303 pp = &clanglint.Preprocessor{ 304 Preprocessor: cp, 305 } 306 case labels.HeaderAbiDumpLabels(): 307 pp = &headerabi.Preprocessor{ 308 Preprocessor: cp, 309 } 310 case labels.ClangCLCppLabels(): 311 pp = &clangcl.Preprocessor{ 312 Preprocessor: cp, 313 } 314 case labels.NaClLabels(): 315 pp = &nacl.Preprocessor{ 316 Preprocessor: cp, 317 } 318 case labels.ClangLinkLabels(): 319 pp = &clanglink.Preprocessor{ 320 BasePreprocessor: bp, 321 ARDeepScan: p.cppLinkDeepScan, 322 } 323 case labels.NaClLinkLabels(): 324 pp = &clanglink.Preprocessor{ 325 BasePreprocessor: bp, 326 ARDeepScan: p.cppLinkDeepScan, 327 } 328 case labels.JavacLabels(): 329 pp = &javac.Preprocessor{ 330 BasePreprocessor: bp, 331 } 332 case labels.LLVMArLabels(): 333 pp = &archive.Preprocessor{ 334 BasePreprocessor: bp, 335 } 336 case labels.TscLabels(): 337 pp = &typescript.Preprocessor{ 338 BasePreprocessor: bp, 339 } 340 } 341 if pp != nil { 342 ch := make(chan bool) 343 var res *iproc.ActionSpec 344 var err error 345 go func() { 346 res, err = iproc.Compute(pp, options) 347 // in a general sense ErrIPTimeout represents an error caused by IP execution 348 // exceeding IPTimeout value (set by ip_timeout) flag; however, 349 // at the moment IPTimeout is used only by cpp dependency scanner. 350 // If, in the future, it will be used more widely, more error types might need to be 351 // translated to ErrIPTimeout 352 if errors.Is(err, cppdependencyscanner.ErrDepsScanTimeout) { 353 err = fmt.Errorf("%w: %v", ErrIPTimeout, err) 354 } 355 close(ch) 356 }() 357 select { 358 case <-ch: 359 if err != nil { 360 return nil, err 361 } 362 return &CommandIO{ 363 InputSpec: res.InputSpec, 364 OutputFiles: res.OutputFiles, 365 OutputDirectories: res.OutputDirectories, 366 EmittedDependencyFile: res.EmittedDependencyFile, 367 UsedShallowMode: res.UsedShallowMode, 368 }, nil 369 case <-ctx.Done(): 370 return nil, fmt.Errorf("context was cancelled before completing input processing") 371 } 372 373 } 374 return nil, status.Errorf(codes.Unimplemented, "unsupported labels: %v", opts.Labels) 375 }