github.com/distbuild/reclient@v0.0.0-20240401075343-3de72e395564/cmd/bootstrap/main.go (about)

     1  // Copyright 2023 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package main bootstraps the reproxy service.
    16  package main
    17  
    18  import (
    19  	"context"
    20  	"flag"
    21  	"fmt"
    22  	"os"
    23  	"os/exec"
    24  	"path/filepath"
    25  	"strings"
    26  	"syscall"
    27  	"time"
    28  
    29  	lpb "github.com/bazelbuild/reclient/api/log"
    30  	spb "github.com/bazelbuild/reclient/api/stats"
    31  	"github.com/bazelbuild/reclient/internal/pkg/auth"
    32  	"github.com/bazelbuild/reclient/internal/pkg/bootstrap"
    33  	"github.com/bazelbuild/reclient/internal/pkg/logger"
    34  	"github.com/bazelbuild/reclient/internal/pkg/logger/event"
    35  	"github.com/bazelbuild/reclient/internal/pkg/loghttp"
    36  	"github.com/bazelbuild/reclient/internal/pkg/pathtranslator"
    37  	"github.com/bazelbuild/reclient/internal/pkg/rbeflag"
    38  	"github.com/bazelbuild/reclient/internal/pkg/stats"
    39  	"github.com/bazelbuild/reclient/pkg/version"
    40  
    41  	cpb "github.com/bazelbuild/remote-apis-sdks/go/api/command"
    42  
    43  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/command"
    44  	"github.com/bazelbuild/remote-apis-sdks/go/pkg/moreflag"
    45  	log "github.com/golang/glog"
    46  	"google.golang.org/protobuf/proto"
    47  )
    48  
    49  // bootstrapStart saves the start time of the bootstrap binary.
    50  // Must be the first variable to ensure it is the earliest possible timestamp
    51  var bootstrapStart = time.Now()
    52  
    53  var (
    54  	homeDir, _   = os.UserHomeDir()
    55  	gcertErrMsg  = fmt.Sprintf("\nTry restarting the build after running %q\n", "gcert")
    56  	gcloudErrMsg = fmt.Sprintf("\nTry restarting the build after running %q\n", "gcloud auth login")
    57  	logDir       = os.TempDir()
    58  )
    59  
    60  var (
    61  	proxyLogDir                       []string
    62  	serverAddr                        = flag.String("server_address", "127.0.0.1:8000", "The server address in the format of host:port for network, or unix:///file for unix domain sockets.")
    63  	reProxy                           = flag.String("re_proxy", reproxyDefaultPath(), "Location of the reproxy binary")
    64  	waitSeconds                       = flag.Int("reproxy_wait_seconds", 20, "Number of seconds to wait for reproxy to start")
    65  	shutdown                          = flag.Bool("shutdown", false, "Whether to shut down the proxy and dump the stats.")
    66  	shutdownSeconds                   = flag.Int("shutdown_seconds", 60, "Number of seconds to wait for reproxy to shutdown")
    67  	logFormat                         = flag.String("log_format", "text", "Format of proxy log. Currently only text and reducedtext are supported.")
    68  	logPath                           = flag.String("log_path", "", "DEPRECATED. Use proxy_log_dir instead. If provided, the path to a log file of all executed records. The format is e.g. text://full/file/path.")
    69  	fastLogCollection                 = flag.Bool("fast_log_collection", false, "Enable optimized log aggregation pipeline. Does not work for multileg builds")
    70  	asyncReproxyShutdown              = flag.Bool("async_reproxy_termination", false, "Allows reproxy to finish shutdown asyncronously. Only applicable with fast_log_collection=true")
    71  	metricsProject                    = flag.String("metrics_project", "", "If set, action and build metrics are exported to Cloud Monitoring in the specified GCP project")
    72  	outputDir                         = flag.String("output_dir", os.TempDir(), "The location to which stats should be written.")
    73  	useADC                            = flag.Bool(auth.UseAppDefaultCredsFlag, false, "Indicates whether to use application default credentials for authentication")
    74  	useGCE                            = flag.Bool(auth.UseGCECredsFlag, false, "Indicates whether to use GCE VM credentials for authentication")
    75  	useExternalToken                  = flag.Bool(auth.UseExternalTokenFlag, false, "Indicates whether to use an externally provided token for authentication")
    76  	serviceNoAuth                     = flag.Bool(auth.ServiceNoAuthFlag, false, "If true, do not authenticate with RBE.")
    77  	credFile                          = flag.String(auth.CredentialFileFlag, "", "The name of a file that contains service account credentials to use when calling remote execution. Used only if --use_application_default_credentials and --use_gce_credentials are false.")
    78  	remoteDisabled                    = flag.Bool("remote_disabled", false, "Whether to disable all remote operations and run all actions locally.")
    79  	cacheDir                          = flag.String("cache_dir", "", "Directory from which to load the cache files at startup and update at shutdown.")
    80  	metricsUploader                   = flag.String("metrics_uploader", defaultMetricsUploader(), "Path to the metrics uploader binary.")
    81  	logHTTPCalls                      = flag.Bool("log_http_calls", false, "Log all http requests made with the default http client.")
    82  	experimentalCredentialsHelper     = flag.String(auth.CredshelperPathFlag, "", "Path to the credentials helper binary. If given execrel://, looks for the `credshelper` binary in the same folder as bootstrap")
    83  	experimentalCredentialsHelperArgs = flag.String(auth.CredshelperArgsFlag, "", "Arguments for the experimental credentials helper, separated by space.")
    84  )
    85  
    86  func main() {
    87  	defer log.Flush()
    88  	flag.Var((*moreflag.StringListValue)(&proxyLogDir), "proxy_log_dir", "If provided, the directory path to a proxy log file of executed records.")
    89  	rbeflag.Parse()
    90  	rbeflag.LogAllFlags(0)
    91  	log.Flush()
    92  	version.PrintAndExitOnVersionFlag(true)
    93  
    94  	httpProxy := os.Getenv("RBE_HTTP_PROXY")
    95  	if httpProxy != "" {
    96  		os.Setenv("http_proxy", httpProxy)
    97  		os.Setenv("https_proxy", httpProxy)
    98  	}
    99  
   100  	if *logHTTPCalls {
   101  		loghttp.Register()
   102  	}
   103  
   104  	if !*fastLogCollection && *asyncReproxyShutdown {
   105  		*asyncReproxyShutdown = false
   106  		log.Info("--async_reproxy_termination=true is not compatible with --fast_log_collection=false, falling back to synchronous shutdown.")
   107  	}
   108  
   109  	if *outputDir != "" {
   110  		// Check for existence of output directory, and create if needed.
   111  		// Build stats and reproxy_outerr are stored in this directory.
   112  		if _, err := os.Stat(*outputDir); err != nil {
   113  			if !os.IsNotExist(err) {
   114  				log.Exitf("Failed checking for output directory: %v", err)
   115  			}
   116  			os.MkdirAll(*outputDir, 0777)
   117  		}
   118  	}
   119  
   120  	if f := flag.Lookup("log_dir"); f != nil && f.Value.String() != "" {
   121  		logDir = f.Value.String()
   122  		// Check for logging directory and create it if it doesn't exist.
   123  		// Logs from glog are stored in this directory.
   124  		if _, err := os.Stat(logDir); err != nil {
   125  			if !os.IsNotExist(err) {
   126  				log.Exitf("Failed checking for logging output directory: %v", err)
   127  			}
   128  			os.MkdirAll(logDir, 0777)
   129  		}
   130  	}
   131  
   132  	cf, err := credsFilePath()
   133  	if err != nil {
   134  		log.Exitf("Failed to determine the token cache file name: %v", err)
   135  	}
   136  	var creds *auth.Credentials
   137  	if !*remoteDisabled {
   138  		creds = newCreds(cf)
   139  		status, err := creds.UpdateStatus()
   140  		if err != nil {
   141  			log.Errorf("Error obtaining credentials: %v", err)
   142  			os.Exit(status)
   143  		}
   144  		creds.SaveToDisk()
   145  	}
   146  
   147  	if *shutdown {
   148  		spi := &lpb.ProxyInfo{
   149  			EventTimes: map[string]*cpb.TimeInterval{},
   150  			Metrics:    map[string]*lpb.Metric{},
   151  		}
   152  		s, err := shutdownReproxy()
   153  		if err != nil {
   154  			log.Warningf("Error shutting down reproxy: %v", err)
   155  		}
   156  		if *outputDir == "" {
   157  			log.Fatal("Must provide an output directory.")
   158  		}
   159  		// Fallback on reading the rpl file if no stats are returned from reproxy
   160  		if !*fastLogCollection || s == nil {
   161  			if *logPath == "" && len(proxyLogDir) == 0 {
   162  				return
   163  			}
   164  			log.V(3).Infof("Loading rpl file to generate stats proto")
   165  			recs, pInfos := parseLogs()
   166  			start := time.Now()
   167  			s = stats.NewFromRecords(recs, pInfos).ToProto()
   168  			spi.EventTimes[event.PostBuildAggregateRpl] = command.TimeIntervalToProto(&command.TimeInterval{From: start, To: time.Now()})
   169  		}
   170  		down, up := stats.BandwidthStats(s)
   171  		fmt.Printf("RBE Stats: down %v, up %v, %v\n", down, up, stats.CompletionStats(s))
   172  		spi.EventTimes[event.BootstrapShutdown] = command.TimeIntervalToProto(&command.TimeInterval{
   173  			From: bootstrapStart,
   174  			To:   time.Now(),
   175  		})
   176  		s.ProxyInfo = append(s.ProxyInfo, spi)
   177  		s.FatalExit = fatalLogsExist(logDir)
   178  		log.Infof("Writing stats to %v", *outputDir)
   179  		if err := stats.WriteStats(s, *outputDir); err != nil {
   180  			log.Errorf("WriteStats(%s) failed: %v", *outputDir, err)
   181  		} else {
   182  			log.Infof("Stats dumped successfully.")
   183  		}
   184  		if *metricsProject == "" {
   185  			return
   186  		}
   187  
   188  		var uploaderArgs []string
   189  		if cfg := flag.Lookup("cfg"); cfg != nil {
   190  			if cfg.Value.String() != "" {
   191  				uploaderArgs = append(uploaderArgs, "--cfg="+cfg.Value.String())
   192  			}
   193  		}
   194  		if ts := creds.TokenSource(); ts != nil {
   195  			if t, err := ts.Token(); err == nil {
   196  				uploaderArgs = append(uploaderArgs, "--oauth_token="+t.AccessToken, "--use_external_auth_token=true")
   197  			}
   198  		}
   199  
   200  		log.V(2).Infof("Running %v %v", *metricsUploader, uploaderArgs)
   201  
   202  		uploaderCmd := exec.Command(*metricsUploader, uploaderArgs...)
   203  		err = uploaderCmd.Start()
   204  		if err != nil {
   205  			log.Warningf("Failed to start metrics uploader with command line %v %v: %v", *metricsUploader, uploaderArgs, err)
   206  		}
   207  		log.Infof("Stats uploader started successfully")
   208  		log.V(2).Infof("Stats uploader pid: %d", uploaderCmd.Process.Pid)
   209  		return
   210  	}
   211  
   212  	cleanFatalLogs(logDir)
   213  
   214  	args := []string{}
   215  	if cfg := flag.Lookup("cfg"); cfg != nil {
   216  		if cfg.Value.String() != "" {
   217  			args = append(args, "--cfg="+cfg.Value.String())
   218  		}
   219  	}
   220  	args = append(args, "--creds_file="+cf)
   221  
   222  	if *fastLogCollection {
   223  		args = append(args, "--wait_for_shutdown_rpc=true")
   224  	}
   225  
   226  	log.V(3).Infof("Trying to authenticate with %s", creds.Mechanism().String())
   227  	currArgs := args[:]
   228  	if *experimentalCredentialsHelper != "" {
   229  		currArgs = append(currArgs, "--use_external_auth_token=true")
   230  	}
   231  	msg, exitCode := bootstrapReproxy(currArgs, bootstrapStart)
   232  	if exitCode == 0 {
   233  		fmt.Println(msg)
   234  	} else {
   235  		fmt.Fprintf(os.Stderr, "\nReproxy failed to start:%s\nCredentials cache file was deleted. Please try again. If this continues to fail, please file a bug.\n", msg)
   236  		creds.RemoveFromDisk()
   237  	}
   238  	log.Flush()
   239  	os.Exit(exitCode)
   240  }
   241  
   242  var failureFiles = []string{"reproxy.FATAL", "bootstrap.FATAL", "rewrapper.FATAL", "reproxy.exe.FATAL", "bootstrap.exe.FATAL", "rewrapper.exe.FATAL"}
   243  
   244  // cleanLogDir removes stray log files which may cause confusion when bootstrap starts
   245  func cleanFatalLogs(logDir string) {
   246  	for _, f := range failureFiles {
   247  		fp := filepath.Join(logDir, f)
   248  		if err := os.Remove(fp); err != nil && !os.IsNotExist(err) {
   249  			log.Errorf("Failed to remove %v: %v", fp, err)
   250  		}
   251  	}
   252  }
   253  
   254  // fatalLogsExist returns true if any *.FATAL log file exists in
   255  func fatalLogsExist(logDir string) bool {
   256  	for _, f := range failureFiles {
   257  		s, err := os.Stat(filepath.Join(logDir, f))
   258  		if err != nil {
   259  			continue
   260  		}
   261  		if s.Size() > 0 {
   262  			return true
   263  		}
   264  	}
   265  	return false
   266  }
   267  
   268  func createTempRbeMetricsFile(s *spb.Stats) (string, error) {
   269  	temp, err := os.CreateTemp("", "rbe_metrics_*.pb")
   270  	if err != nil {
   271  		return "", err
   272  	}
   273  	defer temp.Close()
   274  	blob, err := proto.Marshal(s)
   275  	if err != nil {
   276  		return "", err
   277  	}
   278  	_, err = temp.Write(blob)
   279  	if err != nil {
   280  		return "", err
   281  	}
   282  	return temp.Name(), nil
   283  }
   284  
   285  func defaultMetricsUploader() string {
   286  	metricsUploader, err := pathtranslator.BinaryRelToAbs("metricsuploader")
   287  	if err != nil {
   288  		log.Warningf("Did not find `metricsuploader` binary in the same directory as `bootstrap`: %v", err)
   289  		return ""
   290  	}
   291  	return metricsUploader
   292  }
   293  
   294  func shutdownReproxy() (*spb.Stats, error) {
   295  	if *asyncReproxyShutdown {
   296  		// On shutdown we may not want to wait for deps cache to finish writing
   297  		// if we get valid stats back in the RPC
   298  		return bootstrap.ShutdownProxyAsync(*serverAddr, *shutdownSeconds)
   299  	}
   300  	s, err := bootstrap.ShutDownProxy(*serverAddr, *shutdownSeconds)
   301  	if err == nil {
   302  		// Log this only here as ShutdownProxyAsync does not guarentee that reproxy is shutdown
   303  		log.Infof("Reproxy shut down successfully")
   304  	}
   305  	return s, err
   306  }
   307  
   308  func bootstrapReproxy(args []string, startTime time.Time) (string, int) {
   309  	if err := bootstrap.StartProxyWithOutput(context.Background(), *serverAddr, *reProxy, *outputDir, *waitSeconds, *shutdownSeconds, startTime, args...); err != nil {
   310  		defaultErr := fmt.Sprintf("Error bootstrapping remote execution proxy: %v", err)
   311  		exiterr, ok := err.(*exec.ExitError)
   312  		if !ok {
   313  			reproxyExecutionError := fmt.Sprintf(
   314  				"%s. \n\033[31m Unable to execute the reproxy binary at %v.\033[0m\n",
   315  				defaultErr, *reProxy,
   316  			)
   317  			log.Exitf(reproxyExecutionError)
   318  		}
   319  
   320  		status, ok := exiterr.Sys().(syscall.WaitStatus)
   321  		if !ok {
   322  			log.Exitf(defaultErr)
   323  		}
   324  		return defaultErr, status.ExitStatus()
   325  	}
   326  	return "Proxy started successfully.", 0
   327  }
   328  
   329  func credsFilePath() (string, error) {
   330  	dir := os.TempDir()
   331  	if *cacheDir != "" {
   332  		dir = *cacheDir
   333  	}
   334  	cf := filepath.Join(dir, "reproxy.creds")
   335  	err := os.MkdirAll(filepath.Dir(cf), 0755)
   336  	if err != nil {
   337  		return "", fmt.Errorf("failed to create dir for credentials file %q: %v", cf, err)
   338  	}
   339  	return cf, nil
   340  }
   341  
   342  func newCreds(cf string) *auth.Credentials {
   343  	if *experimentalCredentialsHelper != "" {
   344  		creds, err := auth.NewExternalCredentials(*experimentalCredentialsHelper, strings.Fields(*experimentalCredentialsHelperArgs), cf)
   345  		if err != nil {
   346  			fmt.Fprintf(os.Stderr, "Experimental credentials helper failed. Please try again or use application default credentials:%v", err)
   347  			os.Exit(auth.ExitCodeExternalTokenAuth)
   348  		}
   349  		return creds
   350  	}
   351  	m, err := auth.MechanismFromFlags()
   352  	if err != nil || m == auth.Unknown {
   353  		log.Errorf("Failed to determine auth mechanism: %v", err)
   354  		os.Exit(auth.ExitCodeNoAuth)
   355  	}
   356  	c, err := auth.NewCredentials(m, cf, 0)
   357  	if err != nil {
   358  		log.Exitf("Failed to initialize credentials: %v", err)
   359  	}
   360  	return c
   361  }
   362  
   363  func parseLogs() ([]*lpb.LogRecord, []*lpb.ProxyInfo) {
   364  	var recs []*lpb.LogRecord
   365  	var pInfos []*lpb.ProxyInfo
   366  	var err error
   367  	if len(proxyLogDir) > 0 {
   368  		log.Infof("Parsing logs from %v...", proxyLogDir)
   369  		format, err := logger.ParseFormat(*logFormat)
   370  		if err != nil {
   371  			log.Errorf("ParseFormat(%v) failed: %v", *logFormat, err)
   372  		} else if recs, pInfos, err = logger.ParseFromLogDirs(format, proxyLogDir); err != nil {
   373  			log.Errorf("ParseFromLogDirs failed: %v", err)
   374  		}
   375  	} else {
   376  		log.Infof("Parsing logs from %v...", *logPath)
   377  		if recs, err = logger.ParseFromFormatFile(*logPath); err != nil {
   378  			log.Errorf("ParseFromFormatFile(%s) failed: %v", *logPath, err)
   379  		}
   380  	}
   381  	return recs, pInfos
   382  }
   383  
   384  func reproxyDefaultPath() string {
   385  	reproxyPath, err := pathtranslator.BinaryRelToAbs("reproxy")
   386  	if err != nil {
   387  		log.Warningf("Did not find `reproxy` binary in the same directory as `bootstrap`: %v", err)
   388  		return ""
   389  	}
   390  	return reproxyPath
   391  }