github.com/adevinta/lava@v0.7.2/internal/engine/engine.go (about)

     1  // Copyright 2023 Adevinta
     2  
     3  // Package engine runs Vulcan checks and retrieves the generated
     4  // reports.
     5  package engine
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"log/slog"
    12  	"net"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/adevinta/vulcan-agent/agent"
    17  	"github.com/adevinta/vulcan-agent/backend"
    18  	"github.com/adevinta/vulcan-agent/backend/docker"
    19  	agentconfig "github.com/adevinta/vulcan-agent/config"
    20  	"github.com/adevinta/vulcan-agent/jobrunner"
    21  	"github.com/adevinta/vulcan-agent/queue"
    22  	"github.com/adevinta/vulcan-agent/queue/chanqueue"
    23  	report "github.com/adevinta/vulcan-report"
    24  	types "github.com/adevinta/vulcan-types"
    25  
    26  	"github.com/adevinta/lava/internal/assettypes"
    27  	"github.com/adevinta/lava/internal/checktypes"
    28  	"github.com/adevinta/lava/internal/config"
    29  	"github.com/adevinta/lava/internal/containers"
    30  	"github.com/adevinta/lava/internal/metrics"
    31  )
    32  
    33  // Report is a collection of reports returned by Vulcan checks and
    34  // indexed by check ID.
    35  type Report map[string]report.Report
    36  
    37  // Engine represents a Lava engine able to run Vulcan checks and
    38  // retrieve the generated reports.
    39  type Engine struct {
    40  	cli     containers.DockerdClient
    41  	catalog checktypes.Catalog
    42  	cfg     agentconfig.Config
    43  	runtime containers.Runtime
    44  }
    45  
    46  // New returns a new [Engine]. It retrieves and merges the checktype
    47  // catalogs from the provided checktype URLs to generate the catalog
    48  // that will be used to configure the scans.
    49  func New(cfg config.AgentConfig, checktypeURLs []string) (eng Engine, err error) {
    50  	catalog, err := checktypes.NewCatalog(checktypeURLs)
    51  	if err != nil {
    52  		return Engine{}, fmt.Errorf("get checkype catalog: %w", err)
    53  	}
    54  	return NewWithCatalog(cfg, catalog)
    55  }
    56  
    57  // NewWithCatalog returns a new [Engine] from a provided agent
    58  // configuration and checktype catalog.
    59  func NewWithCatalog(cfg config.AgentConfig, catalog checktypes.Catalog) (eng Engine, err error) {
    60  	metrics.Collect("checktypes", catalog)
    61  
    62  	rt, err := containers.GetenvRuntime()
    63  	if err != nil {
    64  		return Engine{}, fmt.Errorf("get env runtime: %w", err)
    65  	}
    66  
    67  	cli, err := containers.NewDockerdClient(rt)
    68  	if err != nil {
    69  		return Engine{}, fmt.Errorf("new dockerd client: %w", err)
    70  	}
    71  
    72  	agentCfg, err := newAgentConfig(cli, cfg)
    73  	if err != nil {
    74  		return Engine{}, fmt.Errorf("get agent config: %w", err)
    75  	}
    76  
    77  	eng = Engine{
    78  		cli:     cli,
    79  		catalog: catalog,
    80  		cfg:     agentCfg,
    81  		runtime: rt,
    82  	}
    83  	return eng, nil
    84  }
    85  
    86  // newAgentConfig creates a new [agentconfig.Config] based on the
    87  // provided Vulcan agent configuration.
    88  func newAgentConfig(cli containers.DockerdClient, cfg config.AgentConfig) (agentconfig.Config, error) {
    89  	listenHost, err := cli.HostGatewayInterfaceAddr()
    90  	if err != nil {
    91  		return agentconfig.Config{}, fmt.Errorf("get gateway interface address: %w", err)
    92  	}
    93  
    94  	parallel := cfg.Parallel
    95  	if parallel == 0 {
    96  		parallel = 1
    97  	}
    98  
    99  	ln, err := net.Listen("tcp", net.JoinHostPort(listenHost, "0"))
   100  	if err != nil {
   101  		return agentconfig.Config{}, fmt.Errorf("listen: %w", err)
   102  	}
   103  
   104  	auths := []agentconfig.Auth{}
   105  	for _, r := range cfg.RegistryAuths {
   106  		auths = append(auths, agentconfig.Auth{
   107  			Server: r.Server,
   108  			User:   r.Username,
   109  			Pass:   r.Password,
   110  		})
   111  	}
   112  
   113  	acfg := agentconfig.Config{
   114  		Agent: agentconfig.AgentConfig{
   115  			ConcurrentJobs:         parallel,
   116  			MaxNoMsgsInterval:      5,   // Low as all the messages will be in the queue before starting the agent.
   117  			MaxProcessMessageTimes: 1,   // No retry.
   118  			Timeout:                180, // Default timeout of 3 minutes.
   119  		},
   120  		API: agentconfig.APIConfig{
   121  			Host:     cli.HostGatewayHostname(),
   122  			Listener: ln,
   123  		},
   124  		Check: agentconfig.CheckConfig{
   125  			Vars: cfg.Vars,
   126  		},
   127  		Runtime: agentconfig.RuntimeConfig{
   128  			Docker: agentconfig.DockerConfig{
   129  				Registry: agentconfig.RegistryConfig{
   130  					PullPolicy:          cfg.PullPolicy,
   131  					BackoffMaxRetries:   5,
   132  					BackoffInterval:     5,
   133  					BackoffJitterFactor: 0.5,
   134  					Auths:               auths,
   135  				},
   136  			},
   137  		},
   138  	}
   139  	return acfg, nil
   140  }
   141  
   142  // Close releases the internal resources used by the Lava engine.
   143  func (eng Engine) Close() error {
   144  	if err := eng.cli.Close(); err != nil {
   145  		return fmt.Errorf("close dockerd client: %w", err)
   146  	}
   147  	return nil
   148  }
   149  
   150  // Run runs vulcan checks and returns the generated report. Before
   151  // running the scan, it checks that all the provided targets are
   152  // reachable and returns an error if any of them is not. The check
   153  // list is based on the configured checktype catalogs and the provided
   154  // targets. These checks are run by a Vulcan agent, which is
   155  // configured using the specified configuration.
   156  func (eng Engine) Run(targets []config.Target) (Report, error) {
   157  	for _, t := range targets {
   158  		err := assettypes.CheckReachable(t.AssetType, t.Identifier)
   159  		if err != nil && !errors.Is(err, assettypes.ErrUnsupported) {
   160  			return nil, fmt.Errorf("unreachable target: %v: %w", t, err)
   161  		}
   162  	}
   163  
   164  	jobs, err := generateJobs(eng.catalog, targets)
   165  	if err != nil {
   166  		return nil, fmt.Errorf("generate jobs: %w", err)
   167  	}
   168  
   169  	if len(jobs) == 0 {
   170  		return nil, nil
   171  	}
   172  
   173  	return eng.runAgent(jobs)
   174  }
   175  
   176  // summaryInterval is the time between summary logs.
   177  const summaryInterval = 15 * time.Second
   178  
   179  // runAgent creates a Vulcan agent using the configured Vulcan agent
   180  // config and uses it to run the provided jobs.
   181  func (eng Engine) runAgent(jobs []jobrunner.Job) (Report, error) {
   182  	srv, err := newTargetServer(eng.runtime)
   183  	if err != nil {
   184  		return nil, fmt.Errorf("new target server: %w", err)
   185  	}
   186  	defer srv.Close()
   187  
   188  	alogger := newAgentLogger(slog.Default())
   189  
   190  	br := func(params backend.RunParams, rc *docker.RunConfig) error {
   191  		return eng.beforeRun(params, rc, srv)
   192  	}
   193  
   194  	backend, err := docker.NewBackend(alogger, eng.cfg, br)
   195  	if err != nil {
   196  		return nil, fmt.Errorf("new Docker backend: %w", err)
   197  	}
   198  
   199  	// Create a state queue and discard all messages.
   200  	stateQueue := chanqueue.New(queue.Discard())
   201  	stateQueue.StartReading(context.Background())
   202  
   203  	jobsQueue := chanqueue.New(nil)
   204  	if err := sendJobs(jobs, jobsQueue); err != nil {
   205  		return nil, fmt.Errorf("send jobs: %w", err)
   206  	}
   207  
   208  	rs := &reportStore{}
   209  
   210  	done := make(chan bool)
   211  	go func() {
   212  		for {
   213  			select {
   214  			case <-done:
   215  				return
   216  			case <-time.After(summaryInterval):
   217  				sums := rs.Summary()
   218  				if len(sums) == 0 {
   219  					slog.Info("waiting for updates")
   220  					break
   221  				}
   222  				for _, s := range sums {
   223  					slog.Info(s)
   224  				}
   225  			}
   226  		}
   227  	}()
   228  
   229  	exitCode := agent.RunWithQueues(eng.cfg, rs, backend, stateQueue, jobsQueue, alogger)
   230  	if exitCode != 0 {
   231  		return nil, fmt.Errorf("run agent: exit code %v", exitCode)
   232  	}
   233  
   234  	done <- true
   235  
   236  	return eng.mkReport(srv, rs), nil
   237  }
   238  
   239  // mkReport generates a report from the information stored in the
   240  // provided [reportStore]. It uses the specified [targetServer] to
   241  // replace the targets sent to the checks with the original targets.
   242  func (eng Engine) mkReport(srv *targetServer, rs *reportStore) Report {
   243  	rep := make(Report)
   244  	for checkID, r := range rs.Reports() {
   245  		tm, ok := srv.TargetMap(checkID)
   246  		if !ok {
   247  			rep[checkID] = r
   248  			continue
   249  		}
   250  
   251  		tmAddrs := tm.Addrs()
   252  
   253  		slog.Info("applying target map", "check", checkID, "tm", tm, "tmAddr", tmAddrs)
   254  
   255  		r.Target = tm.OldIdentifier
   256  
   257  		var vulns []report.Vulnerability
   258  		for _, vuln := range r.Vulnerabilities {
   259  			vuln = vulnReplaceAll(vuln, tm.NewIdentifier, tm.OldIdentifier)
   260  			vuln = vulnReplaceAll(vuln, tmAddrs.NewIdentifier, tmAddrs.OldIdentifier)
   261  			vulns = append(vulns, vuln)
   262  		}
   263  		r.Vulnerabilities = vulns
   264  
   265  		rep[checkID] = r
   266  	}
   267  	return rep
   268  }
   269  
   270  // vulnReplaceAll returns a copy of the vulnerability vuln with all
   271  // non-overlapping instances of old replaced by new.
   272  func vulnReplaceAll(vuln report.Vulnerability, old, new string) report.Vulnerability {
   273  	vuln.Summary = strings.ReplaceAll(vuln.Summary, old, new)
   274  	vuln.AffectedResource = strings.ReplaceAll(vuln.AffectedResource, old, new)
   275  	vuln.AffectedResourceString = strings.ReplaceAll(vuln.AffectedResourceString, old, new)
   276  	vuln.Description = strings.ReplaceAll(vuln.Description, old, new)
   277  	vuln.Details = strings.ReplaceAll(vuln.Details, old, new)
   278  	vuln.ImpactDetails = strings.ReplaceAll(vuln.ImpactDetails, old, new)
   279  
   280  	var labels []string
   281  	for _, label := range vuln.Labels {
   282  		labels = append(labels, strings.ReplaceAll(label, old, new))
   283  	}
   284  	vuln.Labels = labels
   285  
   286  	var recs []string
   287  	for _, rec := range vuln.Recommendations {
   288  		recs = append(recs, strings.ReplaceAll(rec, old, new))
   289  	}
   290  	vuln.Recommendations = recs
   291  
   292  	var refs []string
   293  	for _, ref := range vuln.References {
   294  		refs = append(refs, strings.ReplaceAll(ref, old, new))
   295  	}
   296  	vuln.References = refs
   297  
   298  	var rscs []report.ResourcesGroup
   299  	for _, rsc := range vuln.Resources {
   300  		rscs = append(rscs, rscReplaceAll(rsc, old, new))
   301  	}
   302  	vuln.Resources = rscs
   303  
   304  	var vulns []report.Vulnerability
   305  	for _, vuln := range vuln.Vulnerabilities {
   306  		vulns = append(vulns, vulnReplaceAll(vuln, old, new))
   307  	}
   308  	vuln.Vulnerabilities = vulns
   309  
   310  	return vuln
   311  }
   312  
   313  // rscReplaceAll returns a copy of the resource group rsc with all
   314  // non-overlapping instances of old replaced by new.
   315  func rscReplaceAll(rsc report.ResourcesGroup, old, new string) report.ResourcesGroup {
   316  	rsc.Name = strings.ReplaceAll(rsc.Name, old, new)
   317  
   318  	var hdrs []string
   319  	for _, hdr := range rsc.Header {
   320  		hdrs = append(hdrs, strings.ReplaceAll(hdr, old, new))
   321  	}
   322  	rsc.Header = hdrs
   323  
   324  	var rows []map[string]string
   325  	for _, r := range rsc.Rows {
   326  		row := make(map[string]string)
   327  		for k, v := range r {
   328  			k = strings.ReplaceAll(k, old, new)
   329  			v = strings.ReplaceAll(v, old, new)
   330  			row[k] = v
   331  		}
   332  		rows = append(rows, row)
   333  	}
   334  	rsc.Rows = rows
   335  
   336  	return rsc
   337  }
   338  
   339  // beforeRun is called by the agent before creating each check
   340  // container.
   341  func (eng Engine) beforeRun(params backend.RunParams, rc *docker.RunConfig, srv *targetServer) error {
   342  	// Register a host pointing to the host gateway.
   343  	if gwmap := eng.cli.HostGatewayMapping(); gwmap != "" {
   344  		rc.HostConfig.ExtraHosts = []string{gwmap}
   345  	}
   346  
   347  	// Allow all checks to scan local assets.
   348  	rc.ContainerConfig.Env = setenv(rc.ContainerConfig.Env, "VULCAN_ALLOW_PRIVATE_IPS", "true")
   349  
   350  	if params.AssetType == string(types.DockerImage) {
   351  		// Due to how reachability is defined by the Vulcan
   352  		// check SDK, local Docker images would be identified
   353  		// as unreachable. So, we disable reachability checks
   354  		// for this type of assets.
   355  		rc.ContainerConfig.Env = setenv(rc.ContainerConfig.Env, "VULCAN_SKIP_REACHABILITY", "true")
   356  
   357  		// Tools like trivy require access to the Docker
   358  		// daemon to scan local Docker images. So, we share
   359  		// the Docker socket with them.
   360  		dockerHost := eng.cli.DaemonHost()
   361  
   362  		// Remote Docker daemons are not supported.
   363  		if dockerVol, found := strings.CutPrefix(dockerHost, "unix://"); found {
   364  			rc.HostConfig.Binds = append(rc.HostConfig.Binds, dockerVol+":/var/run/docker.sock")
   365  		}
   366  	}
   367  
   368  	// Proxy local targets and serve Git repositories.
   369  	target := config.Target{
   370  		Identifier: params.Target,
   371  		AssetType:  types.AssetType(params.AssetType),
   372  	}
   373  	tm, err := srv.Handle(params.CheckID, target)
   374  	if err != nil {
   375  		return fmt.Errorf("handle target: %w", err)
   376  	}
   377  	if !tm.IsZero() {
   378  		rc.ContainerConfig.Env = setenv(rc.ContainerConfig.Env, "VULCAN_CHECK_TARGET", tm.NewIdentifier)
   379  		rc.ContainerConfig.Env = setenv(rc.ContainerConfig.Env, "VULCAN_CHECK_ASSET_TYPE", string(tm.NewAssetType))
   380  	}
   381  
   382  	return nil
   383  }
   384  
   385  // setenv sets the value of the variable named by the key in the
   386  // provided environment. An environment consists on a slice of strings
   387  // with the format "key=value".
   388  func setenv(env []string, key, value string) []string {
   389  	for i, ev := range env {
   390  		if strings.HasPrefix(ev, key+"=") {
   391  			env[i] = fmt.Sprintf("%s=%s", key, value)
   392  			return env
   393  		}
   394  	}
   395  	return append(env, fmt.Sprintf("%s=%s", key, value))
   396  }