github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/fanal/artifact/image/image.go (about)

     1  package image
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"io"
     7  	"os"
     8  	"reflect"
     9  	"strings"
    10  	"sync"
    11  
    12  	v1 "github.com/google/go-containerregistry/pkg/v1"
    13  	"github.com/samber/lo"
    14  	"golang.org/x/exp/slices"
    15  	"golang.org/x/xerrors"
    16  
    17  	"github.com/devseccon/trivy/pkg/fanal/analyzer"
    18  	"github.com/devseccon/trivy/pkg/fanal/artifact"
    19  	"github.com/devseccon/trivy/pkg/fanal/cache"
    20  	"github.com/devseccon/trivy/pkg/fanal/handler"
    21  	"github.com/devseccon/trivy/pkg/fanal/image"
    22  	"github.com/devseccon/trivy/pkg/fanal/log"
    23  	"github.com/devseccon/trivy/pkg/fanal/types"
    24  	"github.com/devseccon/trivy/pkg/fanal/walker"
    25  	"github.com/devseccon/trivy/pkg/parallel"
    26  	"github.com/devseccon/trivy/pkg/semaphore"
    27  )
    28  
    29  type Artifact struct {
    30  	image          types.Image
    31  	cache          cache.ArtifactCache
    32  	walker         walker.LayerTar
    33  	analyzer       analyzer.AnalyzerGroup       // analyzer for files in container image
    34  	configAnalyzer analyzer.ConfigAnalyzerGroup // analyzer for container image config
    35  	handlerManager handler.Manager
    36  
    37  	artifactOption artifact.Option
    38  }
    39  
    40  type LayerInfo struct {
    41  	DiffID    string
    42  	CreatedBy string // can be empty
    43  }
    44  
    45  func NewArtifact(img types.Image, c cache.ArtifactCache, opt artifact.Option) (artifact.Artifact, error) {
    46  	// Initialize handlers
    47  	handlerManager, err := handler.NewManager(opt)
    48  	if err != nil {
    49  		return nil, xerrors.Errorf("handler init error: %w", err)
    50  	}
    51  
    52  	a, err := analyzer.NewAnalyzerGroup(opt.AnalyzerOptions())
    53  	if err != nil {
    54  		return nil, xerrors.Errorf("analyzer group error: %w", err)
    55  	}
    56  
    57  	ca, err := analyzer.NewConfigAnalyzerGroup(opt.ConfigAnalyzerOptions())
    58  	if err != nil {
    59  		return nil, xerrors.Errorf("config analyzer group error: %w", err)
    60  	}
    61  
    62  	return Artifact{
    63  		image:          img,
    64  		cache:          c,
    65  		walker:         walker.NewLayerTar(opt.SkipFiles, opt.SkipDirs),
    66  		analyzer:       a,
    67  		configAnalyzer: ca,
    68  		handlerManager: handlerManager,
    69  
    70  		artifactOption: opt,
    71  	}, nil
    72  }
    73  
    74  func (a Artifact) Inspect(ctx context.Context) (types.ArtifactReference, error) {
    75  	imageID, err := a.image.ID()
    76  	if err != nil {
    77  		return types.ArtifactReference{}, xerrors.Errorf("unable to get the image ID: %w", err)
    78  	}
    79  
    80  	configFile, err := a.image.ConfigFile()
    81  	if err != nil {
    82  		return types.ArtifactReference{}, xerrors.Errorf("unable to get the image's config file: %w", err)
    83  	}
    84  
    85  	diffIDs := a.diffIDs(configFile)
    86  
    87  	// Debug
    88  	log.Logger.Debugf("Image ID: %s", imageID)
    89  	log.Logger.Debugf("Diff IDs: %v", diffIDs)
    90  
    91  	// Try retrieving a remote SBOM document
    92  	if res, err := a.retrieveRemoteSBOM(ctx); err == nil {
    93  		// Found SBOM
    94  		return res, nil
    95  	} else if !errors.Is(err, errNoSBOMFound) {
    96  		// Fail on unexpected error, otherwise it falls into the usual scanning.
    97  		return types.ArtifactReference{}, xerrors.Errorf("remote SBOM fetching error: %w", err)
    98  	}
    99  
   100  	// Try to detect base layers.
   101  	baseDiffIDs := a.guessBaseLayers(diffIDs, configFile)
   102  	log.Logger.Debugf("Base Layers: %v", baseDiffIDs)
   103  
   104  	// Convert image ID and layer IDs to cache keys
   105  	imageKey, layerKeys, err := a.calcCacheKeys(imageID, diffIDs)
   106  	if err != nil {
   107  		return types.ArtifactReference{}, err
   108  	}
   109  
   110  	// Parse histories and extract a list of "created_by"
   111  	layerKeyMap := a.consolidateCreatedBy(diffIDs, layerKeys, configFile)
   112  
   113  	missingImage, missingLayers, err := a.cache.MissingBlobs(imageKey, layerKeys)
   114  	if err != nil {
   115  		return types.ArtifactReference{}, xerrors.Errorf("unable to get missing layers: %w", err)
   116  	}
   117  
   118  	missingImageKey := imageKey
   119  	if missingImage {
   120  		log.Logger.Debugf("Missing image ID in cache: %s", imageID)
   121  	} else {
   122  		missingImageKey = ""
   123  	}
   124  
   125  	if err = a.inspect(ctx, missingImageKey, missingLayers, baseDiffIDs, layerKeyMap, configFile); err != nil {
   126  		return types.ArtifactReference{}, xerrors.Errorf("analyze error: %w", err)
   127  	}
   128  
   129  	return types.ArtifactReference{
   130  		Name:    a.image.Name(),
   131  		Type:    types.ArtifactContainerImage,
   132  		ID:      imageKey,
   133  		BlobIDs: layerKeys,
   134  		ImageMetadata: types.ImageMetadata{
   135  			ID:          imageID,
   136  			DiffIDs:     diffIDs,
   137  			RepoTags:    a.image.RepoTags(),
   138  			RepoDigests: a.image.RepoDigests(),
   139  			ConfigFile:  *configFile,
   140  		},
   141  	}, nil
   142  }
   143  
   144  func (Artifact) Clean(_ types.ArtifactReference) error {
   145  	return nil
   146  }
   147  
   148  func (a Artifact) calcCacheKeys(imageID string, diffIDs []string) (string, []string, error) {
   149  	// Pass an empty config scanner option so that the cache key can be the same, even when policies are updated.
   150  	imageKey, err := cache.CalcKey(imageID, a.configAnalyzer.AnalyzerVersions(), nil, artifact.Option{})
   151  	if err != nil {
   152  		return "", nil, err
   153  	}
   154  
   155  	hookVersions := a.handlerManager.Versions()
   156  	var layerKeys []string
   157  	for _, diffID := range diffIDs {
   158  		blobKey, err := cache.CalcKey(diffID, a.analyzer.AnalyzerVersions(), hookVersions, a.artifactOption)
   159  		if err != nil {
   160  			return "", nil, err
   161  		}
   162  		layerKeys = append(layerKeys, blobKey)
   163  	}
   164  	return imageKey, layerKeys, nil
   165  }
   166  
   167  func (a Artifact) consolidateCreatedBy(diffIDs, layerKeys []string, configFile *v1.ConfigFile) map[string]LayerInfo {
   168  	// save createdBy fields in order of layers
   169  	var createdBy []string
   170  	for _, h := range configFile.History {
   171  		// skip histories for empty layers
   172  		if h.EmptyLayer {
   173  			continue
   174  		}
   175  		c := strings.TrimPrefix(h.CreatedBy, "/bin/sh -c ")
   176  		c = strings.TrimPrefix(c, "#(nop) ")
   177  		createdBy = append(createdBy, c)
   178  	}
   179  
   180  	// If history detected incorrect - use only diffID
   181  	// TODO: our current logic may not detect empty layers correctly in rare cases.
   182  	validCreatedBy := len(diffIDs) == len(createdBy)
   183  
   184  	layerKeyMap := make(map[string]LayerInfo)
   185  	for i, diffID := range diffIDs {
   186  
   187  		c := ""
   188  		if validCreatedBy {
   189  			c = createdBy[i]
   190  		}
   191  
   192  		layerKey := layerKeys[i]
   193  		layerKeyMap[layerKey] = LayerInfo{
   194  			DiffID:    diffID,
   195  			CreatedBy: c,
   196  		}
   197  	}
   198  	return layerKeyMap
   199  }
   200  
   201  func (a Artifact) inspect(ctx context.Context, missingImage string, layerKeys, baseDiffIDs []string,
   202  	layerKeyMap map[string]LayerInfo, configFile *v1.ConfigFile) error {
   203  
   204  	var osFound types.OS
   205  	p := parallel.NewPipeline(a.artifactOption.Parallel, false, layerKeys, func(ctx context.Context, layerKey string) (any, error) {
   206  		layer := layerKeyMap[layerKey]
   207  
   208  		// If it is a base layer, secret scanning should not be performed.
   209  		var disabledAnalyzers []analyzer.Type
   210  		if slices.Contains(baseDiffIDs, layer.DiffID) {
   211  			disabledAnalyzers = append(disabledAnalyzers, analyzer.TypeSecret)
   212  		}
   213  
   214  		layerInfo, err := a.inspectLayer(ctx, layer, disabledAnalyzers)
   215  		if err != nil {
   216  			return nil, xerrors.Errorf("failed to analyze layer (%s): %w", layer.DiffID, err)
   217  		}
   218  		if err = a.cache.PutBlob(layerKey, layerInfo); err != nil {
   219  			return nil, xerrors.Errorf("failed to store layer: %s in cache: %w", layerKey, err)
   220  		}
   221  		if lo.IsNotEmpty(layerInfo.OS) {
   222  			osFound = layerInfo.OS
   223  		}
   224  		return nil, nil
   225  
   226  	}, nil)
   227  
   228  	if err := p.Do(ctx); err != nil {
   229  		return xerrors.Errorf("pipeline error: %w", err)
   230  	}
   231  
   232  	if missingImage != "" {
   233  		if err := a.inspectConfig(ctx, missingImage, osFound, configFile); err != nil {
   234  			return xerrors.Errorf("unable to analyze config: %w", err)
   235  		}
   236  	}
   237  
   238  	return nil
   239  }
   240  
   241  func (a Artifact) inspectLayer(ctx context.Context, layerInfo LayerInfo, disabled []analyzer.Type) (types.BlobInfo, error) {
   242  	log.Logger.Debugf("Missing diff ID in cache: %s", layerInfo.DiffID)
   243  
   244  	layerDigest, rc, err := a.uncompressedLayer(layerInfo.DiffID)
   245  	if err != nil {
   246  		return types.BlobInfo{}, xerrors.Errorf("unable to get uncompressed layer %s: %w", layerInfo.DiffID, err)
   247  	}
   248  	defer rc.Close()
   249  
   250  	// Prepare variables
   251  	var wg sync.WaitGroup
   252  	opts := analyzer.AnalysisOptions{
   253  		Offline:      a.artifactOption.Offline,
   254  		FileChecksum: a.artifactOption.FileChecksum,
   255  	}
   256  	result := analyzer.NewAnalysisResult()
   257  	limit := semaphore.New(a.artifactOption.Parallel)
   258  
   259  	// Prepare filesystem for post analysis
   260  	composite, err := a.analyzer.PostAnalyzerFS()
   261  	if err != nil {
   262  		return types.BlobInfo{}, xerrors.Errorf("unable to get post analysis filesystem: %w", err)
   263  	}
   264  	defer composite.Cleanup()
   265  
   266  	// Walk a tar layer
   267  	opqDirs, whFiles, err := a.walker.Walk(rc, func(filePath string, info os.FileInfo, opener analyzer.Opener) error {
   268  		if err = a.analyzer.AnalyzeFile(ctx, &wg, limit, result, "", filePath, info, opener, disabled, opts); err != nil {
   269  			return xerrors.Errorf("failed to analyze %s: %w", filePath, err)
   270  		}
   271  
   272  		// Skip post analysis if the file is not required
   273  		analyzerTypes := a.analyzer.RequiredPostAnalyzers(filePath, info)
   274  		if len(analyzerTypes) == 0 {
   275  			return nil
   276  		}
   277  
   278  		// Build filesystem for post analysis
   279  		tmpFilePath, err := composite.CopyFileToTemp(opener, info)
   280  		if err != nil {
   281  			return xerrors.Errorf("failed to copy file to temp: %w", err)
   282  		}
   283  		if err = composite.CreateLink(analyzerTypes, "", filePath, tmpFilePath); err != nil {
   284  			return xerrors.Errorf("failed to write a file: %w", err)
   285  		}
   286  
   287  		return nil
   288  	})
   289  	if err != nil {
   290  		return types.BlobInfo{}, xerrors.Errorf("walk error: %w", err)
   291  	}
   292  
   293  	// Wait for all the goroutine to finish.
   294  	wg.Wait()
   295  
   296  	// Post-analysis
   297  	if err = a.analyzer.PostAnalyze(ctx, composite, result, opts); err != nil {
   298  		return types.BlobInfo{}, xerrors.Errorf("post analysis error: %w", err)
   299  	}
   300  
   301  	// Sort the analysis result for consistent results
   302  	result.Sort()
   303  
   304  	blobInfo := types.BlobInfo{
   305  		SchemaVersion:     types.BlobJSONSchemaVersion,
   306  		Digest:            layerDigest,
   307  		DiffID:            layerInfo.DiffID,
   308  		CreatedBy:         layerInfo.CreatedBy,
   309  		OpaqueDirs:        opqDirs,
   310  		WhiteoutFiles:     whFiles,
   311  		OS:                result.OS,
   312  		Repository:        result.Repository,
   313  		PackageInfos:      result.PackageInfos,
   314  		Applications:      result.Applications,
   315  		Misconfigurations: result.Misconfigurations,
   316  		Secrets:           result.Secrets,
   317  		Licenses:          result.Licenses,
   318  		CustomResources:   result.CustomResources,
   319  
   320  		// For Red Hat
   321  		BuildInfo: result.BuildInfo,
   322  	}
   323  
   324  	// Call post handlers to modify blob info
   325  	if err = a.handlerManager.PostHandle(ctx, result, &blobInfo); err != nil {
   326  		return types.BlobInfo{}, xerrors.Errorf("post handler error: %w", err)
   327  	}
   328  
   329  	return blobInfo, nil
   330  }
   331  
   332  func (a Artifact) diffIDs(configFile *v1.ConfigFile) []string {
   333  	if configFile == nil {
   334  		return nil
   335  	}
   336  	return lo.Map(configFile.RootFS.DiffIDs, func(diffID v1.Hash, _ int) string {
   337  		return diffID.String()
   338  	})
   339  }
   340  
   341  func (a Artifact) uncompressedLayer(diffID string) (string, io.ReadCloser, error) {
   342  	// diffID is a hash of the uncompressed layer
   343  	h, err := v1.NewHash(diffID)
   344  	if err != nil {
   345  		return "", nil, xerrors.Errorf("invalid layer ID (%s): %w", diffID, err)
   346  	}
   347  
   348  	layer, err := a.image.LayerByDiffID(h)
   349  	if err != nil {
   350  		return "", nil, xerrors.Errorf("failed to get the layer (%s): %w", diffID, err)
   351  	}
   352  
   353  	// digest is a hash of the compressed layer
   354  	var digest string
   355  	if a.isCompressed(layer) {
   356  		d, err := layer.Digest()
   357  		if err != nil {
   358  			return "", nil, xerrors.Errorf("failed to get the digest (%s): %w", diffID, err)
   359  		}
   360  		digest = d.String()
   361  	}
   362  
   363  	rc, err := layer.Uncompressed()
   364  	if err != nil {
   365  		return "", nil, xerrors.Errorf("failed to get the layer content (%s): %w", diffID, err)
   366  	}
   367  	return digest, rc, nil
   368  }
   369  
   370  // ref. https://github.com/google/go-containerregistry/issues/701
   371  func (a Artifact) isCompressed(l v1.Layer) bool {
   372  	_, uncompressed := reflect.TypeOf(l).Elem().FieldByName("UncompressedLayer")
   373  	return !uncompressed
   374  }
   375  
   376  func (a Artifact) inspectConfig(ctx context.Context, imageID string, osFound types.OS, config *v1.ConfigFile) error {
   377  	result := lo.FromPtr(a.configAnalyzer.AnalyzeImageConfig(ctx, osFound, config))
   378  
   379  	info := types.ArtifactInfo{
   380  		SchemaVersion:    types.ArtifactJSONSchemaVersion,
   381  		Architecture:     config.Architecture,
   382  		Created:          config.Created.Time,
   383  		DockerVersion:    config.DockerVersion,
   384  		OS:               config.OS,
   385  		Misconfiguration: result.Misconfiguration,
   386  		Secret:           result.Secret,
   387  		HistoryPackages:  result.HistoryPackages,
   388  	}
   389  
   390  	if err := a.cache.PutArtifact(imageID, info); err != nil {
   391  		return xerrors.Errorf("failed to put image info into the cache: %w", err)
   392  	}
   393  
   394  	return nil
   395  }
   396  
   397  // guessBaseLayers guesses layers in base image (call base layers).
   398  func (a Artifact) guessBaseLayers(diffIDs []string, configFile *v1.ConfigFile) []string {
   399  	if configFile == nil {
   400  		return nil
   401  	}
   402  
   403  	baseImageIndex := image.GuessBaseImageIndex(configFile.History)
   404  
   405  	// Diff IDs don't include empty layers, so the index is different from histories
   406  	var diffIDIndex int
   407  	var baseDiffIDs []string
   408  	for i, h := range configFile.History {
   409  		// It is no longer base layer.
   410  		if i > baseImageIndex {
   411  			break
   412  		}
   413  		// Empty layers are not included in diff IDs.
   414  		if h.EmptyLayer {
   415  			continue
   416  		}
   417  
   418  		if diffIDIndex >= len(diffIDs) {
   419  			// something wrong...
   420  			return nil
   421  		}
   422  		baseDiffIDs = append(baseDiffIDs, diffIDs[diffIDIndex])
   423  		diffIDIndex++
   424  	}
   425  	return baseDiffIDs
   426  }