github.com/quay/claircore@v1.5.28/rhel/repositoryscanner.go (about)

     1  package rhel
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/json"
     7  	"errors"
     8  	"fmt"
     9  	"io/fs"
    10  	"net/http"
    11  	"net/url"
    12  	"os"
    13  	"path/filepath"
    14  	"runtime/trace"
    15  	"strings"
    16  	"time"
    17  
    18  	"github.com/quay/zlog"
    19  
    20  	"github.com/quay/claircore"
    21  	"github.com/quay/claircore/indexer"
    22  	"github.com/quay/claircore/internal/zreader"
    23  	"github.com/quay/claircore/rhel/dockerfile"
    24  	"github.com/quay/claircore/rhel/internal/common"
    25  	"github.com/quay/claircore/rhel/internal/containerapi"
    26  	"github.com/quay/claircore/toolkit/types/cpe"
    27  )
    28  
    29  /*
    30  RepositoryScanner implements repository detection logic for RHEL.
    31  
    32  The RHEL detection logic needs outside information because the Red Hat build
    33  system does not (and did not, in the past) store the relevant information in the
    34  layer itself. In addition, dnf and yum do not persist provenance information
    35  outside of a cache and rpm considers such information outside its baliwick.
    36  
    37  In the case of the RHEL ecosystem, "repository" is a bit of a misnomer, as
    38  advisories are tracked on the Product level, and so Clair's "repository" data is
    39  used instead to indicate a Product. This mismatch can lead to apparent
    40  duplications in reporting. For example, if an advisory is marked as affecting
    41  "cpe:/a:redhat:enterprise_linux:8" and
    42  "cpe:/a:redhat:enterprise_linux:8::appstream", this results in two advisories
    43  being recorded. (CPEs do not namespace the way this example may imply; that is
    44  to say, the latter is not "contained in" or a "member of" the former.) If a
    45  layer reports that it is both the "cpe:/a:redhat:enterprise_linux:8" and
    46  "cpe:/a:redhat:enterprise_linux:8::appstream" layer, then both advisories match.
    47  */
    48  type RepositoryScanner struct {
    49  	// These members are created after the Configure call.
    50  	upd        *common.Updater
    51  	apiFetcher *containerapi.ContainerAPI
    52  	client     *http.Client
    53  
    54  	cfg RepositoryScannerConfig
    55  }
    56  
    57  var (
    58  	_ indexer.RepositoryScanner = (*RepositoryScanner)(nil)
    59  	_ indexer.RPCScanner        = (*RepositoryScanner)(nil)
    60  	_ indexer.VersionedScanner  = (*RepositoryScanner)(nil)
    61  )
    62  
    63  // RepositoryScannerConfig is the configuration expected for a
    64  // [RepositoryScanner].
    65  //
    66  // Providing the "URL" and "File" members controls how the RepositoryScanner
    67  // handles updating its mapping file:
    68  //
    69  //   - If the "URL" is provided or no configuration is provided, the mapping file
    70  //     is fetched at construction time and then updated periodically.
    71  //   - If only the "File" is provided, it will be consulted exclusively.
    72  //   - If both the "URL" and "File" are provided, the file will be loaded
    73  //     initially and then updated periodically from the URL.
    74  type RepositoryScannerConfig struct {
    75  	// DisableAPI disables the use of the API.
    76  	DisableAPI bool `json:"disable_api" yaml:"disable_api"`
    77  	// API is the URL to talk to the Red Hat Container API.
    78  	//
    79  	// See [DefaultContainerAPI] and [containerapi.ContainerAPI].
    80  	API string `json:"api" yaml:"api"`
    81  	// Repo2CPEMappingURL can be used to fetch the repo mapping file.
    82  	// Consulting the mapping file is preferred over the Container API.
    83  	//
    84  	// See [DefaultRepo2CPEMappingURL] and [repo2cpe].
    85  	Repo2CPEMappingURL string `json:"repo2cpe_mapping_url" yaml:"repo2cpe_mapping_url"`
    86  	// Repo2CPEMappingFile, if specified, is consulted instead of the [Repo2CPEMappingURL].
    87  	//
    88  	// This should be provided to avoid any network traffic.
    89  	Repo2CPEMappingFile string `json:"repo2cpe_mapping_file" yaml:"repo2cpe_mapping_file"`
    90  	// Timeout controls the timeout for any remote calls this package makes.
    91  	//
    92  	// The default is 10 seconds.
    93  	Timeout time.Duration `json:"timeout" yaml:"timeout"`
    94  }
    95  
    96  const (
    97  	// RepositoryKey marks a repository as being based on a Red Hat CPE.
    98  	repositoryKey = "rhel-cpe-repository"
    99  	// DefaultContainerAPI is the default Red Hat Container API URL.
   100  	//
   101  	//doc:url indexer
   102  	DefaultContainerAPI = "https://catalog.redhat.com/api/containers/"
   103  	// DefaultRepo2CPEMappingURL is default URL with a mapping file provided by Red Hat.
   104  	//
   105  	//doc:url indexer
   106  	DefaultRepo2CPEMappingURL = "https://access.redhat.com/security/data/metrics/repository-to-cpe.json"
   107  )
   108  
   109  // Name implements [indexer.VersionedScanner].
   110  func (*RepositoryScanner) Name() string { return "rhel-repository-scanner" }
   111  
   112  // Version implements [indexer.VersionedScanner].
   113  func (*RepositoryScanner) Version() string { return "1.1" }
   114  
   115  // Kind implements [indexer.VersionedScanner].
   116  func (*RepositoryScanner) Kind() string { return "repository" }
   117  
   118  // Configure implements [indexer.RPCScanner].
   119  func (r *RepositoryScanner) Configure(ctx context.Context, f indexer.ConfigDeserializer, c *http.Client) error {
   120  	ctx = zlog.ContextWithValues(ctx,
   121  		"component", "rhel/RepositoryScanner.Configure",
   122  		"version", r.Version())
   123  	r.client = c
   124  	if err := f(&r.cfg); err != nil {
   125  		return err
   126  	}
   127  	// Set defaults if not set via passed function.
   128  	if r.cfg.API == "" {
   129  		r.cfg.API = DefaultContainerAPI
   130  	}
   131  	if r.cfg.Timeout == 0 {
   132  		r.cfg.Timeout = 10 * time.Second
   133  	}
   134  
   135  	var mf *mappingFile
   136  	switch {
   137  	case r.cfg.Repo2CPEMappingURL == "" && r.cfg.Repo2CPEMappingFile == "":
   138  		// defaults
   139  		r.cfg.Repo2CPEMappingURL = DefaultRepo2CPEMappingURL
   140  	case r.cfg.Repo2CPEMappingURL != "" && r.cfg.Repo2CPEMappingFile == "":
   141  		// remote only
   142  	case r.cfg.Repo2CPEMappingFile != "":
   143  		// seed from file
   144  		f, err := os.Open(r.cfg.Repo2CPEMappingFile)
   145  		if err != nil {
   146  			return err
   147  		}
   148  		defer f.Close()
   149  		z, err := zreader.Reader(f)
   150  		if err != nil {
   151  			return err
   152  		}
   153  		defer z.Close()
   154  		mf = &mappingFile{}
   155  		if err := json.NewDecoder(z).Decode(mf); err != nil {
   156  			return err
   157  		}
   158  	}
   159  	r.upd = common.NewUpdater(r.cfg.Repo2CPEMappingURL, mf)
   160  	tctx, done := context.WithTimeout(ctx, r.cfg.Timeout)
   161  	defer done()
   162  	r.upd.Get(tctx, c)
   163  
   164  	if r.cfg.DisableAPI {
   165  		zlog.Debug(ctx).Msg("container API disabled")
   166  	} else {
   167  		// Additional setup
   168  		root, err := url.Parse(r.cfg.API)
   169  		if err != nil {
   170  			return err
   171  		}
   172  
   173  		r.apiFetcher = &containerapi.ContainerAPI{
   174  			Root:   root,
   175  			Client: r.client,
   176  		}
   177  	}
   178  
   179  	return nil
   180  }
   181  
   182  // Scan implements [indexer.RepositoryScanner].
   183  func (r *RepositoryScanner) Scan(ctx context.Context, l *claircore.Layer) (repositories []*claircore.Repository, err error) {
   184  	defer trace.StartRegion(ctx, "Scanner.Scan").End()
   185  	ctx = zlog.ContextWithValues(ctx,
   186  		"component", "rhel/RepositoryScanner.Scan",
   187  		"version", r.Version(),
   188  		"layer", l.Hash.String())
   189  	zlog.Debug(ctx).Msg("start")
   190  	defer zlog.Debug(ctx).Msg("done")
   191  
   192  	sys, err := l.FS()
   193  	if err != nil {
   194  		return nil, fmt.Errorf("rhel: unable to open layer: %w", err)
   195  	}
   196  
   197  	tctx, done := context.WithTimeout(ctx, r.cfg.Timeout)
   198  	defer done()
   199  	cmi, err := r.upd.Get(tctx, r.client)
   200  	if err != nil && cmi == nil {
   201  		return []*claircore.Repository{}, err
   202  	}
   203  	cm, ok := cmi.(*mappingFile)
   204  	if !ok || cm == nil {
   205  		return []*claircore.Repository{}, fmt.Errorf("rhel: unable to create a mappingFile object")
   206  	}
   207  	CPEs, err := mapContentSets(ctx, sys, cm)
   208  	if err != nil {
   209  		return []*claircore.Repository{}, err
   210  	}
   211  	if CPEs == nil && r.apiFetcher != nil {
   212  		// Embedded content-sets are available only for new images.
   213  		// For old images, use fallback option and query Red Hat Container API.
   214  		ctx, done := context.WithTimeout(ctx, r.cfg.Timeout)
   215  		defer done()
   216  		CPEs, err = mapContainerAPI(ctx, sys, r.apiFetcher)
   217  		if err != nil {
   218  			return []*claircore.Repository{}, err
   219  		}
   220  	}
   221  
   222  	for _, cpeID := range CPEs {
   223  		r := &claircore.Repository{
   224  			Name: cpeID,
   225  			Key:  repositoryKey,
   226  		}
   227  		r.CPE, err = cpe.Unbind(cpeID)
   228  		if err != nil {
   229  			zlog.Warn(ctx).
   230  				Err(err).
   231  				Str("url", `https://bugzilla.redhat.com/enter_bug.cgi?product=Container%20Factory`).
   232  				Str("cpeID", cpeID).
   233  				Msg("invalid CPE, please report a bug upstream")
   234  			continue
   235  		}
   236  
   237  		repositories = append(repositories, r)
   238  	}
   239  
   240  	return repositories, nil
   241  }
   242  
   243  // MapContentSets returns a slice of CPEs bound into strings, as discovered by
   244  // examining information contained within the container.
   245  func mapContentSets(ctx context.Context, sys fs.FS, cm *mappingFile) ([]string, error) {
   246  	// Get CPEs using embedded content-set files.
   247  	// The files is be stored in /root/buildinfo/content_manifests/ and will need to
   248  	// be translated using mapping file provided by Red Hat's PST team.
   249  	ms, err := fs.Glob(sys, `root/buildinfo/content_manifests/*.json`)
   250  	if err != nil {
   251  		panic("programmer error: " + err.Error())
   252  	}
   253  	if ms == nil {
   254  		return nil, nil
   255  	}
   256  	p := ms[0]
   257  	zlog.Debug(ctx).
   258  		Str("manifest-path", p).
   259  		Msg("found content manifest file")
   260  	b, err := fs.ReadFile(sys, p)
   261  	if err != nil {
   262  		return nil, fmt.Errorf("rhel: unable to read %q: %w", p, err)
   263  	}
   264  	var m contentManifest
   265  	var syntaxErr *json.SyntaxError
   266  	err = json.Unmarshal(b, &m)
   267  	switch {
   268  	case errors.Is(err, nil):
   269  	case errors.As(err, &syntaxErr):
   270  		zlog.Warn(ctx).
   271  			Str("manifest-path", p).
   272  			Err(err).
   273  			Msg("could not unmarshal content_manifests file")
   274  		return nil, nil
   275  	default:
   276  		return nil, err
   277  	}
   278  	// If the JSON file is malformed and has a 0-length list of content sets,
   279  	// report nil so that the API can be consulted.
   280  	if len(m.ContentSets) == 0 {
   281  		return nil, nil
   282  	}
   283  	return cm.Get(ctx, m.ContentSets)
   284  }
   285  
   286  // MappingFile is a data struct for mapping file between repositories and CPEs
   287  type mappingFile struct {
   288  	Data map[string]repo `json:"data"`
   289  }
   290  
   291  // Repo structure holds information about CPEs for given repo
   292  type repo struct {
   293  	CPEs []string `json:"cpes"`
   294  }
   295  
   296  func (m *mappingFile) Get(ctx context.Context, rs []string) ([]string, error) {
   297  	s := map[string]struct{}{}
   298  	for _, r := range rs {
   299  		cpes, ok := m.Data[r]
   300  		if !ok {
   301  			zlog.Debug(ctx).
   302  				Str("repository", r).
   303  				Msg("repository not present in a mapping file")
   304  			continue
   305  		}
   306  		for _, cpe := range cpes.CPEs {
   307  			s[cpe] = struct{}{}
   308  		}
   309  	}
   310  
   311  	i, r := 0, make([]string, len(s))
   312  	for k := range s {
   313  		r[i] = k
   314  		i++
   315  	}
   316  	return r, nil
   317  }
   318  
   319  // ContentManifest structure is the data provided by OSBS.
   320  type contentManifest struct {
   321  	ContentSets []string         `json:"content_sets"`
   322  	Metadata    manifestMetadata `json:"metadata"`
   323  }
   324  
   325  // ManifestMetadata struct holds additional metadata about the build.
   326  type manifestMetadata struct {
   327  	ImageLayerIndex int `json:"image_layer_index"`
   328  }
   329  
   330  // MapContainerAPI returns a slice of CPEs bound into strings, as discovered by
   331  // pulling labels from the Dockerfile contained in the layer and submitted to the
   332  // Container API.
   333  func mapContainerAPI(ctx context.Context, sys fs.FS, api *containerapi.ContainerAPI) ([]string, error) {
   334  	ms, err := fs.Glob(sys, "root/buildinfo/Dockerfile-*")
   335  	if err != nil {
   336  		panic("programmer error: " + err.Error())
   337  	}
   338  	if ms == nil {
   339  		return nil, nil
   340  	}
   341  	p := ms[0]
   342  	b, err := fs.ReadFile(sys, p)
   343  	if err != nil {
   344  		return nil, fmt.Errorf("rhel: unable to read %q: %w", p, err)
   345  	}
   346  
   347  	nvr, arch, err := extractBuildNVR(ctx, p, b)
   348  	switch {
   349  	case errors.Is(err, nil):
   350  	case errors.Is(err, errBadDockerfile):
   351  		zlog.Info(ctx).
   352  			AnErr("label_error", err).
   353  			Msg("bad dockerfile")
   354  		return nil, nil
   355  	default:
   356  		return nil, err
   357  	}
   358  
   359  	cpes, err := api.GetCPEs(ctx, nvr, arch)
   360  	if err != nil {
   361  		return nil, err
   362  	}
   363  	zlog.Debug(ctx).
   364  		Str("nvr", nvr).
   365  		Str("arch", arch).
   366  		Strs("cpes", cpes).
   367  		Msg("got CPEs from container API")
   368  	return cpes, nil
   369  }
   370  
   371  // ExtractBuildNVR extracts the build's NVR and arch from the named Dockerfile and its contents.
   372  //
   373  // The `redhat.com.component` label is extracted from the contents and used as the "name."
   374  // "Version" and "release" are extracted from the Dockerfile path.
   375  // "Arch" is extracted from the `architecture` label.
   376  func extractBuildNVR(ctx context.Context, dockerfilePath string, b []byte) (string, string, error) {
   377  	const (
   378  		comp = `com.redhat.component`
   379  		arch = `architecture`
   380  	)
   381  	ls, err := dockerfile.GetLabels(ctx, bytes.NewReader(b))
   382  	if err != nil {
   383  		return "", "", err
   384  	}
   385  	n, ok := ls[comp]
   386  	if !ok {
   387  		return "", "", missingLabel(comp)
   388  	}
   389  	a, ok := ls[arch]
   390  	if !ok {
   391  		return "", "", missingLabel(arch)
   392  	}
   393  	v, r := parseVersionRelease(filepath.Base(dockerfilePath))
   394  	return fmt.Sprintf("%s-%s-%s", n, v, r), a, nil
   395  }
   396  
   397  var errBadDockerfile = errors.New("bad dockerfile")
   398  
   399  // MissingLabel is an error that provides information on which label was missing
   400  // and "Is" errBadDockerfile.
   401  type missingLabel string
   402  
   403  func (e missingLabel) Error() string {
   404  	return fmt.Sprintf("dockerfile missing expected label %q", string(e))
   405  }
   406  
   407  func (e missingLabel) Is(tgt error) bool {
   408  	if oe, ok := tgt.(missingLabel); ok {
   409  		return string(oe) == string(e)
   410  	}
   411  	return errors.Is(tgt, errBadDockerfile)
   412  }
   413  
   414  // ParseVersionRelease reports the version and release from an NVR string.
   415  func parseVersionRelease(nvr string) (version, release string) {
   416  	releaseIndex := strings.LastIndex(nvr, "-")
   417  	release = nvr[releaseIndex+1:]
   418  
   419  	versionIndex := strings.LastIndex(nvr[:releaseIndex], "-")
   420  	version = nvr[versionIndex+1 : releaseIndex]
   421  	return
   422  }