github.com/quay/claircore@v1.5.28/rhel/coalescer.go (about)

     1  package rhel
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/quay/claircore"
     7  	"github.com/quay/claircore/indexer"
     8  )
     9  
    10  /*
    11  Coalescer takes individual layer artifacts and coalesces them into a full report
    12  on the manifest's contents.
    13  
    14  Due to the specifics of the RHEL build system, some information needs to be
    15  back-propagated. That is to say, some information discovered in later layers is
    16  also attributed to earlier layers. Both the product and distribution information
    17  work this way.
    18  
    19  A Coalescer is safe for concurrent use.
    20  */
    21  type Coalescer struct{}
    22  
    23  var _ indexer.Coalescer = (*Coalescer)(nil)
    24  
    25  // Coalesce implements [indexer.Coalescer].
    26  func (*Coalescer) Coalesce(ctx context.Context, artifacts []*indexer.LayerArtifacts) (*claircore.IndexReport, error) {
    27  	// The comments in here have been largely audited to have consistent language, but
    28  	// "CPE," "repository," and "product" may be used interchangably here.
    29  	if ctx.Err() != nil {
    30  		return nil, ctx.Err()
    31  	}
    32  	ir := claircore.IndexReport{
    33  		Environments:  map[string][]*claircore.Environment{},
    34  		Packages:      map[string]*claircore.Package{},
    35  		Distributions: map[string]*claircore.Distribution{},
    36  		Repositories:  map[string]*claircore.Repository{},
    37  	}
    38  
    39  	// User layers built on top of Red Hat images don't have product CPEs associated with them.
    40  	// We need to share the product information forward to all layers where it's missing.
    41  	// This only applies to Red Hat images, obivously.
    42  	var prev []*claircore.Repository
    43  	for i := range artifacts {
    44  		lr := filterRedHatRepos(artifacts[i].Repos)
    45  		if len(lr) != 0 {
    46  			prev = lr
    47  			continue
    48  		}
    49  		artifacts[i].Repos = append(artifacts[i].Repos, prev...)
    50  	}
    51  	// The same thing has to be done in reverse, because the first layer(s) are missing
    52  	// the relevant information.
    53  	for i := len(artifacts) - 1; i >= 0; i-- {
    54  		lr := filterRedHatRepos(artifacts[i].Repos)
    55  		if len(lr) != 0 {
    56  			prev = lr
    57  			continue
    58  		}
    59  		artifacts[i].Repos = append(artifacts[i].Repos, prev...)
    60  	}
    61  	// This dance with copying the product information in both directions means
    62  	// that if Red Hat product information is found, it "taints" all the layers.
    63  
    64  	for _, a := range artifacts {
    65  		for _, repo := range a.Repos {
    66  			ir.Repositories[repo.ID] = repo
    67  		}
    68  	}
    69  	// In our coalescing logic if a Distribution is found in layer "n" all packages found
    70  	// in layers [0-n] will be associated with this layer. This is for the same reasons
    71  	// for the repository tainting, above.
    72  	var curDist *claircore.Distribution
    73  	for _, a := range artifacts {
    74  		if len(a.Dist) != 0 {
    75  			curDist = a.Dist[0]
    76  			ir.Distributions[curDist.ID] = curDist
    77  			break
    78  		}
    79  	}
    80  	// Next lets begin associating packages with their Environment. We must
    81  	// consider each package in a package database as a unique entity for
    82  	// the edge case where a unique package is located in more then one package database.
    83  	// we'll use a struct as a helper and a map to lookup these structs
    84  	type packageDatabase struct {
    85  		packages     map[string]*claircore.Package
    86  		environments map[string]*claircore.Environment
    87  	}
    88  	dbs := map[string]*packageDatabase{}
    89  
    90  	// lets walk each layer forward looking for packages, new distributions, and
    91  	// creating the environments we discover packages in.
    92  	for _, layerArtifacts := range artifacts {
    93  		// check if we need to update our currDist
    94  		if len(layerArtifacts.Dist) != 0 {
    95  			curDist = layerArtifacts.Dist[0]
    96  			ir.Distributions[curDist.ID] = curDist
    97  		}
    98  		// associate packages with their environments
    99  		for _, pkg := range layerArtifacts.Pkgs {
   100  			// if we encounter a package where we haven't recorded a package database,
   101  			// initialize the package database
   102  			var distID string
   103  			if curDist != nil {
   104  				distID = curDist.ID
   105  			}
   106  			db, ok := dbs[pkg.PackageDB]
   107  			if !ok {
   108  				db = &packageDatabase{
   109  					packages:     make(map[string]*claircore.Package),
   110  					environments: make(map[string]*claircore.Environment),
   111  				}
   112  				dbs[pkg.PackageDB] = db
   113  			}
   114  			if _, ok := db.packages[pkg.ID]; !ok {
   115  				environment := &claircore.Environment{
   116  					PackageDB:      pkg.PackageDB,
   117  					IntroducedIn:   layerArtifacts.Hash,
   118  					DistributionID: distID,
   119  					RepositoryIDs:  make([]string, len(layerArtifacts.Repos)),
   120  				}
   121  				for i := range layerArtifacts.Repos {
   122  					environment.RepositoryIDs[i] = layerArtifacts.Repos[i].ID
   123  				}
   124  				db.packages[pkg.ID] = pkg
   125  				db.environments[pkg.ID] = environment
   126  			}
   127  		}
   128  	}
   129  	if ctx.Err() != nil {
   130  		return nil, ctx.Err()
   131  	}
   132  
   133  	// Now let's go through packages and finds out whether each package is still
   134  	// available in package database in higher layers.
   135  	// When package is not available in higher layers it means that package was
   136  	// either updated/downgraded/removed. In such a cases we need to remove it
   137  	// from list of packages
   138  	// If a package is available in all layers it means that it should be added
   139  	// to list of packages and associate an environment for it.
   140  	for i := range artifacts {
   141  		currentLayerArtifacts := artifacts[i]
   142  		if len(currentLayerArtifacts.Pkgs) == 0 {
   143  			continue
   144  		}
   145  		for _, currentPkg := range currentLayerArtifacts.Pkgs {
   146  			if _, ok := ir.Packages[currentPkg.ID]; ok {
   147  				// the package was already processed in previous layers
   148  				continue
   149  			}
   150  			// for each package let's find out if it is also available in other layers dbs
   151  			found := true
   152  			for j := i + 1; j < len(artifacts); j++ {
   153  				nextLayerArtifacts := artifacts[j]
   154  				if len(nextLayerArtifacts.Pkgs) == 0 {
   155  					continue
   156  				}
   157  				found = false
   158  				for _, nextPkg := range nextLayerArtifacts.Pkgs {
   159  					if currentPkg.ID == nextPkg.ID && currentPkg.PackageDB == nextPkg.PackageDB {
   160  						found = true
   161  						break
   162  					}
   163  				}
   164  			}
   165  			if found {
   166  				ir.Packages[currentPkg.ID] = currentPkg
   167  				ir.Environments[currentPkg.ID] = append(ir.Environments[currentPkg.ID], dbs[currentPkg.PackageDB].environments[currentPkg.ID])
   168  			}
   169  		}
   170  	}
   171  	return &ir, nil
   172  }
   173  
   174  // FilterRedHatRepos finds and reports Red Hat's CPE based repositories.
   175  func filterRedHatRepos(in []*claircore.Repository) []*claircore.Repository {
   176  	out := make([]*claircore.Repository, 0, len(in))
   177  	for _, r := range in {
   178  		if r.Key == repositoryKey {
   179  			out = append(out, r)
   180  		}
   181  	}
   182  	return out
   183  }