github.com/anchore/syft@v1.38.2/syft/format/internal/backfill.go (about)

     1  package internal
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"slices"
     7  	"strings"
     8  
     9  	"github.com/anchore/packageurl-go"
    10  	"github.com/anchore/syft/internal/log"
    11  	"github.com/anchore/syft/syft/cpe"
    12  	"github.com/anchore/syft/syft/pkg"
    13  	cataloger "github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
    14  )
    15  
    16  // Backfill takes all information present in the package and attempts to fill in any missing information
    17  // from any available sources, such as the Metadata, PURL, or CPEs.
    18  //
    19  // Backfill does not call p.SetID(), but this needs to be called later to ensure it's up to date
    20  func Backfill(p *pkg.Package) {
    21  	backfillFromPurl(p)
    22  	backfillFromCPE(p)
    23  }
    24  
    25  func backfillFromCPE(p *pkg.Package) {
    26  	if len(p.CPEs) == 0 {
    27  		return
    28  	}
    29  
    30  	c := p.CPEs[0]
    31  
    32  	if p.Type == "" {
    33  		p.Type = cataloger.TargetSoftwareToPackageType(c.Attributes.TargetSW)
    34  	}
    35  }
    36  
    37  func backfillFromPurl(p *pkg.Package) {
    38  	if p.PURL == "" {
    39  		return
    40  	}
    41  
    42  	purl, err := packageurl.FromString(p.PURL)
    43  	if err != nil {
    44  		log.Debugf("unable to parse purl: %s: %w", p.PURL, err)
    45  		return
    46  	}
    47  
    48  	var cpes []cpe.CPE
    49  	epoch := ""
    50  	rpmmod := ""
    51  
    52  	for _, qualifier := range purl.Qualifiers {
    53  		switch qualifier.Key {
    54  		case pkg.PURLQualifierCPES:
    55  			rawCpes := strings.Split(qualifier.Value, ",")
    56  			for _, rawCpe := range rawCpes {
    57  				c, err := cpe.New(rawCpe, cpe.DeclaredSource)
    58  				if err != nil {
    59  					log.Debugf("unable to decode cpe %s in purl %s: %w", rawCpe, p.PURL, err)
    60  					continue
    61  				}
    62  				cpes = append(cpes, c)
    63  			}
    64  		case pkg.PURLQualifierEpoch:
    65  			epoch = qualifier.Value
    66  		case pkg.PURLQualifierRpmModularity:
    67  			rpmmod = qualifier.Value
    68  		}
    69  	}
    70  
    71  	if p.Type == "" {
    72  		p.Type = pkg.TypeFromPURL(p.PURL)
    73  	}
    74  	if p.Language == "" {
    75  		p.Language = pkg.LanguageFromPURL(p.PURL)
    76  	}
    77  	if p.Name == "" {
    78  		p.Name = nameFromPurl(purl)
    79  	}
    80  
    81  	setVersionFromPurl(p, purl, epoch)
    82  
    83  	if p.Language == pkg.Java {
    84  		setJavaMetadataFromPurl(p, purl)
    85  	}
    86  
    87  	if p.Type == pkg.RpmPkg {
    88  		setRpmMetadataFromPurl(p, rpmmod)
    89  	}
    90  
    91  	for _, c := range cpes {
    92  		if slices.Contains(p.CPEs, c) {
    93  			continue
    94  		}
    95  		p.CPEs = append(p.CPEs, c)
    96  	}
    97  }
    98  
    99  func setJavaMetadataFromPurl(p *pkg.Package, _ packageurl.PackageURL) {
   100  	if p.Type != pkg.JavaPkg {
   101  		return
   102  	}
   103  	if p.Metadata == nil {
   104  		// since we don't know if the purl elements directly came from pom properties or the manifest,
   105  		// we can only go as far as to set the type to JavaArchive, but not fill in the group id and artifact id
   106  		p.Metadata = pkg.JavaArchive{}
   107  	}
   108  }
   109  
   110  func setRpmMetadataFromPurl(p *pkg.Package, rpmmod string) {
   111  	if p.Type != pkg.RpmPkg {
   112  		return
   113  	}
   114  	if rpmmod == "" {
   115  		return
   116  	}
   117  
   118  	if p.Metadata == nil {
   119  		p.Metadata = pkg.RpmDBEntry{
   120  			ModularityLabel: &rpmmod,
   121  		}
   122  		return
   123  	}
   124  
   125  	switch m := p.Metadata.(type) {
   126  	case pkg.RpmDBEntry:
   127  		if m.ModularityLabel == nil {
   128  			m.ModularityLabel = &rpmmod
   129  			p.Metadata = m
   130  		}
   131  	case pkg.RpmArchive:
   132  		if m.ModularityLabel == nil {
   133  			m.ModularityLabel = &rpmmod
   134  			p.Metadata = m
   135  		}
   136  	}
   137  }
   138  
   139  func setVersionFromPurl(p *pkg.Package, purl packageurl.PackageURL, epoch string) {
   140  	if p.Version == "" {
   141  		p.Version = purl.Version
   142  	}
   143  
   144  	if epoch != "" && p.Type == pkg.RpmPkg && !epochPrefix.MatchString(p.Version) {
   145  		p.Version = fmt.Sprintf("%s:%s", epoch, p.Version)
   146  	}
   147  }
   148  
   149  var epochPrefix = regexp.MustCompile(`^\d+:`)
   150  
   151  // nameFromPurl returns the syft package name of the package from the purl. If the purl includes a namespace,
   152  // the name is prefixed as appropriate based on the PURL type
   153  func nameFromPurl(purl packageurl.PackageURL) string {
   154  	if !nameExcludesPurlNamespace(purl.Type) && purl.Namespace != "" {
   155  		return fmt.Sprintf("%s/%s", purl.Namespace, purl.Name)
   156  	}
   157  	return purl.Name
   158  }
   159  
   160  func nameExcludesPurlNamespace(purlType string) bool {
   161  	switch purlType {
   162  	case packageurl.TypeAlpine,
   163  		packageurl.TypeAlpm,
   164  		packageurl.TypeConan,
   165  		packageurl.TypeCpan,
   166  		packageurl.TypeDebian,
   167  		packageurl.TypeMaven,
   168  		packageurl.TypeQpkg,
   169  		packageurl.TypeRPM,
   170  		packageurl.TypeSWID:
   171  		return true
   172  	}
   173  	return false
   174  }