github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/debian/package.go (about)

     1  package debian
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"path"
     7  	"path/filepath"
     8  	"sort"
     9  	"strings"
    10  
    11  	"github.com/anchore/packageurl-go"
    12  	"github.com/anchore/syft/internal"
    13  	"github.com/anchore/syft/internal/log"
    14  	"github.com/anchore/syft/syft/file"
    15  	"github.com/anchore/syft/syft/linux"
    16  	"github.com/anchore/syft/syft/pkg"
    17  )
    18  
    19  const (
    20  	md5sumsExt   = ".md5sums"
    21  	conffilesExt = ".conffiles"
    22  	docsPath     = "/usr/share/doc"
    23  )
    24  
    25  func newDpkgPackage(d pkg.DpkgDBEntry, dbLocation file.Location, resolver file.Resolver, release *linux.Release) pkg.Package {
    26  	// TODO: separate pr to license refactor, but explore extracting dpkg-specific license parsing into a separate function
    27  	licenses := make([]pkg.License, 0)
    28  	p := pkg.Package{
    29  		Name:      d.Package,
    30  		Version:   d.Version,
    31  		Licenses:  pkg.NewLicenseSet(licenses...),
    32  		Locations: file.NewLocationSet(dbLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
    33  		PURL:      packageURL(d, release),
    34  		Type:      pkg.DebPkg,
    35  		Metadata:  d,
    36  	}
    37  
    38  	if resolver != nil {
    39  		// the current entry only has what may have been listed in the status file, however, there are additional
    40  		// files that are listed in multiple other locations. We should retrieve them all and merge the file lists
    41  		// together.
    42  		mergeFileListing(resolver, dbLocation, &p)
    43  
    44  		// fetch additional data from the copyright file to derive the license information
    45  		addLicenses(resolver, dbLocation, &p)
    46  	}
    47  
    48  	p.SetID()
    49  
    50  	return p
    51  }
    52  
    53  // PackageURL returns the PURL for the specific Debian package (see https://github.com/package-url/purl-spec)
    54  func packageURL(m pkg.DpkgDBEntry, distro *linux.Release) string {
    55  	if distro == nil {
    56  		return ""
    57  	}
    58  
    59  	if distro.ID != "debian" && !internal.StringInSlice("debian", distro.IDLike) {
    60  		return ""
    61  	}
    62  
    63  	qualifiers := map[string]string{
    64  		pkg.PURLQualifierArch: m.Architecture,
    65  	}
    66  
    67  	if m.Source != "" {
    68  		if m.SourceVersion != "" {
    69  			qualifiers[pkg.PURLQualifierUpstream] = fmt.Sprintf("%s@%s", m.Source, m.SourceVersion)
    70  		} else {
    71  			qualifiers[pkg.PURLQualifierUpstream] = m.Source
    72  		}
    73  	}
    74  
    75  	return packageurl.NewPackageURL(
    76  		packageurl.TypeDebian,
    77  		distro.ID,
    78  		m.Package,
    79  		m.Version,
    80  		pkg.PURLQualifiers(
    81  			qualifiers,
    82  			distro,
    83  		),
    84  		"",
    85  	).ToString()
    86  }
    87  
    88  func addLicenses(resolver file.Resolver, dbLocation file.Location, p *pkg.Package) {
    89  	metadata, ok := p.Metadata.(pkg.DpkgDBEntry)
    90  	if !ok {
    91  		log.WithFields("package", p).Warn("unable to extract DPKG metadata to add licenses")
    92  		return
    93  	}
    94  
    95  	// get license information from the copyright file
    96  	copyrightReader, copyrightLocation := fetchCopyrightContents(resolver, dbLocation, metadata)
    97  
    98  	if copyrightReader != nil && copyrightLocation != nil {
    99  		defer internal.CloseAndLogError(copyrightReader, copyrightLocation.AccessPath)
   100  		// attach the licenses
   101  		licenseStrs := parseLicensesFromCopyright(copyrightReader)
   102  		for _, licenseStr := range licenseStrs {
   103  			p.Licenses.Add(pkg.NewLicenseFromLocations(licenseStr, copyrightLocation.WithoutAnnotations()))
   104  		}
   105  		// keep a record of the file where this was discovered
   106  		p.Locations.Add(*copyrightLocation)
   107  	}
   108  }
   109  
   110  func mergeFileListing(resolver file.Resolver, dbLocation file.Location, p *pkg.Package) {
   111  	metadata, ok := p.Metadata.(pkg.DpkgDBEntry)
   112  	if !ok {
   113  		log.WithFields("package", p).Warn("unable to extract DPKG metadata to file listing")
   114  		return
   115  	}
   116  
   117  	// get file listing (package files + additional config files)
   118  	files, infoLocations := getAdditionalFileListing(resolver, dbLocation, metadata)
   119  loopNewFiles:
   120  	for _, newFile := range files {
   121  		for _, existingFile := range metadata.Files {
   122  			if existingFile.Path == newFile.Path {
   123  				// skip adding this file since it already exists
   124  				continue loopNewFiles
   125  			}
   126  		}
   127  		metadata.Files = append(metadata.Files, newFile)
   128  	}
   129  
   130  	// sort files by path
   131  	sort.SliceStable(metadata.Files, func(i, j int) bool {
   132  		return metadata.Files[i].Path < metadata.Files[j].Path
   133  	})
   134  
   135  	// persist alterations
   136  	p.Metadata = metadata
   137  
   138  	// persist location information from each new source of information
   139  	p.Locations.Add(infoLocations...)
   140  }
   141  
   142  func getAdditionalFileListing(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry) ([]pkg.DpkgFileRecord, []file.Location) {
   143  	// ensure the default value for a collection is never nil since this may be shown as JSON
   144  	var files = make([]pkg.DpkgFileRecord, 0)
   145  	var locations []file.Location
   146  
   147  	md5Reader, md5Location := fetchMd5Contents(resolver, dbLocation, m)
   148  
   149  	if md5Reader != nil && md5Location != nil {
   150  		defer internal.CloseAndLogError(md5Reader, md5Location.AccessPath)
   151  		// attach the file list
   152  		files = append(files, parseDpkgMD5Info(md5Reader)...)
   153  
   154  		// keep a record of the file where this was discovered
   155  		locations = append(locations, *md5Location)
   156  	}
   157  
   158  	conffilesReader, conffilesLocation := fetchConffileContents(resolver, dbLocation, m)
   159  
   160  	if conffilesReader != nil && conffilesLocation != nil {
   161  		defer internal.CloseAndLogError(conffilesReader, conffilesLocation.AccessPath)
   162  		// attach the file list
   163  		files = append(files, parseDpkgConffileInfo(conffilesReader)...)
   164  
   165  		// keep a record of the file where this was discovered
   166  		locations = append(locations, *conffilesLocation)
   167  	}
   168  
   169  	return files, locations
   170  }
   171  
   172  //nolint:dupl
   173  func fetchMd5Contents(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry) (io.ReadCloser, *file.Location) {
   174  	var md5Reader io.ReadCloser
   175  	var err error
   176  
   177  	if resolver == nil {
   178  		return nil, nil
   179  	}
   180  
   181  	// for typical debian-base distributions, the installed package info is at /var/lib/dpkg/status
   182  	// and the md5sum information is under /var/lib/dpkg/info/; however, for distroless the installed
   183  	// package info is across multiple files under /var/lib/dpkg/status.d/ and the md5sums are contained in
   184  	// the same directory
   185  	searchPath := filepath.Dir(dbLocation.RealPath)
   186  
   187  	if !strings.HasSuffix(searchPath, "status.d") {
   188  		searchPath = path.Join(searchPath, "info")
   189  	}
   190  
   191  	// look for /var/lib/dpkg/info/NAME:ARCH.md5sums
   192  	name := md5Key(m)
   193  	location := resolver.RelativeFileByPath(dbLocation, path.Join(searchPath, name+md5sumsExt))
   194  
   195  	if location == nil {
   196  		// the most specific key did not work, fallback to just the name
   197  		// look for /var/lib/dpkg/info/NAME.md5sums
   198  		location = resolver.RelativeFileByPath(dbLocation, path.Join(searchPath, m.Package+md5sumsExt))
   199  	}
   200  
   201  	if location == nil {
   202  		return nil, nil
   203  	}
   204  
   205  	// this is unexpected, but not a show-stopper
   206  	md5Reader, err = resolver.FileContentsByLocation(*location)
   207  	if err != nil {
   208  		log.Warnf("failed to fetch deb md5 contents (package=%s): %+v", m.Package, err)
   209  	}
   210  
   211  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   212  
   213  	return md5Reader, &l
   214  }
   215  
   216  //nolint:dupl
   217  func fetchConffileContents(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry) (io.ReadCloser, *file.Location) {
   218  	var reader io.ReadCloser
   219  	var err error
   220  
   221  	if resolver == nil {
   222  		return nil, nil
   223  	}
   224  
   225  	parentPath := filepath.Dir(dbLocation.RealPath)
   226  
   227  	// look for /var/lib/dpkg/info/NAME:ARCH.conffiles
   228  	name := md5Key(m)
   229  	location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+conffilesExt))
   230  
   231  	if location == nil {
   232  		// the most specific key did not work, fallback to just the name
   233  		// look for /var/lib/dpkg/info/NAME.conffiles
   234  		location = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", m.Package+conffilesExt))
   235  	}
   236  
   237  	if location == nil {
   238  		return nil, nil
   239  	}
   240  
   241  	// this is unexpected, but not a show-stopper
   242  	reader, err = resolver.FileContentsByLocation(*location)
   243  	if err != nil {
   244  		log.Warnf("failed to fetch deb conffiles contents (package=%s): %+v", m.Package, err)
   245  	}
   246  
   247  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   248  
   249  	return reader, &l
   250  }
   251  
   252  func fetchCopyrightContents(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry) (io.ReadCloser, *file.Location) {
   253  	if resolver == nil {
   254  		return nil, nil
   255  	}
   256  
   257  	// look for /usr/share/docs/NAME/copyright files
   258  	copyrightPath := path.Join(docsPath, m.Package, "copyright")
   259  	location := resolver.RelativeFileByPath(dbLocation, copyrightPath)
   260  
   261  	// we may not have a copyright file for each package, ignore missing files
   262  	if location == nil {
   263  		return nil, nil
   264  	}
   265  
   266  	reader, err := resolver.FileContentsByLocation(*location)
   267  	if err != nil {
   268  		log.Warnf("failed to fetch deb copyright contents (package=%s): %s", m.Package, err)
   269  	}
   270  	defer internal.CloseAndLogError(reader, location.RealPath)
   271  
   272  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   273  
   274  	return reader, &l
   275  }
   276  
   277  func md5Key(metadata pkg.DpkgDBEntry) string {
   278  	contentKey := metadata.Package
   279  	if metadata.Architecture != "" && metadata.Architecture != "all" {
   280  		contentKey = contentKey + ":" + metadata.Architecture
   281  	}
   282  	return contentKey
   283  }