github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/deb/package.go (about)

     1  package deb
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"path"
     7  	"path/filepath"
     8  	"sort"
     9  	"strings"
    10  
    11  	"github.com/anchore/packageurl-go"
    12  	"github.com/anchore/syft/internal"
    13  	"github.com/anchore/syft/internal/log"
    14  	"github.com/anchore/syft/syft/file"
    15  	"github.com/anchore/syft/syft/linux"
    16  	"github.com/anchore/syft/syft/pkg"
    17  )
    18  
    19  const (
    20  	md5sumsExt   = ".md5sums"
    21  	conffilesExt = ".conffiles"
    22  	docsPath     = "/usr/share/doc"
    23  )
    24  
    25  func newDpkgPackage(d pkg.DpkgMetadata, dbLocation file.Location, resolver file.Resolver, release *linux.Release) pkg.Package {
    26  	// TODO: separate pr to license refactor, but explore extracting dpkg-specific license parsing into a separate function
    27  	licenses := make([]pkg.License, 0)
    28  	p := pkg.Package{
    29  		Name:         d.Package,
    30  		Version:      d.Version,
    31  		Licenses:     pkg.NewLicenseSet(licenses...),
    32  		Locations:    file.NewLocationSet(dbLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
    33  		PURL:         packageURL(d, release),
    34  		Type:         pkg.DebPkg,
    35  		MetadataType: pkg.DpkgMetadataType,
    36  		Metadata:     d,
    37  	}
    38  
    39  	// the current entry only has what may have been listed in the status file, however, there are additional
    40  	// files that are listed in multiple other locations. We should retrieve them all and merge the file lists
    41  	// together.
    42  	mergeFileListing(resolver, dbLocation, &p)
    43  
    44  	// fetch additional data from the copyright file to derive the license information
    45  	addLicenses(resolver, dbLocation, &p)
    46  
    47  	p.SetID()
    48  
    49  	return p
    50  }
    51  
    52  // PackageURL returns the PURL for the specific Debian package (see https://github.com/package-url/purl-spec)
    53  func packageURL(m pkg.DpkgMetadata, distro *linux.Release) string {
    54  	if distro == nil {
    55  		return ""
    56  	}
    57  
    58  	if distro.ID != "debian" && !internal.StringInSlice("debian", distro.IDLike) {
    59  		return ""
    60  	}
    61  
    62  	qualifiers := map[string]string{
    63  		pkg.PURLQualifierArch: m.Architecture,
    64  	}
    65  
    66  	if m.Source != "" {
    67  		if m.SourceVersion != "" {
    68  			qualifiers[pkg.PURLQualifierUpstream] = fmt.Sprintf("%s@%s", m.Source, m.SourceVersion)
    69  		} else {
    70  			qualifiers[pkg.PURLQualifierUpstream] = m.Source
    71  		}
    72  	}
    73  
    74  	return packageurl.NewPackageURL(
    75  		packageurl.TypeDebian,
    76  		distro.ID,
    77  		m.Package,
    78  		m.Version,
    79  		pkg.PURLQualifiers(
    80  			qualifiers,
    81  			distro,
    82  		),
    83  		"",
    84  	).ToString()
    85  }
    86  
    87  func addLicenses(resolver file.Resolver, dbLocation file.Location, p *pkg.Package) {
    88  	metadata, ok := p.Metadata.(pkg.DpkgMetadata)
    89  	if !ok {
    90  		log.WithFields("package", p).Warn("unable to extract DPKG metadata to add licenses")
    91  		return
    92  	}
    93  
    94  	// get license information from the copyright file
    95  	copyrightReader, copyrightLocation := fetchCopyrightContents(resolver, dbLocation, metadata)
    96  
    97  	if copyrightReader != nil && copyrightLocation != nil {
    98  		defer internal.CloseAndLogError(copyrightReader, copyrightLocation.VirtualPath)
    99  		// attach the licenses
   100  		licenseStrs := parseLicensesFromCopyright(copyrightReader)
   101  		for _, licenseStr := range licenseStrs {
   102  			p.Licenses.Add(pkg.NewLicenseFromLocations(licenseStr, copyrightLocation.WithoutAnnotations()))
   103  		}
   104  		// keep a record of the file where this was discovered
   105  		p.Locations.Add(*copyrightLocation)
   106  	}
   107  }
   108  
   109  func mergeFileListing(resolver file.Resolver, dbLocation file.Location, p *pkg.Package) {
   110  	metadata, ok := p.Metadata.(pkg.DpkgMetadata)
   111  	if !ok {
   112  		log.WithFields("package", p).Warn("unable to extract DPKG metadata to file listing")
   113  		return
   114  	}
   115  
   116  	// get file listing (package files + additional config files)
   117  	files, infoLocations := getAdditionalFileListing(resolver, dbLocation, metadata)
   118  loopNewFiles:
   119  	for _, newFile := range files {
   120  		for _, existingFile := range metadata.Files {
   121  			if existingFile.Path == newFile.Path {
   122  				// skip adding this file since it already exists
   123  				continue loopNewFiles
   124  			}
   125  		}
   126  		metadata.Files = append(metadata.Files, newFile)
   127  	}
   128  
   129  	// sort files by path
   130  	sort.SliceStable(metadata.Files, func(i, j int) bool {
   131  		return metadata.Files[i].Path < metadata.Files[j].Path
   132  	})
   133  
   134  	// persist alterations
   135  	p.Metadata = metadata
   136  
   137  	// persist location information from each new source of information
   138  	p.Locations.Add(infoLocations...)
   139  }
   140  
   141  func getAdditionalFileListing(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgMetadata) ([]pkg.DpkgFileRecord, []file.Location) {
   142  	// ensure the default value for a collection is never nil since this may be shown as JSON
   143  	var files = make([]pkg.DpkgFileRecord, 0)
   144  	var locations []file.Location
   145  
   146  	md5Reader, md5Location := fetchMd5Contents(resolver, dbLocation, m)
   147  
   148  	if md5Reader != nil && md5Location != nil {
   149  		defer internal.CloseAndLogError(md5Reader, md5Location.VirtualPath)
   150  		// attach the file list
   151  		files = append(files, parseDpkgMD5Info(md5Reader)...)
   152  
   153  		// keep a record of the file where this was discovered
   154  		locations = append(locations, *md5Location)
   155  	}
   156  
   157  	conffilesReader, conffilesLocation := fetchConffileContents(resolver, dbLocation, m)
   158  
   159  	if conffilesReader != nil && conffilesLocation != nil {
   160  		defer internal.CloseAndLogError(conffilesReader, conffilesLocation.VirtualPath)
   161  		// attach the file list
   162  		files = append(files, parseDpkgConffileInfo(conffilesReader)...)
   163  
   164  		// keep a record of the file where this was discovered
   165  		locations = append(locations, *conffilesLocation)
   166  	}
   167  
   168  	return files, locations
   169  }
   170  
   171  //nolint:dupl
   172  func fetchMd5Contents(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgMetadata) (io.ReadCloser, *file.Location) {
   173  	var md5Reader io.ReadCloser
   174  	var err error
   175  
   176  	if resolver == nil {
   177  		return nil, nil
   178  	}
   179  
   180  	// for typical debian-base distributions, the installed package info is at /var/lib/dpkg/status
   181  	// and the md5sum information is under /var/lib/dpkg/info/; however, for distroless the installed
   182  	// package info is across multiple files under /var/lib/dpkg/status.d/ and the md5sums are contained in
   183  	// the same directory
   184  	searchPath := filepath.Dir(dbLocation.RealPath)
   185  
   186  	if !strings.HasSuffix(searchPath, "status.d") {
   187  		searchPath = path.Join(searchPath, "info")
   188  	}
   189  
   190  	// look for /var/lib/dpkg/info/NAME:ARCH.md5sums
   191  	name := md5Key(m)
   192  	location := resolver.RelativeFileByPath(dbLocation, path.Join(searchPath, name+md5sumsExt))
   193  
   194  	if location == nil {
   195  		// the most specific key did not work, fallback to just the name
   196  		// look for /var/lib/dpkg/info/NAME.md5sums
   197  		location = resolver.RelativeFileByPath(dbLocation, path.Join(searchPath, m.Package+md5sumsExt))
   198  	}
   199  
   200  	if location == nil {
   201  		return nil, nil
   202  	}
   203  
   204  	// this is unexpected, but not a show-stopper
   205  	md5Reader, err = resolver.FileContentsByLocation(*location)
   206  	if err != nil {
   207  		log.Warnf("failed to fetch deb md5 contents (package=%s): %+v", m.Package, err)
   208  	}
   209  
   210  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   211  
   212  	return md5Reader, &l
   213  }
   214  
   215  //nolint:dupl
   216  func fetchConffileContents(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgMetadata) (io.ReadCloser, *file.Location) {
   217  	var reader io.ReadCloser
   218  	var err error
   219  
   220  	if resolver == nil {
   221  		return nil, nil
   222  	}
   223  
   224  	parentPath := filepath.Dir(dbLocation.RealPath)
   225  
   226  	// look for /var/lib/dpkg/info/NAME:ARCH.conffiles
   227  	name := md5Key(m)
   228  	location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+conffilesExt))
   229  
   230  	if location == nil {
   231  		// the most specific key did not work, fallback to just the name
   232  		// look for /var/lib/dpkg/info/NAME.conffiles
   233  		location = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", m.Package+conffilesExt))
   234  	}
   235  
   236  	if location == nil {
   237  		return nil, nil
   238  	}
   239  
   240  	// this is unexpected, but not a show-stopper
   241  	reader, err = resolver.FileContentsByLocation(*location)
   242  	if err != nil {
   243  		log.Warnf("failed to fetch deb conffiles contents (package=%s): %+v", m.Package, err)
   244  	}
   245  
   246  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   247  
   248  	return reader, &l
   249  }
   250  
   251  func fetchCopyrightContents(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgMetadata) (io.ReadCloser, *file.Location) {
   252  	if resolver == nil {
   253  		return nil, nil
   254  	}
   255  
   256  	// look for /usr/share/docs/NAME/copyright files
   257  	copyrightPath := path.Join(docsPath, m.Package, "copyright")
   258  	location := resolver.RelativeFileByPath(dbLocation, copyrightPath)
   259  
   260  	// we may not have a copyright file for each package, ignore missing files
   261  	if location == nil {
   262  		return nil, nil
   263  	}
   264  
   265  	reader, err := resolver.FileContentsByLocation(*location)
   266  	if err != nil {
   267  		log.Warnf("failed to fetch deb copyright contents (package=%s): %w", m.Package, err)
   268  	}
   269  
   270  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   271  
   272  	return reader, &l
   273  }
   274  
   275  func md5Key(metadata pkg.DpkgMetadata) string {
   276  	contentKey := metadata.Package
   277  	if metadata.Architecture != "" && metadata.Architecture != "all" {
   278  		contentKey = contentKey + ":" + metadata.Architecture
   279  	}
   280  	return contentKey
   281  }