github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/debian/package.go (about)

     1  package debian
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"path"
     7  	"path/filepath"
     8  	"sort"
     9  	"strings"
    10  
    11  	"github.com/anchore/packageurl-go"
    12  	"github.com/anchore/syft/syft/file"
    13  	"github.com/anchore/syft/syft/linux"
    14  	"github.com/anchore/syft/syft/pkg"
    15  	"github.com/lineaje-labs/syft/internal"
    16  	"github.com/lineaje-labs/syft/internal/log"
    17  )
    18  
    19  const (
    20  	md5sumsExt   = ".md5sums"
    21  	conffilesExt = ".conffiles"
    22  	docsPath     = "/usr/share/doc"
    23  )
    24  
    25  func newDpkgPackage(
    26  	d pkg.DpkgDBEntry, dbLocation file.Location, resolver file.Resolver, release *linux.Release,
    27  ) pkg.Package {
    28  	// TODO: separate pr to license refactor, but explore extracting dpkg-specific license parsing into a separate function
    29  	licenses := make([]pkg.License, 0)
    30  	p := pkg.Package{
    31  		Name:      d.Package,
    32  		Version:   d.Version,
    33  		Licenses:  pkg.NewLicenseSet(licenses...),
    34  		Locations: file.NewLocationSet(dbLocation.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
    35  		PURL:      packageURL(d, release),
    36  		Type:      pkg.DebPkg,
    37  		Metadata:  d,
    38  	}
    39  
    40  	if resolver != nil {
    41  		// the current entry only has what may have been listed in the status file, however, there are additional
    42  		// files that are listed in multiple other locations. We should retrieve them all and merge the file lists
    43  		// together.
    44  		mergeFileListing(resolver, dbLocation, &p)
    45  
    46  		// fetch additional data from the copyright file to derive the license information
    47  		addLicenses(resolver, dbLocation, &p)
    48  	}
    49  
    50  	p.SetID()
    51  
    52  	return p
    53  }
    54  
    55  // PackageURL returns the PURL for the specific Debian package (see https://github.com/package-url/purl-spec)
    56  func packageURL(m pkg.DpkgDBEntry, distro *linux.Release) string {
    57  	if distro == nil {
    58  		return ""
    59  	}
    60  
    61  	if distro.ID != "debian" && !internal.StringInSlice("debian", distro.IDLike) {
    62  		return ""
    63  	}
    64  
    65  	qualifiers := map[string]string{
    66  		pkg.PURLQualifierArch: m.Architecture,
    67  	}
    68  
    69  	if m.Source != "" {
    70  		if m.SourceVersion != "" {
    71  			qualifiers[pkg.PURLQualifierUpstream] = fmt.Sprintf("%s@%s", m.Source, m.SourceVersion)
    72  		} else {
    73  			qualifiers[pkg.PURLQualifierUpstream] = m.Source
    74  		}
    75  	}
    76  
    77  	return packageurl.NewPackageURL(
    78  		packageurl.TypeDebian,
    79  		distro.ID,
    80  		m.Package,
    81  		m.Version,
    82  		pkg.PURLQualifiers(
    83  			qualifiers,
    84  			distro,
    85  		),
    86  		"",
    87  	).ToString()
    88  }
    89  
    90  func addLicenses(resolver file.Resolver, dbLocation file.Location, p *pkg.Package) {
    91  	metadata, ok := p.Metadata.(pkg.DpkgDBEntry)
    92  	if !ok {
    93  		log.WithFields("package", p).Warn("unable to extract DPKG metadata to add licenses")
    94  		return
    95  	}
    96  
    97  	// get license information from the copyright file
    98  	copyrightReader, copyrightLocation := fetchCopyrightContents(resolver, dbLocation, metadata)
    99  
   100  	if copyrightReader != nil && copyrightLocation != nil {
   101  		defer internal.CloseAndLogError(copyrightReader, copyrightLocation.AccessPath)
   102  		// attach the licenses
   103  		licenseStrs := parseLicensesFromCopyright(copyrightReader)
   104  		for _, licenseStr := range licenseStrs {
   105  			p.Licenses.Add(pkg.NewLicenseFromLocations(licenseStr, copyrightLocation.WithoutAnnotations()))
   106  		}
   107  		// keep a record of the file where this was discovered
   108  		p.Locations.Add(*copyrightLocation)
   109  	}
   110  }
   111  
   112  func mergeFileListing(resolver file.Resolver, dbLocation file.Location, p *pkg.Package) {
   113  	metadata, ok := p.Metadata.(pkg.DpkgDBEntry)
   114  	if !ok {
   115  		log.WithFields("package", p).Warn("unable to extract DPKG metadata to file listing")
   116  		return
   117  	}
   118  
   119  	// get file listing (package files + additional config files)
   120  	files, infoLocations := getAdditionalFileListing(resolver, dbLocation, metadata)
   121  loopNewFiles:
   122  	for _, newFile := range files {
   123  		for _, existingFile := range metadata.Files {
   124  			if existingFile.Path == newFile.Path {
   125  				// skip adding this file since it already exists
   126  				continue loopNewFiles
   127  			}
   128  		}
   129  		metadata.Files = append(metadata.Files, newFile)
   130  	}
   131  
   132  	// sort files by path
   133  	sort.SliceStable(metadata.Files, func(i, j int) bool {
   134  		return metadata.Files[i].Path < metadata.Files[j].Path
   135  	})
   136  
   137  	// persist alterations
   138  	p.Metadata = metadata
   139  
   140  	// persist location information from each new source of information
   141  	p.Locations.Add(infoLocations...)
   142  }
   143  
   144  func getAdditionalFileListing(
   145  	resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry,
   146  ) ([]pkg.DpkgFileRecord, []file.Location) {
   147  	// ensure the default value for a collection is never nil since this may be shown as JSON
   148  	var files = make([]pkg.DpkgFileRecord, 0)
   149  	var locations []file.Location
   150  
   151  	md5Reader, md5Location := fetchMd5Contents(resolver, dbLocation, m)
   152  
   153  	if md5Reader != nil && md5Location != nil {
   154  		defer internal.CloseAndLogError(md5Reader, md5Location.AccessPath)
   155  		// attach the file list
   156  		files = append(files, parseDpkgMD5Info(md5Reader)...)
   157  
   158  		// keep a record of the file where this was discovered
   159  		locations = append(locations, *md5Location)
   160  	}
   161  
   162  	conffilesReader, conffilesLocation := fetchConffileContents(resolver, dbLocation, m)
   163  
   164  	if conffilesReader != nil && conffilesLocation != nil {
   165  		defer internal.CloseAndLogError(conffilesReader, conffilesLocation.AccessPath)
   166  		// attach the file list
   167  		files = append(files, parseDpkgConffileInfo(conffilesReader)...)
   168  
   169  		// keep a record of the file where this was discovered
   170  		locations = append(locations, *conffilesLocation)
   171  	}
   172  
   173  	return files, locations
   174  }
   175  
   176  //nolint:dupl
   177  func fetchMd5Contents(
   178  	resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry,
   179  ) (io.ReadCloser, *file.Location) {
   180  	var md5Reader io.ReadCloser
   181  	var err error
   182  
   183  	if resolver == nil {
   184  		return nil, nil
   185  	}
   186  
   187  	// for typical debian-base distributions, the installed package info is at /var/lib/dpkg/status
   188  	// and the md5sum information is under /var/lib/dpkg/info/; however, for distroless the installed
   189  	// package info is across multiple files under /var/lib/dpkg/status.d/ and the md5sums are contained in
   190  	// the same directory
   191  	searchPath := filepath.Dir(dbLocation.RealPath)
   192  
   193  	if !strings.HasSuffix(searchPath, "status.d") {
   194  		searchPath = path.Join(searchPath, "info")
   195  	}
   196  
   197  	// look for /var/lib/dpkg/info/NAME:ARCH.md5sums
   198  	name := md5Key(m)
   199  	location := resolver.RelativeFileByPath(dbLocation, path.Join(searchPath, name+md5sumsExt))
   200  
   201  	if location == nil {
   202  		// the most specific key did not work, fallback to just the name
   203  		// look for /var/lib/dpkg/info/NAME.md5sums
   204  		location = resolver.RelativeFileByPath(dbLocation, path.Join(searchPath, m.Package+md5sumsExt))
   205  	}
   206  
   207  	if location == nil {
   208  		return nil, nil
   209  	}
   210  
   211  	// this is unexpected, but not a show-stopper
   212  	md5Reader, err = resolver.FileContentsByLocation(*location)
   213  	if err != nil {
   214  		log.Warnf("failed to fetch deb md5 contents (package=%s): %+v", m.Package, err)
   215  	}
   216  
   217  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   218  
   219  	return md5Reader, &l
   220  }
   221  
   222  //nolint:dupl
   223  func fetchConffileContents(
   224  	resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry,
   225  ) (io.ReadCloser, *file.Location) {
   226  	var reader io.ReadCloser
   227  	var err error
   228  
   229  	if resolver == nil {
   230  		return nil, nil
   231  	}
   232  
   233  	parentPath := filepath.Dir(dbLocation.RealPath)
   234  
   235  	// look for /var/lib/dpkg/info/NAME:ARCH.conffiles
   236  	name := md5Key(m)
   237  	location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+conffilesExt))
   238  
   239  	if location == nil {
   240  		// the most specific key did not work, fallback to just the name
   241  		// look for /var/lib/dpkg/info/NAME.conffiles
   242  		location = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", m.Package+conffilesExt))
   243  	}
   244  
   245  	if location == nil {
   246  		return nil, nil
   247  	}
   248  
   249  	// this is unexpected, but not a show-stopper
   250  	reader, err = resolver.FileContentsByLocation(*location)
   251  	if err != nil {
   252  		log.Warnf("failed to fetch deb conffiles contents (package=%s): %+v", m.Package, err)
   253  	}
   254  
   255  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   256  
   257  	return reader, &l
   258  }
   259  
   260  func fetchCopyrightContents(
   261  	resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry,
   262  ) (io.ReadCloser, *file.Location) {
   263  	if resolver == nil {
   264  		return nil, nil
   265  	}
   266  
   267  	// look for /usr/share/docs/NAME/copyright files
   268  	copyrightPath := path.Join(docsPath, m.Package, "copyright")
   269  	location := resolver.RelativeFileByPath(dbLocation, copyrightPath)
   270  
   271  	// we may not have a copyright file for each package, ignore missing files
   272  	if location == nil {
   273  		return nil, nil
   274  	}
   275  
   276  	reader, err := resolver.FileContentsByLocation(*location)
   277  	if err != nil {
   278  		log.Warnf("failed to fetch deb copyright contents (package=%s): %w", m.Package, err)
   279  	}
   280  
   281  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   282  
   283  	return reader, &l
   284  }
   285  
   286  func md5Key(metadata pkg.DpkgDBEntry) string {
   287  	contentKey := metadata.Package
   288  	if metadata.Architecture != "" && metadata.Architecture != "all" {
   289  		contentKey = contentKey + ":" + metadata.Architecture
   290  	}
   291  	return contentKey
   292  }