github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/debian/package.go (about)

     1  package debian
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"path"
     8  	"path/filepath"
     9  	"sort"
    10  	"strings"
    11  
    12  	"github.com/anchore/packageurl-go"
    13  	"github.com/anchore/syft/internal"
    14  	"github.com/anchore/syft/internal/log"
    15  	"github.com/anchore/syft/syft/file"
    16  	"github.com/anchore/syft/syft/linux"
    17  	"github.com/anchore/syft/syft/pkg"
    18  )
    19  
    20  const (
    21  	md5sumsExt   = ".md5sums"
    22  	conffilesExt = ".conffiles"
    23  	docsPath     = "/usr/share/doc"
    24  )
    25  
    26  func newDpkgPackage(ctx context.Context, d pkg.DpkgDBEntry, dbLocation file.Location, resolver file.Resolver, release *linux.Release, evidence ...file.Location) pkg.Package {
    27  	// TODO: separate pr to license refactor, but explore extracting dpkg-specific license parsing into a separate function
    28  	var licenses []pkg.License
    29  
    30  	locations := file.NewLocationSet(dbLocation)
    31  	locations.Add(evidence...)
    32  
    33  	p := pkg.Package{
    34  		Name:      d.Package,
    35  		Version:   d.Version,
    36  		Licenses:  pkg.NewLicenseSet(licenses...),
    37  		Locations: locations,
    38  		PURL:      packageURL(d, release),
    39  		Type:      pkg.DebPkg,
    40  		Metadata:  d,
    41  	}
    42  
    43  	if resolver != nil {
    44  		// the current entry only has what may have been listed in the status file, however, there are additional
    45  		// files that are listed in multiple other locations. We should retrieve them all and merge the file lists
    46  		// together.
    47  		mergeFileListing(resolver, dbLocation, &p)
    48  
    49  		// fetch additional data from the copyright file to derive the license information
    50  		addLicenses(ctx, resolver, dbLocation, &p)
    51  	}
    52  
    53  	p.SetID()
    54  
    55  	return p
    56  }
    57  
    58  func newDebArchivePackage(ctx context.Context, location file.Location, metadata pkg.DpkgArchiveEntry, licenseStrings []string) pkg.Package {
    59  	p := pkg.Package{
    60  		Name:     metadata.Package,
    61  		Version:  metadata.Version,
    62  		Licenses: pkg.NewLicenseSet(pkg.NewLicensesFromValuesWithContext(ctx, licenseStrings...)...),
    63  		Type:     pkg.DebPkg,
    64  		PURL: packageURL(
    65  			pkg.DpkgDBEntry(metadata),
    66  			// we don't know the distro information, but since this is a deb file then we can reasonably assume it is a debian-based distro
    67  			&linux.Release{IDLike: []string{"debian"}},
    68  		),
    69  		Metadata:  metadata,
    70  		Locations: file.NewLocationSet(location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
    71  	}
    72  
    73  	p.SetID()
    74  	return p
    75  }
    76  
    77  // PackageURL returns the PURL for the specific Debian package (see https://github.com/package-url/purl-spec)
    78  func packageURL(m pkg.DpkgDBEntry, distro *linux.Release) string {
    79  	if distro == nil {
    80  		return ""
    81  	}
    82  
    83  	if distro.ID != "debian" && !internal.StringInSlice("debian", distro.IDLike) {
    84  		return ""
    85  	}
    86  
    87  	qualifiers := map[string]string{
    88  		pkg.PURLQualifierArch: m.Architecture,
    89  	}
    90  
    91  	if m.Source != "" {
    92  		if m.SourceVersion != "" {
    93  			qualifiers[pkg.PURLQualifierUpstream] = fmt.Sprintf("%s@%s", m.Source, m.SourceVersion)
    94  		} else {
    95  			qualifiers[pkg.PURLQualifierUpstream] = m.Source
    96  		}
    97  	}
    98  
    99  	return packageurl.NewPackageURL(
   100  		packageurl.TypeDebian,
   101  		distro.ID,
   102  		m.Package,
   103  		m.Version,
   104  		pkg.PURLQualifiers(
   105  			qualifiers,
   106  			distro,
   107  		),
   108  		"",
   109  	).ToString()
   110  }
   111  
   112  func addLicenses(ctx context.Context, resolver file.Resolver, dbLocation file.Location, p *pkg.Package) {
   113  	metadata, ok := p.Metadata.(pkg.DpkgDBEntry)
   114  	if !ok {
   115  		log.WithFields("package", p).Trace("unable to extract DPKG metadata to add licenses")
   116  		return
   117  	}
   118  
   119  	// get license information from the copyright file
   120  	copyrightReader, copyrightLocation := fetchCopyrightContents(resolver, dbLocation, metadata)
   121  	var licenseStrs []string
   122  	if copyrightReader != nil && copyrightLocation != nil {
   123  		defer internal.CloseAndLogError(copyrightReader, copyrightLocation.AccessPath)
   124  		// attach the licenses
   125  		licenseStrs = parseLicensesFromCopyright(copyrightReader)
   126  		for _, licenseStr := range licenseStrs {
   127  			p.Licenses.Add(pkg.NewLicenseFromLocationsWithContext(ctx, licenseStr, copyrightLocation.WithoutAnnotations()))
   128  		}
   129  		// keep a record of the file where this was discovered
   130  		p.Locations.Add(*copyrightLocation)
   131  	}
   132  	// try to use the license classifier if parsing the copyright file failed
   133  	if len(licenseStrs) == 0 {
   134  		sr, sl := fetchCopyrightContents(resolver, dbLocation, metadata)
   135  		if sr != nil && sl != nil {
   136  			defer internal.CloseAndLogError(sr, sl.AccessPath)
   137  			p.Licenses.Add(pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(*sl, sr))...)
   138  		}
   139  	}
   140  }
   141  
   142  func mergeFileListing(resolver file.Resolver, dbLocation file.Location, p *pkg.Package) {
   143  	metadata, ok := p.Metadata.(pkg.DpkgDBEntry)
   144  	if !ok {
   145  		log.WithFields("package", p).Trace("unable to extract DPKG metadata to file listing")
   146  		return
   147  	}
   148  
   149  	// get file listing (package files + additional config files)
   150  	files, infoLocations := getAdditionalFileListing(resolver, dbLocation, metadata)
   151  loopNewFiles:
   152  	for _, newFile := range files {
   153  		for _, existingFile := range metadata.Files {
   154  			if existingFile.Path == newFile.Path {
   155  				// skip adding this file since it already exists
   156  				continue loopNewFiles
   157  			}
   158  		}
   159  		metadata.Files = append(metadata.Files, newFile)
   160  	}
   161  
   162  	// sort files by path
   163  	sort.SliceStable(metadata.Files, func(i, j int) bool {
   164  		return metadata.Files[i].Path < metadata.Files[j].Path
   165  	})
   166  
   167  	// persist alterations
   168  	p.Metadata = metadata
   169  
   170  	// persist location information from each new source of information
   171  	p.Locations.Add(infoLocations...)
   172  }
   173  
   174  func getAdditionalFileListing(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry) ([]pkg.DpkgFileRecord, []file.Location) {
   175  	// ensure the default value for a collection is never nil since this may be shown as JSON
   176  	var files = make([]pkg.DpkgFileRecord, 0)
   177  	var locations []file.Location
   178  
   179  	md5Reader, md5Location := fetchMd5Contents(resolver, dbLocation, m)
   180  
   181  	if md5Reader != nil && md5Location != nil {
   182  		defer internal.CloseAndLogError(md5Reader, md5Location.AccessPath)
   183  		// attach the file list
   184  		files = append(files, parseDpkgMD5Info(md5Reader)...)
   185  
   186  		// keep a record of the file where this was discovered
   187  		locations = append(locations, *md5Location)
   188  	}
   189  
   190  	conffilesReader, conffilesLocation := fetchConffileContents(resolver, dbLocation, m)
   191  
   192  	if conffilesReader != nil && conffilesLocation != nil {
   193  		defer internal.CloseAndLogError(conffilesReader, conffilesLocation.AccessPath)
   194  		// attach the file list
   195  		files = append(files, parseDpkgConffileInfo(conffilesReader)...)
   196  
   197  		// keep a record of the file where this was discovered
   198  		locations = append(locations, *conffilesLocation)
   199  	}
   200  
   201  	return files, locations
   202  }
   203  
   204  func fetchMd5Contents(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry) (io.ReadCloser, *file.Location) {
   205  	var md5Reader io.ReadCloser
   206  	var err error
   207  
   208  	if resolver == nil {
   209  		return nil, nil
   210  	}
   211  
   212  	// for typical debian-base distributions, the installed package info is at /var/lib/dpkg/status
   213  	// and the md5sum information is under /var/lib/dpkg/info/; however, for distroless the installed
   214  	// package info is across multiple files under /var/lib/dpkg/status.d/ and the md5sums are contained in
   215  	// the same directory
   216  	searchPath := filepath.Dir(dbLocation.RealPath)
   217  
   218  	if !strings.HasSuffix(searchPath, "status.d") {
   219  		searchPath = path.Join(searchPath, "info")
   220  	}
   221  
   222  	// look for /var/lib/dpkg/info/NAME:ARCH.md5sums
   223  	name := md5Key(m)
   224  	location := resolver.RelativeFileByPath(dbLocation, path.Join(searchPath, name+md5sumsExt))
   225  
   226  	if location == nil {
   227  		// the most specific key did not work, fallback to just the name
   228  		// look for /var/lib/dpkg/info/NAME.md5sums
   229  		location = resolver.RelativeFileByPath(dbLocation, path.Join(searchPath, m.Package+md5sumsExt))
   230  	}
   231  
   232  	if location == nil {
   233  		return nil, nil
   234  	}
   235  
   236  	// this is unexpected, but not a show-stopper
   237  	md5Reader, err = resolver.FileContentsByLocation(*location)
   238  	if err != nil {
   239  		log.Tracef("failed to fetch deb md5 contents (package=%s): %+v", m.Package, err)
   240  	}
   241  
   242  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   243  
   244  	return md5Reader, &l
   245  }
   246  
   247  func fetchConffileContents(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry) (io.ReadCloser, *file.Location) {
   248  	var reader io.ReadCloser
   249  	var err error
   250  
   251  	if resolver == nil {
   252  		return nil, nil
   253  	}
   254  
   255  	parentPath := filepath.Dir(dbLocation.RealPath)
   256  
   257  	// look for /var/lib/dpkg/info/NAME:ARCH.conffiles
   258  	name := md5Key(m)
   259  	location := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+conffilesExt))
   260  
   261  	if location == nil {
   262  		// the most specific key did not work, fallback to just the name
   263  		// look for /var/lib/dpkg/info/NAME.conffiles
   264  		location = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", m.Package+conffilesExt))
   265  	}
   266  
   267  	if location == nil {
   268  		return nil, nil
   269  	}
   270  
   271  	// this is unexpected, but not a show-stopper
   272  	reader, err = resolver.FileContentsByLocation(*location)
   273  	if err != nil {
   274  		log.Tracef("failed to fetch deb conffiles contents (package=%s): %+v", m.Package, err)
   275  	}
   276  
   277  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   278  
   279  	return reader, &l
   280  }
   281  
   282  func fetchCopyrightContents(resolver file.Resolver, dbLocation file.Location, m pkg.DpkgDBEntry) (io.ReadCloser, *file.Location) {
   283  	if resolver == nil {
   284  		return nil, nil
   285  	}
   286  
   287  	// look for /usr/share/docs/NAME/copyright files
   288  	copyrightPath := path.Join(docsPath, m.Package, "copyright")
   289  	location := resolver.RelativeFileByPath(dbLocation, copyrightPath)
   290  
   291  	// we may not have a copyright file for each package, ignore missing files
   292  	if location == nil {
   293  		return nil, nil
   294  	}
   295  
   296  	reader, err := resolver.FileContentsByLocation(*location) //nolint:gocritic // since we're returning the reader, it's up to the caller to close it
   297  	if err != nil {
   298  		log.Tracef("failed to fetch deb copyright contents (package=%s): %s", m.Package, err)
   299  	}
   300  
   301  	l := location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)
   302  
   303  	return reader, &l
   304  }
   305  
   306  func md5Key(metadata pkg.DpkgDBEntry) string {
   307  	contentKey := metadata.Package
   308  	if metadata.Architecture != "" && metadata.Architecture != "all" {
   309  		contentKey = contentKey + ":" + metadata.Architecture
   310  	}
   311  	return contentKey
   312  }