github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/debian/parse_dpkg_db.go (about)

     1  package debian
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"path"
    10  	"regexp"
    11  	"strings"
    12  
    13  	"github.com/dustin/go-humanize"
    14  	"github.com/go-viper/mapstructure/v2"
    15  
    16  	"github.com/anchore/go-sync"
    17  	"github.com/anchore/syft/internal"
    18  	"github.com/anchore/syft/internal/log"
    19  	"github.com/anchore/syft/internal/unknown"
    20  	"github.com/anchore/syft/syft/artifact"
    21  	"github.com/anchore/syft/syft/cataloging"
    22  	"github.com/anchore/syft/syft/file"
    23  	"github.com/anchore/syft/syft/pkg"
    24  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    25  )
    26  
    27  const (
    28  	deinstallStatus string = "deinstall"
    29  )
    30  
    31  var (
    32  	errEndOfPackages = fmt.Errorf("no more packages to read")
    33  	sourceRegexp     = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`)
    34  )
    35  
    36  // parseDpkgDB reads a dpkg database "status" file (and surrounding data files) and returns the packages and relationships found.
    37  func parseDpkgDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    38  	metadata, err := parseDpkgStatus(reader)
    39  	if err != nil {
    40  		return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err)
    41  	}
    42  
    43  	dbLoc := reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)
    44  	var pkgs []pkg.Package
    45  	_ = sync.CollectSlice(&ctx, cataloging.ExecutorFile, sync.ToSeq(metadata), func(m pkg.DpkgDBEntry) (pkg.Package, error) {
    46  		return newDpkgPackage(ctx, m, dbLoc, resolver, env.LinuxRelease, findDpkgInfoFiles(m.Package, resolver, reader.Location)...), nil
    47  	}, &pkgs)
    48  
    49  	return pkgs, nil, unknown.IfEmptyf(pkgs, "unable to determine packages")
    50  }
    51  
    52  func findDpkgInfoFiles(name string, resolver file.Resolver, dbLocation file.Location) []file.Location {
    53  	if resolver == nil {
    54  		return nil
    55  	}
    56  	if strings.TrimSpace(name) == "" {
    57  		return nil
    58  	}
    59  
    60  	// for typical debian-base distributions, the installed package info is at /var/lib/dpkg/status
    61  	// and the md5sum information is under /var/lib/dpkg/info/; however, for distroless the installed
    62  	// package info is across multiple files under /var/lib/dpkg/status.d/ and the md5sums are contained in
    63  	// the same directory
    64  	searchPath := path.Dir(dbLocation.RealPath)
    65  
    66  	if !strings.HasSuffix(searchPath, "status.d") {
    67  		searchPath = path.Join(searchPath, "info")
    68  	}
    69  
    70  	// look for /var/lib/dpkg/info/NAME.*
    71  	locations, err := resolver.FilesByGlob(path.Join(searchPath, name+".*"))
    72  	if err != nil {
    73  		log.WithFields("error", err, "pkg", name).Trace("failed to fetch related dpkg info files")
    74  		return nil
    75  	}
    76  
    77  	return locations
    78  }
    79  
    80  // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed.
    81  func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) {
    82  	buffedReader := bufio.NewReader(reader)
    83  	var metadata []pkg.DpkgDBEntry
    84  
    85  	continueProcessing := true
    86  	for continueProcessing {
    87  		entry, err := parseDpkgStatusEntry(buffedReader)
    88  		if err != nil {
    89  			if errors.Is(err, errEndOfPackages) {
    90  				continueProcessing = false
    91  			} else {
    92  				return nil, err
    93  			}
    94  		}
    95  		if entry == nil {
    96  			continue
    97  		}
    98  
    99  		metadata = append(metadata, *entry)
   100  	}
   101  
   102  	return metadata, nil
   103  }
   104  
   105  // dpkgExtractedMetadata is an adapter struct to capture the fields from the dpkg status file, however, the final
   106  // pkg.DpkgMetadata struct has different types for some fields (e.g. Provides, Depends, and PreDepends is []string, not a string).
   107  type dpkgExtractedMetadata struct {
   108  	Package       string `mapstructure:"Package"`
   109  	Source        string `mapstructure:"Source"`
   110  	Version       string `mapstructure:"Version"`
   111  	SourceVersion string `mapstructure:"SourceVersion"`
   112  	Architecture  string `mapstructure:"Architecture"`
   113  	Maintainer    string `mapstructure:"Maintainer"`
   114  	InstalledSize int    `mapstructure:"InstalledSize"`
   115  	Description   string `mapstructure:"Description"`
   116  	Provides      string `mapstructure:"Provides"`
   117  	Depends       string `mapstructure:"Depends"`
   118  	PreDepends    string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends
   119  	Status        string `mapstructure:"Status"`
   120  }
   121  
   122  // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader.
   123  func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) {
   124  	var retErr error
   125  	dpkgFields, err := extractAllFields(reader)
   126  	if err != nil {
   127  		if !errors.Is(err, errEndOfPackages) {
   128  			return nil, err
   129  		}
   130  		if len(dpkgFields) == 0 {
   131  			return nil, err
   132  		}
   133  		retErr = err
   134  	}
   135  
   136  	raw := dpkgExtractedMetadata{}
   137  	err = mapstructure.Decode(dpkgFields, &raw)
   138  	if err != nil {
   139  		return nil, err
   140  	}
   141  
   142  	// Skip entries which have been removed but not purged, e.g. "rc" status in dpkg -l
   143  	if strings.Contains(raw.Status, deinstallStatus) {
   144  		return nil, nil
   145  	}
   146  
   147  	sourceName, sourceVersion := extractSourceVersion(raw.Source)
   148  	if sourceVersion != "" {
   149  		raw.SourceVersion = sourceVersion
   150  		raw.Source = sourceName
   151  	}
   152  
   153  	if raw.Package == "" {
   154  		return nil, retErr
   155  	}
   156  
   157  	entry := pkg.DpkgDBEntry{
   158  		Package:       raw.Package,
   159  		Source:        raw.Source,
   160  		Version:       raw.Version,
   161  		SourceVersion: raw.SourceVersion,
   162  		Architecture:  raw.Architecture,
   163  		Maintainer:    raw.Maintainer,
   164  		InstalledSize: raw.InstalledSize,
   165  		Description:   raw.Description,
   166  		Provides:      splitPkgList(raw.Provides),
   167  		Depends:       splitPkgList(raw.Depends),
   168  		PreDepends:    splitPkgList(raw.PreDepends),
   169  	}
   170  
   171  	// there may be an optional conffiles section that we should persist as files
   172  	if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil {
   173  		if sectionStr, ok := conffilesSection.(string); ok {
   174  			entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr))
   175  		}
   176  	}
   177  
   178  	if entry.Files == nil {
   179  		// ensure the default value for a collection is never nil since this may be shown as JSON
   180  		entry.Files = make([]pkg.DpkgFileRecord, 0)
   181  	}
   182  
   183  	return &entry, retErr
   184  }
   185  
   186  func splitPkgList(pkgList string) (ret []string) {
   187  	fields := strings.Split(pkgList, ",")
   188  	for _, field := range fields {
   189  		field = strings.TrimSpace(field)
   190  		if field != "" {
   191  			ret = append(ret, field)
   192  		}
   193  	}
   194  	return ret
   195  }
   196  
   197  func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
   198  	dpkgFields := make(map[string]interface{})
   199  	var key string
   200  
   201  	for {
   202  		line, err := reader.ReadString('\n')
   203  		if err != nil {
   204  			if errors.Is(err, io.EOF) {
   205  				return dpkgFields, errEndOfPackages
   206  			}
   207  			return nil, err
   208  		}
   209  
   210  		line = strings.TrimRight(line, "\n")
   211  
   212  		// empty line indicates end of entry
   213  		if len(line) == 0 {
   214  			// if the entry has not started, keep parsing lines
   215  			if len(dpkgFields) == 0 {
   216  				continue
   217  			}
   218  			break
   219  		}
   220  
   221  		switch {
   222  		case strings.HasPrefix(line, " "):
   223  			// a field-body continuation
   224  			if len(key) == 0 {
   225  				return nil, fmt.Errorf("no match for continuation: line: '%s'", line)
   226  			}
   227  
   228  			val, ok := dpkgFields[key]
   229  			if !ok {
   230  				return nil, fmt.Errorf("no previous key exists, expecting: %s", key)
   231  			}
   232  			// concatenate onto previous value
   233  			val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line))
   234  			dpkgFields[key] = val
   235  		default:
   236  			// parse a new key
   237  			var val interface{}
   238  			key, val, err = handleNewKeyValue(line)
   239  			if err != nil {
   240  				log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err)
   241  				continue
   242  			}
   243  
   244  			if _, ok := dpkgFields[key]; ok {
   245  				return nil, fmt.Errorf("duplicate key discovered: %s", key)
   246  			}
   247  			dpkgFields[key] = val
   248  		}
   249  	}
   250  	return dpkgFields, nil
   251  }
   252  
   253  // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if
   254  // of the "<name>" form, then return name and nil
   255  func extractSourceVersion(source string) (string, string) {
   256  	// special handling for the Source field since it has formatted data
   257  	match := internal.MatchNamedCaptureGroups(sourceRegexp, source)
   258  	return match["name"], match["version"]
   259  }
   260  
   261  // handleNewKeyValue parse a new key-value pair from the given unprocessed line
   262  func handleNewKeyValue(line string) (key string, val interface{}, err error) {
   263  	if i := strings.Index(line, ":"); i > 0 {
   264  		key = strings.TrimSpace(line[0:i])
   265  		// mapstruct cant handle "-"
   266  		key = strings.ReplaceAll(key, "-", "")
   267  		val := strings.TrimSpace(line[i+1:])
   268  
   269  		// further processing of values based on the key that was discovered
   270  		switch key {
   271  		case "InstalledSize":
   272  			s, err := humanize.ParseBytes(val)
   273  			if err != nil {
   274  				return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err)
   275  			}
   276  			return key, int(s), nil
   277  		default:
   278  			return key, val, nil
   279  		}
   280  	}
   281  
   282  	return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line)
   283  }