github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/debian/parse_dpkg_db.go (about)

     1  package debian
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"regexp"
    10  	"strings"
    11  
    12  	"github.com/dustin/go-humanize"
    13  	"github.com/mitchellh/mapstructure"
    14  
    15  	"github.com/anchore/syft/internal"
    16  	"github.com/anchore/syft/internal/log"
    17  	"github.com/anchore/syft/syft/artifact"
    18  	"github.com/anchore/syft/syft/file"
    19  	"github.com/anchore/syft/syft/pkg"
    20  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    21  )
    22  
    23  var (
    24  	errEndOfPackages = fmt.Errorf("no more packages to read")
    25  	sourceRegexp     = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`)
    26  )
    27  
    28  // parseDpkgDB reads a dpkg database "status" file (and surrounding data files) and returns the packages and relationships found.
    29  func parseDpkgDB(_ context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    30  	metadata, err := parseDpkgStatus(reader)
    31  	if err != nil {
    32  		return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err)
    33  	}
    34  
    35  	var pkgs []pkg.Package
    36  	for _, m := range metadata {
    37  		pkgs = append(pkgs, newDpkgPackage(m, reader.Location, resolver, env.LinuxRelease))
    38  	}
    39  
    40  	return pkgs, nil, nil
    41  }
    42  
    43  // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed.
    44  func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) {
    45  	buffedReader := bufio.NewReader(reader)
    46  	var metadata []pkg.DpkgDBEntry
    47  
    48  	continueProcessing := true
    49  	for continueProcessing {
    50  		entry, err := parseDpkgStatusEntry(buffedReader)
    51  		if err != nil {
    52  			if errors.Is(err, errEndOfPackages) {
    53  				continueProcessing = false
    54  			} else {
    55  				return nil, err
    56  			}
    57  		}
    58  		if entry == nil {
    59  			continue
    60  		}
    61  
    62  		metadata = append(metadata, *entry)
    63  	}
    64  
    65  	return metadata, nil
    66  }
    67  
    68  // dpkgExtractedMetadata is an adapter struct to capture the fields from the dpkg status file, however, the final
    69  // pkg.DpkgMetadata struct has different types for some fields (e.g. Provides, Depends, and PreDepends is []string, not a string).
    70  type dpkgExtractedMetadata struct {
    71  	Package       string `mapstructure:"Package"`
    72  	Source        string `mapstructure:"Source"`
    73  	Version       string `mapstructure:"Version"`
    74  	SourceVersion string `mapstructure:"SourceVersion"`
    75  	Architecture  string `mapstructure:"Architecture"`
    76  	Maintainer    string `mapstructure:"Maintainer"`
    77  	InstalledSize int    `mapstructure:"InstalledSize"`
    78  	Description   string `mapstructure:"Description"`
    79  	Provides      string `mapstructure:"Provides"`
    80  	Depends       string `mapstructure:"Depends"`
    81  	PreDepends    string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends
    82  }
    83  
    84  // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader.
    85  func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) {
    86  	var retErr error
    87  	dpkgFields, err := extractAllFields(reader)
    88  	if err != nil {
    89  		if !errors.Is(err, errEndOfPackages) {
    90  			return nil, err
    91  		}
    92  		if len(dpkgFields) == 0 {
    93  			return nil, err
    94  		}
    95  		retErr = err
    96  	}
    97  
    98  	raw := dpkgExtractedMetadata{}
    99  	err = mapstructure.Decode(dpkgFields, &raw)
   100  	if err != nil {
   101  		return nil, err
   102  	}
   103  
   104  	sourceName, sourceVersion := extractSourceVersion(raw.Source)
   105  	if sourceVersion != "" {
   106  		raw.SourceVersion = sourceVersion
   107  		raw.Source = sourceName
   108  	}
   109  
   110  	if raw.Package == "" {
   111  		return nil, retErr
   112  	}
   113  
   114  	entry := pkg.DpkgDBEntry{
   115  		Package:       raw.Package,
   116  		Source:        raw.Source,
   117  		Version:       raw.Version,
   118  		SourceVersion: raw.SourceVersion,
   119  		Architecture:  raw.Architecture,
   120  		Maintainer:    raw.Maintainer,
   121  		InstalledSize: raw.InstalledSize,
   122  		Description:   raw.Description,
   123  		Provides:      splitPkgList(raw.Provides),
   124  		Depends:       splitPkgList(raw.Depends),
   125  		PreDepends:    splitPkgList(raw.PreDepends),
   126  	}
   127  
   128  	// there may be an optional conffiles section that we should persist as files
   129  	if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil {
   130  		if sectionStr, ok := conffilesSection.(string); ok {
   131  			entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr))
   132  		}
   133  	}
   134  
   135  	if entry.Files == nil {
   136  		// ensure the default value for a collection is never nil since this may be shown as JSON
   137  		entry.Files = make([]pkg.DpkgFileRecord, 0)
   138  	}
   139  
   140  	return &entry, retErr
   141  }
   142  
   143  func splitPkgList(pkgList string) (ret []string) {
   144  	fields := strings.Split(pkgList, ",")
   145  	for _, field := range fields {
   146  		field = strings.TrimSpace(field)
   147  		if field != "" {
   148  			ret = append(ret, field)
   149  		}
   150  	}
   151  	return ret
   152  }
   153  
   154  func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
   155  	dpkgFields := make(map[string]interface{})
   156  	var key string
   157  
   158  	for {
   159  		line, err := reader.ReadString('\n')
   160  		if err != nil {
   161  			if errors.Is(err, io.EOF) {
   162  				return dpkgFields, errEndOfPackages
   163  			}
   164  			return nil, err
   165  		}
   166  
   167  		line = strings.TrimRight(line, "\n")
   168  
   169  		// empty line indicates end of entry
   170  		if len(line) == 0 {
   171  			// if the entry has not started, keep parsing lines
   172  			if len(dpkgFields) == 0 {
   173  				continue
   174  			}
   175  			break
   176  		}
   177  
   178  		switch {
   179  		case strings.HasPrefix(line, " "):
   180  			// a field-body continuation
   181  			if len(key) == 0 {
   182  				return nil, fmt.Errorf("no match for continuation: line: '%s'", line)
   183  			}
   184  
   185  			val, ok := dpkgFields[key]
   186  			if !ok {
   187  				return nil, fmt.Errorf("no previous key exists, expecting: %s", key)
   188  			}
   189  			// concatenate onto previous value
   190  			val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line))
   191  			dpkgFields[key] = val
   192  		default:
   193  			// parse a new key
   194  			var val interface{}
   195  			key, val, err = handleNewKeyValue(line)
   196  			if err != nil {
   197  				log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err)
   198  				continue
   199  			}
   200  
   201  			if _, ok := dpkgFields[key]; ok {
   202  				return nil, fmt.Errorf("duplicate key discovered: %s", key)
   203  			}
   204  			dpkgFields[key] = val
   205  		}
   206  	}
   207  	return dpkgFields, nil
   208  }
   209  
   210  // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if
   211  // of the "<name>" form, then return name and nil
   212  func extractSourceVersion(source string) (string, string) {
   213  	// special handling for the Source field since it has formatted data
   214  	match := internal.MatchNamedCaptureGroups(sourceRegexp, source)
   215  	return match["name"], match["version"]
   216  }
   217  
   218  // handleNewKeyValue parse a new key-value pair from the given unprocessed line
   219  func handleNewKeyValue(line string) (key string, val interface{}, err error) {
   220  	if i := strings.Index(line, ":"); i > 0 {
   221  		key = strings.TrimSpace(line[0:i])
   222  		// mapstruct cant handle "-"
   223  		key = strings.ReplaceAll(key, "-", "")
   224  		val := strings.TrimSpace(line[i+1:])
   225  
   226  		// further processing of values based on the key that was discovered
   227  		switch key {
   228  		case "InstalledSize":
   229  			s, err := humanize.ParseBytes(val)
   230  			if err != nil {
   231  				return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err)
   232  			}
   233  			return key, int(s), nil
   234  		default:
   235  			return key, val, nil
   236  		}
   237  	}
   238  
   239  	return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line)
   240  }