github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/debian/parse_dpkg_db.go (about)

     1  package debian
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"regexp"
     9  	"strings"
    10  
    11  	"github.com/dustin/go-humanize"
    12  	"github.com/mitchellh/mapstructure"
    13  
    14  	"github.com/anchore/syft/syft/artifact"
    15  	"github.com/anchore/syft/syft/file"
    16  	"github.com/anchore/syft/syft/pkg"
    17  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    18  	"github.com/lineaje-labs/syft/internal"
    19  	"github.com/lineaje-labs/syft/internal/log"
    20  )
    21  
    22  var (
    23  	errEndOfPackages = fmt.Errorf("no more packages to read")
    24  	sourceRegexp     = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`)
    25  )
    26  
    27  // parseDpkgDB reads a dpkg database "status" file (and surrounding data files) and returns the packages and relationships found.
    28  func parseDpkgDB(
    29  	resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser,
    30  ) ([]pkg.Package, []artifact.Relationship, error) {
    31  	metadata, err := parseDpkgStatus(reader)
    32  	if err != nil {
    33  		return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err)
    34  	}
    35  
    36  	var pkgs []pkg.Package
    37  	for _, m := range metadata {
    38  		pkgs = append(pkgs, newDpkgPackage(m, reader.Location, resolver, env.LinuxRelease))
    39  	}
    40  
    41  	return pkgs, associateRelationships(pkgs), nil
    42  }
    43  
    44  // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed.
    45  func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) {
    46  	buffedReader := bufio.NewReader(reader)
    47  	var metadata []pkg.DpkgDBEntry
    48  
    49  	continueProcessing := true
    50  	for continueProcessing {
    51  		entry, err := parseDpkgStatusEntry(buffedReader)
    52  		if err != nil {
    53  			if errors.Is(err, errEndOfPackages) {
    54  				continueProcessing = false
    55  			} else {
    56  				return nil, err
    57  			}
    58  		}
    59  		if entry == nil {
    60  			continue
    61  		}
    62  
    63  		metadata = append(metadata, *entry)
    64  	}
    65  
    66  	return metadata, nil
    67  }
    68  
    69  // dpkgExtractedMetadata is an adapter struct to capture the fields from the dpkg status file, however, the final
    70  // pkg.DpkgMetadata struct has different types for some fields (e.g. Provides, Depends, and PreDepends is []string, not a string).
    71  type dpkgExtractedMetadata struct {
    72  	Package       string `mapstructure:"Package"`
    73  	Source        string `mapstructure:"Source"`
    74  	Version       string `mapstructure:"Version"`
    75  	SourceVersion string `mapstructure:"SourceVersion"`
    76  	Architecture  string `mapstructure:"Architecture"`
    77  	Maintainer    string `mapstructure:"Maintainer"`
    78  	InstalledSize int    `mapstructure:"InstalledSize"`
    79  	Description   string `mapstructure:"Description"`
    80  	Provides      string `mapstructure:"Provides"`
    81  	Depends       string `mapstructure:"Depends"`
    82  	PreDepends    string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends
    83  }
    84  
    85  // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader.
    86  func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) {
    87  	var retErr error
    88  	dpkgFields, err := extractAllFields(reader)
    89  	if err != nil {
    90  		if !errors.Is(err, errEndOfPackages) {
    91  			return nil, err
    92  		}
    93  		if len(dpkgFields) == 0 {
    94  			return nil, err
    95  		}
    96  		retErr = err
    97  	}
    98  
    99  	raw := dpkgExtractedMetadata{}
   100  	err = mapstructure.Decode(dpkgFields, &raw)
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  
   105  	sourceName, sourceVersion := extractSourceVersion(raw.Source)
   106  	if sourceVersion != "" {
   107  		raw.SourceVersion = sourceVersion
   108  		raw.Source = sourceName
   109  	}
   110  
   111  	if raw.Package == "" {
   112  		return nil, retErr
   113  	}
   114  
   115  	entry := pkg.DpkgDBEntry{
   116  		Package:       raw.Package,
   117  		Source:        raw.Source,
   118  		Version:       raw.Version,
   119  		SourceVersion: raw.SourceVersion,
   120  		Architecture:  raw.Architecture,
   121  		Maintainer:    raw.Maintainer,
   122  		InstalledSize: raw.InstalledSize,
   123  		Description:   raw.Description,
   124  		Provides:      splitPkgList(raw.Provides),
   125  		Depends:       splitPkgList(raw.Depends),
   126  		PreDepends:    splitPkgList(raw.PreDepends),
   127  	}
   128  
   129  	// there may be an optional conffiles section that we should persist as files
   130  	if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil {
   131  		if sectionStr, ok := conffilesSection.(string); ok {
   132  			entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr))
   133  		}
   134  	}
   135  
   136  	if entry.Files == nil {
   137  		// ensure the default value for a collection is never nil since this may be shown as JSON
   138  		entry.Files = make([]pkg.DpkgFileRecord, 0)
   139  	}
   140  
   141  	return &entry, retErr
   142  }
   143  
   144  func splitPkgList(pkgList string) (ret []string) {
   145  	fields := strings.Split(pkgList, ",")
   146  	for _, field := range fields {
   147  		field = strings.TrimSpace(field)
   148  		if field != "" {
   149  			ret = append(ret, field)
   150  		}
   151  	}
   152  	return ret
   153  }
   154  
   155  func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
   156  	dpkgFields := make(map[string]interface{})
   157  	var key string
   158  
   159  	for {
   160  		line, err := reader.ReadString('\n')
   161  		if err != nil {
   162  			if errors.Is(err, io.EOF) {
   163  				return dpkgFields, errEndOfPackages
   164  			}
   165  			return nil, err
   166  		}
   167  
   168  		line = strings.TrimRight(line, "\n")
   169  
   170  		// empty line indicates end of entry
   171  		if len(line) == 0 {
   172  			// if the entry has not started, keep parsing lines
   173  			if len(dpkgFields) == 0 {
   174  				continue
   175  			}
   176  			break
   177  		}
   178  
   179  		switch {
   180  		case strings.HasPrefix(line, " "):
   181  			// a field-body continuation
   182  			if len(key) == 0 {
   183  				return nil, fmt.Errorf("no match for continuation: line: '%s'", line)
   184  			}
   185  
   186  			val, ok := dpkgFields[key]
   187  			if !ok {
   188  				return nil, fmt.Errorf("no previous key exists, expecting: %s", key)
   189  			}
   190  			// concatenate onto previous value
   191  			val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line))
   192  			dpkgFields[key] = val
   193  		default:
   194  			// parse a new key
   195  			var val interface{}
   196  			key, val, err = handleNewKeyValue(line)
   197  			if err != nil {
   198  				log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err)
   199  				continue
   200  			}
   201  
   202  			if _, ok := dpkgFields[key]; ok {
   203  				return nil, fmt.Errorf("duplicate key discovered: %s", key)
   204  			}
   205  			dpkgFields[key] = val
   206  		}
   207  	}
   208  	return dpkgFields, nil
   209  }
   210  
   211  // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if
   212  // of the "<name>" form, then return name and nil
   213  func extractSourceVersion(source string) (string, string) {
   214  	// special handling for the Source field since it has formatted data
   215  	match := internal.MatchNamedCaptureGroups(sourceRegexp, source)
   216  	return match["name"], match["version"]
   217  }
   218  
   219  // handleNewKeyValue parse a new key-value pair from the given unprocessed line
   220  func handleNewKeyValue(line string) (key string, val interface{}, err error) {
   221  	if i := strings.Index(line, ":"); i > 0 {
   222  		key = strings.TrimSpace(line[0:i])
   223  		// mapstruct cant handle "-"
   224  		key = strings.ReplaceAll(key, "-", "")
   225  		val := strings.TrimSpace(line[i+1:])
   226  
   227  		// further processing of values based on the key that was discovered
   228  		switch key {
   229  		case "InstalledSize":
   230  			s, err := humanize.ParseBytes(val)
   231  			if err != nil {
   232  				return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err)
   233  			}
   234  			return key, int(s), nil
   235  		default:
   236  			return key, val, nil
   237  		}
   238  	}
   239  
   240  	return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line)
   241  }
   242  
   243  // associateRelationships will create relationships between packages based on the "Depends", "Pre-Depends", and "Provides"
   244  // fields for installed packages. if there is an installed package that has a dependency that is (somehow) not installed,
   245  // then that relationship (between the installed and uninstalled package) will NOT be created.
   246  func associateRelationships(pkgs []pkg.Package) (relationships []artifact.Relationship) {
   247  	// map["provides" + "package"] -> packages that provide that package
   248  	lookup := make(map[string][]pkg.Package)
   249  
   250  	// read provided and add as keys for lookup keys as well as package names
   251  	for _, p := range pkgs {
   252  		meta, ok := p.Metadata.(pkg.DpkgDBEntry)
   253  		if !ok {
   254  			log.Warnf("cataloger failed to extract dpkg 'provides' metadata for package %+v", p.Name)
   255  			continue
   256  		}
   257  		lookup[p.Name] = append(lookup[p.Name], p)
   258  		for _, provides := range meta.Provides {
   259  			k := stripVersionSpecifier(provides)
   260  			lookup[k] = append(lookup[k], p)
   261  		}
   262  	}
   263  
   264  	// read "Depends" and "Pre-Depends" and match with keys
   265  	for _, p := range pkgs {
   266  		meta, ok := p.Metadata.(pkg.DpkgDBEntry)
   267  		if !ok {
   268  			log.Warnf("cataloger failed to extract dpkg 'dependency' metadata for package %+v", p.Name)
   269  			continue
   270  		}
   271  
   272  		var allDeps []string
   273  		allDeps = append(allDeps, meta.Depends...)
   274  		allDeps = append(allDeps, meta.PreDepends...)
   275  
   276  		for _, depSpecifier := range allDeps {
   277  			deps := splitPackageChoice(depSpecifier)
   278  			for _, dep := range deps {
   279  				for _, depPkg := range lookup[dep] {
   280  					relationships = append(relationships, artifact.Relationship{
   281  						From: depPkg,
   282  						To:   p,
   283  						Type: artifact.DependencyOfRelationship,
   284  					})
   285  				}
   286  			}
   287  		}
   288  	}
   289  	return relationships
   290  }
   291  
   292  func stripVersionSpecifier(s string) string {
   293  	// examples:
   294  	// libgmp10 (>= 2:6.2.1+dfsg1)         -->  libgmp10
   295  	// libgmp10                            -->  libgmp10
   296  	// foo [i386]                          -->  foo
   297  	// default-mta | mail-transport-agent  -->  default-mta | mail-transport-agent
   298  	// kernel-headers-2.2.10 [!hurd-i386]  -->  kernel-headers-2.2.10
   299  
   300  	items := internal.SplitAny(s, "[(<>=")
   301  	if len(items) == 0 {
   302  		return s
   303  	}
   304  
   305  	return strings.TrimSpace(items[0])
   306  }
   307  
   308  func splitPackageChoice(s string) (ret []string) {
   309  	fields := strings.Split(s, "|")
   310  	for _, field := range fields {
   311  		field = strings.TrimSpace(field)
   312  		if field != "" {
   313  			ret = append(ret, stripVersionSpecifier(field))
   314  		}
   315  	}
   316  	return ret
   317  }