github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/deb/parse_dpkg_db.go (about)

     1  package deb
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"regexp"
     9  	"strings"
    10  
    11  	"github.com/dustin/go-humanize"
    12  	"github.com/mitchellh/mapstructure"
    13  
    14  	"github.com/anchore/syft/internal"
    15  	"github.com/anchore/syft/internal/log"
    16  	"github.com/anchore/syft/syft/artifact"
    17  	"github.com/anchore/syft/syft/file"
    18  	"github.com/anchore/syft/syft/pkg"
    19  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    20  )
    21  
    22  var (
    23  	errEndOfPackages = fmt.Errorf("no more packages to read")
    24  	sourceRegexp     = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`)
    25  )
    26  
    27  func parseDpkgDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    28  	metadata, err := parseDpkgStatus(reader)
    29  	if err != nil {
    30  		return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err)
    31  	}
    32  
    33  	var pkgs []pkg.Package
    34  	for _, m := range metadata {
    35  		pkgs = append(pkgs, newDpkgPackage(m, reader.Location, resolver, env.LinuxRelease))
    36  	}
    37  
    38  	return pkgs, nil, nil
    39  }
    40  
    41  // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed.
    42  func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgMetadata, error) {
    43  	buffedReader := bufio.NewReader(reader)
    44  	var metadata []pkg.DpkgMetadata
    45  
    46  	continueProcessing := true
    47  	for continueProcessing {
    48  		entry, err := parseDpkgStatusEntry(buffedReader)
    49  		if err != nil {
    50  			if errors.Is(err, errEndOfPackages) {
    51  				continueProcessing = false
    52  			} else {
    53  				return nil, err
    54  			}
    55  		}
    56  		if entry == nil {
    57  			continue
    58  		}
    59  
    60  		metadata = append(metadata, *entry)
    61  	}
    62  
    63  	return metadata, nil
    64  }
    65  
    66  // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader.
    67  func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgMetadata, error) {
    68  	var retErr error
    69  	dpkgFields, err := extractAllFields(reader)
    70  	if err != nil {
    71  		if !errors.Is(err, errEndOfPackages) {
    72  			return nil, err
    73  		}
    74  		if len(dpkgFields) == 0 {
    75  			return nil, err
    76  		}
    77  		retErr = err
    78  	}
    79  
    80  	entry := pkg.DpkgMetadata{}
    81  	err = mapstructure.Decode(dpkgFields, &entry)
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	sourceName, sourceVersion := extractSourceVersion(entry.Source)
    87  	if sourceVersion != "" {
    88  		entry.SourceVersion = sourceVersion
    89  		entry.Source = sourceName
    90  	}
    91  
    92  	if entry.Package == "" {
    93  		return nil, retErr
    94  	}
    95  
    96  	// there may be an optional conffiles section that we should persist as files
    97  	if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil {
    98  		if sectionStr, ok := conffilesSection.(string); ok {
    99  			entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr))
   100  		}
   101  	}
   102  
   103  	if entry.Files == nil {
   104  		// ensure the default value for a collection is never nil since this may be shown as JSON
   105  		entry.Files = make([]pkg.DpkgFileRecord, 0)
   106  	}
   107  
   108  	return &entry, retErr
   109  }
   110  
   111  func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
   112  	dpkgFields := make(map[string]interface{})
   113  	var key string
   114  
   115  	for {
   116  		line, err := reader.ReadString('\n')
   117  		if err != nil {
   118  			if errors.Is(err, io.EOF) {
   119  				return dpkgFields, errEndOfPackages
   120  			}
   121  			return nil, err
   122  		}
   123  
   124  		line = strings.TrimRight(line, "\n")
   125  
   126  		// empty line indicates end of entry
   127  		if len(line) == 0 {
   128  			// if the entry has not started, keep parsing lines
   129  			if len(dpkgFields) == 0 {
   130  				continue
   131  			}
   132  			break
   133  		}
   134  
   135  		switch {
   136  		case strings.HasPrefix(line, " "):
   137  			// a field-body continuation
   138  			if len(key) == 0 {
   139  				return nil, fmt.Errorf("no match for continuation: line: '%s'", line)
   140  			}
   141  
   142  			val, ok := dpkgFields[key]
   143  			if !ok {
   144  				return nil, fmt.Errorf("no previous key exists, expecting: %s", key)
   145  			}
   146  			// concatenate onto previous value
   147  			val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line))
   148  			dpkgFields[key] = val
   149  		default:
   150  			// parse a new key
   151  			var val interface{}
   152  			key, val, err = handleNewKeyValue(line)
   153  			if err != nil {
   154  				log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err)
   155  				continue
   156  			}
   157  
   158  			if _, ok := dpkgFields[key]; ok {
   159  				return nil, fmt.Errorf("duplicate key discovered: %s", key)
   160  			}
   161  			dpkgFields[key] = val
   162  		}
   163  	}
   164  	return dpkgFields, nil
   165  }
   166  
   167  // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if
   168  // of the "<name>" form, then return name and nil
   169  func extractSourceVersion(source string) (string, string) {
   170  	// special handling for the Source field since it has formatted data
   171  	match := internal.MatchNamedCaptureGroups(sourceRegexp, source)
   172  	return match["name"], match["version"]
   173  }
   174  
   175  // handleNewKeyValue parse a new key-value pair from the given unprocessed line
   176  func handleNewKeyValue(line string) (key string, val interface{}, err error) {
   177  	if i := strings.Index(line, ":"); i > 0 {
   178  		key = strings.TrimSpace(line[0:i])
   179  		// mapstruct cant handle "-"
   180  		key = strings.ReplaceAll(key, "-", "")
   181  		val := strings.TrimSpace(line[i+1:])
   182  
   183  		// further processing of values based on the key that was discovered
   184  		switch key {
   185  		case "InstalledSize":
   186  			s, err := humanize.ParseBytes(val)
   187  			if err != nil {
   188  				return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err)
   189  			}
   190  			return key, int(s), nil
   191  		default:
   192  			return key, val, nil
   193  		}
   194  	}
   195  
   196  	return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line)
   197  }