github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/deb/parse_dpkg_db.go (about)

     1  package deb
     2  
     3  import (
     4  	"bufio"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"regexp"
     9  	"strings"
    10  
    11  	"github.com/dustin/go-humanize"
    12  	"github.com/mitchellh/mapstructure"
    13  	"github.com/nextlinux/gosbom/gosbom/artifact"
    14  	"github.com/nextlinux/gosbom/gosbom/file"
    15  	"github.com/nextlinux/gosbom/gosbom/pkg"
    16  	"github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic"
    17  	"github.com/nextlinux/gosbom/internal"
    18  	"github.com/nextlinux/gosbom/internal/log"
    19  )
    20  
    21  var (
    22  	errEndOfPackages = fmt.Errorf("no more packages to read")
    23  	sourceRegexp     = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`)
    24  )
    25  
    26  func parseDpkgDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    27  	metadata, err := parseDpkgStatus(reader)
    28  	if err != nil {
    29  		return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err)
    30  	}
    31  
    32  	var pkgs []pkg.Package
    33  	for _, m := range metadata {
    34  		pkgs = append(pkgs, newDpkgPackage(m, reader.Location, resolver, env.LinuxRelease))
    35  	}
    36  
    37  	return pkgs, nil, nil
    38  }
    39  
    40  // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed.
    41  func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgMetadata, error) {
    42  	buffedReader := bufio.NewReader(reader)
    43  	var metadata []pkg.DpkgMetadata
    44  
    45  	continueProcessing := true
    46  	for continueProcessing {
    47  		entry, err := parseDpkgStatusEntry(buffedReader)
    48  		if err != nil {
    49  			if errors.Is(err, errEndOfPackages) {
    50  				continueProcessing = false
    51  			} else {
    52  				return nil, err
    53  			}
    54  		}
    55  		if entry == nil {
    56  			continue
    57  		}
    58  
    59  		metadata = append(metadata, *entry)
    60  	}
    61  
    62  	return metadata, nil
    63  }
    64  
    65  // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader.
    66  func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgMetadata, error) {
    67  	var retErr error
    68  	dpkgFields, err := extractAllFields(reader)
    69  	if err != nil {
    70  		if !errors.Is(err, errEndOfPackages) {
    71  			return nil, err
    72  		}
    73  		if len(dpkgFields) == 0 {
    74  			return nil, err
    75  		}
    76  		retErr = err
    77  	}
    78  
    79  	entry := pkg.DpkgMetadata{}
    80  	err = mapstructure.Decode(dpkgFields, &entry)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  
    85  	sourceName, sourceVersion := extractSourceVersion(entry.Source)
    86  	if sourceVersion != "" {
    87  		entry.SourceVersion = sourceVersion
    88  		entry.Source = sourceName
    89  	}
    90  
    91  	if entry.Package == "" {
    92  		return nil, retErr
    93  	}
    94  
    95  	// there may be an optional conffiles section that we should persist as files
    96  	if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil {
    97  		if sectionStr, ok := conffilesSection.(string); ok {
    98  			entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr))
    99  		}
   100  	}
   101  
   102  	if entry.Files == nil {
   103  		// ensure the default value for a collection is never nil since this may be shown as JSON
   104  		entry.Files = make([]pkg.DpkgFileRecord, 0)
   105  	}
   106  
   107  	return &entry, retErr
   108  }
   109  
   110  func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
   111  	dpkgFields := make(map[string]interface{})
   112  	var key string
   113  
   114  	for {
   115  		line, err := reader.ReadString('\n')
   116  		if err != nil {
   117  			if errors.Is(err, io.EOF) {
   118  				return dpkgFields, errEndOfPackages
   119  			}
   120  			return nil, err
   121  		}
   122  
   123  		line = strings.TrimRight(line, "\n")
   124  
   125  		// empty line indicates end of entry
   126  		if len(line) == 0 {
   127  			// if the entry has not started, keep parsing lines
   128  			if len(dpkgFields) == 0 {
   129  				continue
   130  			}
   131  			break
   132  		}
   133  
   134  		switch {
   135  		case strings.HasPrefix(line, " "):
   136  			// a field-body continuation
   137  			if len(key) == 0 {
   138  				return nil, fmt.Errorf("no match for continuation: line: '%s'", line)
   139  			}
   140  
   141  			val, ok := dpkgFields[key]
   142  			if !ok {
   143  				return nil, fmt.Errorf("no previous key exists, expecting: %s", key)
   144  			}
   145  			// concatenate onto previous value
   146  			val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line))
   147  			dpkgFields[key] = val
   148  		default:
   149  			// parse a new key
   150  			var val interface{}
   151  			key, val, err = handleNewKeyValue(line)
   152  			if err != nil {
   153  				log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err)
   154  				continue
   155  			}
   156  
   157  			if _, ok := dpkgFields[key]; ok {
   158  				return nil, fmt.Errorf("duplicate key discovered: %s", key)
   159  			}
   160  			dpkgFields[key] = val
   161  		}
   162  	}
   163  	return dpkgFields, nil
   164  }
   165  
   166  // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if
   167  // of the "<name>" form, then return name and nil
   168  func extractSourceVersion(source string) (string, string) {
   169  	// special handling for the Source field since it has formatted data
   170  	match := internal.MatchNamedCaptureGroups(sourceRegexp, source)
   171  	return match["name"], match["version"]
   172  }
   173  
   174  // handleNewKeyValue parse a new key-value pair from the given unprocessed line
   175  func handleNewKeyValue(line string) (key string, val interface{}, err error) {
   176  	if i := strings.Index(line, ":"); i > 0 {
   177  		key = strings.TrimSpace(line[0:i])
   178  		// mapstruct cant handle "-"
   179  		key = strings.ReplaceAll(key, "-", "")
   180  		val := strings.TrimSpace(line[i+1:])
   181  
   182  		// further processing of values based on the key that was discovered
   183  		switch key {
   184  		case "InstalledSize":
   185  			s, err := humanize.ParseBytes(val)
   186  			if err != nil {
   187  				return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err)
   188  			}
   189  			return key, int(s), nil
   190  		default:
   191  			return key, val, nil
   192  		}
   193  	}
   194  
   195  	return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line)
   196  }