github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/alpm/parse_alpm_db.go (about)

     1  package alpm
     2  
     3  import (
     4  	"bufio"
     5  	"compress/gzip"
     6  	"fmt"
     7  	"io"
     8  	"path/filepath"
     9  	"strconv"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/mitchellh/mapstructure"
    14  	"github.com/nextlinux/gosbom/gosbom/artifact"
    15  	"github.com/nextlinux/gosbom/gosbom/file"
    16  	"github.com/nextlinux/gosbom/gosbom/pkg"
    17  	"github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic"
    18  	"github.com/vbatts/go-mtree"
    19  )
    20  
    21  var _ generic.Parser = parseAlpmDB
    22  
    23  var (
    24  	ignoredFiles = map[string]bool{
    25  		"/set":       true,
    26  		".BUILDINFO": true,
    27  		".PKGINFO":   true,
    28  		"":           true,
    29  	}
    30  )
    31  
    32  type parsedData struct {
    33  	Licenses         string `mapstructure:"license"`
    34  	pkg.AlpmMetadata `mapstructure:",squash"`
    35  }
    36  
    37  func parseAlpmDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    38  	data, err := parseAlpmDBEntry(reader)
    39  	if err != nil {
    40  		return nil, nil, err
    41  	}
    42  
    43  	base := filepath.Dir(reader.RealPath)
    44  	r, err := getFileReader(filepath.Join(base, "mtree"), resolver)
    45  	if err != nil {
    46  		return nil, nil, err
    47  	}
    48  
    49  	pkgFiles, err := parseMtree(r)
    50  	if err != nil {
    51  		return nil, nil, err
    52  	}
    53  
    54  	// replace the files found the pacman database with the files from the mtree These contain more metadata and
    55  	// thus more useful.
    56  	// TODO: probably want to use MTREE and PKGINFO here
    57  	data.Files = pkgFiles
    58  
    59  	// We only really do this to get any backup database entries from the files database
    60  	files := filepath.Join(base, "files")
    61  	_, err = getFileReader(files, resolver)
    62  	if err != nil {
    63  		return nil, nil, err
    64  	}
    65  	filesMetadata, err := parseAlpmDBEntry(reader)
    66  	if err != nil {
    67  		return nil, nil, err
    68  	} else if filesMetadata != nil {
    69  		data.Backup = filesMetadata.Backup
    70  	}
    71  
    72  	if data.Package == "" {
    73  		return nil, nil, nil
    74  	}
    75  
    76  	return []pkg.Package{
    77  		newPackage(
    78  			data,
    79  			env.LinuxRelease,
    80  			reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
    81  		),
    82  	}, nil, nil
    83  }
    84  
    85  func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) {
    86  	scanner := newScanner(reader)
    87  	metadata, err := parseDatabase(scanner)
    88  	if err != nil {
    89  		return nil, err
    90  	}
    91  	return metadata, nil
    92  }
    93  
    94  func newScanner(reader io.Reader) *bufio.Scanner {
    95  	// This is taken from the apk parser
    96  	// https://github.com/nextlinux/gosbom/blob/v0.47.0/gosbom/pkg/cataloger/apkdb/parse_apk_db.go#L37
    97  	const maxScannerCapacity = 1024 * 1024
    98  	bufScan := make([]byte, maxScannerCapacity)
    99  	scanner := bufio.NewScanner(reader)
   100  	scanner.Buffer(bufScan, maxScannerCapacity)
   101  	onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
   102  		for i := 0; i < len(data); i++ {
   103  			if i > 0 && data[i-1] == '\n' && data[i] == '\n' {
   104  				return i + 1, data[:i-1], nil
   105  			}
   106  		}
   107  		if !atEOF {
   108  			return 0, nil, nil
   109  		}
   110  		// deliver the last token (which could be an empty string)
   111  		return 0, data, bufio.ErrFinalToken
   112  	}
   113  
   114  	scanner.Split(onDoubleLF)
   115  	return scanner
   116  }
   117  
   118  func getFileReader(path string, resolver file.Resolver) (io.Reader, error) {
   119  	locs, err := resolver.FilesByPath(path)
   120  	if err != nil {
   121  		return nil, err
   122  	}
   123  
   124  	if len(locs) == 0 {
   125  		return nil, fmt.Errorf("could not find file: %s", path)
   126  	}
   127  	// TODO: Should we maybe check if we found the file
   128  	dbContentReader, err := resolver.FileContentsByLocation(locs[0])
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  	return dbContentReader, nil
   133  }
   134  
   135  func parseDatabase(b *bufio.Scanner) (*parsedData, error) {
   136  	var err error
   137  	pkgFields := make(map[string]interface{})
   138  	for b.Scan() {
   139  		fields := strings.SplitN(b.Text(), "\n", 2)
   140  
   141  		// End of File
   142  		if len(fields) == 1 {
   143  			break
   144  		}
   145  
   146  		// The alpm database surrounds the keys with %.
   147  		key := strings.ReplaceAll(fields[0], "%", "")
   148  		key = strings.ToLower(key)
   149  		value := strings.TrimSpace(fields[1])
   150  
   151  		switch key {
   152  		case "files":
   153  			var files []map[string]string
   154  			for _, f := range strings.Split(value, "\n") {
   155  				path := fmt.Sprintf("/%s", f)
   156  				if ok := ignoredFiles[path]; !ok {
   157  					files = append(files, map[string]string{"path": path})
   158  				}
   159  			}
   160  			pkgFields[key] = files
   161  		case "backup":
   162  			var backup []map[string]interface{}
   163  			for _, f := range strings.Split(value, "\n") {
   164  				fields := strings.SplitN(f, "\t", 2)
   165  				path := fmt.Sprintf("/%s", fields[0])
   166  				if ok := ignoredFiles[path]; !ok {
   167  					backup = append(backup, map[string]interface{}{
   168  						"path": path,
   169  						"digests": []file.Digest{{
   170  							Algorithm: "md5",
   171  							Value:     fields[1],
   172  						}}})
   173  				}
   174  			}
   175  			pkgFields[key] = backup
   176  		case "reason":
   177  			fallthrough
   178  		case "size":
   179  			pkgFields[key], err = strconv.ParseInt(value, 10, 64)
   180  			if err != nil {
   181  				return nil, fmt.Errorf("failed to parse %s to integer", value)
   182  			}
   183  		default:
   184  			pkgFields[key] = value
   185  		}
   186  	}
   187  
   188  	return parsePkgFiles(pkgFields)
   189  }
   190  
   191  func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) {
   192  	var entry parsedData
   193  	if err := mapstructure.Decode(pkgFields, &entry); err != nil {
   194  		return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err)
   195  	}
   196  
   197  	if entry.Backup == nil {
   198  		entry.Backup = make([]pkg.AlpmFileRecord, 0)
   199  	}
   200  
   201  	if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 {
   202  		return nil, nil
   203  	}
   204  	return &entry, nil
   205  }
   206  
   207  func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) {
   208  	var err error
   209  	var entries []pkg.AlpmFileRecord
   210  
   211  	r, err = gzip.NewReader(r)
   212  	if err != nil {
   213  		return nil, err
   214  	}
   215  	specDh, err := mtree.ParseSpec(r)
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  	for _, f := range specDh.Entries {
   220  		var entry pkg.AlpmFileRecord
   221  		entry.Digests = make([]file.Digest, 0)
   222  		fileFields := make(map[string]interface{})
   223  		if ok := ignoredFiles[f.Name]; ok {
   224  			continue
   225  		}
   226  		path := fmt.Sprintf("/%s", f.Name)
   227  		fileFields["path"] = path
   228  		for _, kv := range f.Keywords {
   229  			kw := string(kv.Keyword())
   230  			switch kw {
   231  			case "time":
   232  				// All unix timestamps have a .0 suffixs.
   233  				v := strings.Split(kv.Value(), ".")
   234  				i, _ := strconv.ParseInt(v[0], 10, 64)
   235  				tm := time.Unix(i, 0)
   236  				fileFields[kw] = tm
   237  			case "sha256digest":
   238  				entry.Digests = append(entry.Digests, file.Digest{
   239  					Algorithm: "sha256",
   240  					Value:     kv.Value(),
   241  				})
   242  			case "md5digest":
   243  				entry.Digests = append(entry.Digests, file.Digest{
   244  					Algorithm: "md5",
   245  					Value:     kv.Value(),
   246  				})
   247  			default:
   248  				fileFields[kw] = kv.Value()
   249  			}
   250  		}
   251  		if err := mapstructure.Decode(fileFields, &entry); err != nil {
   252  			return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err)
   253  		}
   254  		entries = append(entries, entry)
   255  	}
   256  	return entries, nil
   257  }