github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/arch/parse_alpm_db.go (about)

     1  package arch
     2  
     3  import (
     4  	"bufio"
     5  	"compress/gzip"
     6  	"context"
     7  	"fmt"
     8  	"io"
     9  	"path"
    10  	"path/filepath"
    11  	"strconv"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/mitchellh/mapstructure"
    16  	"github.com/vbatts/go-mtree"
    17  
    18  	"github.com/anchore/syft/internal"
    19  	"github.com/anchore/syft/internal/log"
    20  	"github.com/anchore/syft/syft/artifact"
    21  	"github.com/anchore/syft/syft/file"
    22  	"github.com/anchore/syft/syft/pkg"
    23  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    24  )
    25  
    26  var _ generic.Parser = parseAlpmDB
    27  
    28  var (
    29  	ignoredFiles = map[string]bool{
    30  		"/set":       true,
    31  		".BUILDINFO": true,
    32  		".PKGINFO":   true,
    33  		"":           true,
    34  	}
    35  )
    36  
    37  type parsedData struct {
    38  	Licenses        string `mapstructure:"license"`
    39  	pkg.AlpmDBEntry `mapstructure:",squash"`
    40  }
    41  
    42  // parseAlpmDB parses the arch linux pacman database flat-files and returns the packages and relationships found within.
    43  func parseAlpmDB(_ context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    44  	data, err := parseAlpmDBEntry(reader)
    45  	if err != nil {
    46  		return nil, nil, err
    47  	}
    48  
    49  	if data == nil {
    50  		return nil, nil, nil
    51  	}
    52  
    53  	base := path.Dir(reader.RealPath)
    54  
    55  	// replace the files found the pacman database with the files from the mtree These contain more metadata and
    56  	// thus more useful.
    57  	files, fileLoc := fetchPkgFiles(base, resolver)
    58  	backups, backupLoc := fetchBackupFiles(base, resolver)
    59  
    60  	var locs []file.Location
    61  	if fileLoc != nil {
    62  		locs = append(locs, fileLoc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
    63  		data.Files = files
    64  	}
    65  
    66  	if backupLoc != nil {
    67  		locs = append(locs, backupLoc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
    68  		data.Backup = backups
    69  	}
    70  
    71  	if data.Package == "" {
    72  		return nil, nil, nil
    73  	}
    74  
    75  	return []pkg.Package{
    76  		newPackage(
    77  			data,
    78  			env.LinuxRelease,
    79  			reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
    80  			locs...,
    81  		),
    82  	}, nil, nil
    83  }
    84  
    85  func fetchPkgFiles(base string, resolver file.Resolver) ([]pkg.AlpmFileRecord, *file.Location) {
    86  	// TODO: probably want to use MTREE and PKGINFO here
    87  	target := path.Join(base, "mtree")
    88  
    89  	loc, err := getLocation(target, resolver)
    90  	if err != nil {
    91  		log.WithFields("error", err, "path", target).Trace("failed to find mtree file")
    92  		return []pkg.AlpmFileRecord{}, nil
    93  	}
    94  	if loc == nil {
    95  		return []pkg.AlpmFileRecord{}, nil
    96  	}
    97  
    98  	reader, err := resolver.FileContentsByLocation(*loc)
    99  	if err != nil {
   100  		return []pkg.AlpmFileRecord{}, nil
   101  	}
   102  	defer internal.CloseAndLogError(reader, loc.RealPath)
   103  
   104  	pkgFiles, err := parseMtree(reader)
   105  	if err != nil {
   106  		log.WithFields("error", err, "path", target).Trace("failed to parse mtree file")
   107  		return []pkg.AlpmFileRecord{}, nil
   108  	}
   109  	return pkgFiles, loc
   110  }
   111  
   112  func fetchBackupFiles(base string, resolver file.Resolver) ([]pkg.AlpmFileRecord, *file.Location) {
   113  	// We only really do this to get any backup database entries from the files database
   114  	target := filepath.Join(base, "files")
   115  
   116  	loc, err := getLocation(target, resolver)
   117  	if err != nil {
   118  		log.WithFields("error", err, "path", target).Trace("failed to find alpm files")
   119  		return []pkg.AlpmFileRecord{}, nil
   120  	}
   121  	if loc == nil {
   122  		return []pkg.AlpmFileRecord{}, nil
   123  	}
   124  
   125  	reader, err := resolver.FileContentsByLocation(*loc)
   126  	if err != nil {
   127  		return []pkg.AlpmFileRecord{}, nil
   128  	}
   129  	defer internal.CloseAndLogError(reader, loc.RealPath)
   130  
   131  	filesMetadata, err := parseAlpmDBEntry(reader)
   132  	if err != nil {
   133  		return []pkg.AlpmFileRecord{}, nil
   134  	}
   135  	if filesMetadata != nil {
   136  		return filesMetadata.Backup, loc
   137  	}
   138  	return []pkg.AlpmFileRecord{}, loc
   139  }
   140  
   141  func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) {
   142  	scanner := newScanner(reader)
   143  	metadata, err := parseDatabase(scanner)
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  	return metadata, nil
   148  }
   149  
   150  func newScanner(reader io.Reader) *bufio.Scanner {
   151  	// This is taken from the apk parser
   152  	// https://github.com/anchore/syft/blob/v0.47.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L37
   153  	const maxScannerCapacity = 1024 * 1024
   154  	bufScan := make([]byte, maxScannerCapacity)
   155  	scanner := bufio.NewScanner(reader)
   156  	scanner.Buffer(bufScan, maxScannerCapacity)
   157  	onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
   158  		for i := 0; i < len(data); i++ {
   159  			if i > 0 && data[i-1] == '\n' && data[i] == '\n' {
   160  				return i + 1, data[:i-1], nil
   161  			}
   162  		}
   163  		if !atEOF {
   164  			return 0, nil, nil
   165  		}
   166  		// deliver the last token (which could be an empty string)
   167  		return 0, data, bufio.ErrFinalToken
   168  	}
   169  
   170  	scanner.Split(onDoubleLF)
   171  	return scanner
   172  }
   173  
   174  func getLocation(path string, resolver file.Resolver) (*file.Location, error) {
   175  	locs, err := resolver.FilesByPath(path)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	if len(locs) == 0 {
   181  		return nil, fmt.Errorf("could not find file: %s", path)
   182  	}
   183  
   184  	if len(locs) > 1 {
   185  		log.WithFields("path", path).Trace("multiple files found for path, using first path")
   186  	}
   187  	return &locs[0], nil
   188  }
   189  
   190  func parseDatabase(b *bufio.Scanner) (*parsedData, error) {
   191  	var err error
   192  	pkgFields := make(map[string]interface{})
   193  	for b.Scan() {
   194  		fields := strings.SplitN(b.Text(), "\n", 2)
   195  
   196  		// End of File
   197  		if len(fields) == 1 {
   198  			break
   199  		}
   200  
   201  		// The alpm database surrounds the keys with %.
   202  		key := strings.ReplaceAll(fields[0], "%", "")
   203  		key = strings.ToLower(key)
   204  		value := strings.TrimSpace(fields[1])
   205  
   206  		switch key {
   207  		case "files":
   208  			var files []map[string]string
   209  			for _, f := range strings.Split(value, "\n") {
   210  				p := fmt.Sprintf("/%s", f)
   211  				if ok := ignoredFiles[p]; !ok {
   212  					files = append(files, map[string]string{"path": p})
   213  				}
   214  			}
   215  			pkgFields[key] = files
   216  		case "backup":
   217  			var backup []map[string]interface{}
   218  			for _, f := range strings.Split(value, "\n") {
   219  				fields := strings.SplitN(f, "\t", 2)
   220  				p := fmt.Sprintf("/%s", fields[0])
   221  				if ok := ignoredFiles[p]; !ok {
   222  					backup = append(backup, map[string]interface{}{
   223  						"path": p,
   224  						"digests": []file.Digest{{
   225  							Algorithm: "md5",
   226  							Value:     fields[1],
   227  						}}})
   228  				}
   229  			}
   230  			pkgFields[key] = backup
   231  		case "depends", "provides":
   232  			pkgFields[key] = processLibrarySpecs(value)
   233  		case "reason":
   234  			fallthrough
   235  		case "size":
   236  			pkgFields[key], err = strconv.ParseInt(value, 10, 64)
   237  			if err != nil {
   238  				return nil, fmt.Errorf("failed to parse %s to integer", value)
   239  			}
   240  		default:
   241  			pkgFields[key] = value
   242  		}
   243  	}
   244  
   245  	return parsePkgFiles(pkgFields)
   246  }
   247  
   248  func processLibrarySpecs(value string) []string {
   249  	lines := strings.Split(value, "\n")
   250  	librarySpecs := make([]string, 0)
   251  	for _, line := range lines {
   252  		line = strings.TrimSpace(line)
   253  		if line == "" {
   254  			continue
   255  		}
   256  		librarySpecs = append(librarySpecs, line)
   257  	}
   258  	return librarySpecs
   259  }
   260  
   261  func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) {
   262  	var entry parsedData
   263  	if err := mapstructure.Decode(pkgFields, &entry); err != nil {
   264  		return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err)
   265  	}
   266  
   267  	if entry.Backup == nil {
   268  		entry.Backup = make([]pkg.AlpmFileRecord, 0)
   269  	}
   270  
   271  	if entry.Files == nil {
   272  		entry.Files = make([]pkg.AlpmFileRecord, 0)
   273  	}
   274  
   275  	if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 {
   276  		return nil, nil
   277  	}
   278  	return &entry, nil
   279  }
   280  
   281  func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) {
   282  	var err error
   283  	var entries []pkg.AlpmFileRecord
   284  
   285  	r, err = gzip.NewReader(r)
   286  	if err != nil {
   287  		return nil, err
   288  	}
   289  	specDh, err := mtree.ParseSpec(r)
   290  	if err != nil {
   291  		return nil, err
   292  	}
   293  	for _, f := range specDh.Entries {
   294  		var entry pkg.AlpmFileRecord
   295  		entry.Digests = make([]file.Digest, 0)
   296  		fileFields := make(map[string]interface{})
   297  		if ok := ignoredFiles[f.Name]; ok {
   298  			continue
   299  		}
   300  		path := fmt.Sprintf("/%s", f.Name)
   301  		fileFields["path"] = path
   302  		for _, kv := range f.Keywords {
   303  			kw := string(kv.Keyword())
   304  			switch kw {
   305  			case "time":
   306  				// All unix timestamps have a .0 suffixs.
   307  				v := strings.Split(kv.Value(), ".")
   308  				i, _ := strconv.ParseInt(v[0], 10, 64)
   309  				tm := time.Unix(i, 0)
   310  				fileFields[kw] = tm
   311  			case "sha256digest":
   312  				entry.Digests = append(entry.Digests, file.Digest{
   313  					Algorithm: "sha256",
   314  					Value:     kv.Value(),
   315  				})
   316  			case "md5digest":
   317  				entry.Digests = append(entry.Digests, file.Digest{
   318  					Algorithm: "md5",
   319  					Value:     kv.Value(),
   320  				})
   321  			default:
   322  				fileFields[kw] = kv.Value()
   323  			}
   324  		}
   325  		if err := mapstructure.Decode(fileFields, &entry); err != nil {
   326  			return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err)
   327  		}
   328  		entries = append(entries, entry)
   329  	}
   330  	return entries, nil
   331  }