github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/arch/parse_alpm_db.go (about)

     1  package arch
     2  
     3  import (
     4  	"bufio"
     5  	"compress/gzip"
     6  	"context"
     7  	"fmt"
     8  	"io"
     9  	"path"
    10  	"path/filepath"
    11  	"strconv"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/go-viper/mapstructure/v2"
    16  	"github.com/vbatts/go-mtree"
    17  
    18  	"github.com/anchore/syft/internal"
    19  	"github.com/anchore/syft/internal/log"
    20  	"github.com/anchore/syft/internal/unknown"
    21  	"github.com/anchore/syft/syft/artifact"
    22  	"github.com/anchore/syft/syft/file"
    23  	"github.com/anchore/syft/syft/pkg"
    24  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    25  )
    26  
    27  var _ generic.Parser = parseAlpmDB
    28  
    29  var (
    30  	ignoredFiles = map[string]bool{
    31  		"/set":       true,
    32  		".BUILDINFO": true,
    33  		".PKGINFO":   true,
    34  		"":           true,
    35  	}
    36  )
    37  
    38  type parsedData struct {
    39  	Licenses        string `mapstructure:"license"`
    40  	pkg.AlpmDBEntry `mapstructure:",squash"`
    41  }
    42  
    43  // parseAlpmDB parses the arch linux pacman database flat-files and returns the packages and relationships found within.
    44  func parseAlpmDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    45  	var errs error
    46  
    47  	data, err := parseAlpmDBEntry(reader)
    48  	if err != nil {
    49  		return nil, nil, err
    50  	}
    51  
    52  	if data == nil {
    53  		return nil, nil, nil
    54  	}
    55  
    56  	base := path.Dir(reader.RealPath)
    57  
    58  	var locs []file.Location
    59  
    60  	// replace the files found the pacman database with the files from the mtree These contain more metadata and
    61  	// thus more useful.
    62  	files, fileLoc, err := fetchPkgFiles(base, resolver)
    63  	errs = unknown.Join(errs, err)
    64  	if err == nil {
    65  		locs = append(locs, fileLoc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
    66  		data.Files = files
    67  	}
    68  	backups, backupLoc, err := fetchBackupFiles(base, resolver)
    69  	errs = unknown.Join(errs, err)
    70  	if err == nil {
    71  		locs = append(locs, backupLoc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
    72  		data.Backup = backups
    73  	}
    74  
    75  	if data.Package == "" {
    76  		return nil, nil, errs
    77  	}
    78  
    79  	return []pkg.Package{
    80  		newPackage(
    81  			ctx,
    82  			data,
    83  			env.LinuxRelease,
    84  			reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
    85  			locs...,
    86  		),
    87  	}, nil, errs
    88  }
    89  
    90  func fetchPkgFiles(base string, resolver file.Resolver) ([]pkg.AlpmFileRecord, file.Location, error) {
    91  	// TODO: probably want to use MTREE and PKGINFO here
    92  	target := path.Join(base, "mtree")
    93  
    94  	loc, err := getLocation(target, resolver)
    95  	if err != nil {
    96  		log.WithFields("error", err, "path", target).Trace("failed to find mtree file")
    97  		return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to find mtree file: %w", err))
    98  	}
    99  	reader, err := resolver.FileContentsByLocation(loc)
   100  	if err != nil {
   101  		return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to get contents: %w", err))
   102  	}
   103  	defer internal.CloseAndLogError(reader, loc.RealPath)
   104  
   105  	pkgFiles, err := parseMtree(reader)
   106  	if err != nil {
   107  		log.WithFields("error", err, "path", target).Trace("failed to parse mtree file")
   108  		return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to parse mtree: %w", err))
   109  	}
   110  	return pkgFiles, loc, nil
   111  }
   112  
   113  func fetchBackupFiles(base string, resolver file.Resolver) ([]pkg.AlpmFileRecord, file.Location, error) {
   114  	// We only really do this to get any backup database entries from the files database
   115  	target := filepath.Join(base, "files")
   116  
   117  	loc, err := getLocation(target, resolver)
   118  	if err != nil {
   119  		log.WithFields("error", err, "path", target).Trace("failed to find alpm files")
   120  		return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to find alpm files: %w", err))
   121  	}
   122  
   123  	reader, err := resolver.FileContentsByLocation(loc)
   124  	if err != nil {
   125  		return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to get contents: %w", err))
   126  	}
   127  	defer internal.CloseAndLogError(reader, loc.RealPath)
   128  
   129  	filesMetadata, err := parseAlpmDBEntry(reader)
   130  	if err != nil {
   131  		return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to parse alpm db entry: %w", err))
   132  	}
   133  	if filesMetadata != nil {
   134  		return filesMetadata.Backup, loc, nil
   135  	}
   136  	return []pkg.AlpmFileRecord{}, loc, nil
   137  }
   138  
   139  func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) {
   140  	scanner := newScanner(reader)
   141  	metadata, err := parseDatabase(scanner)
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  	return metadata, nil
   146  }
   147  
   148  func newScanner(reader io.Reader) *bufio.Scanner {
   149  	// This is taken from the apk parser
   150  	// https://github.com/anchore/syft/blob/v0.47.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L37
   151  	const maxScannerCapacity = 1024 * 1024
   152  	bufScan := make([]byte, maxScannerCapacity)
   153  	scanner := bufio.NewScanner(reader)
   154  	scanner.Buffer(bufScan, maxScannerCapacity)
   155  	onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
   156  		for i := 0; i < len(data); i++ {
   157  			if i > 0 && data[i-1] == '\n' && data[i] == '\n' {
   158  				return i + 1, data[:i-1], nil
   159  			}
   160  		}
   161  		if !atEOF {
   162  			return 0, nil, nil
   163  		}
   164  		// deliver the last token (which could be an empty string)
   165  		return 0, data, bufio.ErrFinalToken
   166  	}
   167  
   168  	scanner.Split(onDoubleLF)
   169  	return scanner
   170  }
   171  
   172  func getLocation(path string, resolver file.Resolver) (file.Location, error) {
   173  	loc := file.NewLocation(path)
   174  	locs, err := resolver.FilesByPath(path)
   175  	if err != nil {
   176  		return loc, err
   177  	}
   178  
   179  	if len(locs) == 0 {
   180  		return loc, fmt.Errorf("could not find file: %s", path)
   181  	}
   182  
   183  	if len(locs) > 1 {
   184  		log.WithFields("path", path).Trace("multiple files found for path, using first path")
   185  	}
   186  	return locs[0], nil
   187  }
   188  
   189  func parseDatabase(b *bufio.Scanner) (*parsedData, error) {
   190  	var err error
   191  	pkgFields := make(map[string]interface{})
   192  	for b.Scan() {
   193  		fields := strings.SplitN(b.Text(), "\n", 2)
   194  
   195  		// End of File
   196  		if len(fields) == 1 {
   197  			break
   198  		}
   199  
   200  		// The alpm database surrounds the keys with %.
   201  		key := strings.ReplaceAll(fields[0], "%", "")
   202  		key = strings.ToLower(key)
   203  		value := strings.TrimSpace(fields[1])
   204  
   205  		switch key {
   206  		case "files":
   207  			var files []map[string]string
   208  			for _, f := range strings.Split(value, "\n") {
   209  				p := fmt.Sprintf("/%s", f)
   210  				if ok := ignoredFiles[p]; !ok {
   211  					files = append(files, map[string]string{"path": p})
   212  				}
   213  			}
   214  			pkgFields[key] = files
   215  		case "backup":
   216  			var backup []map[string]interface{}
   217  			for _, f := range strings.Split(value, "\n") {
   218  				fields := strings.SplitN(f, "\t", 2)
   219  				p := fmt.Sprintf("/%s", fields[0])
   220  				if ok := ignoredFiles[p]; !ok {
   221  					backup = append(backup, map[string]interface{}{
   222  						"path": p,
   223  						"digests": []file.Digest{{
   224  							Algorithm: "md5",
   225  							Value:     fields[1],
   226  						}}})
   227  				}
   228  			}
   229  			pkgFields[key] = backup
   230  		case "depends", "provides":
   231  			pkgFields[key] = processLibrarySpecs(value)
   232  		case "reason":
   233  			fallthrough
   234  		case "size":
   235  			pkgFields[key], err = strconv.ParseInt(value, 10, 64)
   236  			if err != nil {
   237  				return nil, fmt.Errorf("failed to parse %s to integer", value)
   238  			}
   239  		default:
   240  			pkgFields[key] = value
   241  		}
   242  	}
   243  
   244  	return parsePkgFiles(pkgFields)
   245  }
   246  
   247  func processLibrarySpecs(value string) []string {
   248  	lines := strings.Split(value, "\n")
   249  	librarySpecs := make([]string, 0)
   250  	for _, line := range lines {
   251  		line = strings.TrimSpace(line)
   252  		if line == "" {
   253  			continue
   254  		}
   255  		librarySpecs = append(librarySpecs, line)
   256  	}
   257  	return librarySpecs
   258  }
   259  
   260  func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) {
   261  	var entry parsedData
   262  	if err := mapstructure.Decode(pkgFields, &entry); err != nil {
   263  		return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err)
   264  	}
   265  
   266  	if entry.Backup == nil {
   267  		entry.Backup = make([]pkg.AlpmFileRecord, 0)
   268  	}
   269  
   270  	if entry.Files == nil {
   271  		entry.Files = make([]pkg.AlpmFileRecord, 0)
   272  	}
   273  
   274  	if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 {
   275  		return nil, nil
   276  	}
   277  	return &entry, nil
   278  }
   279  
   280  func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) {
   281  	var err error
   282  	var entries []pkg.AlpmFileRecord
   283  
   284  	r, err = gzip.NewReader(r)
   285  	if err != nil {
   286  		return nil, err
   287  	}
   288  	specDh, err := mtree.ParseSpec(r)
   289  	if err != nil {
   290  		return nil, err
   291  	}
   292  	for _, f := range specDh.Entries {
   293  		var entry pkg.AlpmFileRecord
   294  		entry.Digests = make([]file.Digest, 0)
   295  		fileFields := make(map[string]interface{})
   296  		if ok := ignoredFiles[f.Name]; ok {
   297  			continue
   298  		}
   299  		path := fmt.Sprintf("/%s", f.Name)
   300  		fileFields["path"] = path
   301  		for _, kv := range f.Keywords {
   302  			kw := string(kv.Keyword())
   303  			switch kw {
   304  			case "time":
   305  				// All unix timestamps have a .0 suffixs.
   306  				v := strings.Split(kv.Value(), ".")
   307  				i, _ := strconv.ParseInt(v[0], 10, 64)
   308  				tm := time.Unix(i, 0)
   309  				fileFields[kw] = tm
   310  			case "sha256digest":
   311  				entry.Digests = append(entry.Digests, file.Digest{
   312  					Algorithm: "sha256",
   313  					Value:     kv.Value(),
   314  				})
   315  			case "md5digest":
   316  				entry.Digests = append(entry.Digests, file.Digest{
   317  					Algorithm: "md5",
   318  					Value:     kv.Value(),
   319  				})
   320  			default:
   321  				fileFields[kw] = kv.Value()
   322  			}
   323  		}
   324  		if err := mapstructure.Decode(fileFields, &entry); err != nil {
   325  			return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err)
   326  		}
   327  		entries = append(entries, entry)
   328  	}
   329  	return entries, nil
   330  }