github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/arch/parse_alpm_db.go (about)

     1  package arch
     2  
     3  import (
     4  	"bufio"
     5  	"compress/gzip"
     6  	"fmt"
     7  	"io"
     8  	"path/filepath"
     9  	"strconv"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/mitchellh/mapstructure"
    14  	"github.com/vbatts/go-mtree"
    15  
    16  	"github.com/anchore/syft/syft/artifact"
    17  	"github.com/anchore/syft/syft/file"
    18  	"github.com/anchore/syft/syft/pkg"
    19  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    20  )
    21  
    22  var _ generic.Parser = parseAlpmDB
    23  
    24  var (
    25  	ignoredFiles = map[string]bool{
    26  		"/set":       true,
    27  		".BUILDINFO": true,
    28  		".PKGINFO":   true,
    29  		"":           true,
    30  	}
    31  )
    32  
    33  type parsedData struct {
    34  	Licenses        string `mapstructure:"license"`
    35  	pkg.AlpmDBEntry `mapstructure:",squash"`
    36  }
    37  
    38  // parseAlpmDB parses the arch linux pacman database flat-files and returns the packages and relationships found within.
    39  func parseAlpmDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    40  	data, err := parseAlpmDBEntry(reader)
    41  	if err != nil {
    42  		return nil, nil, err
    43  	}
    44  
    45  	base := filepath.Dir(reader.RealPath)
    46  	r, err := getFileReader(filepath.Join(base, "mtree"), resolver)
    47  	if err != nil {
    48  		return nil, nil, err
    49  	}
    50  
    51  	pkgFiles, err := parseMtree(r)
    52  	if err != nil {
    53  		return nil, nil, err
    54  	}
    55  
    56  	// replace the files found the pacman database with the files from the mtree These contain more metadata and
    57  	// thus more useful.
    58  	// TODO: probably want to use MTREE and PKGINFO here
    59  	data.Files = pkgFiles
    60  
    61  	// We only really do this to get any backup database entries from the files database
    62  	files := filepath.Join(base, "files")
    63  	_, err = getFileReader(files, resolver)
    64  	if err != nil {
    65  		return nil, nil, err
    66  	}
    67  	filesMetadata, err := parseAlpmDBEntry(reader)
    68  	if err != nil {
    69  		return nil, nil, err
    70  	} else if filesMetadata != nil {
    71  		data.Backup = filesMetadata.Backup
    72  	}
    73  
    74  	if data.Package == "" {
    75  		return nil, nil, nil
    76  	}
    77  
    78  	return []pkg.Package{
    79  		newPackage(
    80  			data,
    81  			env.LinuxRelease,
    82  			reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
    83  		),
    84  	}, nil, nil
    85  }
    86  
    87  func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) {
    88  	scanner := newScanner(reader)
    89  	metadata, err := parseDatabase(scanner)
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  	return metadata, nil
    94  }
    95  
    96  func newScanner(reader io.Reader) *bufio.Scanner {
    97  	// This is taken from the apk parser
    98  	// https://github.com/anchore/syft/blob/v0.47.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L37
    99  	const maxScannerCapacity = 1024 * 1024
   100  	bufScan := make([]byte, maxScannerCapacity)
   101  	scanner := bufio.NewScanner(reader)
   102  	scanner.Buffer(bufScan, maxScannerCapacity)
   103  	onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
   104  		for i := 0; i < len(data); i++ {
   105  			if i > 0 && data[i-1] == '\n' && data[i] == '\n' {
   106  				return i + 1, data[:i-1], nil
   107  			}
   108  		}
   109  		if !atEOF {
   110  			return 0, nil, nil
   111  		}
   112  		// deliver the last token (which could be an empty string)
   113  		return 0, data, bufio.ErrFinalToken
   114  	}
   115  
   116  	scanner.Split(onDoubleLF)
   117  	return scanner
   118  }
   119  
   120  func getFileReader(path string, resolver file.Resolver) (io.Reader, error) {
   121  	locs, err := resolver.FilesByPath(path)
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  
   126  	if len(locs) == 0 {
   127  		return nil, fmt.Errorf("could not find file: %s", path)
   128  	}
   129  	// TODO: Should we maybe check if we found the file
   130  	dbContentReader, err := resolver.FileContentsByLocation(locs[0])
   131  	if err != nil {
   132  		return nil, err
   133  	}
   134  	return dbContentReader, nil
   135  }
   136  
   137  func parseDatabase(b *bufio.Scanner) (*parsedData, error) {
   138  	var err error
   139  	pkgFields := make(map[string]interface{})
   140  	for b.Scan() {
   141  		fields := strings.SplitN(b.Text(), "\n", 2)
   142  
   143  		// End of File
   144  		if len(fields) == 1 {
   145  			break
   146  		}
   147  
   148  		// The alpm database surrounds the keys with %.
   149  		key := strings.ReplaceAll(fields[0], "%", "")
   150  		key = strings.ToLower(key)
   151  		value := strings.TrimSpace(fields[1])
   152  
   153  		switch key {
   154  		case "files":
   155  			var files []map[string]string
   156  			for _, f := range strings.Split(value, "\n") {
   157  				path := fmt.Sprintf("/%s", f)
   158  				if ok := ignoredFiles[path]; !ok {
   159  					files = append(files, map[string]string{"path": path})
   160  				}
   161  			}
   162  			pkgFields[key] = files
   163  		case "backup":
   164  			var backup []map[string]interface{}
   165  			for _, f := range strings.Split(value, "\n") {
   166  				fields := strings.SplitN(f, "\t", 2)
   167  				path := fmt.Sprintf("/%s", fields[0])
   168  				if ok := ignoredFiles[path]; !ok {
   169  					backup = append(backup, map[string]interface{}{
   170  						"path": path,
   171  						"digests": []file.Digest{{
   172  							Algorithm: "md5",
   173  							Value:     fields[1],
   174  						}}})
   175  				}
   176  			}
   177  			pkgFields[key] = backup
   178  		case "reason":
   179  			fallthrough
   180  		case "size":
   181  			pkgFields[key], err = strconv.ParseInt(value, 10, 64)
   182  			if err != nil {
   183  				return nil, fmt.Errorf("failed to parse %s to integer", value)
   184  			}
   185  		default:
   186  			pkgFields[key] = value
   187  		}
   188  	}
   189  
   190  	return parsePkgFiles(pkgFields)
   191  }
   192  
   193  func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) {
   194  	var entry parsedData
   195  	if err := mapstructure.Decode(pkgFields, &entry); err != nil {
   196  		return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err)
   197  	}
   198  
   199  	if entry.Backup == nil {
   200  		entry.Backup = make([]pkg.AlpmFileRecord, 0)
   201  	}
   202  
   203  	if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 {
   204  		return nil, nil
   205  	}
   206  	return &entry, nil
   207  }
   208  
   209  func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) {
   210  	var err error
   211  	var entries []pkg.AlpmFileRecord
   212  
   213  	r, err = gzip.NewReader(r)
   214  	if err != nil {
   215  		return nil, err
   216  	}
   217  	specDh, err := mtree.ParseSpec(r)
   218  	if err != nil {
   219  		return nil, err
   220  	}
   221  	for _, f := range specDh.Entries {
   222  		var entry pkg.AlpmFileRecord
   223  		entry.Digests = make([]file.Digest, 0)
   224  		fileFields := make(map[string]interface{})
   225  		if ok := ignoredFiles[f.Name]; ok {
   226  			continue
   227  		}
   228  		path := fmt.Sprintf("/%s", f.Name)
   229  		fileFields["path"] = path
   230  		for _, kv := range f.Keywords {
   231  			kw := string(kv.Keyword())
   232  			switch kw {
   233  			case "time":
   234  				// All unix timestamps have a .0 suffixs.
   235  				v := strings.Split(kv.Value(), ".")
   236  				i, _ := strconv.ParseInt(v[0], 10, 64)
   237  				tm := time.Unix(i, 0)
   238  				fileFields[kw] = tm
   239  			case "sha256digest":
   240  				entry.Digests = append(entry.Digests, file.Digest{
   241  					Algorithm: "sha256",
   242  					Value:     kv.Value(),
   243  				})
   244  			case "md5digest":
   245  				entry.Digests = append(entry.Digests, file.Digest{
   246  					Algorithm: "md5",
   247  					Value:     kv.Value(),
   248  				})
   249  			default:
   250  				fileFields[kw] = kv.Value()
   251  			}
   252  		}
   253  		if err := mapstructure.Decode(fileFields, &entry); err != nil {
   254  			return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err)
   255  		}
   256  		entries = append(entries, entry)
   257  	}
   258  	return entries, nil
   259  }