github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/pkg/cataloger/apkdb/parse_apk_db.go (about)

     1  package apkdb
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"io"
     7  	"path"
     8  	"regexp"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"github.com/kastenhq/syft/internal"
    13  	"github.com/kastenhq/syft/internal/log"
    14  	"github.com/kastenhq/syft/syft/artifact"
    15  	"github.com/kastenhq/syft/syft/file"
    16  	"github.com/kastenhq/syft/syft/linux"
    17  	"github.com/kastenhq/syft/syft/pkg"
    18  	"github.com/kastenhq/syft/syft/pkg/cataloger/generic"
    19  )
    20  
    21  // integrity check
    22  var _ generic.Parser = parseApkDB
    23  
    24  var (
    25  	repoRegex = regexp.MustCompile(`(?m)^https://.*\.alpinelinux\.org/alpine/v([^/]+)/([a-zA-Z0-9_]+)$`)
    26  )
    27  
    28  type parsedData struct {
    29  	License string `mapstructure:"L" json:"license"`
    30  	pkg.ApkMetadata
    31  }
    32  
    33  // parseApkDB parses packages from a given APK installed DB file. For more
    34  // information on specific fields, see https://wiki.alpinelinux.org/wiki/Apk_spec.
    35  //
    36  //nolint:funlen,gocognit
    37  func parseApkDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    38  	scanner := bufio.NewScanner(reader)
    39  
    40  	var apks []parsedData
    41  	var currentEntry parsedData
    42  	entryParsingInProgress := false
    43  	fileParsingCtx := newApkFileParsingContext()
    44  
    45  	// creating a dedicated append-like function here instead of using `append(...)`
    46  	// below since there is nontrivial logic to be performed for each finalized apk
    47  	// entry.
    48  	appendApk := func(p parsedData) {
    49  		if files := fileParsingCtx.files; len(files) >= 1 {
    50  			// attached accumulated files to current package
    51  			p.Files = files
    52  
    53  			// reset file parsing for next use
    54  			fileParsingCtx = newApkFileParsingContext()
    55  		}
    56  
    57  		nilFieldsToEmptySlice(&p)
    58  		apks = append(apks, p)
    59  	}
    60  
    61  	for scanner.Scan() {
    62  		line := scanner.Text()
    63  
    64  		if line == "" {
    65  			// i.e. apk entry separator
    66  
    67  			if entryParsingInProgress {
    68  				// current entry is complete
    69  				appendApk(currentEntry)
    70  			}
    71  
    72  			entryParsingInProgress = false
    73  
    74  			// zero-out currentEntry for use by any future entry
    75  			currentEntry = parsedData{}
    76  
    77  			continue
    78  		}
    79  
    80  		field := parseApkField(line)
    81  		if field == nil {
    82  			log.Warnf("unable to parse field data from line %q", line)
    83  			continue
    84  		}
    85  		if len(field.name) == 0 {
    86  			log.Warnf("failed to parse field name from line %q", line)
    87  			continue
    88  		}
    89  		if len(field.value) == 0 {
    90  			log.Debugf("line %q: parsed field %q appears to have an empty value, skipping", line, field.name)
    91  			continue
    92  		}
    93  
    94  		entryParsingInProgress = true
    95  
    96  		field.apply(&currentEntry, fileParsingCtx)
    97  	}
    98  
    99  	if entryParsingInProgress {
   100  		// There was no final empty line, so currentEntry hasn't been added to the
   101  		// collection yet; but we've now reached the end of scanning, so let's be sure to
   102  		// add currentEntry to the collection.
   103  		appendApk(currentEntry)
   104  	}
   105  
   106  	if err := scanner.Err(); err != nil {
   107  		return nil, nil, fmt.Errorf("failed to parse APK installed DB file: %w", err)
   108  	}
   109  
   110  	var r *linux.Release
   111  	if env != nil {
   112  		r = env.LinuxRelease
   113  	}
   114  	// this is somewhat ugly, but better than completely failing when we can't find the release,
   115  	// e.g. embedded deeper in the tree, like containers or chroots.
   116  	// but we now have no way of handling different repository sources. On the other hand,
   117  	// we never could before this. At least now, we can handle some.
   118  	// This should get fixed with https://gitlab.alpinelinux.org/alpine/apk-tools/-/issues/10875
   119  	if r == nil {
   120  		// find the repositories file from the relative directory of the DB file
   121  		releases := findReleases(resolver, reader.Location.RealPath)
   122  
   123  		if len(releases) > 0 {
   124  			r = &releases[0]
   125  		}
   126  	}
   127  
   128  	pkgs := make([]pkg.Package, 0, len(apks))
   129  	for _, apk := range apks {
   130  		pkgs = append(pkgs, newPackage(apk, r, reader.Location))
   131  	}
   132  
   133  	return pkgs, discoverPackageDependencies(pkgs), nil
   134  }
   135  
   136  func findReleases(resolver file.Resolver, dbPath string) []linux.Release {
   137  	if resolver == nil {
   138  		return nil
   139  	}
   140  
   141  	reposLocation := path.Clean(path.Join(path.Dir(dbPath), "../../../etc/apk/repositories"))
   142  	locations, err := resolver.FilesByPath(reposLocation)
   143  	if err != nil {
   144  		log.Tracef("unable to find APK repositories file %q: %+v", reposLocation, err)
   145  		return nil
   146  	}
   147  
   148  	if len(locations) == 0 {
   149  		return nil
   150  	}
   151  	location := locations[0]
   152  
   153  	reposReader, err := resolver.FileContentsByLocation(location)
   154  	if err != nil {
   155  		log.Tracef("unable to fetch contents for APK repositories file %q: %+v", reposLocation, err)
   156  		return nil
   157  	}
   158  
   159  	return parseReleasesFromAPKRepository(file.LocationReadCloser{
   160  		Location:   location,
   161  		ReadCloser: reposReader,
   162  	})
   163  }
   164  
   165  func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release {
   166  	var releases []linux.Release
   167  
   168  	reposB, err := io.ReadAll(reader)
   169  	if err != nil {
   170  		log.Tracef("unable to read APK repositories file %q: %+v", reader.Location.RealPath, err)
   171  		return nil
   172  	}
   173  
   174  	parts := repoRegex.FindAllStringSubmatch(string(reposB), -1)
   175  	for _, part := range parts {
   176  		if len(part) >= 3 {
   177  			releases = append(releases, linux.Release{
   178  				Name:      "Alpine Linux",
   179  				ID:        "alpine",
   180  				VersionID: part[1],
   181  			})
   182  		}
   183  	}
   184  
   185  	return releases
   186  }
   187  
   188  func parseApkField(line string) *apkField {
   189  	parts := strings.SplitN(line, ":", 2)
   190  	if len(parts) != 2 {
   191  		return nil
   192  	}
   193  
   194  	f := apkField{
   195  		name:  parts[0],
   196  		value: parts[1],
   197  	}
   198  
   199  	return &f
   200  }
   201  
   202  type apkField struct {
   203  	name  string
   204  	value string
   205  }
   206  
   207  //nolint:funlen
   208  func (f apkField) apply(p *parsedData, ctx *apkFileParsingContext) {
   209  	switch f.name {
   210  	// APKINDEX field parsing
   211  
   212  	case "P":
   213  		p.Package = f.value
   214  	case "o":
   215  		p.OriginPackage = f.value
   216  	case "m":
   217  		p.Maintainer = f.value
   218  	case "V":
   219  		p.Version = f.value
   220  	case "L":
   221  		p.License = f.value
   222  	case "A":
   223  		p.Architecture = f.value
   224  	case "U":
   225  		p.URL = f.value
   226  	case "T":
   227  		p.Description = f.value
   228  	case "S":
   229  		i, err := strconv.Atoi(f.value)
   230  		if err != nil {
   231  			log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err)
   232  			return
   233  		}
   234  
   235  		p.Size = i
   236  	case "I":
   237  		i, err := strconv.Atoi(f.value)
   238  		if err != nil {
   239  			log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err)
   240  			return
   241  		}
   242  
   243  		p.InstalledSize = i
   244  	case "D":
   245  		deps := parseListValue(f.value)
   246  		p.Dependencies = deps
   247  	case "p":
   248  		provides := parseListValue(f.value)
   249  		p.Provides = provides
   250  	case "C":
   251  		p.Checksum = f.value
   252  	case "c":
   253  		p.GitCommit = f.value
   254  
   255  	// File/directory field parsing:
   256  
   257  	case "F":
   258  		directory := path.Join("/", f.value)
   259  
   260  		ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: directory})
   261  		ctx.indexOfLatestDirectory = len(ctx.files) - 1
   262  	case "M":
   263  		i := ctx.indexOfLatestDirectory
   264  		latest := ctx.files[i]
   265  
   266  		var ok bool
   267  		latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
   268  		if !ok {
   269  			log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value)
   270  			return
   271  		}
   272  
   273  		// save updated directory
   274  		ctx.files[i] = latest
   275  	case "R":
   276  		var regularFile string
   277  
   278  		dirIndex := ctx.indexOfLatestDirectory
   279  		if dirIndex < 0 {
   280  			regularFile = path.Join("/", f.value)
   281  		} else {
   282  			latestDirPath := ctx.files[dirIndex].Path
   283  			regularFile = path.Join(latestDirPath, f.value)
   284  		}
   285  
   286  		ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: regularFile})
   287  		ctx.indexOfLatestRegularFile = len(ctx.files) - 1
   288  	case "a":
   289  		i := ctx.indexOfLatestRegularFile
   290  		latest := ctx.files[i]
   291  
   292  		var ok bool
   293  		latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
   294  		if !ok {
   295  			log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value)
   296  			return
   297  		}
   298  
   299  		// save updated file
   300  		ctx.files[i] = latest
   301  	case "Z":
   302  		i := ctx.indexOfLatestRegularFile
   303  		latest := ctx.files[i]
   304  		latest.Digest = processChecksum(f.value)
   305  
   306  		// save updated file
   307  		ctx.files[i] = latest
   308  	}
   309  }
   310  
   311  func processFileInfo(v string) (uid, gid, perms string, ok bool) {
   312  	ok = false
   313  
   314  	fileInfo := strings.Split(v, ":")
   315  	if len(fileInfo) < 3 {
   316  		return
   317  	}
   318  
   319  	uid = fileInfo[0]
   320  	gid = fileInfo[1]
   321  	perms = fileInfo[2]
   322  
   323  	// note: there are more optional fields available that we are not capturing,
   324  	// e.g.: "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4="
   325  
   326  	ok = true
   327  	return
   328  }
   329  
   330  // apkFileParsingContext helps keep track of what file data has been captured so far for the APK currently being parsed.
   331  type apkFileParsingContext struct {
   332  	files                    []pkg.ApkFileRecord
   333  	indexOfLatestDirectory   int
   334  	indexOfLatestRegularFile int
   335  }
   336  
   337  func newApkFileParsingContext() *apkFileParsingContext {
   338  	return &apkFileParsingContext{
   339  		indexOfLatestDirectory:   -1, // no directories yet
   340  		indexOfLatestRegularFile: -1, // no regular files yet
   341  	}
   342  }
   343  
   344  // parseListValue parses a space-separated list from an apk entry field value.
   345  func parseListValue(value string) []string {
   346  	items := strings.Split(value, " ")
   347  	if len(items) >= 1 {
   348  		return items
   349  	}
   350  
   351  	return nil
   352  }
   353  
   354  func nilFieldsToEmptySlice(p *parsedData) {
   355  	if p.Dependencies == nil {
   356  		p.Dependencies = []string{}
   357  	}
   358  
   359  	if p.Provides == nil {
   360  		p.Provides = []string{}
   361  	}
   362  
   363  	if p.Files == nil {
   364  		p.Files = []pkg.ApkFileRecord{}
   365  	}
   366  }
   367  
   368  func processChecksum(value string) *file.Digest {
   369  	// from: https://wiki.alpinelinux.org/wiki/Apk_spec
   370  	// The package checksum field is the SHA1 hash of the second gzip stream (control stream) in the package. The
   371  	// binary hash digest is base64 encoded. This is prefixed with Q1 to differentiate it from the MD5 hashes
   372  	// used in older index formats. It is not possible to compute this checksum with standard command line tools
   373  	// but the apk-tools can compute it in their index operation.
   374  
   375  	// based on https://github.com/alpinelinux/apk-tools/blob/dd1908f2fc20b4cfe2c15c55fafaa5fadfb599dc/src/blob.c#L379-L393
   376  	// it seems that the old md5 checksum value was only the hex representation (not base64)
   377  	algorithm := "md5"
   378  	if strings.HasPrefix(value, "Q1") {
   379  		algorithm = "'Q1'+base64(sha1)"
   380  	}
   381  
   382  	return &file.Digest{
   383  		Algorithm: algorithm,
   384  		Value:     value,
   385  	}
   386  }
   387  
   388  func discoverPackageDependencies(pkgs []pkg.Package) (relationships []artifact.Relationship) {
   389  	// map["provides" string] -> packages that provide the "p" key
   390  	lookup := make(map[string][]pkg.Package)
   391  	// read "Provides" (p) and add as keys for lookup keys as well as package names
   392  	for _, p := range pkgs {
   393  		apkg, ok := p.Metadata.(pkg.ApkMetadata)
   394  		if !ok {
   395  			log.Warnf("cataloger failed to extract apk 'provides' metadata for package %+v", p.Name)
   396  			continue
   397  		}
   398  		lookup[p.Name] = append(lookup[p.Name], p)
   399  		for _, provides := range apkg.Provides {
   400  			k := stripVersionSpecifier(provides)
   401  			lookup[k] = append(lookup[k], p)
   402  		}
   403  	}
   404  
   405  	// read "Pull Dependencies" (D) and match with keys
   406  	for _, p := range pkgs {
   407  		apkg, ok := p.Metadata.(pkg.ApkMetadata)
   408  		if !ok {
   409  			log.Warnf("cataloger failed to extract apk dependency metadata for package %+v", p.Name)
   410  			continue
   411  		}
   412  
   413  		for _, depSpecifier := range apkg.Dependencies {
   414  			// use the lookup to find what pkg we depend on
   415  			dep := stripVersionSpecifier(depSpecifier)
   416  			for _, depPkg := range lookup[dep] {
   417  				// this is a pkg that package "p" depends on... make a relationship
   418  				relationships = append(relationships, artifact.Relationship{
   419  					From: depPkg,
   420  					To:   p,
   421  					Type: artifact.DependencyOfRelationship,
   422  				})
   423  			}
   424  		}
   425  	}
   426  	return relationships
   427  }
   428  
   429  func stripVersionSpecifier(s string) string {
   430  	// examples:
   431  	// musl>=1                 --> musl
   432  	// cmd:scanelf=1.3.4-r0    --> cmd:scanelf
   433  
   434  	items := internal.SplitAny(s, "<>=")
   435  	if len(items) == 0 {
   436  		return s
   437  	}
   438  
   439  	return items[0]
   440  }