github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/alpine/parse_apk_db.go (about)

     1  package alpine
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"io"
     7  	"path"
     8  	"regexp"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"github.com/anchore/syft/syft/artifact"
    13  	"github.com/anchore/syft/syft/file"
    14  	"github.com/anchore/syft/syft/linux"
    15  	"github.com/anchore/syft/syft/pkg"
    16  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    17  	"github.com/lineaje-labs/syft/internal"
    18  	"github.com/lineaje-labs/syft/internal/log"
    19  )
    20  
    21  // integrity check
    22  var _ generic.Parser = parseApkDB
    23  
    24  var (
    25  	repoRegex = regexp.MustCompile(`(?m)^https://.*\.alpinelinux\.org/alpine/v([^/]+)/([a-zA-Z0-9_]+)$`)
    26  )
    27  
    28  type parsedData struct {
    29  	License string `mapstructure:"L" json:"license"`
    30  	pkg.ApkDBEntry
    31  }
    32  
    33  // parseApkDB parses packages from a given APK "installed" flat-file DB. For more
    34  // information on specific fields, see https://wiki.alpinelinux.org/wiki/Apk_spec.
    35  //
    36  //nolint:funlen,gocognit
    37  func parseApkDB(
    38  	resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser,
    39  ) ([]pkg.Package, []artifact.Relationship, error) {
    40  	scanner := bufio.NewScanner(reader)
    41  
    42  	var apks []parsedData
    43  	var currentEntry parsedData
    44  	entryParsingInProgress := false
    45  	fileParsingCtx := newApkFileParsingContext()
    46  
    47  	// creating a dedicated append-like function here instead of using `append(...)`
    48  	// below since there is nontrivial logic to be performed for each finalized apk
    49  	// entry.
    50  	appendApk := func(p parsedData) {
    51  		if files := fileParsingCtx.files; len(files) >= 1 {
    52  			// attached accumulated files to current package
    53  			p.Files = files
    54  
    55  			// reset file parsing for next use
    56  			fileParsingCtx = newApkFileParsingContext()
    57  		}
    58  
    59  		nilFieldsToEmptySlice(&p)
    60  		apks = append(apks, p)
    61  	}
    62  
    63  	for scanner.Scan() {
    64  		line := scanner.Text()
    65  
    66  		if line == "" {
    67  			// i.e. apk entry separator
    68  
    69  			if entryParsingInProgress {
    70  				// current entry is complete
    71  				appendApk(currentEntry)
    72  			}
    73  
    74  			entryParsingInProgress = false
    75  
    76  			// zero-out currentEntry for use by any future entry
    77  			currentEntry = parsedData{}
    78  
    79  			continue
    80  		}
    81  
    82  		field := parseApkField(line)
    83  		if field == nil {
    84  			log.Warnf("unable to parse field data from line %q", line)
    85  			continue
    86  		}
    87  		if len(field.name) == 0 {
    88  			log.Warnf("failed to parse field name from line %q", line)
    89  			continue
    90  		}
    91  		if len(field.value) == 0 {
    92  			log.Debugf("line %q: parsed field %q appears to have an empty value, skipping", line, field.name)
    93  			continue
    94  		}
    95  
    96  		entryParsingInProgress = true
    97  
    98  		field.apply(&currentEntry, fileParsingCtx)
    99  	}
   100  
   101  	if entryParsingInProgress {
   102  		// There was no final empty line, so currentEntry hasn't been added to the
   103  		// collection yet; but we've now reached the end of scanning, so let's be sure to
   104  		// add currentEntry to the collection.
   105  		appendApk(currentEntry)
   106  	}
   107  
   108  	if err := scanner.Err(); err != nil {
   109  		return nil, nil, fmt.Errorf("failed to parse APK installed DB file: %w", err)
   110  	}
   111  
   112  	var r *linux.Release
   113  	if env != nil {
   114  		r = env.LinuxRelease
   115  	}
   116  	// this is somewhat ugly, but better than completely failing when we can't find the release,
   117  	// e.g. embedded deeper in the tree, like containers or chroots.
   118  	// but we now have no way of handling different repository sources. On the other hand,
   119  	// we never could before this. At least now, we can handle some.
   120  	// This should get fixed with https://gitlab.alpinelinux.org/alpine/apk-tools/-/issues/10875
   121  	if r == nil {
   122  		// find the repositories file from the relative directory of the DB file
   123  		releases := findReleases(resolver, reader.Location.RealPath)
   124  
   125  		if len(releases) > 0 {
   126  			r = &releases[0]
   127  		}
   128  	}
   129  
   130  	pkgs := make([]pkg.Package, 0, len(apks))
   131  	for _, apk := range apks {
   132  		pkgs = append(pkgs, newPackage(apk, r, reader.Location))
   133  	}
   134  
   135  	return pkgs, discoverPackageDependencies(pkgs), nil
   136  }
   137  
   138  func findReleases(resolver file.Resolver, dbPath string) []linux.Release {
   139  	if resolver == nil {
   140  		return nil
   141  	}
   142  
   143  	reposLocation := path.Clean(path.Join(path.Dir(dbPath), "../../../etc/apk/repositories"))
   144  	locations, err := resolver.FilesByPath(reposLocation)
   145  	if err != nil {
   146  		log.Tracef("unable to find APK repositories file %q: %+v", reposLocation, err)
   147  		return nil
   148  	}
   149  
   150  	if len(locations) == 0 {
   151  		return nil
   152  	}
   153  	location := locations[0]
   154  
   155  	reposReader, err := resolver.FileContentsByLocation(location)
   156  	if err != nil {
   157  		log.Tracef("unable to fetch contents for APK repositories file %q: %+v", reposLocation, err)
   158  		return nil
   159  	}
   160  
   161  	return parseReleasesFromAPKRepository(file.LocationReadCloser{
   162  		Location:   location,
   163  		ReadCloser: reposReader,
   164  	})
   165  }
   166  
   167  func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release {
   168  	var releases []linux.Release
   169  
   170  	reposB, err := io.ReadAll(reader)
   171  	if err != nil {
   172  		log.Tracef("unable to read APK repositories file %q: %+v", reader.Location.RealPath, err)
   173  		return nil
   174  	}
   175  
   176  	parts := repoRegex.FindAllStringSubmatch(string(reposB), -1)
   177  	for _, part := range parts {
   178  		if len(part) >= 3 {
   179  			releases = append(releases, linux.Release{
   180  				Name:      "Alpine Linux",
   181  				ID:        "alpine",
   182  				VersionID: part[1],
   183  			})
   184  		}
   185  	}
   186  
   187  	return releases
   188  }
   189  
   190  func parseApkField(line string) *apkField {
   191  	parts := strings.SplitN(line, ":", 2)
   192  	if len(parts) != 2 {
   193  		return nil
   194  	}
   195  
   196  	f := apkField{
   197  		name:  parts[0],
   198  		value: parts[1],
   199  	}
   200  
   201  	return &f
   202  }
   203  
   204  type apkField struct {
   205  	name  string
   206  	value string
   207  }
   208  
   209  //nolint:funlen
   210  func (f apkField) apply(p *parsedData, ctx *apkFileParsingContext) {
   211  	switch f.name {
   212  	// APKINDEX field parsing
   213  
   214  	case "P":
   215  		p.Package = f.value
   216  	case "o":
   217  		p.OriginPackage = f.value
   218  	case "m":
   219  		p.Maintainer = f.value
   220  	case "V":
   221  		p.Version = f.value
   222  	case "L":
   223  		p.License = f.value
   224  	case "A":
   225  		p.Architecture = f.value
   226  	case "U":
   227  		p.URL = f.value
   228  	case "T":
   229  		p.Description = f.value
   230  	case "S":
   231  		i, err := strconv.Atoi(f.value)
   232  		if err != nil {
   233  			log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err)
   234  			return
   235  		}
   236  
   237  		p.Size = i
   238  	case "I":
   239  		i, err := strconv.Atoi(f.value)
   240  		if err != nil {
   241  			log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err)
   242  			return
   243  		}
   244  
   245  		p.InstalledSize = i
   246  	case "D":
   247  		deps := parseListValue(f.value)
   248  		p.Dependencies = deps
   249  	case "p":
   250  		provides := parseListValue(f.value)
   251  		p.Provides = provides
   252  	case "C":
   253  		p.Checksum = f.value
   254  	case "c":
   255  		p.GitCommit = f.value
   256  
   257  	// File/directory field parsing:
   258  
   259  	case "F":
   260  		directory := path.Join("/", f.value)
   261  
   262  		ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: directory})
   263  		ctx.indexOfLatestDirectory = len(ctx.files) - 1
   264  	case "M":
   265  		i := ctx.indexOfLatestDirectory
   266  		latest := ctx.files[i]
   267  
   268  		var ok bool
   269  		latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
   270  		if !ok {
   271  			log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value)
   272  			return
   273  		}
   274  
   275  		// save updated directory
   276  		ctx.files[i] = latest
   277  	case "R":
   278  		var regularFile string
   279  
   280  		dirIndex := ctx.indexOfLatestDirectory
   281  		if dirIndex < 0 {
   282  			regularFile = path.Join("/", f.value)
   283  		} else {
   284  			latestDirPath := ctx.files[dirIndex].Path
   285  			regularFile = path.Join(latestDirPath, f.value)
   286  		}
   287  
   288  		ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: regularFile})
   289  		ctx.indexOfLatestRegularFile = len(ctx.files) - 1
   290  	case "a":
   291  		i := ctx.indexOfLatestRegularFile
   292  		latest := ctx.files[i]
   293  
   294  		var ok bool
   295  		latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
   296  		if !ok {
   297  			log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value)
   298  			return
   299  		}
   300  
   301  		// save updated file
   302  		ctx.files[i] = latest
   303  	case "Z":
   304  		i := ctx.indexOfLatestRegularFile
   305  		latest := ctx.files[i]
   306  		latest.Digest = processChecksum(f.value)
   307  
   308  		// save updated file
   309  		ctx.files[i] = latest
   310  	}
   311  }
   312  
   313  func processFileInfo(v string) (uid, gid, perms string, ok bool) {
   314  	ok = false
   315  
   316  	fileInfo := strings.Split(v, ":")
   317  	if len(fileInfo) < 3 {
   318  		return
   319  	}
   320  
   321  	uid = fileInfo[0]
   322  	gid = fileInfo[1]
   323  	perms = fileInfo[2]
   324  
   325  	// note: there are more optional fields available that we are not capturing,
   326  	// e.g.: "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4="
   327  
   328  	ok = true
   329  	return
   330  }
   331  
   332  // apkFileParsingContext helps keep track of what file data has been captured so far for the APK currently being parsed.
   333  type apkFileParsingContext struct {
   334  	files                    []pkg.ApkFileRecord
   335  	indexOfLatestDirectory   int
   336  	indexOfLatestRegularFile int
   337  }
   338  
   339  func newApkFileParsingContext() *apkFileParsingContext {
   340  	return &apkFileParsingContext{
   341  		indexOfLatestDirectory:   -1, // no directories yet
   342  		indexOfLatestRegularFile: -1, // no regular files yet
   343  	}
   344  }
   345  
   346  // parseListValue parses a space-separated list from an apk entry field value.
   347  func parseListValue(value string) []string {
   348  	items := strings.Split(value, " ")
   349  	if len(items) >= 1 {
   350  		return items
   351  	}
   352  
   353  	return nil
   354  }
   355  
   356  func nilFieldsToEmptySlice(p *parsedData) {
   357  	if p.Dependencies == nil {
   358  		p.Dependencies = []string{}
   359  	}
   360  
   361  	if p.Provides == nil {
   362  		p.Provides = []string{}
   363  	}
   364  
   365  	if p.Files == nil {
   366  		p.Files = []pkg.ApkFileRecord{}
   367  	}
   368  }
   369  
   370  func processChecksum(value string) *file.Digest {
   371  	// from: https://wiki.alpinelinux.org/wiki/Apk_spec
   372  	// The package checksum field is the SHA1 hash of the second gzip stream (control stream) in the package. The
   373  	// binary hash digest is base64 encoded. This is prefixed with Q1 to differentiate it from the MD5 hashes
   374  	// used in older index formats. It is not possible to compute this checksum with standard command line tools
   375  	// but the apk-tools can compute it in their index operation.
   376  
   377  	// based on https://github.com/alpinelinux/apk-tools/blob/dd1908f2fc20b4cfe2c15c55fafaa5fadfb599dc/src/blob.c#L379-L393
   378  	// it seems that the old md5 checksum value was only the hex representation (not base64)
   379  	algorithm := "md5"
   380  	if strings.HasPrefix(value, "Q1") {
   381  		algorithm = "'Q1'+base64(sha1)"
   382  	}
   383  
   384  	return &file.Digest{
   385  		Algorithm: algorithm,
   386  		Value:     value,
   387  	}
   388  }
   389  
   390  func discoverPackageDependencies(pkgs []pkg.Package) (relationships []artifact.Relationship) {
   391  	// map["provides" string] -> packages that provide the "p" key
   392  	lookup := make(map[string][]pkg.Package)
   393  	// read "Provides" (p) and add as keys for lookup keys as well as package names
   394  	for _, p := range pkgs {
   395  		apkg, ok := p.Metadata.(pkg.ApkDBEntry)
   396  		if !ok {
   397  			log.Warnf("cataloger failed to extract apk 'provides' metadata for package %+v", p.Name)
   398  			continue
   399  		}
   400  		lookup[p.Name] = append(lookup[p.Name], p)
   401  		for _, provides := range apkg.Provides {
   402  			k := stripVersionSpecifier(provides)
   403  			lookup[k] = append(lookup[k], p)
   404  		}
   405  	}
   406  
   407  	// read "Pull Dependencies" (D) and match with keys
   408  	for _, p := range pkgs {
   409  		apkg, ok := p.Metadata.(pkg.ApkDBEntry)
   410  		if !ok {
   411  			log.Warnf("cataloger failed to extract apk dependency metadata for package %+v", p.Name)
   412  			continue
   413  		}
   414  
   415  		for _, depSpecifier := range apkg.Dependencies {
   416  			// use the lookup to find what pkg we depend on
   417  			dep := stripVersionSpecifier(depSpecifier)
   418  			for _, depPkg := range lookup[dep] {
   419  				// this is a pkg that package "p" depends on... make a relationship
   420  				relationships = append(relationships, artifact.Relationship{
   421  					From: depPkg,
   422  					To:   p,
   423  					Type: artifact.DependencyOfRelationship,
   424  				})
   425  			}
   426  		}
   427  	}
   428  	return relationships
   429  }
   430  
   431  func stripVersionSpecifier(s string) string {
   432  	// examples:
   433  	// musl>=1                 --> musl
   434  	// cmd:scanelf=1.3.4-r0    --> cmd:scanelf
   435  
   436  	items := internal.SplitAny(s, "<>=")
   437  	if len(items) == 0 {
   438  		return s
   439  	}
   440  
   441  	return items[0]
   442  }