github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/alpine/parse_apk_db.go (about)

     1  package alpine
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"path"
     9  	"regexp"
    10  	"strconv"
    11  	"strings"
    12  
    13  	"github.com/anchore/syft/internal"
    14  	"github.com/anchore/syft/internal/log"
    15  	"github.com/anchore/syft/internal/unknown"
    16  	"github.com/anchore/syft/syft/artifact"
    17  	"github.com/anchore/syft/syft/file"
    18  	"github.com/anchore/syft/syft/linux"
    19  	"github.com/anchore/syft/syft/pkg"
    20  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    21  )
    22  
    23  // integrity check
    24  var _ generic.Parser = parseApkDB
    25  
    26  var (
    27  	repoRegex = regexp.MustCompile(`(?m)^https://.*\.alpinelinux\.org/alpine/v([^/]+)/([a-zA-Z0-9_]+)$`)
    28  )
    29  
    30  type parsedData struct {
    31  	License string `mapstructure:"L" json:"license"`
    32  	pkg.ApkDBEntry
    33  }
    34  
    35  // parseApkDB parses packages from a given APK "installed" flat-file DB. For more
    36  // information on specific fields, see https://wiki.alpinelinux.org/wiki/Apk_spec.
    37  //
    38  //nolint:funlen
    39  func parseApkDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    40  	scanner := bufio.NewScanner(reader)
    41  
    42  	var errs error
    43  	var apks []parsedData
    44  	var currentEntry parsedData
    45  	entryParsingInProgress := false
    46  	fileParsingCtx := newApkFileParsingContext()
    47  
    48  	// creating a dedicated append-like function here instead of using `append(...)`
    49  	// below since there is nontrivial logic to be performed for each finalized apk
    50  	// entry.
    51  	appendApk := func(p parsedData) {
    52  		if files := fileParsingCtx.files; len(files) >= 1 {
    53  			// attached accumulated files to current package
    54  			p.Files = files
    55  
    56  			// reset file parsing for next use
    57  			fileParsingCtx = newApkFileParsingContext()
    58  		}
    59  
    60  		nilFieldsToEmptySlice(&p)
    61  		apks = append(apks, p)
    62  	}
    63  
    64  	for scanner.Scan() {
    65  		line := scanner.Text()
    66  
    67  		if line == "" {
    68  			// i.e. apk entry separator
    69  
    70  			if entryParsingInProgress {
    71  				// current entry is complete
    72  				appendApk(currentEntry)
    73  			}
    74  
    75  			entryParsingInProgress = false
    76  
    77  			// zero-out currentEntry for use by any future entry
    78  			currentEntry = parsedData{}
    79  
    80  			continue
    81  		}
    82  
    83  		field := parseApkField(line)
    84  		if field == nil {
    85  			log.Debugf("unable to parse field data from line %q", line)
    86  			errs = unknown.Appendf(errs, reader, "unable to parse field data from line %q", line)
    87  			continue
    88  		}
    89  		if len(field.name) == 0 {
    90  			log.Debugf("failed to parse field name from line %q", line)
    91  			errs = unknown.Appendf(errs, reader, "failed to parse field name from line %q", line)
    92  			continue
    93  		}
    94  		if len(field.value) == 0 {
    95  			log.Debugf("line %q: parsed field %q appears to have an empty value, skipping", line, field.name)
    96  			continue
    97  		}
    98  
    99  		entryParsingInProgress = true
   100  
   101  		field.apply(&currentEntry, fileParsingCtx)
   102  	}
   103  
   104  	if entryParsingInProgress {
   105  		// There was no final empty line, so currentEntry hasn't been added to the
   106  		// collection yet; but we've now reached the end of scanning, so let's be sure to
   107  		// add currentEntry to the collection.
   108  		appendApk(currentEntry)
   109  	}
   110  
   111  	if err := scanner.Err(); err != nil {
   112  		return nil, nil, fmt.Errorf("failed to parse APK installed DB file: %w", err)
   113  	}
   114  
   115  	var r *linux.Release
   116  	if env != nil {
   117  		r = env.LinuxRelease
   118  	}
   119  	// this is somewhat ugly, but better than completely failing when we can't find the release,
   120  	// e.g. embedded deeper in the tree, like containers or chroots.
   121  	// but we now have no way of handling different repository sources. On the other hand,
   122  	// we never could before this. At least now, we can handle some.
   123  	// This should get fixed with https://gitlab.alpinelinux.org/alpine/apk-tools/-/issues/10875
   124  	if r == nil {
   125  		// find the repositories file from the relative directory of the DB file
   126  		releases := findReleases(resolver, reader.RealPath)
   127  
   128  		if len(releases) > 0 {
   129  			r = &releases[0]
   130  		}
   131  	}
   132  
   133  	pkgs := make([]pkg.Package, 0, len(apks))
   134  	for _, apk := range apks {
   135  		pkgs = append(pkgs, newPackage(ctx, apk, r, reader.Location))
   136  	}
   137  
   138  	return pkgs, nil, errs
   139  }
   140  
   141  func findReleases(resolver file.Resolver, dbPath string) []linux.Release {
   142  	if resolver == nil {
   143  		return nil
   144  	}
   145  
   146  	reposLocation := path.Clean(path.Join(path.Dir(dbPath), "../../../etc/apk/repositories"))
   147  	locations, err := resolver.FilesByPath(reposLocation)
   148  	if err != nil {
   149  		log.Tracef("unable to find APK repositories file %q: %+v", reposLocation, err)
   150  		return nil
   151  	}
   152  
   153  	if len(locations) == 0 {
   154  		return nil
   155  	}
   156  	location := locations[0]
   157  
   158  	reposReader, err := resolver.FileContentsByLocation(location)
   159  	if err != nil {
   160  		log.Tracef("unable to fetch contents for APK repositories file %q: %+v", reposLocation, err)
   161  		return nil
   162  	}
   163  	defer internal.CloseAndLogError(reposReader, location.RealPath)
   164  
   165  	return parseReleasesFromAPKRepository(file.LocationReadCloser{
   166  		Location:   location,
   167  		ReadCloser: reposReader,
   168  	})
   169  }
   170  
   171  func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release {
   172  	var releases []linux.Release
   173  
   174  	reposB, err := io.ReadAll(reader)
   175  	if err != nil {
   176  		log.Tracef("unable to read APK repositories file %q: %+v", reader.RealPath, err)
   177  		return nil
   178  	}
   179  
   180  	parts := repoRegex.FindAllStringSubmatch(string(reposB), -1)
   181  	for _, part := range parts {
   182  		if len(part) >= 3 {
   183  			releases = append(releases, linux.Release{
   184  				Name:      "Alpine Linux",
   185  				ID:        "alpine",
   186  				VersionID: part[1],
   187  			})
   188  		}
   189  	}
   190  
   191  	return releases
   192  }
   193  
   194  func parseApkField(line string) *apkField {
   195  	parts := strings.SplitN(line, ":", 2)
   196  	if len(parts) != 2 {
   197  		return nil
   198  	}
   199  
   200  	f := apkField{
   201  		name:  parts[0],
   202  		value: parts[1],
   203  	}
   204  
   205  	return &f
   206  }
   207  
   208  type apkField struct {
   209  	name  string
   210  	value string
   211  }
   212  
   213  //nolint:funlen
   214  func (f apkField) apply(p *parsedData, ctx *apkFileParsingContext) {
   215  	switch f.name {
   216  	// APKINDEX field parsing
   217  
   218  	case "P":
   219  		p.Package = f.value
   220  	case "o":
   221  		p.OriginPackage = f.value
   222  	case "m":
   223  		p.Maintainer = f.value
   224  	case "V":
   225  		p.Version = f.value
   226  	case "L":
   227  		p.License = f.value
   228  	case "A":
   229  		p.Architecture = f.value
   230  	case "U":
   231  		p.URL = f.value
   232  	case "T":
   233  		p.Description = f.value
   234  	case "S":
   235  		i, err := strconv.Atoi(f.value)
   236  		if err != nil {
   237  			log.Debugf("unable to parse value %q for field %q: %w", f.value, f.name, err)
   238  			return
   239  		}
   240  
   241  		p.Size = i
   242  	case "I":
   243  		i, err := strconv.Atoi(f.value)
   244  		if err != nil {
   245  			log.Debugf("unable to parse value %q for field %q: %w", f.value, f.name, err)
   246  			return
   247  		}
   248  
   249  		p.InstalledSize = i
   250  	case "D":
   251  		deps := parseListValue(f.value)
   252  		p.Dependencies = deps
   253  	case "p":
   254  		provides := parseListValue(f.value)
   255  		p.Provides = provides
   256  	case "C":
   257  		p.Checksum = f.value
   258  	case "c":
   259  		p.GitCommit = f.value
   260  
   261  	// File/directory field parsing:
   262  
   263  	case "F":
   264  		directory := path.Join("/", f.value)
   265  
   266  		ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: directory})
   267  		ctx.indexOfLatestDirectory = len(ctx.files) - 1
   268  	case "M":
   269  		i := ctx.indexOfLatestDirectory
   270  		latest := ctx.files[i]
   271  
   272  		var ok bool
   273  		latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
   274  		if !ok {
   275  			log.Debugf("unexpected value for APK ACL field %q: %q", f.name, f.value)
   276  			return
   277  		}
   278  
   279  		// save updated directory
   280  		ctx.files[i] = latest
   281  	case "R":
   282  		var regularFile string
   283  
   284  		dirIndex := ctx.indexOfLatestDirectory
   285  		if dirIndex < 0 {
   286  			regularFile = path.Join("/", f.value)
   287  		} else {
   288  			latestDirPath := ctx.files[dirIndex].Path
   289  			regularFile = path.Join(latestDirPath, f.value)
   290  		}
   291  
   292  		ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: regularFile})
   293  		ctx.indexOfLatestRegularFile = len(ctx.files) - 1
   294  	case "a":
   295  		i := ctx.indexOfLatestRegularFile
   296  		latest := ctx.files[i]
   297  
   298  		var ok bool
   299  		latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
   300  		if !ok {
   301  			log.Debugf("unexpected value for APK ACL field %q: %q", f.name, f.value)
   302  			return
   303  		}
   304  
   305  		// save updated file
   306  		ctx.files[i] = latest
   307  	case "Z":
   308  		i := ctx.indexOfLatestRegularFile
   309  		latest := ctx.files[i]
   310  		latest.Digest = processChecksum(f.value)
   311  
   312  		// save updated file
   313  		ctx.files[i] = latest
   314  	}
   315  }
   316  
   317  func processFileInfo(v string) (uid, gid, perms string, ok bool) {
   318  	ok = false
   319  
   320  	fileInfo := strings.Split(v, ":")
   321  	if len(fileInfo) < 3 {
   322  		return
   323  	}
   324  
   325  	uid = fileInfo[0]
   326  	gid = fileInfo[1]
   327  	perms = fileInfo[2]
   328  
   329  	// note: there are more optional fields available that we are not capturing,
   330  	// e.g.: "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4="
   331  
   332  	ok = true
   333  	return
   334  }
   335  
   336  // apkFileParsingContext helps keep track of what file data has been captured so far for the APK currently being parsed.
   337  type apkFileParsingContext struct {
   338  	files                    []pkg.ApkFileRecord
   339  	indexOfLatestDirectory   int
   340  	indexOfLatestRegularFile int
   341  }
   342  
   343  func newApkFileParsingContext() *apkFileParsingContext {
   344  	return &apkFileParsingContext{
   345  		indexOfLatestDirectory:   -1, // no directories yet
   346  		indexOfLatestRegularFile: -1, // no regular files yet
   347  	}
   348  }
   349  
   350  // parseListValue parses a space-separated list from an apk entry field value.
   351  func parseListValue(value string) []string {
   352  	items := strings.Split(value, " ")
   353  	if len(items) >= 1 {
   354  		return items
   355  	}
   356  
   357  	return nil
   358  }
   359  
   360  func nilFieldsToEmptySlice(p *parsedData) {
   361  	if p.Dependencies == nil {
   362  		p.Dependencies = []string{}
   363  	}
   364  
   365  	if p.Provides == nil {
   366  		p.Provides = []string{}
   367  	}
   368  
   369  	if p.Files == nil {
   370  		p.Files = []pkg.ApkFileRecord{}
   371  	}
   372  }
   373  
   374  func processChecksum(value string) *file.Digest {
   375  	// from: https://wiki.alpinelinux.org/wiki/Apk_spec
   376  	// The package checksum field is the SHA1 hash of the second gzip stream (control stream) in the package. The
   377  	// binary hash digest is base64 encoded. This is prefixed with Q1 to differentiate it from the MD5 hashes
   378  	// used in older index formats. It is not possible to compute this checksum with standard command line tools
   379  	// but the apk-tools can compute it in their index operation.
   380  
   381  	// based on https://github.com/alpinelinux/apk-tools/blob/dd1908f2fc20b4cfe2c15c55fafaa5fadfb599dc/src/blob.c#L379-L393
   382  	// it seems that the old md5 checksum value was only the hex representation (not base64)
   383  	algorithm := "md5"
   384  	if strings.HasPrefix(value, "Q1") {
   385  		algorithm = "'Q1'+base64(sha1)"
   386  	}
   387  
   388  	return &file.Digest{
   389  		Algorithm: algorithm,
   390  		Value:     value,
   391  	}
   392  }