github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/alpine/parse_apk_db.go (about)

     1  package alpine
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"path"
     9  	"regexp"
    10  	"strconv"
    11  	"strings"
    12  
    13  	"github.com/anchore/syft/internal"
    14  	"github.com/anchore/syft/internal/log"
    15  	"github.com/anchore/syft/syft/artifact"
    16  	"github.com/anchore/syft/syft/file"
    17  	"github.com/anchore/syft/syft/linux"
    18  	"github.com/anchore/syft/syft/pkg"
    19  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    20  )
    21  
    22  // integrity check
    23  var _ generic.Parser = parseApkDB
    24  
    25  var (
    26  	repoRegex = regexp.MustCompile(`(?m)^https://.*\.alpinelinux\.org/alpine/v([^/]+)/([a-zA-Z0-9_]+)$`)
    27  )
    28  
    29  type parsedData struct {
    30  	License string `mapstructure:"L" json:"license"`
    31  	pkg.ApkDBEntry
    32  }
    33  
    34  // parseApkDB parses packages from a given APK "installed" flat-file DB. For more
    35  // information on specific fields, see https://wiki.alpinelinux.org/wiki/Apk_spec.
    36  //
    37  //nolint:funlen,gocognit
    38  func parseApkDB(_ context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    39  	scanner := bufio.NewScanner(reader)
    40  
    41  	var apks []parsedData
    42  	var currentEntry parsedData
    43  	entryParsingInProgress := false
    44  	fileParsingCtx := newApkFileParsingContext()
    45  
    46  	// creating a dedicated append-like function here instead of using `append(...)`
    47  	// below since there is nontrivial logic to be performed for each finalized apk
    48  	// entry.
    49  	appendApk := func(p parsedData) {
    50  		if files := fileParsingCtx.files; len(files) >= 1 {
    51  			// attached accumulated files to current package
    52  			p.Files = files
    53  
    54  			// reset file parsing for next use
    55  			fileParsingCtx = newApkFileParsingContext()
    56  		}
    57  
    58  		nilFieldsToEmptySlice(&p)
    59  		apks = append(apks, p)
    60  	}
    61  
    62  	for scanner.Scan() {
    63  		line := scanner.Text()
    64  
    65  		if line == "" {
    66  			// i.e. apk entry separator
    67  
    68  			if entryParsingInProgress {
    69  				// current entry is complete
    70  				appendApk(currentEntry)
    71  			}
    72  
    73  			entryParsingInProgress = false
    74  
    75  			// zero-out currentEntry for use by any future entry
    76  			currentEntry = parsedData{}
    77  
    78  			continue
    79  		}
    80  
    81  		field := parseApkField(line)
    82  		if field == nil {
    83  			log.Warnf("unable to parse field data from line %q", line)
    84  			continue
    85  		}
    86  		if len(field.name) == 0 {
    87  			log.Warnf("failed to parse field name from line %q", line)
    88  			continue
    89  		}
    90  		if len(field.value) == 0 {
    91  			log.Debugf("line %q: parsed field %q appears to have an empty value, skipping", line, field.name)
    92  			continue
    93  		}
    94  
    95  		entryParsingInProgress = true
    96  
    97  		field.apply(&currentEntry, fileParsingCtx)
    98  	}
    99  
   100  	if entryParsingInProgress {
   101  		// There was no final empty line, so currentEntry hasn't been added to the
   102  		// collection yet; but we've now reached the end of scanning, so let's be sure to
   103  		// add currentEntry to the collection.
   104  		appendApk(currentEntry)
   105  	}
   106  
   107  	if err := scanner.Err(); err != nil {
   108  		return nil, nil, fmt.Errorf("failed to parse APK installed DB file: %w", err)
   109  	}
   110  
   111  	var r *linux.Release
   112  	if env != nil {
   113  		r = env.LinuxRelease
   114  	}
   115  	// this is somewhat ugly, but better than completely failing when we can't find the release,
   116  	// e.g. embedded deeper in the tree, like containers or chroots.
   117  	// but we now have no way of handling different repository sources. On the other hand,
   118  	// we never could before this. At least now, we can handle some.
   119  	// This should get fixed with https://gitlab.alpinelinux.org/alpine/apk-tools/-/issues/10875
   120  	if r == nil {
   121  		// find the repositories file from the relative directory of the DB file
   122  		releases := findReleases(resolver, reader.Location.RealPath)
   123  
   124  		if len(releases) > 0 {
   125  			r = &releases[0]
   126  		}
   127  	}
   128  
   129  	pkgs := make([]pkg.Package, 0, len(apks))
   130  	for _, apk := range apks {
   131  		pkgs = append(pkgs, newPackage(apk, r, reader.Location))
   132  	}
   133  
   134  	return pkgs, nil, nil
   135  }
   136  
   137  func findReleases(resolver file.Resolver, dbPath string) []linux.Release {
   138  	if resolver == nil {
   139  		return nil
   140  	}
   141  
   142  	reposLocation := path.Clean(path.Join(path.Dir(dbPath), "../../../etc/apk/repositories"))
   143  	locations, err := resolver.FilesByPath(reposLocation)
   144  	if err != nil {
   145  		log.Tracef("unable to find APK repositories file %q: %+v", reposLocation, err)
   146  		return nil
   147  	}
   148  
   149  	if len(locations) == 0 {
   150  		return nil
   151  	}
   152  	location := locations[0]
   153  
   154  	reposReader, err := resolver.FileContentsByLocation(location)
   155  	if err != nil {
   156  		log.Tracef("unable to fetch contents for APK repositories file %q: %+v", reposLocation, err)
   157  		return nil
   158  	}
   159  	defer internal.CloseAndLogError(reposReader, location.RealPath)
   160  
   161  	return parseReleasesFromAPKRepository(file.LocationReadCloser{
   162  		Location:   location,
   163  		ReadCloser: reposReader,
   164  	})
   165  }
   166  
   167  func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release {
   168  	var releases []linux.Release
   169  
   170  	reposB, err := io.ReadAll(reader)
   171  	if err != nil {
   172  		log.Tracef("unable to read APK repositories file %q: %+v", reader.Location.RealPath, err)
   173  		return nil
   174  	}
   175  
   176  	parts := repoRegex.FindAllStringSubmatch(string(reposB), -1)
   177  	for _, part := range parts {
   178  		if len(part) >= 3 {
   179  			releases = append(releases, linux.Release{
   180  				Name:      "Alpine Linux",
   181  				ID:        "alpine",
   182  				VersionID: part[1],
   183  			})
   184  		}
   185  	}
   186  
   187  	return releases
   188  }
   189  
   190  func parseApkField(line string) *apkField {
   191  	parts := strings.SplitN(line, ":", 2)
   192  	if len(parts) != 2 {
   193  		return nil
   194  	}
   195  
   196  	f := apkField{
   197  		name:  parts[0],
   198  		value: parts[1],
   199  	}
   200  
   201  	return &f
   202  }
   203  
   204  type apkField struct {
   205  	name  string
   206  	value string
   207  }
   208  
   209  //nolint:funlen
   210  func (f apkField) apply(p *parsedData, ctx *apkFileParsingContext) {
   211  	switch f.name {
   212  	// APKINDEX field parsing
   213  
   214  	case "P":
   215  		p.Package = f.value
   216  	case "o":
   217  		p.OriginPackage = f.value
   218  	case "m":
   219  		p.Maintainer = f.value
   220  	case "V":
   221  		p.Version = f.value
   222  	case "L":
   223  		p.License = f.value
   224  	case "A":
   225  		p.Architecture = f.value
   226  	case "U":
   227  		p.URL = f.value
   228  	case "T":
   229  		p.Description = f.value
   230  	case "S":
   231  		i, err := strconv.Atoi(f.value)
   232  		if err != nil {
   233  			log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err)
   234  			return
   235  		}
   236  
   237  		p.Size = i
   238  	case "I":
   239  		i, err := strconv.Atoi(f.value)
   240  		if err != nil {
   241  			log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err)
   242  			return
   243  		}
   244  
   245  		p.InstalledSize = i
   246  	case "D":
   247  		deps := parseListValue(f.value)
   248  		p.Dependencies = deps
   249  	case "p":
   250  		provides := parseListValue(f.value)
   251  		p.Provides = provides
   252  	case "C":
   253  		p.Checksum = f.value
   254  	case "c":
   255  		p.GitCommit = f.value
   256  
   257  	// File/directory field parsing:
   258  
   259  	case "F":
   260  		directory := path.Join("/", f.value)
   261  
   262  		ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: directory})
   263  		ctx.indexOfLatestDirectory = len(ctx.files) - 1
   264  	case "M":
   265  		i := ctx.indexOfLatestDirectory
   266  		latest := ctx.files[i]
   267  
   268  		var ok bool
   269  		latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
   270  		if !ok {
   271  			log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value)
   272  			return
   273  		}
   274  
   275  		// save updated directory
   276  		ctx.files[i] = latest
   277  	case "R":
   278  		var regularFile string
   279  
   280  		dirIndex := ctx.indexOfLatestDirectory
   281  		if dirIndex < 0 {
   282  			regularFile = path.Join("/", f.value)
   283  		} else {
   284  			latestDirPath := ctx.files[dirIndex].Path
   285  			regularFile = path.Join(latestDirPath, f.value)
   286  		}
   287  
   288  		ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: regularFile})
   289  		ctx.indexOfLatestRegularFile = len(ctx.files) - 1
   290  	case "a":
   291  		i := ctx.indexOfLatestRegularFile
   292  		latest := ctx.files[i]
   293  
   294  		var ok bool
   295  		latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
   296  		if !ok {
   297  			log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value)
   298  			return
   299  		}
   300  
   301  		// save updated file
   302  		ctx.files[i] = latest
   303  	case "Z":
   304  		i := ctx.indexOfLatestRegularFile
   305  		latest := ctx.files[i]
   306  		latest.Digest = processChecksum(f.value)
   307  
   308  		// save updated file
   309  		ctx.files[i] = latest
   310  	}
   311  }
   312  
   313  func processFileInfo(v string) (uid, gid, perms string, ok bool) {
   314  	ok = false
   315  
   316  	fileInfo := strings.Split(v, ":")
   317  	if len(fileInfo) < 3 {
   318  		return
   319  	}
   320  
   321  	uid = fileInfo[0]
   322  	gid = fileInfo[1]
   323  	perms = fileInfo[2]
   324  
   325  	// note: there are more optional fields available that we are not capturing,
   326  	// e.g.: "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4="
   327  
   328  	ok = true
   329  	return
   330  }
   331  
   332  // apkFileParsingContext helps keep track of what file data has been captured so far for the APK currently being parsed.
   333  type apkFileParsingContext struct {
   334  	files                    []pkg.ApkFileRecord
   335  	indexOfLatestDirectory   int
   336  	indexOfLatestRegularFile int
   337  }
   338  
   339  func newApkFileParsingContext() *apkFileParsingContext {
   340  	return &apkFileParsingContext{
   341  		indexOfLatestDirectory:   -1, // no directories yet
   342  		indexOfLatestRegularFile: -1, // no regular files yet
   343  	}
   344  }
   345  
   346  // parseListValue parses a space-separated list from an apk entry field value.
   347  func parseListValue(value string) []string {
   348  	items := strings.Split(value, " ")
   349  	if len(items) >= 1 {
   350  		return items
   351  	}
   352  
   353  	return nil
   354  }
   355  
   356  func nilFieldsToEmptySlice(p *parsedData) {
   357  	if p.Dependencies == nil {
   358  		p.Dependencies = []string{}
   359  	}
   360  
   361  	if p.Provides == nil {
   362  		p.Provides = []string{}
   363  	}
   364  
   365  	if p.Files == nil {
   366  		p.Files = []pkg.ApkFileRecord{}
   367  	}
   368  }
   369  
   370  func processChecksum(value string) *file.Digest {
   371  	// from: https://wiki.alpinelinux.org/wiki/Apk_spec
   372  	// The package checksum field is the SHA1 hash of the second gzip stream (control stream) in the package. The
   373  	// binary hash digest is base64 encoded. This is prefixed with Q1 to differentiate it from the MD5 hashes
   374  	// used in older index formats. It is not possible to compute this checksum with standard command line tools
   375  	// but the apk-tools can compute it in their index operation.
   376  
   377  	// based on https://github.com/alpinelinux/apk-tools/blob/dd1908f2fc20b4cfe2c15c55fafaa5fadfb599dc/src/blob.c#L379-L393
   378  	// it seems that the old md5 checksum value was only the hex representation (not base64)
   379  	algorithm := "md5"
   380  	if strings.HasPrefix(value, "Q1") {
   381  		algorithm = "'Q1'+base64(sha1)"
   382  	}
   383  
   384  	return &file.Digest{
   385  		Algorithm: algorithm,
   386  		Value:     value,
   387  	}
   388  }