github.com/quay/claircore@v1.5.28/dpkg/scanner.go (about)

     1  // Package dpkg implements a package indexer for dpkg packages.
     2  package dpkg
     3  
     4  import (
     5  	"bufio"
     6  	"context"
     7  	"crypto/md5"
     8  	"encoding/hex"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"io/fs"
    13  	"net/textproto"
    14  	"path/filepath"
    15  	"runtime/trace"
    16  	"slices"
    17  	"strings"
    18  
    19  	"github.com/quay/zlog"
    20  
    21  	"github.com/quay/claircore"
    22  	"github.com/quay/claircore/indexer"
    23  )
    24  
    25  const (
    26  	name    = "dpkg"
    27  	kind    = "package"
    28  	version = "6"
    29  )
    30  
    31  var (
    32  	_ indexer.VersionedScanner = (*Scanner)(nil)
    33  	_ indexer.PackageScanner   = (*Scanner)(nil)
    34  )
    35  
    36  // Scanner implements the scanner.PackageScanner interface.
    37  //
    38  // This looks for directories that look like dpkg databases and examines the
    39  // "status" file it finds there.
    40  //
    41  // The zero value is ready to use.
    42  type Scanner struct{}
    43  
    44  // Name implements scanner.VersionedScanner.
    45  func (ps *Scanner) Name() string { return name }
    46  
    47  // Version implements scanner.VersionedScanner.
    48  func (ps *Scanner) Version() string { return version }
    49  
    50  // Kind implements scanner.VersionedScanner.
    51  func (ps *Scanner) Kind() string { return kind }
    52  
    53  // Scan attempts to find a dpkg database within the layer and read all of the
    54  // installed packages it can find in the "status" file.
    55  //
    56  // It's expected to return (nil, nil) if there's no dpkg database in the layer.
    57  //
    58  // It does not respect any dpkg configuration files.
    59  func (ps *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*claircore.Package, error) {
    60  	// Preamble
    61  	defer trace.StartRegion(ctx, "Scanner.Scan").End()
    62  	trace.Log(ctx, "layer", layer.Hash.String())
    63  	ctx = zlog.ContextWithValues(ctx,
    64  		"component", "dpkg/Scanner.Scan",
    65  		"version", ps.Version(),
    66  		"layer", layer.Hash.String())
    67  	zlog.Debug(ctx).Msg("start")
    68  	defer zlog.Debug(ctx).Msg("done")
    69  
    70  	sys, err := layer.FS()
    71  	if err != nil {
    72  		return nil, fmt.Errorf("dpkg: opening layer failed: %w", err)
    73  	}
    74  
    75  	// This is a map keyed by directory. A "score" of 2 means this is almost
    76  	// certainly a dpkg database.
    77  	loc := make(map[string]int)
    78  	walk := func(p string, d fs.DirEntry, err error) error {
    79  		if err != nil {
    80  			return err
    81  		}
    82  		switch dir, f := filepath.Split(p); {
    83  		case f == "status" && !d.IsDir():
    84  			loc[dir]++
    85  		case f == "info" && d.IsDir():
    86  			loc[dir]++
    87  		}
    88  		return nil
    89  	}
    90  
    91  	if err := fs.WalkDir(sys, ".", walk); err != nil {
    92  		return nil, err
    93  	}
    94  	zlog.Debug(ctx).Msg("scanned for possible databases")
    95  
    96  	// If we didn't find anything, this loop is completely skipped.
    97  	var pkgs []*claircore.Package
    98  	var found *packages
    99  	for p, x := range loc {
   100  		if x != 2 { // If we didn't find both files, skip this directory.
   101  			continue
   102  		}
   103  		if found == nil {
   104  			found = newPackages()
   105  		} else {
   106  			found.Reset()
   107  		}
   108  		if err := loadDatabase(ctx, sys, p, found, &pkgs); err != nil {
   109  			return nil, err
   110  		}
   111  	}
   112  
   113  	// This shouldn't need to use the "Stable" variant, as Name+Version should
   114  	// be unique.
   115  	slices.SortFunc(pkgs, sortpkg)
   116  
   117  	return pkgs, nil
   118  }
   119  
   120  type packages struct {
   121  	bin map[string]*claircore.Package
   122  	src map[string]*claircore.Package
   123  }
   124  
   125  func newPackages() *packages {
   126  	// Guess at initial sizing.
   127  	return &packages{
   128  		bin: make(map[string]*claircore.Package, 1024),
   129  		src: make(map[string]*claircore.Package, 1024),
   130  	}
   131  }
   132  
   133  func (p *packages) Reset() {
   134  	clear(p.bin)
   135  	clear(p.src)
   136  }
   137  
   138  // Sortpkg is a function for [slices.SortFunc].
   139  // Defined this way to make it usable in tests.
   140  func sortpkg(a, b *claircore.Package) int {
   141  	cmp := strings.Compare(a.Name, b.Name)
   142  	if cmp == 0 {
   143  		return strings.Compare(a.Version, b.Version)
   144  	}
   145  	return cmp
   146  }
   147  
   148  // LoadDatabase loads the "status" and "info" files in the indicated directory.
   149  //
   150  // "Found"is used for scratch space and results are appended to the slice pointed to by "out".
   151  func loadDatabase(ctx context.Context, sys fs.FS, dir string, found *packages, out *[]*claircore.Package) error {
   152  	zlog.Debug(ctx).Msg("examining package database")
   153  
   154  	// We want the "status" file.
   155  	fn := filepath.Join(dir, "status")
   156  	db, err := sys.Open(fn)
   157  	switch {
   158  	case errors.Is(err, nil):
   159  	case errors.Is(err, fs.ErrNotExist):
   160  		zlog.Debug(ctx).
   161  			Str("filename", fn).
   162  			Msg("false positive")
   163  		return err
   164  	default:
   165  		return fmt.Errorf("reading status file from layer failed: %w", err)
   166  	}
   167  
   168  	// The database is actually an RFC822-like message with "\n\n"
   169  	// separators, so don't be alarmed by the usage of the "net/textproto"
   170  	// package here.
   171  	tp := textproto.NewReader(bufio.NewReader(db))
   172  	if err := parseStatus(ctx, found, fn, tp); err != nil {
   173  		return fmt.Errorf("unable to parse status file %q: %w", fn, err)
   174  	}
   175  
   176  	const suffix = ".md5sums"
   177  	ms, err := fs.Glob(sys, filepath.Join(dir, "info", "*"+suffix))
   178  	if err != nil {
   179  		panic(fmt.Sprintf("programmer error: %v", err))
   180  	}
   181  	hash := md5.New()
   182  	for _, n := range ms {
   183  		k := strings.TrimSuffix(filepath.Base(n), suffix)
   184  		if i := strings.IndexRune(k, ':'); i != -1 {
   185  			k = k[:i]
   186  		}
   187  		p, ok := found.bin[k]
   188  		if !ok {
   189  			zlog.Debug(ctx).
   190  				Str("package", k).
   191  				Msg("extra metadata found, ignoring")
   192  			continue
   193  		}
   194  		f, err := sys.Open(n)
   195  		if err != nil {
   196  			return fmt.Errorf("unable to open file %q: %w", n, err)
   197  		}
   198  		hash.Reset()
   199  		_, err = io.Copy(hash, f)
   200  		f.Close()
   201  		if err != nil {
   202  			zlog.Warn(ctx).
   203  				Err(err).
   204  				Str("package", n).
   205  				Msg("unable to read package metadata")
   206  			continue
   207  		}
   208  		p.RepositoryHint = hex.EncodeToString(hash.Sum(nil))
   209  	}
   210  	zlog.Debug(ctx).
   211  		Int("count", len(found.bin)).
   212  		Msg("found packages")
   213  
   214  	for _, pkg := range found.bin {
   215  		*out = append(*out, pkg)
   216  	}
   217  
   218  	return nil
   219  }
   220  
   221  // ParseStatus parses the dpkg "status" file in "tp".
   222  //
   223  // Packages are stored in "found".
   224  func parseStatus(ctx context.Context, found *packages, fn string, tp *textproto.Reader) error {
   225  Restart:
   226  	hdr, err := tp.ReadMIMEHeader()
   227  	for ; err == nil && len(hdr) > 0; hdr, err = tp.ReadMIMEHeader() {
   228  		var ok, installed bool
   229  		for _, s := range strings.Fields(hdr.Get("Status")) {
   230  			switch s {
   231  			case "installed":
   232  				installed = true
   233  			case "ok":
   234  				ok = true
   235  			}
   236  		}
   237  		if !ok || !installed {
   238  			continue
   239  		}
   240  		name := hdr.Get("Package")
   241  		v := hdr.Get("Version")
   242  		p := &claircore.Package{
   243  			Name:      name,
   244  			Version:   v,
   245  			Kind:      claircore.BINARY,
   246  			Arch:      hdr.Get("Architecture"),
   247  			PackageDB: fn,
   248  		}
   249  		if src := hdr.Get("Source"); src != "" {
   250  			// This "Name (Version)" scheme is handled by dpkg-query(1), so we
   251  			// do similar.
   252  			name, ver, ok := strings.Cut(src, " ")
   253  			if ok {
   254  				ver = strings.Trim(ver, "()")
   255  			} else {
   256  				name = src
   257  				ver = v
   258  			}
   259  
   260  			srcpkg, ok := found.src[name]
   261  			if !ok {
   262  				srcpkg = &claircore.Package{
   263  					Name:      name,
   264  					Kind:      claircore.SOURCE,
   265  					Version:   ver,
   266  					PackageDB: fn,
   267  				}
   268  				found.src[name] = srcpkg
   269  			}
   270  			p.Source = srcpkg
   271  		} else {
   272  			// If there's not an explicit source package listed, assume it's a
   273  			// 1-to-1 mapping.
   274  			p.Source = &claircore.Package{
   275  				Name:      name,
   276  				Kind:      claircore.SOURCE,
   277  				Version:   v,
   278  				PackageDB: fn,
   279  			}
   280  		}
   281  
   282  		found.bin[name] = p
   283  	}
   284  	switch {
   285  	case errors.Is(err, io.EOF):
   286  	default:
   287  		zlog.Warn(ctx).Err(err).Msg("unable to read entry")
   288  		goto Restart
   289  	}
   290  	return nil
   291  }