github.com/quay/claircore@v1.5.28/rpm/native_db.go (about)

     1  package rpm
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"path"
     9  	"regexp"
    10  	"runtime/trace"
    11  	"strings"
    12  
    13  	"github.com/quay/zlog"
    14  	"golang.org/x/crypto/openpgp/packet"
    15  
    16  	"github.com/quay/claircore"
    17  	"github.com/quay/claircore/rpm/internal/rpm"
    18  )
    19  
    20  // NativeDB is the interface implemented for in-process RPM database handlers.
    21  type nativeDB interface {
    22  	AllHeaders(context.Context) ([]io.ReaderAt, error)
    23  	Validate(context.Context) error
    24  }
    25  
    26  // PackagesFromDB extracts the packages from the RPM headers provided by
    27  // the database.
    28  func packagesFromDB(ctx context.Context, pkgdb string, db nativeDB) ([]*claircore.Package, error) {
    29  	defer trace.StartRegion(ctx, "packagesFromDB").End()
    30  	rds, err := db.AllHeaders(ctx)
    31  	if err != nil {
    32  		return nil, fmt.Errorf("rpm: error reading headers: %w", err)
    33  	}
    34  	// Bulk allocations:
    35  	ps := make([]claircore.Package, 0, len(rds))
    36  	pkgs := make([]*claircore.Package, 0, len(rds))
    37  	srcs := make([]claircore.Package, 0, len(rds)) // Worst-case size.
    38  	src := make(map[string]*claircore.Package)
    39  	src["(none)"] = nil
    40  	var b strings.Builder
    41  
    42  	for _, rd := range rds {
    43  		var h rpm.Header
    44  		if err := h.Parse(ctx, rd); err != nil {
    45  			return nil, err
    46  		}
    47  		var info Info
    48  		if err := info.Load(ctx, &h); err != nil {
    49  			return nil, err
    50  		}
    51  		if info.Name == "gpg-pubkey" {
    52  			// This is *not* an rpm package. It is just a public key stored in the rpm database.
    53  			// Ignore this "package".
    54  			continue
    55  		}
    56  
    57  		idx := len(ps)
    58  		ps = append(ps, claircore.Package{
    59  			Kind:      claircore.BINARY,
    60  			Name:      info.Name,
    61  			Arch:      info.Arch,
    62  			PackageDB: pkgdb,
    63  		})
    64  		p := &ps[idx]
    65  		var modStream string
    66  		if strings.Count(info.Module, ":") > 1 {
    67  			first := true
    68  			idx := strings.IndexFunc(info.Module, func(r rune) bool {
    69  				if r != ':' {
    70  					return false
    71  				}
    72  				if first {
    73  					first = false
    74  					return false
    75  				}
    76  				return true
    77  			})
    78  			modStream = info.Module[:idx]
    79  		}
    80  		p.Module = modStream
    81  		p.Version = constructEVR(&b, &info)
    82  		p.RepositoryHint = constructHint(&b, &info)
    83  
    84  		if s, ok := src[info.SourceNEVR]; ok {
    85  			p.Source = s
    86  		} else {
    87  			s := strings.TrimSuffix(info.SourceNEVR, ".src.rpm")
    88  			pos := len(s)
    89  			for i := 0; i < 2; i++ {
    90  				pos = strings.LastIndexByte(s[:pos], '-')
    91  				if pos == -1 {
    92  					return nil, fmt.Errorf("malformed NEVR: %q", info.SourceNEVR)
    93  				}
    94  			}
    95  
    96  			idx := len(srcs)
    97  			srcs = append(srcs, claircore.Package{
    98  				Kind:    claircore.SOURCE,
    99  				Name:    s[:pos],
   100  				Version: strings.TrimPrefix(s[pos+1:], "0:"),
   101  			})
   102  			pkg := &srcs[idx]
   103  			src[info.SourceNEVR] = pkg
   104  			p.Source = pkg
   105  			pkg.Module = modStream
   106  		}
   107  
   108  		pkgs = append(pkgs, p)
   109  	}
   110  	zlog.Debug(ctx).
   111  		Int("packages", len(pkgs)).
   112  		Int("sources", len(srcs)).
   113  		Msg("processed rpm db")
   114  	return pkgs, nil
   115  }
   116  
   117  // Info is the package information extracted from the RPM header.
   118  type Info struct {
   119  	Name       string
   120  	Version    string
   121  	Release    string
   122  	SourceNEVR string
   123  	Module     string
   124  	Arch       string
   125  	Digest     string
   126  	Signature  []byte   // This is a PGP signature packet.
   127  	Filenames  []string // Filtered by the [filePatterns] regexp.
   128  	DigestAlgo int
   129  	Epoch      int
   130  }
   131  
   132  // Load populates the receiver with information extracted from the provided
   133  // [rpm.Header].
   134  func (i *Info) Load(ctx context.Context, h *rpm.Header) error {
   135  	var dirname, basename []string
   136  	var dirindex []int32
   137  	for idx := range h.Infos {
   138  		e := &h.Infos[idx]
   139  		if _, ok := wantTags[e.Tag]; !ok {
   140  			continue
   141  		}
   142  		v, err := h.ReadData(ctx, e)
   143  		if err != nil {
   144  			return err
   145  		}
   146  		switch e.Tag {
   147  		case rpm.TagName:
   148  			i.Name = v.(string)
   149  		case rpm.TagEpoch:
   150  			i.Epoch = int(v.([]int32)[0])
   151  		case rpm.TagVersion:
   152  			i.Version = v.(string)
   153  		case rpm.TagRelease:
   154  			i.Release = v.(string)
   155  		case rpm.TagSourceRPM:
   156  			i.SourceNEVR = v.(string)
   157  		case rpm.TagModularityLabel:
   158  			i.Module = v.(string)
   159  		case rpm.TagArch:
   160  			i.Arch = v.(string)
   161  		case rpm.TagPayloadDigestAlgo:
   162  			i.DigestAlgo = int(v.([]int32)[0])
   163  		case rpm.TagPayloadDigest:
   164  			i.Digest = v.([]string)[0]
   165  		case rpm.TagSigPGP:
   166  			i.Signature = v.([]byte)
   167  		case rpm.TagDirnames:
   168  			dirname = v.([]string)
   169  		case rpm.TagDirindexes:
   170  			dirindex = v.([]int32)
   171  		case rpm.TagBasenames:
   172  			basename = v.([]string)
   173  		case rpm.TagFilenames:
   174  			// Filenames is the tag used in rpm4 -- this is a best-effort for
   175  			// supporting it.
   176  			for _, name := range v.([]string) {
   177  				if !filePatterns.MatchString(name) {
   178  					// Record the name as a relative path, as that's what we use
   179  					// everywhere else.
   180  					i.Filenames = append(i.Filenames, name[1:])
   181  				}
   182  			}
   183  		}
   184  	}
   185  
   186  	// Catch panics from malformed headers. Can't think of a better way to
   187  	// handle this.
   188  	defer func() {
   189  		if r := recover(); r == nil {
   190  			return
   191  		}
   192  		zlog.Warn(ctx).
   193  			Str("name", i.Name).
   194  			Strs("basename", basename).
   195  			Strs("dirname", dirname).
   196  			Ints32("dirindex", dirindex).
   197  			Msg("caught panic in filename construction")
   198  		i.Filenames = nil
   199  	}()
   200  	for j := range basename {
   201  		// We only want '/'-separated paths, even if running on some other,
   202  		// weird OS. It seems that RPM assumes '/' throughout.
   203  		name := path.Join(dirname[dirindex[j]], basename[j])
   204  		if filePatterns.MatchString(name) {
   205  			// Record the name as a relative path, as that's what we use
   206  			// everywhere else.
   207  			i.Filenames = append(i.Filenames, name[1:])
   208  		}
   209  	}
   210  	return nil
   211  }
   212  
   213  // FilePatterns is a regular expression for *any* file that may need to be
   214  // recorded alongside a package.
   215  //
   216  // The tested strings are absolute paths.
   217  var filePatterns *regexp.Regexp
   218  
   219  func init() {
   220  	// TODO(hank) The blanket binary pattern is too broad and can miss things.
   221  	// Long-term, we should add pattern matching akin to [yara] or file(1) as a
   222  	// plugin mechanism that all indexers can use. That way, the Go indexer
   223  	// could register a pattern and use a shared filter over the
   224  	// [fs.WalkDirFunc] while this package (and dpkg, etc) can tell that another
   225  	// indexer will find those files relevant.
   226  	//
   227  	// [yara]: https://github.com/VirusTotal/yara
   228  	pat := []string{
   229  		`^.*/[^/]+\.jar$`, // Jar files
   230  		`^.*/site-packages/[^/]+\.egg-info/PKG-INFO$`, // Python packages
   231  		`^.*/package.json$`,                           // npm packages
   232  		`^.*/[^/]+\.gemspec$`,                         // ruby gems
   233  		`^/usr/bin/[^/]+$`,                            // any executable
   234  	}
   235  	filePatterns = regexp.MustCompile(strings.Join(pat, `|`))
   236  }
   237  
   238  var wantTags = map[rpm.Tag]struct{}{
   239  	rpm.TagArch:              {},
   240  	rpm.TagBasenames:         {},
   241  	rpm.TagDirindexes:        {},
   242  	rpm.TagDirnames:          {},
   243  	rpm.TagEpoch:             {},
   244  	rpm.TagFilenames:         {},
   245  	rpm.TagModularityLabel:   {},
   246  	rpm.TagName:              {},
   247  	rpm.TagPayloadDigest:     {},
   248  	rpm.TagPayloadDigestAlgo: {},
   249  	rpm.TagRelease:           {},
   250  	rpm.TagSigPGP:            {},
   251  	rpm.TagSourceRPM:         {},
   252  	rpm.TagVersion:           {},
   253  }
   254  
   255  func constructEVR(b *strings.Builder, info *Info) string {
   256  	b.Reset()
   257  	if info.Epoch != 0 {
   258  		fmt.Fprintf(b, "%d:", info.Epoch)
   259  	}
   260  	b.WriteString(info.Version)
   261  	b.WriteByte('-')
   262  	b.WriteString(info.Release)
   263  	return b.String()
   264  }
   265  
   266  func constructHint(b *strings.Builder, info *Info) string {
   267  	b.Reset()
   268  	if info.Digest != "" {
   269  		b.WriteString("hash:")
   270  		switch info.DigestAlgo {
   271  		case 8:
   272  			b.WriteString("sha256:")
   273  			b.WriteString(info.Digest)
   274  		}
   275  	}
   276  	if len(info.Signature) != 0 {
   277  		prd := packet.NewReader(bytes.NewReader(info.Signature))
   278  		p, err := prd.Next()
   279  		for ; err == nil; p, err = prd.Next() {
   280  			switch p := p.(type) {
   281  			case *packet.SignatureV3:
   282  				if p.SigType != 0 {
   283  					continue
   284  				}
   285  				if b.Len() != 0 {
   286  					b.WriteByte('|')
   287  				}
   288  				fmt.Fprintf(b, "key:%016x", p.IssuerKeyId)
   289  			case *packet.Signature:
   290  				if p.SigType != 0 || p.IssuerKeyId == nil {
   291  					continue
   292  				}
   293  				if b.Len() != 0 {
   294  					b.WriteByte('|')
   295  				}
   296  				fmt.Fprintf(b, "key:%016x", *p.IssuerKeyId)
   297  			}
   298  		}
   299  	}
   300  	return b.String()
   301  }