github.com/quay/claircore@v1.5.28/rpm/native_db.go (about) 1 package rpm 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "io" 8 "path" 9 "regexp" 10 "runtime/trace" 11 "strings" 12 13 "github.com/quay/zlog" 14 "golang.org/x/crypto/openpgp/packet" 15 16 "github.com/quay/claircore" 17 "github.com/quay/claircore/rpm/internal/rpm" 18 ) 19 20 // NativeDB is the interface implemented for in-process RPM database handlers. 21 type nativeDB interface { 22 AllHeaders(context.Context) ([]io.ReaderAt, error) 23 Validate(context.Context) error 24 } 25 26 // PackagesFromDB extracts the packages from the RPM headers provided by 27 // the database. 28 func packagesFromDB(ctx context.Context, pkgdb string, db nativeDB) ([]*claircore.Package, error) { 29 defer trace.StartRegion(ctx, "packagesFromDB").End() 30 rds, err := db.AllHeaders(ctx) 31 if err != nil { 32 return nil, fmt.Errorf("rpm: error reading headers: %w", err) 33 } 34 // Bulk allocations: 35 ps := make([]claircore.Package, 0, len(rds)) 36 pkgs := make([]*claircore.Package, 0, len(rds)) 37 srcs := make([]claircore.Package, 0, len(rds)) // Worst-case size. 38 src := make(map[string]*claircore.Package) 39 src["(none)"] = nil 40 var b strings.Builder 41 42 for _, rd := range rds { 43 var h rpm.Header 44 if err := h.Parse(ctx, rd); err != nil { 45 return nil, err 46 } 47 var info Info 48 if err := info.Load(ctx, &h); err != nil { 49 return nil, err 50 } 51 if info.Name == "gpg-pubkey" { 52 // This is *not* an rpm package. It is just a public key stored in the rpm database. 53 // Ignore this "package". 54 continue 55 } 56 57 idx := len(ps) 58 ps = append(ps, claircore.Package{ 59 Kind: claircore.BINARY, 60 Name: info.Name, 61 Arch: info.Arch, 62 PackageDB: pkgdb, 63 }) 64 p := &ps[idx] 65 var modStream string 66 if strings.Count(info.Module, ":") > 1 { 67 first := true 68 idx := strings.IndexFunc(info.Module, func(r rune) bool { 69 if r != ':' { 70 return false 71 } 72 if first { 73 first = false 74 return false 75 } 76 return true 77 }) 78 modStream = info.Module[:idx] 79 } 80 p.Module = modStream 81 p.Version = constructEVR(&b, &info) 82 p.RepositoryHint = constructHint(&b, &info) 83 84 if s, ok := src[info.SourceNEVR]; ok { 85 p.Source = s 86 } else { 87 s := strings.TrimSuffix(info.SourceNEVR, ".src.rpm") 88 pos := len(s) 89 for i := 0; i < 2; i++ { 90 pos = strings.LastIndexByte(s[:pos], '-') 91 if pos == -1 { 92 return nil, fmt.Errorf("malformed NEVR: %q", info.SourceNEVR) 93 } 94 } 95 96 idx := len(srcs) 97 srcs = append(srcs, claircore.Package{ 98 Kind: claircore.SOURCE, 99 Name: s[:pos], 100 Version: strings.TrimPrefix(s[pos+1:], "0:"), 101 }) 102 pkg := &srcs[idx] 103 src[info.SourceNEVR] = pkg 104 p.Source = pkg 105 pkg.Module = modStream 106 } 107 108 pkgs = append(pkgs, p) 109 } 110 zlog.Debug(ctx). 111 Int("packages", len(pkgs)). 112 Int("sources", len(srcs)). 113 Msg("processed rpm db") 114 return pkgs, nil 115 } 116 117 // Info is the package information extracted from the RPM header. 118 type Info struct { 119 Name string 120 Version string 121 Release string 122 SourceNEVR string 123 Module string 124 Arch string 125 Digest string 126 Signature []byte // This is a PGP signature packet. 127 Filenames []string // Filtered by the [filePatterns] regexp. 128 DigestAlgo int 129 Epoch int 130 } 131 132 // Load populates the receiver with information extracted from the provided 133 // [rpm.Header]. 134 func (i *Info) Load(ctx context.Context, h *rpm.Header) error { 135 var dirname, basename []string 136 var dirindex []int32 137 for idx := range h.Infos { 138 e := &h.Infos[idx] 139 if _, ok := wantTags[e.Tag]; !ok { 140 continue 141 } 142 v, err := h.ReadData(ctx, e) 143 if err != nil { 144 return err 145 } 146 switch e.Tag { 147 case rpm.TagName: 148 i.Name = v.(string) 149 case rpm.TagEpoch: 150 i.Epoch = int(v.([]int32)[0]) 151 case rpm.TagVersion: 152 i.Version = v.(string) 153 case rpm.TagRelease: 154 i.Release = v.(string) 155 case rpm.TagSourceRPM: 156 i.SourceNEVR = v.(string) 157 case rpm.TagModularityLabel: 158 i.Module = v.(string) 159 case rpm.TagArch: 160 i.Arch = v.(string) 161 case rpm.TagPayloadDigestAlgo: 162 i.DigestAlgo = int(v.([]int32)[0]) 163 case rpm.TagPayloadDigest: 164 i.Digest = v.([]string)[0] 165 case rpm.TagSigPGP: 166 i.Signature = v.([]byte) 167 case rpm.TagDirnames: 168 dirname = v.([]string) 169 case rpm.TagDirindexes: 170 dirindex = v.([]int32) 171 case rpm.TagBasenames: 172 basename = v.([]string) 173 case rpm.TagFilenames: 174 // Filenames is the tag used in rpm4 -- this is a best-effort for 175 // supporting it. 176 for _, name := range v.([]string) { 177 if !filePatterns.MatchString(name) { 178 // Record the name as a relative path, as that's what we use 179 // everywhere else. 180 i.Filenames = append(i.Filenames, name[1:]) 181 } 182 } 183 } 184 } 185 186 // Catch panics from malformed headers. Can't think of a better way to 187 // handle this. 188 defer func() { 189 if r := recover(); r == nil { 190 return 191 } 192 zlog.Warn(ctx). 193 Str("name", i.Name). 194 Strs("basename", basename). 195 Strs("dirname", dirname). 196 Ints32("dirindex", dirindex). 197 Msg("caught panic in filename construction") 198 i.Filenames = nil 199 }() 200 for j := range basename { 201 // We only want '/'-separated paths, even if running on some other, 202 // weird OS. It seems that RPM assumes '/' throughout. 203 name := path.Join(dirname[dirindex[j]], basename[j]) 204 if filePatterns.MatchString(name) { 205 // Record the name as a relative path, as that's what we use 206 // everywhere else. 207 i.Filenames = append(i.Filenames, name[1:]) 208 } 209 } 210 return nil 211 } 212 213 // FilePatterns is a regular expression for *any* file that may need to be 214 // recorded alongside a package. 215 // 216 // The tested strings are absolute paths. 217 var filePatterns *regexp.Regexp 218 219 func init() { 220 // TODO(hank) The blanket binary pattern is too broad and can miss things. 221 // Long-term, we should add pattern matching akin to [yara] or file(1) as a 222 // plugin mechanism that all indexers can use. That way, the Go indexer 223 // could register a pattern and use a shared filter over the 224 // [fs.WalkDirFunc] while this package (and dpkg, etc) can tell that another 225 // indexer will find those files relevant. 226 // 227 // [yara]: https://github.com/VirusTotal/yara 228 pat := []string{ 229 `^.*/[^/]+\.jar$`, // Jar files 230 `^.*/site-packages/[^/]+\.egg-info/PKG-INFO$`, // Python packages 231 `^.*/package.json$`, // npm packages 232 `^.*/[^/]+\.gemspec$`, // ruby gems 233 `^/usr/bin/[^/]+$`, // any executable 234 } 235 filePatterns = regexp.MustCompile(strings.Join(pat, `|`)) 236 } 237 238 var wantTags = map[rpm.Tag]struct{}{ 239 rpm.TagArch: {}, 240 rpm.TagBasenames: {}, 241 rpm.TagDirindexes: {}, 242 rpm.TagDirnames: {}, 243 rpm.TagEpoch: {}, 244 rpm.TagFilenames: {}, 245 rpm.TagModularityLabel: {}, 246 rpm.TagName: {}, 247 rpm.TagPayloadDigest: {}, 248 rpm.TagPayloadDigestAlgo: {}, 249 rpm.TagRelease: {}, 250 rpm.TagSigPGP: {}, 251 rpm.TagSourceRPM: {}, 252 rpm.TagVersion: {}, 253 } 254 255 func constructEVR(b *strings.Builder, info *Info) string { 256 b.Reset() 257 if info.Epoch != 0 { 258 fmt.Fprintf(b, "%d:", info.Epoch) 259 } 260 b.WriteString(info.Version) 261 b.WriteByte('-') 262 b.WriteString(info.Release) 263 return b.String() 264 } 265 266 func constructHint(b *strings.Builder, info *Info) string { 267 b.Reset() 268 if info.Digest != "" { 269 b.WriteString("hash:") 270 switch info.DigestAlgo { 271 case 8: 272 b.WriteString("sha256:") 273 b.WriteString(info.Digest) 274 } 275 } 276 if len(info.Signature) != 0 { 277 prd := packet.NewReader(bytes.NewReader(info.Signature)) 278 p, err := prd.Next() 279 for ; err == nil; p, err = prd.Next() { 280 switch p := p.(type) { 281 case *packet.SignatureV3: 282 if p.SigType != 0 { 283 continue 284 } 285 if b.Len() != 0 { 286 b.WriteByte('|') 287 } 288 fmt.Fprintf(b, "key:%016x", p.IssuerKeyId) 289 case *packet.Signature: 290 if p.SigType != 0 || p.IssuerKeyId == nil { 291 continue 292 } 293 if b.Len() != 0 { 294 b.WriteByte('|') 295 } 296 fmt.Fprintf(b, "key:%016x", *p.IssuerKeyId) 297 } 298 } 299 } 300 return b.String() 301 }