github.com/quay/claircore@v1.5.28/dpkg/scanner.go (about) 1 // Package dpkg implements a package indexer for dpkg packages. 2 package dpkg 3 4 import ( 5 "bufio" 6 "context" 7 "crypto/md5" 8 "encoding/hex" 9 "errors" 10 "fmt" 11 "io" 12 "io/fs" 13 "net/textproto" 14 "path/filepath" 15 "runtime/trace" 16 "slices" 17 "strings" 18 19 "github.com/quay/zlog" 20 21 "github.com/quay/claircore" 22 "github.com/quay/claircore/indexer" 23 ) 24 25 const ( 26 name = "dpkg" 27 kind = "package" 28 version = "6" 29 ) 30 31 var ( 32 _ indexer.VersionedScanner = (*Scanner)(nil) 33 _ indexer.PackageScanner = (*Scanner)(nil) 34 ) 35 36 // Scanner implements the scanner.PackageScanner interface. 37 // 38 // This looks for directories that look like dpkg databases and examines the 39 // "status" file it finds there. 40 // 41 // The zero value is ready to use. 42 type Scanner struct{} 43 44 // Name implements scanner.VersionedScanner. 45 func (ps *Scanner) Name() string { return name } 46 47 // Version implements scanner.VersionedScanner. 48 func (ps *Scanner) Version() string { return version } 49 50 // Kind implements scanner.VersionedScanner. 51 func (ps *Scanner) Kind() string { return kind } 52 53 // Scan attempts to find a dpkg database within the layer and read all of the 54 // installed packages it can find in the "status" file. 55 // 56 // It's expected to return (nil, nil) if there's no dpkg database in the layer. 57 // 58 // It does not respect any dpkg configuration files. 59 func (ps *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*claircore.Package, error) { 60 // Preamble 61 defer trace.StartRegion(ctx, "Scanner.Scan").End() 62 trace.Log(ctx, "layer", layer.Hash.String()) 63 ctx = zlog.ContextWithValues(ctx, 64 "component", "dpkg/Scanner.Scan", 65 "version", ps.Version(), 66 "layer", layer.Hash.String()) 67 zlog.Debug(ctx).Msg("start") 68 defer zlog.Debug(ctx).Msg("done") 69 70 sys, err := layer.FS() 71 if err != nil { 72 return nil, fmt.Errorf("dpkg: opening layer failed: %w", err) 73 } 74 75 // This is a map keyed by directory. A "score" of 2 means this is almost 76 // certainly a dpkg database. 77 loc := make(map[string]int) 78 walk := func(p string, d fs.DirEntry, err error) error { 79 if err != nil { 80 return err 81 } 82 switch dir, f := filepath.Split(p); { 83 case f == "status" && !d.IsDir(): 84 loc[dir]++ 85 case f == "info" && d.IsDir(): 86 loc[dir]++ 87 } 88 return nil 89 } 90 91 if err := fs.WalkDir(sys, ".", walk); err != nil { 92 return nil, err 93 } 94 zlog.Debug(ctx).Msg("scanned for possible databases") 95 96 // If we didn't find anything, this loop is completely skipped. 97 var pkgs []*claircore.Package 98 var found *packages 99 for p, x := range loc { 100 if x != 2 { // If we didn't find both files, skip this directory. 101 continue 102 } 103 if found == nil { 104 found = newPackages() 105 } else { 106 found.Reset() 107 } 108 if err := loadDatabase(ctx, sys, p, found, &pkgs); err != nil { 109 return nil, err 110 } 111 } 112 113 // This shouldn't need to use the "Stable" variant, as Name+Version should 114 // be unique. 115 slices.SortFunc(pkgs, sortpkg) 116 117 return pkgs, nil 118 } 119 120 type packages struct { 121 bin map[string]*claircore.Package 122 src map[string]*claircore.Package 123 } 124 125 func newPackages() *packages { 126 // Guess at initial sizing. 127 return &packages{ 128 bin: make(map[string]*claircore.Package, 1024), 129 src: make(map[string]*claircore.Package, 1024), 130 } 131 } 132 133 func (p *packages) Reset() { 134 clear(p.bin) 135 clear(p.src) 136 } 137 138 // Sortpkg is a function for [slices.SortFunc]. 139 // Defined this way to make it usable in tests. 140 func sortpkg(a, b *claircore.Package) int { 141 cmp := strings.Compare(a.Name, b.Name) 142 if cmp == 0 { 143 return strings.Compare(a.Version, b.Version) 144 } 145 return cmp 146 } 147 148 // LoadDatabase loads the "status" and "info" files in the indicated directory. 149 // 150 // "Found"is used for scratch space and results are appended to the slice pointed to by "out". 151 func loadDatabase(ctx context.Context, sys fs.FS, dir string, found *packages, out *[]*claircore.Package) error { 152 zlog.Debug(ctx).Msg("examining package database") 153 154 // We want the "status" file. 155 fn := filepath.Join(dir, "status") 156 db, err := sys.Open(fn) 157 switch { 158 case errors.Is(err, nil): 159 case errors.Is(err, fs.ErrNotExist): 160 zlog.Debug(ctx). 161 Str("filename", fn). 162 Msg("false positive") 163 return err 164 default: 165 return fmt.Errorf("reading status file from layer failed: %w", err) 166 } 167 168 // The database is actually an RFC822-like message with "\n\n" 169 // separators, so don't be alarmed by the usage of the "net/textproto" 170 // package here. 171 tp := textproto.NewReader(bufio.NewReader(db)) 172 if err := parseStatus(ctx, found, fn, tp); err != nil { 173 return fmt.Errorf("unable to parse status file %q: %w", fn, err) 174 } 175 176 const suffix = ".md5sums" 177 ms, err := fs.Glob(sys, filepath.Join(dir, "info", "*"+suffix)) 178 if err != nil { 179 panic(fmt.Sprintf("programmer error: %v", err)) 180 } 181 hash := md5.New() 182 for _, n := range ms { 183 k := strings.TrimSuffix(filepath.Base(n), suffix) 184 if i := strings.IndexRune(k, ':'); i != -1 { 185 k = k[:i] 186 } 187 p, ok := found.bin[k] 188 if !ok { 189 zlog.Debug(ctx). 190 Str("package", k). 191 Msg("extra metadata found, ignoring") 192 continue 193 } 194 f, err := sys.Open(n) 195 if err != nil { 196 return fmt.Errorf("unable to open file %q: %w", n, err) 197 } 198 hash.Reset() 199 _, err = io.Copy(hash, f) 200 f.Close() 201 if err != nil { 202 zlog.Warn(ctx). 203 Err(err). 204 Str("package", n). 205 Msg("unable to read package metadata") 206 continue 207 } 208 p.RepositoryHint = hex.EncodeToString(hash.Sum(nil)) 209 } 210 zlog.Debug(ctx). 211 Int("count", len(found.bin)). 212 Msg("found packages") 213 214 for _, pkg := range found.bin { 215 *out = append(*out, pkg) 216 } 217 218 return nil 219 } 220 221 // ParseStatus parses the dpkg "status" file in "tp". 222 // 223 // Packages are stored in "found". 224 func parseStatus(ctx context.Context, found *packages, fn string, tp *textproto.Reader) error { 225 Restart: 226 hdr, err := tp.ReadMIMEHeader() 227 for ; err == nil && len(hdr) > 0; hdr, err = tp.ReadMIMEHeader() { 228 var ok, installed bool 229 for _, s := range strings.Fields(hdr.Get("Status")) { 230 switch s { 231 case "installed": 232 installed = true 233 case "ok": 234 ok = true 235 } 236 } 237 if !ok || !installed { 238 continue 239 } 240 name := hdr.Get("Package") 241 v := hdr.Get("Version") 242 p := &claircore.Package{ 243 Name: name, 244 Version: v, 245 Kind: claircore.BINARY, 246 Arch: hdr.Get("Architecture"), 247 PackageDB: fn, 248 } 249 if src := hdr.Get("Source"); src != "" { 250 // This "Name (Version)" scheme is handled by dpkg-query(1), so we 251 // do similar. 252 name, ver, ok := strings.Cut(src, " ") 253 if ok { 254 ver = strings.Trim(ver, "()") 255 } else { 256 name = src 257 ver = v 258 } 259 260 srcpkg, ok := found.src[name] 261 if !ok { 262 srcpkg = &claircore.Package{ 263 Name: name, 264 Kind: claircore.SOURCE, 265 Version: ver, 266 PackageDB: fn, 267 } 268 found.src[name] = srcpkg 269 } 270 p.Source = srcpkg 271 } else { 272 // If there's not an explicit source package listed, assume it's a 273 // 1-to-1 mapping. 274 p.Source = &claircore.Package{ 275 Name: name, 276 Kind: claircore.SOURCE, 277 Version: v, 278 PackageDB: fn, 279 } 280 } 281 282 found.bin[name] = p 283 } 284 switch { 285 case errors.Is(err, io.EOF): 286 default: 287 zlog.Warn(ctx).Err(err).Msg("unable to read entry") 288 goto Restart 289 } 290 return nil 291 }