github.com/quay/claircore@v1.5.28/python/packagescanner.go (about) 1 // Package python contains components for interrogating python packages in 2 // container layers. 3 package python 4 5 import ( 6 "bufio" 7 "bytes" 8 "context" 9 "fmt" 10 "io/fs" 11 "net/textproto" 12 "path" 13 "path/filepath" 14 "runtime/trace" 15 "strings" 16 17 "github.com/quay/zlog" 18 19 "github.com/quay/claircore" 20 "github.com/quay/claircore/indexer" 21 "github.com/quay/claircore/pkg/pep440" 22 ) 23 24 var ( 25 _ indexer.VersionedScanner = (*Scanner)(nil) 26 _ indexer.PackageScanner = (*Scanner)(nil) 27 28 Repository = claircore.Repository{ 29 Name: "pypi", 30 URI: "https://pypi.org/simple", 31 } 32 ) 33 34 // Scanner implements the scanner.PackageScanner interface. 35 // 36 // It looks for directories that seem like wheels or eggs, and looks at the 37 // metadata recorded there. This type attempts to follow the specs documented by 38 // the [PyPA], with the newer PEPs being preferred. 39 // 40 // The zero value is ready to use. 41 // 42 // [PyPA]: https://packaging.python.org/en/latest/specifications/recording-installed-packages/ 43 type Scanner struct{} 44 45 // Name implements scanner.VersionedScanner. 46 func (*Scanner) Name() string { return "python" } 47 48 // Version implements scanner.VersionedScanner. 49 func (*Scanner) Version() string { return "4" } 50 51 // Kind implements scanner.VersionedScanner. 52 func (*Scanner) Kind() string { return "package" } 53 54 // Scan attempts to find wheel or egg info directories and record the package 55 // information there. 56 // 57 // A return of (nil, nil) is expected if there's nothing found. 58 func (ps *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*claircore.Package, error) { 59 defer trace.StartRegion(ctx, "Scanner.Scan").End() 60 trace.Log(ctx, "layer", layer.Hash.String()) 61 ctx = zlog.ContextWithValues(ctx, 62 "component", "python/Scanner.Scan", 63 "version", ps.Version(), 64 "layer", layer.Hash.String()) 65 zlog.Debug(ctx).Msg("start") 66 defer zlog.Debug(ctx).Msg("done") 67 if err := ctx.Err(); err != nil { 68 return nil, err 69 } 70 71 sys, err := layer.FS() 72 if err != nil { 73 return nil, fmt.Errorf("python: unable to open layer: %w", err) 74 } 75 76 ms, err := findDeliciousEgg(ctx, sys) 77 if err != nil { 78 return nil, fmt.Errorf("python: failed to find delicious egg: %w", err) 79 } 80 var ret []*claircore.Package 81 for _, n := range ms { 82 b, err := fs.ReadFile(sys, n) 83 if err != nil { 84 return nil, fmt.Errorf("python: unable to read file: %w", err) 85 } 86 // The two files we read are in RFC8288 (email message) format, and the 87 // keys we care about are shared. 88 rd := textproto.NewReader(bufio.NewReader(bytes.NewReader(b))) 89 hdr, err := rd.ReadMIMEHeader() 90 if err != nil && hdr == nil { 91 zlog.Warn(ctx). 92 Err(err). 93 Str("path", n). 94 Msg("unable to read metadata, skipping") 95 continue 96 } 97 v, err := pep440.Parse(hdr.Get("Version")) 98 if err != nil { 99 zlog.Warn(ctx). 100 Err(err). 101 Str("path", n). 102 Msg("couldn't parse the version, skipping") 103 continue 104 } 105 pkgDB := filepath.Join(n, "..", "..") 106 // If the package is .egg-info format 107 // with just the .egg-info file, 108 // only go up one level. 109 if strings.HasSuffix(n, `.egg-info`) { 110 pkgDB = filepath.Join(n, "..") 111 } 112 ret = append(ret, &claircore.Package{ 113 Name: strings.ToLower(hdr.Get("Name")), 114 Version: v.String(), 115 PackageDB: "python:" + pkgDB, 116 Filepath: n, 117 Kind: claircore.BINARY, 118 NormalizedVersion: v.Version(), 119 // TODO Is there some way to pick up on where a wheel or egg was 120 // found? 121 RepositoryHint: "https://pypi.org/simple", 122 }) 123 } 124 return ret, nil 125 } 126 127 // DefaultRepository implements [indexer.DefaultRepoScanner] 128 func (Scanner) DefaultRepository(ctx context.Context) *claircore.Repository { 129 return &Repository 130 } 131 132 // findDeliciousEgg finds eggs and wheels. 133 // 134 // Three formats are supported at this time: 135 // 136 // * .egg - only when .egg is a directory. .egg as a zipfile is not supported at this time. 137 // * .egg-info - both as a standalone file and a directory which contains PKG-INFO. 138 // * wheel - only .dist-info/METADATA is supported. 139 // 140 // See https://setuptools.pypa.io/en/latest/deprecated/python_eggs.html for more information about Python Eggs 141 // and https://peps.python.org/pep-0427/ for more information about Wheel. 142 func findDeliciousEgg(ctx context.Context, sys fs.FS) (out []string, err error) { 143 // Is this layer an rpm layer? 144 // 145 // If so, files in the disto-managed directory can be skipped. 146 var rpm bool 147 for _, p := range []string{ 148 "var/lib/rpm/Packages", 149 "var/lib/rpm/rpmdb.sqlite", 150 "var/lib/rpm/Packages.db", 151 } { 152 if fi, err := fs.Stat(sys, p); err == nil && fi.Mode().IsRegular() { 153 rpm = true 154 break 155 } 156 } 157 // Is this layer a dpkg layer? 158 var dpkg bool 159 if fi, err := fs.Stat(sys, `var/lib/dpkg/status`); err == nil && fi.Mode().IsRegular() { 160 dpkg = true 161 } 162 163 return out, fs.WalkDir(sys, ".", func(p string, d fs.DirEntry, err error) error { 164 ev := zlog.Debug(ctx). 165 Str("file", p) 166 var success bool 167 defer func() { 168 if !success { 169 ev.Discard().Send() 170 } 171 }() 172 switch { 173 case err != nil: 174 return err 175 case (rpm || dpkg) && d.Type().IsDir(): 176 // Skip one level up from the "packages" directory so the walk also 177 // skips the standard library. 178 var pat string 179 switch { 180 case rpm: 181 pat = `usr/lib*/python[23].*` 182 ev = ev.Bool("rpm_dir", true) 183 case dpkg: 184 pat = `usr/lib*/python[23]` 185 ev = ev.Bool("dpkg_dir", true) 186 default: 187 panic("programmer error: unreachable") 188 } 189 if m, _ := path.Match(pat, p); m { 190 ev.Msg("skipping directory") 191 return fs.SkipDir 192 } 193 fallthrough 194 case !d.Type().IsRegular(): 195 // Should we chase symlinks with the correct name? 196 return nil 197 case strings.HasPrefix(filepath.Base(p), ".wh."): 198 return nil 199 case strings.HasSuffix(p, `.egg/EGG-INFO/PKG-INFO`): 200 ev = ev.Str("kind", ".egg") 201 case strings.HasSuffix(p, `.egg-info`): 202 fallthrough 203 case strings.HasSuffix(p, `.egg-info/PKG-INFO`): 204 ev = ev.Str("kind", ".egg-info") 205 case strings.HasSuffix(p, `.dist-info/METADATA`): 206 ev = ev.Str("kind", "wheel") 207 // See if we can discern the installer. 208 var installer string 209 ip := path.Join(path.Dir(p), `INSTALLER`) 210 if ic, err := fs.ReadFile(sys, ip); err == nil { 211 installer = string(bytes.TrimSpace(ic)) 212 ev = ev.Str("installer", installer) 213 } 214 if _, ok := blocklist[installer]; ok { 215 ev.Msg("skipping package") 216 return nil 217 } 218 default: 219 return nil 220 } 221 ev.Msg("found package") 222 success = true 223 out = append(out, p) 224 return nil 225 }) 226 } 227 228 // Blocklist of installers to ignore. 229 // 230 // Currently, rpm is the only known package manager that actually populates this 231 // information. 232 var blocklist = map[string]struct{}{ 233 "rpm": {}, 234 "dpkg": {}, 235 "apk": {}, 236 }