github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/os/rpm/rpm.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build !windows 16 17 // Package rpm extracts packages from rpm database. 18 package rpm 19 20 import ( 21 "context" 22 "fmt" 23 "os" 24 "path/filepath" 25 "slices" 26 "time" 27 28 rpmdb "github.com/erikvarga/go-rpmdb/pkg" 29 "github.com/google/osv-scalibr/extractor" 30 "github.com/google/osv-scalibr/extractor/filesystem" 31 "github.com/google/osv-scalibr/extractor/filesystem/os/osrelease" 32 rpmmeta "github.com/google/osv-scalibr/extractor/filesystem/os/rpm/metadata" 33 "github.com/google/osv-scalibr/inventory" 34 "github.com/google/osv-scalibr/log" 35 "github.com/google/osv-scalibr/plugin" 36 "github.com/google/osv-scalibr/purl" 37 "github.com/google/osv-scalibr/stats" 38 39 // SQLite driver needed for parsing rpmdb.sqlite files. 40 _ "modernc.org/sqlite" 41 ) 42 43 // Name is the name for the RPM extractor 44 const Name = "os/rpm" 45 46 const defaultTimeout = 5 * time.Minute 47 48 var ( 49 requiredDirectory = []string{ 50 "usr/lib/sysimage/rpm/", 51 "var/lib/rpm/", 52 "usr/share/rpm/", 53 } 54 55 requiredFilename = []string{ 56 // Berkley DB (old format) 57 "Packages", 58 // NDB (very rare alternative to sqlite) 59 "Packages.db", 60 // SQLite3 (new format) 61 "rpmdb.sqlite", 62 } 63 ) 64 65 // Config contains RPM specific configuration values 66 type Config struct { 67 // Stats is a stats collector for reporting metrics. 68 Stats stats.Collector 69 // MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If 70 // `FileRequired` gets a bigger file, it will return false, 71 MaxFileSizeBytes int64 72 // Timeout is the timeout duration for parsing the RPM database. 73 Timeout time.Duration 74 } 75 76 // DefaultConfig returns the default configuration values for the RPM extractor. 77 func DefaultConfig() Config { 78 return Config{ 79 Stats: nil, 80 MaxFileSizeBytes: 0, 81 Timeout: defaultTimeout, 82 } 83 } 84 85 // Extractor extracts rpm packages from rpm database. 86 type Extractor struct { 87 stats stats.Collector 88 maxFileSizeBytes int64 89 Timeout time.Duration 90 } 91 92 // New returns an RPM extractor. 93 // 94 // For most use cases, initialize with: 95 // ``` 96 // e := New(DefaultConfig()) 97 // ``` 98 func New(cfg Config) *Extractor { 99 return &Extractor{ 100 stats: cfg.Stats, 101 maxFileSizeBytes: cfg.MaxFileSizeBytes, 102 Timeout: cfg.Timeout, 103 } 104 } 105 106 // NewDefault returns an extractor with the default config settings. 107 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 108 109 // Name of the extractor. 110 func (e Extractor) Name() string { return Name } 111 112 // Version of the extractor. 113 func (e Extractor) Version() int { return 0 } 114 115 // Requirements of the extractor. 116 func (e Extractor) Requirements() *plugin.Capabilities { 117 return &plugin.Capabilities{} 118 } 119 120 // FileRequired returns true if the specified file matches rpm status file pattern. 121 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 122 path := api.Path() 123 dir, filename := filepath.Split(filepath.ToSlash(path)) 124 if !slices.Contains(requiredDirectory, dir) || !slices.Contains(requiredFilename, filename) { 125 return false 126 } 127 128 fileinfo, err := api.Stat() 129 if err != nil { 130 return false 131 } 132 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 133 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded) 134 return false 135 } 136 137 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK) 138 return true 139 } 140 141 func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) { 142 if e.stats == nil { 143 return 144 } 145 e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ 146 Path: path, 147 Result: result, 148 FileSizeBytes: fileSizeBytes, 149 }) 150 } 151 152 // Extract extracts packages from rpm status files passed through the scan input. 153 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 154 pkgs, err := e.extractFromInput(ctx, input) 155 if e.stats != nil { 156 var fileSizeBytes int64 157 if input.Info != nil { 158 fileSizeBytes = input.Info.Size() 159 } 160 e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ 161 Path: input.Path, 162 Result: filesystem.ExtractorErrorToFileExtractedResult(err), 163 FileSizeBytes: fileSizeBytes, 164 }) 165 } 166 return inventory.Inventory{Packages: pkgs}, err 167 } 168 169 func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Package, error) { 170 absPath, err := input.GetRealPath() 171 if err != nil { 172 return nil, fmt.Errorf("GetRealPath(%v): %w", input, err) 173 } 174 if input.Root == "" { 175 // The file got copied to a temporary dir, remove it at the end. 176 defer func() { 177 dir := filepath.Dir(absPath) 178 if err := os.RemoveAll(dir); err != nil { 179 log.Errorf("os.RemoveAll(%q): %v", dir, err) 180 } 181 }() 182 } 183 rpmPkgs, err := e.parseRPMDB(ctx, absPath) 184 if err != nil { 185 return nil, fmt.Errorf("ParseRPMDB(%s): %w", absPath, err) 186 } 187 188 m, err := osrelease.GetOSRelease(input.FS) 189 if err != nil { 190 log.Errorf("osrelease.ParseOsRelease(): %v", err) 191 } 192 193 pkgs := []*extractor.Package{} 194 for _, p := range rpmPkgs { 195 metadata := &rpmmeta.Metadata{ 196 PackageName: p.Name, 197 SourceRPM: p.SourceRPM, 198 Epoch: p.Epoch, 199 OSName: m["NAME"], 200 OSPrettyName: m["PRETTY_NAME"], 201 OSID: m["ID"], 202 OSVersionID: m["VERSION_ID"], 203 OSBuildID: m["BUILD_ID"], 204 Vendor: p.Vendor, 205 Architecture: p.Architecture, 206 } 207 208 pkgs = append(pkgs, &extractor.Package{ 209 Name: p.Name, 210 Version: fmt.Sprintf("%s-%s", p.Version, p.Release), 211 PURLType: purl.TypeRPM, 212 Locations: []string{input.Path}, 213 Metadata: metadata, 214 Licenses: []string{p.License}, 215 }) 216 } 217 218 return pkgs, nil 219 } 220 221 // parseRPMDB returns a slice of OS packages parsed from a RPM DB. 222 func (e Extractor) parseRPMDB(ctx context.Context, path string) ([]rpmPackageInfo, error) { 223 db, err := rpmdb.Open(path) 224 if err != nil { 225 return nil, err 226 } 227 defer db.Close() 228 229 var pkgs []*rpmdb.PackageInfo 230 if e.Timeout == 0 { 231 pkgs, err = db.ListPackages() 232 if err != nil { 233 return nil, err 234 } 235 } else { 236 ctx, cancelFunc := context.WithTimeout(ctx, e.Timeout) 237 defer cancelFunc() 238 239 // The timeout is only for corrupt bdb databases 240 pkgs, err = db.ListPackagesWithContext(ctx) 241 if err != nil { 242 return nil, err 243 } 244 } 245 246 var result []rpmPackageInfo 247 for _, pkg := range pkgs { 248 newPkg := rpmPackageInfo{ 249 Name: pkg.Name, 250 Version: pkg.Version, 251 Release: pkg.Release, 252 Epoch: pkg.EpochNum(), 253 SourceRPM: pkg.SourceRpm, 254 Vendor: pkg.Vendor, 255 Architecture: pkg.Arch, 256 License: pkg.License, 257 } 258 259 result = append(result, newPkg) 260 } 261 262 return result, nil 263 } 264 265 type rpmPackageInfo struct { 266 Name string 267 Version string 268 Release string 269 Epoch int 270 SourceRPM string 271 Maintainer string 272 Vendor string 273 Architecture string 274 License string 275 }