github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/os/dpkg/dpkg.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package dpkg extracts packages from dpkg database. 16 package dpkg 17 18 import ( 19 "bufio" 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "net/textproto" 25 "path/filepath" 26 "strings" 27 28 "github.com/google/osv-scalibr/extractor" 29 "github.com/google/osv-scalibr/extractor/filesystem" 30 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 31 dpkgmeta "github.com/google/osv-scalibr/extractor/filesystem/os/dpkg/metadata" 32 "github.com/google/osv-scalibr/extractor/filesystem/os/osrelease" 33 "github.com/google/osv-scalibr/inventory" 34 "github.com/google/osv-scalibr/inventory/vex" 35 "github.com/google/osv-scalibr/log" 36 "github.com/google/osv-scalibr/plugin" 37 "github.com/google/osv-scalibr/purl" 38 "github.com/google/osv-scalibr/stats" 39 ) 40 41 const ( 42 // Name is the unique name of this extractor. 43 Name = "os/dpkg" 44 45 // defaultMaxFileSizeBytes is the maximum file size an extractor will unmarshal. 46 // If Extract gets a bigger file, it will return an error. 47 defaultMaxFileSizeBytes = 100 * units.MiB 48 49 // defaultIncludeNotInstalled is the default value for the IncludeNotInstalled option. 50 defaultIncludeNotInstalled = false 51 ) 52 53 // Config is the configuration for the Extractor. 54 type Config struct { 55 // Stats is a stats collector for reporting metrics. 56 Stats stats.Collector 57 // MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If 58 // `FileRequired` gets a bigger file, it will return false, 59 MaxFileSizeBytes int64 60 // IncludeNotInstalled includes packages that are not installed 61 // (e.g. `deinstall`, `purge`, and those missing a status field). 62 IncludeNotInstalled bool 63 } 64 65 // DefaultConfig returns the default configuration for the DPKG extractor. 66 func DefaultConfig() Config { 67 return Config{ 68 MaxFileSizeBytes: defaultMaxFileSizeBytes, 69 IncludeNotInstalled: defaultIncludeNotInstalled, 70 } 71 } 72 73 // Extractor extracts packages from DPKG files. 74 type Extractor struct { 75 stats stats.Collector 76 maxFileSizeBytes int64 77 includeNotInstalled bool 78 } 79 80 // New returns a DPKG extractor. 81 // 82 // For most use cases, initialize with: 83 // ``` 84 // e := New(DefaultConfig()) 85 // ``` 86 func New(cfg Config) *Extractor { 87 return &Extractor{ 88 stats: cfg.Stats, 89 maxFileSizeBytes: cfg.MaxFileSizeBytes, 90 includeNotInstalled: cfg.IncludeNotInstalled, 91 } 92 } 93 94 // NewDefault returns an extractor with the default config settings. 95 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 96 97 // Config returns the configuration of the extractor. 98 func (e Extractor) Config() Config { 99 return Config{ 100 Stats: e.stats, 101 MaxFileSizeBytes: e.maxFileSizeBytes, 102 IncludeNotInstalled: e.includeNotInstalled, 103 } 104 } 105 106 // Name of the extractor. 107 func (e Extractor) Name() string { return Name } 108 109 // Version of the extractor. 110 func (e Extractor) Version() int { return 0 } 111 112 // Requirements of the extractor. 113 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 114 115 // FileRequired returns true if the specified file matches dpkg status file pattern. 116 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 117 path := api.Path() 118 if !fileRequired(path) { 119 return false 120 } 121 122 fileinfo, err := api.Stat() 123 if err != nil { 124 return false 125 } 126 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 127 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded) 128 return false 129 } 130 131 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK) 132 return true 133 } 134 135 func fileRequired(path string) bool { 136 normalized := filepath.ToSlash(path) 137 138 // Normal status file matching DPKG or OPKG format 139 if normalized == "var/lib/dpkg/status" || normalized == "usr/lib/opkg/status" { 140 return true 141 } 142 143 // Should only match status files in status.d directory. 144 return strings.HasPrefix(normalized, "var/lib/dpkg/status.d/") && !strings.HasSuffix(normalized, ".md5sums") 145 } 146 147 func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) { 148 if e.stats == nil { 149 return 150 } 151 e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ 152 Path: path, 153 Result: result, 154 FileSizeBytes: fileSizeBytes, 155 }) 156 } 157 158 // Extract extracts packages from dpkg status files passed through the scan input. 159 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 160 pkgs, err := e.extractFromInput(ctx, input) 161 if e.stats != nil { 162 var fileSizeBytes int64 163 if input.Info != nil { 164 fileSizeBytes = input.Info.Size() 165 } 166 e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ 167 Path: input.Path, 168 Result: filesystem.ExtractorErrorToFileExtractedResult(err), 169 FileSizeBytes: fileSizeBytes, 170 }) 171 } 172 return inventory.Inventory{Packages: pkgs}, err 173 } 174 175 func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Package, error) { 176 m, err := osrelease.GetOSRelease(input.FS) 177 if err != nil { 178 log.Errorf("osrelease.ParseOsRelease(): %v", err) 179 } 180 181 rd := textproto.NewReader(bufio.NewReader(input.Reader)) 182 pkgs := []*extractor.Package{} 183 for eof := false; !eof; { 184 // Return if canceled or exceeding deadline. 185 if err := ctx.Err(); err != nil { 186 return pkgs, fmt.Errorf("%s halted due to context error: %w", e.Name(), err) 187 } 188 189 h, err := rd.ReadMIMEHeader() 190 if err != nil { 191 if errors.Is(err, io.EOF) { 192 // We might still have one more line of data 193 // so return only after it's been parsed. 194 eof = true 195 } else { 196 if strings.Contains(input.Path, "status.d") { 197 log.Warnf("Failed to read MIME header from %q: %v", input.Path, err) 198 return []*extractor.Package{}, nil 199 } 200 return pkgs, err 201 } 202 } 203 204 // Skip empty lines 205 if len(h) == 0 { 206 continue 207 } 208 209 // Distroless distributions have their packages in status.d, which does not contain the Status 210 // value. 211 if !e.includeNotInstalled && (!strings.Contains(input.Path, "status.d") || h.Get("Status") != "") { 212 if h.Get("Status") == "" { 213 log.Warnf("Package %q has no status field", h.Get("Package")) 214 continue 215 } 216 installed, err := statusInstalled(h.Get("Status")) 217 if err != nil { 218 return pkgs, fmt.Errorf("statusInstalled(%q): %w", h.Get("Status"), err) 219 } 220 if !installed { 221 continue 222 } 223 } 224 225 pkgName := h.Get("Package") 226 pkgVersion := h.Get("Version") 227 if pkgName == "" || pkgVersion == "" { 228 if !eof { // Expected when reaching the last line. 229 log.Warnf("DPKG package name or version is empty (name: %q, version: %q)", pkgName, pkgVersion) 230 } 231 continue 232 } 233 234 description := strings.ToLower(h.Get("Description")) 235 var vexes []*vex.PackageExploitabilitySignal 236 if strings.Contains(description, "transitional package") || 237 strings.Contains(description, "transitional dummy package") || 238 strings.Contains(description, "transitional empty package") { 239 vexes = append(vexes, &vex.PackageExploitabilitySignal{ 240 Plugin: Name, 241 Justification: vex.ComponentNotPresent, 242 MatchesAllVulns: true, 243 }) 244 } 245 246 purlType := purl.TypeDebian 247 if input.Path == "usr/lib/opkg/status" { 248 purlType = purl.TypeOpkg 249 } 250 251 p := &extractor.Package{ 252 Name: pkgName, 253 Version: pkgVersion, 254 PURLType: purlType, 255 Metadata: &dpkgmeta.Metadata{ 256 PackageName: pkgName, 257 PackageVersion: pkgVersion, 258 Status: h.Get("Status"), 259 OSID: m["ID"], 260 OSVersionCodename: m["VERSION_CODENAME"], 261 OSVersionID: m["VERSION_ID"], 262 Maintainer: h.Get("Maintainer"), 263 Architecture: h.Get("Architecture"), 264 }, 265 Locations: []string{input.Path}, 266 ExploitabilitySignals: vexes, 267 } 268 sourceName, sourceVersion, err := parseSourceNameVersion(h.Get("Source")) 269 if err != nil { 270 return pkgs, fmt.Errorf("parseSourceNameVersion(%q): %w", h.Get("Source"), err) 271 } 272 if sourceName != "" { 273 p.Metadata.(*dpkgmeta.Metadata).SourceName = sourceName 274 p.Metadata.(*dpkgmeta.Metadata).SourceVersion = sourceVersion 275 } 276 277 pkgs = append(pkgs, p) 278 } 279 return pkgs, nil 280 } 281 282 func statusInstalled(status string) (bool, error) { 283 // Status field format: "want flag status", e.g. "install ok installed" 284 // The package is currently installed if the status field is set to installed. 285 // Other fields just show the intent of the package manager but not the current state. 286 parts := strings.Split(status, " ") 287 if len(parts) != 3 { 288 return false, fmt.Errorf("invalid DPKG Status field %q", status) 289 } 290 return parts[2] == "installed", nil 291 } 292 293 func parseSourceNameVersion(source string) (string, string, error) { 294 if source == "" { 295 return "", "", nil 296 } 297 // Format is either "name" or "name (version)" 298 if idx := strings.Index(source, " ("); idx != -1 { 299 if !strings.HasSuffix(source, ")") { 300 return "", "", fmt.Errorf("invalid DPKG Source field: %q", source) 301 } 302 n := source[:idx] 303 v := source[idx+2 : len(source)-1] 304 return n, v, nil 305 } 306 return source, "", nil 307 }