github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/debian/parse_dpkg_db.go (about) 1 package debian 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "regexp" 9 "strings" 10 11 "github.com/dustin/go-humanize" 12 "github.com/mitchellh/mapstructure" 13 14 "github.com/anchore/syft/syft/artifact" 15 "github.com/anchore/syft/syft/file" 16 "github.com/anchore/syft/syft/pkg" 17 "github.com/anchore/syft/syft/pkg/cataloger/generic" 18 "github.com/lineaje-labs/syft/internal" 19 "github.com/lineaje-labs/syft/internal/log" 20 ) 21 22 var ( 23 errEndOfPackages = fmt.Errorf("no more packages to read") 24 sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`) 25 ) 26 27 // parseDpkgDB reads a dpkg database "status" file (and surrounding data files) and returns the packages and relationships found. 28 func parseDpkgDB( 29 resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser, 30 ) ([]pkg.Package, []artifact.Relationship, error) { 31 metadata, err := parseDpkgStatus(reader) 32 if err != nil { 33 return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err) 34 } 35 36 var pkgs []pkg.Package 37 for _, m := range metadata { 38 pkgs = append(pkgs, newDpkgPackage(m, reader.Location, resolver, env.LinuxRelease)) 39 } 40 41 return pkgs, associateRelationships(pkgs), nil 42 } 43 44 // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. 45 func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) { 46 buffedReader := bufio.NewReader(reader) 47 var metadata []pkg.DpkgDBEntry 48 49 continueProcessing := true 50 for continueProcessing { 51 entry, err := parseDpkgStatusEntry(buffedReader) 52 if err != nil { 53 if errors.Is(err, errEndOfPackages) { 54 continueProcessing = false 55 } else { 56 return nil, err 57 } 58 } 59 if entry == nil { 60 continue 61 } 62 63 metadata = append(metadata, *entry) 64 } 65 66 return metadata, nil 67 } 68 69 // dpkgExtractedMetadata is an adapter struct to capture the fields from the dpkg status file, however, the final 70 // pkg.DpkgMetadata struct has different types for some fields (e.g. Provides, Depends, and PreDepends is []string, not a string). 71 type dpkgExtractedMetadata struct { 72 Package string `mapstructure:"Package"` 73 Source string `mapstructure:"Source"` 74 Version string `mapstructure:"Version"` 75 SourceVersion string `mapstructure:"SourceVersion"` 76 Architecture string `mapstructure:"Architecture"` 77 Maintainer string `mapstructure:"Maintainer"` 78 InstalledSize int `mapstructure:"InstalledSize"` 79 Description string `mapstructure:"Description"` 80 Provides string `mapstructure:"Provides"` 81 Depends string `mapstructure:"Depends"` 82 PreDepends string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends 83 } 84 85 // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. 86 func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) { 87 var retErr error 88 dpkgFields, err := extractAllFields(reader) 89 if err != nil { 90 if !errors.Is(err, errEndOfPackages) { 91 return nil, err 92 } 93 if len(dpkgFields) == 0 { 94 return nil, err 95 } 96 retErr = err 97 } 98 99 raw := dpkgExtractedMetadata{} 100 err = mapstructure.Decode(dpkgFields, &raw) 101 if err != nil { 102 return nil, err 103 } 104 105 sourceName, sourceVersion := extractSourceVersion(raw.Source) 106 if sourceVersion != "" { 107 raw.SourceVersion = sourceVersion 108 raw.Source = sourceName 109 } 110 111 if raw.Package == "" { 112 return nil, retErr 113 } 114 115 entry := pkg.DpkgDBEntry{ 116 Package: raw.Package, 117 Source: raw.Source, 118 Version: raw.Version, 119 SourceVersion: raw.SourceVersion, 120 Architecture: raw.Architecture, 121 Maintainer: raw.Maintainer, 122 InstalledSize: raw.InstalledSize, 123 Description: raw.Description, 124 Provides: splitPkgList(raw.Provides), 125 Depends: splitPkgList(raw.Depends), 126 PreDepends: splitPkgList(raw.PreDepends), 127 } 128 129 // there may be an optional conffiles section that we should persist as files 130 if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil { 131 if sectionStr, ok := conffilesSection.(string); ok { 132 entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr)) 133 } 134 } 135 136 if entry.Files == nil { 137 // ensure the default value for a collection is never nil since this may be shown as JSON 138 entry.Files = make([]pkg.DpkgFileRecord, 0) 139 } 140 141 return &entry, retErr 142 } 143 144 func splitPkgList(pkgList string) (ret []string) { 145 fields := strings.Split(pkgList, ",") 146 for _, field := range fields { 147 field = strings.TrimSpace(field) 148 if field != "" { 149 ret = append(ret, field) 150 } 151 } 152 return ret 153 } 154 155 func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) { 156 dpkgFields := make(map[string]interface{}) 157 var key string 158 159 for { 160 line, err := reader.ReadString('\n') 161 if err != nil { 162 if errors.Is(err, io.EOF) { 163 return dpkgFields, errEndOfPackages 164 } 165 return nil, err 166 } 167 168 line = strings.TrimRight(line, "\n") 169 170 // empty line indicates end of entry 171 if len(line) == 0 { 172 // if the entry has not started, keep parsing lines 173 if len(dpkgFields) == 0 { 174 continue 175 } 176 break 177 } 178 179 switch { 180 case strings.HasPrefix(line, " "): 181 // a field-body continuation 182 if len(key) == 0 { 183 return nil, fmt.Errorf("no match for continuation: line: '%s'", line) 184 } 185 186 val, ok := dpkgFields[key] 187 if !ok { 188 return nil, fmt.Errorf("no previous key exists, expecting: %s", key) 189 } 190 // concatenate onto previous value 191 val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)) 192 dpkgFields[key] = val 193 default: 194 // parse a new key 195 var val interface{} 196 key, val, err = handleNewKeyValue(line) 197 if err != nil { 198 log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err) 199 continue 200 } 201 202 if _, ok := dpkgFields[key]; ok { 203 return nil, fmt.Errorf("duplicate key discovered: %s", key) 204 } 205 dpkgFields[key] = val 206 } 207 } 208 return dpkgFields, nil 209 } 210 211 // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if 212 // of the "<name>" form, then return name and nil 213 func extractSourceVersion(source string) (string, string) { 214 // special handling for the Source field since it has formatted data 215 match := internal.MatchNamedCaptureGroups(sourceRegexp, source) 216 return match["name"], match["version"] 217 } 218 219 // handleNewKeyValue parse a new key-value pair from the given unprocessed line 220 func handleNewKeyValue(line string) (key string, val interface{}, err error) { 221 if i := strings.Index(line, ":"); i > 0 { 222 key = strings.TrimSpace(line[0:i]) 223 // mapstruct cant handle "-" 224 key = strings.ReplaceAll(key, "-", "") 225 val := strings.TrimSpace(line[i+1:]) 226 227 // further processing of values based on the key that was discovered 228 switch key { 229 case "InstalledSize": 230 s, err := humanize.ParseBytes(val) 231 if err != nil { 232 return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err) 233 } 234 return key, int(s), nil 235 default: 236 return key, val, nil 237 } 238 } 239 240 return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line) 241 } 242 243 // associateRelationships will create relationships between packages based on the "Depends", "Pre-Depends", and "Provides" 244 // fields for installed packages. if there is an installed package that has a dependency that is (somehow) not installed, 245 // then that relationship (between the installed and uninstalled package) will NOT be created. 246 func associateRelationships(pkgs []pkg.Package) (relationships []artifact.Relationship) { 247 // map["provides" + "package"] -> packages that provide that package 248 lookup := make(map[string][]pkg.Package) 249 250 // read provided and add as keys for lookup keys as well as package names 251 for _, p := range pkgs { 252 meta, ok := p.Metadata.(pkg.DpkgDBEntry) 253 if !ok { 254 log.Warnf("cataloger failed to extract dpkg 'provides' metadata for package %+v", p.Name) 255 continue 256 } 257 lookup[p.Name] = append(lookup[p.Name], p) 258 for _, provides := range meta.Provides { 259 k := stripVersionSpecifier(provides) 260 lookup[k] = append(lookup[k], p) 261 } 262 } 263 264 // read "Depends" and "Pre-Depends" and match with keys 265 for _, p := range pkgs { 266 meta, ok := p.Metadata.(pkg.DpkgDBEntry) 267 if !ok { 268 log.Warnf("cataloger failed to extract dpkg 'dependency' metadata for package %+v", p.Name) 269 continue 270 } 271 272 var allDeps []string 273 allDeps = append(allDeps, meta.Depends...) 274 allDeps = append(allDeps, meta.PreDepends...) 275 276 for _, depSpecifier := range allDeps { 277 deps := splitPackageChoice(depSpecifier) 278 for _, dep := range deps { 279 for _, depPkg := range lookup[dep] { 280 relationships = append(relationships, artifact.Relationship{ 281 From: depPkg, 282 To: p, 283 Type: artifact.DependencyOfRelationship, 284 }) 285 } 286 } 287 } 288 } 289 return relationships 290 } 291 292 func stripVersionSpecifier(s string) string { 293 // examples: 294 // libgmp10 (>= 2:6.2.1+dfsg1) --> libgmp10 295 // libgmp10 --> libgmp10 296 // foo [i386] --> foo 297 // default-mta | mail-transport-agent --> default-mta | mail-transport-agent 298 // kernel-headers-2.2.10 [!hurd-i386] --> kernel-headers-2.2.10 299 300 items := internal.SplitAny(s, "[(<>=") 301 if len(items) == 0 { 302 return s 303 } 304 305 return strings.TrimSpace(items[0]) 306 } 307 308 func splitPackageChoice(s string) (ret []string) { 309 fields := strings.Split(s, "|") 310 for _, field := range fields { 311 field = strings.TrimSpace(field) 312 if field != "" { 313 ret = append(ret, stripVersionSpecifier(field)) 314 } 315 } 316 return ret 317 }