github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/debian/parse_dpkg_db.go (about) 1 package debian 2 3 import ( 4 "bufio" 5 "context" 6 "errors" 7 "fmt" 8 "io" 9 "path" 10 "regexp" 11 "strings" 12 13 "github.com/dustin/go-humanize" 14 "github.com/go-viper/mapstructure/v2" 15 16 "github.com/anchore/go-sync" 17 "github.com/anchore/syft/internal" 18 "github.com/anchore/syft/internal/log" 19 "github.com/anchore/syft/internal/unknown" 20 "github.com/anchore/syft/syft/artifact" 21 "github.com/anchore/syft/syft/cataloging" 22 "github.com/anchore/syft/syft/file" 23 "github.com/anchore/syft/syft/pkg" 24 "github.com/anchore/syft/syft/pkg/cataloger/generic" 25 ) 26 27 const ( 28 deinstallStatus string = "deinstall" 29 ) 30 31 var ( 32 errEndOfPackages = fmt.Errorf("no more packages to read") 33 sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`) 34 ) 35 36 // parseDpkgDB reads a dpkg database "status" file (and surrounding data files) and returns the packages and relationships found. 37 func parseDpkgDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 38 metadata, err := parseDpkgStatus(reader) 39 if err != nil { 40 return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err) 41 } 42 43 dbLoc := reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation) 44 var pkgs []pkg.Package 45 _ = sync.CollectSlice(&ctx, cataloging.ExecutorFile, sync.ToSeq(metadata), func(m pkg.DpkgDBEntry) (pkg.Package, error) { 46 return newDpkgPackage(ctx, m, dbLoc, resolver, env.LinuxRelease, findDpkgInfoFiles(m.Package, resolver, reader.Location)...), nil 47 }, &pkgs) 48 49 return pkgs, nil, unknown.IfEmptyf(pkgs, "unable to determine packages") 50 } 51 52 func findDpkgInfoFiles(name string, resolver file.Resolver, dbLocation file.Location) []file.Location { 53 if resolver == nil { 54 return nil 55 } 56 if strings.TrimSpace(name) == "" { 57 return nil 58 } 59 60 // for typical debian-base distributions, the installed package info is at /var/lib/dpkg/status 61 // and the md5sum information is under /var/lib/dpkg/info/; however, for distroless the installed 62 // package info is across multiple files under /var/lib/dpkg/status.d/ and the md5sums are contained in 63 // the same directory 64 searchPath := path.Dir(dbLocation.RealPath) 65 66 if !strings.HasSuffix(searchPath, "status.d") { 67 searchPath = path.Join(searchPath, "info") 68 } 69 70 // look for /var/lib/dpkg/info/NAME.* 71 locations, err := resolver.FilesByGlob(path.Join(searchPath, name+".*")) 72 if err != nil { 73 log.WithFields("error", err, "pkg", name).Trace("failed to fetch related dpkg info files") 74 return nil 75 } 76 77 return locations 78 } 79 80 // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. 81 func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) { 82 buffedReader := bufio.NewReader(reader) 83 var metadata []pkg.DpkgDBEntry 84 85 continueProcessing := true 86 for continueProcessing { 87 entry, err := parseDpkgStatusEntry(buffedReader) 88 if err != nil { 89 if errors.Is(err, errEndOfPackages) { 90 continueProcessing = false 91 } else { 92 return nil, err 93 } 94 } 95 if entry == nil { 96 continue 97 } 98 99 metadata = append(metadata, *entry) 100 } 101 102 return metadata, nil 103 } 104 105 // dpkgExtractedMetadata is an adapter struct to capture the fields from the dpkg status file, however, the final 106 // pkg.DpkgMetadata struct has different types for some fields (e.g. Provides, Depends, and PreDepends is []string, not a string). 107 type dpkgExtractedMetadata struct { 108 Package string `mapstructure:"Package"` 109 Source string `mapstructure:"Source"` 110 Version string `mapstructure:"Version"` 111 SourceVersion string `mapstructure:"SourceVersion"` 112 Architecture string `mapstructure:"Architecture"` 113 Maintainer string `mapstructure:"Maintainer"` 114 InstalledSize int `mapstructure:"InstalledSize"` 115 Description string `mapstructure:"Description"` 116 Provides string `mapstructure:"Provides"` 117 Depends string `mapstructure:"Depends"` 118 PreDepends string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends 119 Status string `mapstructure:"Status"` 120 } 121 122 // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. 123 func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) { 124 var retErr error 125 dpkgFields, err := extractAllFields(reader) 126 if err != nil { 127 if !errors.Is(err, errEndOfPackages) { 128 return nil, err 129 } 130 if len(dpkgFields) == 0 { 131 return nil, err 132 } 133 retErr = err 134 } 135 136 raw := dpkgExtractedMetadata{} 137 err = mapstructure.Decode(dpkgFields, &raw) 138 if err != nil { 139 return nil, err 140 } 141 142 // Skip entries which have been removed but not purged, e.g. "rc" status in dpkg -l 143 if strings.Contains(raw.Status, deinstallStatus) { 144 return nil, nil 145 } 146 147 sourceName, sourceVersion := extractSourceVersion(raw.Source) 148 if sourceVersion != "" { 149 raw.SourceVersion = sourceVersion 150 raw.Source = sourceName 151 } 152 153 if raw.Package == "" { 154 return nil, retErr 155 } 156 157 entry := pkg.DpkgDBEntry{ 158 Package: raw.Package, 159 Source: raw.Source, 160 Version: raw.Version, 161 SourceVersion: raw.SourceVersion, 162 Architecture: raw.Architecture, 163 Maintainer: raw.Maintainer, 164 InstalledSize: raw.InstalledSize, 165 Description: raw.Description, 166 Provides: splitPkgList(raw.Provides), 167 Depends: splitPkgList(raw.Depends), 168 PreDepends: splitPkgList(raw.PreDepends), 169 } 170 171 // there may be an optional conffiles section that we should persist as files 172 if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil { 173 if sectionStr, ok := conffilesSection.(string); ok { 174 entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr)) 175 } 176 } 177 178 if entry.Files == nil { 179 // ensure the default value for a collection is never nil since this may be shown as JSON 180 entry.Files = make([]pkg.DpkgFileRecord, 0) 181 } 182 183 return &entry, retErr 184 } 185 186 func splitPkgList(pkgList string) (ret []string) { 187 fields := strings.Split(pkgList, ",") 188 for _, field := range fields { 189 field = strings.TrimSpace(field) 190 if field != "" { 191 ret = append(ret, field) 192 } 193 } 194 return ret 195 } 196 197 func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) { 198 dpkgFields := make(map[string]interface{}) 199 var key string 200 201 for { 202 line, err := reader.ReadString('\n') 203 if err != nil { 204 if errors.Is(err, io.EOF) { 205 return dpkgFields, errEndOfPackages 206 } 207 return nil, err 208 } 209 210 line = strings.TrimRight(line, "\n") 211 212 // empty line indicates end of entry 213 if len(line) == 0 { 214 // if the entry has not started, keep parsing lines 215 if len(dpkgFields) == 0 { 216 continue 217 } 218 break 219 } 220 221 switch { 222 case strings.HasPrefix(line, " "): 223 // a field-body continuation 224 if len(key) == 0 { 225 return nil, fmt.Errorf("no match for continuation: line: '%s'", line) 226 } 227 228 val, ok := dpkgFields[key] 229 if !ok { 230 return nil, fmt.Errorf("no previous key exists, expecting: %s", key) 231 } 232 // concatenate onto previous value 233 val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)) 234 dpkgFields[key] = val 235 default: 236 // parse a new key 237 var val interface{} 238 key, val, err = handleNewKeyValue(line) 239 if err != nil { 240 log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err) 241 continue 242 } 243 244 if _, ok := dpkgFields[key]; ok { 245 return nil, fmt.Errorf("duplicate key discovered: %s", key) 246 } 247 dpkgFields[key] = val 248 } 249 } 250 return dpkgFields, nil 251 } 252 253 // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if 254 // of the "<name>" form, then return name and nil 255 func extractSourceVersion(source string) (string, string) { 256 // special handling for the Source field since it has formatted data 257 match := internal.MatchNamedCaptureGroups(sourceRegexp, source) 258 return match["name"], match["version"] 259 } 260 261 // handleNewKeyValue parse a new key-value pair from the given unprocessed line 262 func handleNewKeyValue(line string) (key string, val interface{}, err error) { 263 if i := strings.Index(line, ":"); i > 0 { 264 key = strings.TrimSpace(line[0:i]) 265 // mapstruct cant handle "-" 266 key = strings.ReplaceAll(key, "-", "") 267 val := strings.TrimSpace(line[i+1:]) 268 269 // further processing of values based on the key that was discovered 270 switch key { 271 case "InstalledSize": 272 s, err := humanize.ParseBytes(val) 273 if err != nil { 274 return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err) 275 } 276 return key, int(s), nil 277 default: 278 return key, val, nil 279 } 280 } 281 282 return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line) 283 }