github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/debian/parse_dpkg_db.go (about) 1 package debian 2 3 import ( 4 "bufio" 5 "context" 6 "errors" 7 "fmt" 8 "io" 9 "regexp" 10 "strings" 11 12 "github.com/dustin/go-humanize" 13 "github.com/mitchellh/mapstructure" 14 15 "github.com/anchore/syft/internal" 16 "github.com/anchore/syft/internal/log" 17 "github.com/anchore/syft/syft/artifact" 18 "github.com/anchore/syft/syft/file" 19 "github.com/anchore/syft/syft/pkg" 20 "github.com/anchore/syft/syft/pkg/cataloger/generic" 21 ) 22 23 var ( 24 errEndOfPackages = fmt.Errorf("no more packages to read") 25 sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`) 26 ) 27 28 // parseDpkgDB reads a dpkg database "status" file (and surrounding data files) and returns the packages and relationships found. 29 func parseDpkgDB(_ context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 30 metadata, err := parseDpkgStatus(reader) 31 if err != nil { 32 return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err) 33 } 34 35 var pkgs []pkg.Package 36 for _, m := range metadata { 37 pkgs = append(pkgs, newDpkgPackage(m, reader.Location, resolver, env.LinuxRelease)) 38 } 39 40 return pkgs, nil, nil 41 } 42 43 // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. 44 func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgDBEntry, error) { 45 buffedReader := bufio.NewReader(reader) 46 var metadata []pkg.DpkgDBEntry 47 48 continueProcessing := true 49 for continueProcessing { 50 entry, err := parseDpkgStatusEntry(buffedReader) 51 if err != nil { 52 if errors.Is(err, errEndOfPackages) { 53 continueProcessing = false 54 } else { 55 return nil, err 56 } 57 } 58 if entry == nil { 59 continue 60 } 61 62 metadata = append(metadata, *entry) 63 } 64 65 return metadata, nil 66 } 67 68 // dpkgExtractedMetadata is an adapter struct to capture the fields from the dpkg status file, however, the final 69 // pkg.DpkgMetadata struct has different types for some fields (e.g. Provides, Depends, and PreDepends is []string, not a string). 70 type dpkgExtractedMetadata struct { 71 Package string `mapstructure:"Package"` 72 Source string `mapstructure:"Source"` 73 Version string `mapstructure:"Version"` 74 SourceVersion string `mapstructure:"SourceVersion"` 75 Architecture string `mapstructure:"Architecture"` 76 Maintainer string `mapstructure:"Maintainer"` 77 InstalledSize int `mapstructure:"InstalledSize"` 78 Description string `mapstructure:"Description"` 79 Provides string `mapstructure:"Provides"` 80 Depends string `mapstructure:"Depends"` 81 PreDepends string `mapstructure:"PreDepends"` // note: original doc is Pre-Depends 82 } 83 84 // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. 85 func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgDBEntry, error) { 86 var retErr error 87 dpkgFields, err := extractAllFields(reader) 88 if err != nil { 89 if !errors.Is(err, errEndOfPackages) { 90 return nil, err 91 } 92 if len(dpkgFields) == 0 { 93 return nil, err 94 } 95 retErr = err 96 } 97 98 raw := dpkgExtractedMetadata{} 99 err = mapstructure.Decode(dpkgFields, &raw) 100 if err != nil { 101 return nil, err 102 } 103 104 sourceName, sourceVersion := extractSourceVersion(raw.Source) 105 if sourceVersion != "" { 106 raw.SourceVersion = sourceVersion 107 raw.Source = sourceName 108 } 109 110 if raw.Package == "" { 111 return nil, retErr 112 } 113 114 entry := pkg.DpkgDBEntry{ 115 Package: raw.Package, 116 Source: raw.Source, 117 Version: raw.Version, 118 SourceVersion: raw.SourceVersion, 119 Architecture: raw.Architecture, 120 Maintainer: raw.Maintainer, 121 InstalledSize: raw.InstalledSize, 122 Description: raw.Description, 123 Provides: splitPkgList(raw.Provides), 124 Depends: splitPkgList(raw.Depends), 125 PreDepends: splitPkgList(raw.PreDepends), 126 } 127 128 // there may be an optional conffiles section that we should persist as files 129 if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil { 130 if sectionStr, ok := conffilesSection.(string); ok { 131 entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr)) 132 } 133 } 134 135 if entry.Files == nil { 136 // ensure the default value for a collection is never nil since this may be shown as JSON 137 entry.Files = make([]pkg.DpkgFileRecord, 0) 138 } 139 140 return &entry, retErr 141 } 142 143 func splitPkgList(pkgList string) (ret []string) { 144 fields := strings.Split(pkgList, ",") 145 for _, field := range fields { 146 field = strings.TrimSpace(field) 147 if field != "" { 148 ret = append(ret, field) 149 } 150 } 151 return ret 152 } 153 154 func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) { 155 dpkgFields := make(map[string]interface{}) 156 var key string 157 158 for { 159 line, err := reader.ReadString('\n') 160 if err != nil { 161 if errors.Is(err, io.EOF) { 162 return dpkgFields, errEndOfPackages 163 } 164 return nil, err 165 } 166 167 line = strings.TrimRight(line, "\n") 168 169 // empty line indicates end of entry 170 if len(line) == 0 { 171 // if the entry has not started, keep parsing lines 172 if len(dpkgFields) == 0 { 173 continue 174 } 175 break 176 } 177 178 switch { 179 case strings.HasPrefix(line, " "): 180 // a field-body continuation 181 if len(key) == 0 { 182 return nil, fmt.Errorf("no match for continuation: line: '%s'", line) 183 } 184 185 val, ok := dpkgFields[key] 186 if !ok { 187 return nil, fmt.Errorf("no previous key exists, expecting: %s", key) 188 } 189 // concatenate onto previous value 190 val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)) 191 dpkgFields[key] = val 192 default: 193 // parse a new key 194 var val interface{} 195 key, val, err = handleNewKeyValue(line) 196 if err != nil { 197 log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err) 198 continue 199 } 200 201 if _, ok := dpkgFields[key]; ok { 202 return nil, fmt.Errorf("duplicate key discovered: %s", key) 203 } 204 dpkgFields[key] = val 205 } 206 } 207 return dpkgFields, nil 208 } 209 210 // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if 211 // of the "<name>" form, then return name and nil 212 func extractSourceVersion(source string) (string, string) { 213 // special handling for the Source field since it has formatted data 214 match := internal.MatchNamedCaptureGroups(sourceRegexp, source) 215 return match["name"], match["version"] 216 } 217 218 // handleNewKeyValue parse a new key-value pair from the given unprocessed line 219 func handleNewKeyValue(line string) (key string, val interface{}, err error) { 220 if i := strings.Index(line, ":"); i > 0 { 221 key = strings.TrimSpace(line[0:i]) 222 // mapstruct cant handle "-" 223 key = strings.ReplaceAll(key, "-", "") 224 val := strings.TrimSpace(line[i+1:]) 225 226 // further processing of values based on the key that was discovered 227 switch key { 228 case "InstalledSize": 229 s, err := humanize.ParseBytes(val) 230 if err != nil { 231 return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err) 232 } 233 return key, int(s), nil 234 default: 235 return key, val, nil 236 } 237 } 238 239 return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line) 240 }