github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/deb/parse_dpkg_db.go (about) 1 package deb 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "regexp" 9 "strings" 10 11 "github.com/dustin/go-humanize" 12 "github.com/mitchellh/mapstructure" 13 14 "github.com/anchore/syft/internal" 15 "github.com/anchore/syft/internal/log" 16 "github.com/anchore/syft/syft/artifact" 17 "github.com/anchore/syft/syft/file" 18 "github.com/anchore/syft/syft/pkg" 19 "github.com/anchore/syft/syft/pkg/cataloger/generic" 20 ) 21 22 var ( 23 errEndOfPackages = fmt.Errorf("no more packages to read") 24 sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`) 25 ) 26 27 func parseDpkgDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 28 metadata, err := parseDpkgStatus(reader) 29 if err != nil { 30 return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err) 31 } 32 33 var pkgs []pkg.Package 34 for _, m := range metadata { 35 pkgs = append(pkgs, newDpkgPackage(m, reader.Location, resolver, env.LinuxRelease)) 36 } 37 38 return pkgs, nil, nil 39 } 40 41 // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. 42 func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgMetadata, error) { 43 buffedReader := bufio.NewReader(reader) 44 var metadata []pkg.DpkgMetadata 45 46 continueProcessing := true 47 for continueProcessing { 48 entry, err := parseDpkgStatusEntry(buffedReader) 49 if err != nil { 50 if errors.Is(err, errEndOfPackages) { 51 continueProcessing = false 52 } else { 53 return nil, err 54 } 55 } 56 if entry == nil { 57 continue 58 } 59 60 metadata = append(metadata, *entry) 61 } 62 63 return metadata, nil 64 } 65 66 // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. 67 func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgMetadata, error) { 68 var retErr error 69 dpkgFields, err := extractAllFields(reader) 70 if err != nil { 71 if !errors.Is(err, errEndOfPackages) { 72 return nil, err 73 } 74 if len(dpkgFields) == 0 { 75 return nil, err 76 } 77 retErr = err 78 } 79 80 entry := pkg.DpkgMetadata{} 81 err = mapstructure.Decode(dpkgFields, &entry) 82 if err != nil { 83 return nil, err 84 } 85 86 sourceName, sourceVersion := extractSourceVersion(entry.Source) 87 if sourceVersion != "" { 88 entry.SourceVersion = sourceVersion 89 entry.Source = sourceName 90 } 91 92 if entry.Package == "" { 93 return nil, retErr 94 } 95 96 // there may be an optional conffiles section that we should persist as files 97 if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil { 98 if sectionStr, ok := conffilesSection.(string); ok { 99 entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr)) 100 } 101 } 102 103 if entry.Files == nil { 104 // ensure the default value for a collection is never nil since this may be shown as JSON 105 entry.Files = make([]pkg.DpkgFileRecord, 0) 106 } 107 108 return &entry, retErr 109 } 110 111 func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) { 112 dpkgFields := make(map[string]interface{}) 113 var key string 114 115 for { 116 line, err := reader.ReadString('\n') 117 if err != nil { 118 if errors.Is(err, io.EOF) { 119 return dpkgFields, errEndOfPackages 120 } 121 return nil, err 122 } 123 124 line = strings.TrimRight(line, "\n") 125 126 // empty line indicates end of entry 127 if len(line) == 0 { 128 // if the entry has not started, keep parsing lines 129 if len(dpkgFields) == 0 { 130 continue 131 } 132 break 133 } 134 135 switch { 136 case strings.HasPrefix(line, " "): 137 // a field-body continuation 138 if len(key) == 0 { 139 return nil, fmt.Errorf("no match for continuation: line: '%s'", line) 140 } 141 142 val, ok := dpkgFields[key] 143 if !ok { 144 return nil, fmt.Errorf("no previous key exists, expecting: %s", key) 145 } 146 // concatenate onto previous value 147 val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)) 148 dpkgFields[key] = val 149 default: 150 // parse a new key 151 var val interface{} 152 key, val, err = handleNewKeyValue(line) 153 if err != nil { 154 log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err) 155 continue 156 } 157 158 if _, ok := dpkgFields[key]; ok { 159 return nil, fmt.Errorf("duplicate key discovered: %s", key) 160 } 161 dpkgFields[key] = val 162 } 163 } 164 return dpkgFields, nil 165 } 166 167 // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if 168 // of the "<name>" form, then return name and nil 169 func extractSourceVersion(source string) (string, string) { 170 // special handling for the Source field since it has formatted data 171 match := internal.MatchNamedCaptureGroups(sourceRegexp, source) 172 return match["name"], match["version"] 173 } 174 175 // handleNewKeyValue parse a new key-value pair from the given unprocessed line 176 func handleNewKeyValue(line string) (key string, val interface{}, err error) { 177 if i := strings.Index(line, ":"); i > 0 { 178 key = strings.TrimSpace(line[0:i]) 179 // mapstruct cant handle "-" 180 key = strings.ReplaceAll(key, "-", "") 181 val := strings.TrimSpace(line[i+1:]) 182 183 // further processing of values based on the key that was discovered 184 switch key { 185 case "InstalledSize": 186 s, err := humanize.ParseBytes(val) 187 if err != nil { 188 return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err) 189 } 190 return key, int(s), nil 191 default: 192 return key, val, nil 193 } 194 } 195 196 return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line) 197 }