github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/deb/parse_dpkg_db.go (about) 1 package deb 2 3 import ( 4 "bufio" 5 "errors" 6 "fmt" 7 "io" 8 "regexp" 9 "strings" 10 11 "github.com/dustin/go-humanize" 12 "github.com/mitchellh/mapstructure" 13 "github.com/nextlinux/gosbom/gosbom/artifact" 14 "github.com/nextlinux/gosbom/gosbom/file" 15 "github.com/nextlinux/gosbom/gosbom/pkg" 16 "github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic" 17 "github.com/nextlinux/gosbom/internal" 18 "github.com/nextlinux/gosbom/internal/log" 19 ) 20 21 var ( 22 errEndOfPackages = fmt.Errorf("no more packages to read") 23 sourceRegexp = regexp.MustCompile(`(?P<name>\S+)( \((?P<version>.*)\))?`) 24 ) 25 26 func parseDpkgDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 27 metadata, err := parseDpkgStatus(reader) 28 if err != nil { 29 return nil, nil, fmt.Errorf("unable to catalog dpkg DB=%q: %w", reader.RealPath, err) 30 } 31 32 var pkgs []pkg.Package 33 for _, m := range metadata { 34 pkgs = append(pkgs, newDpkgPackage(m, reader.Location, resolver, env.LinuxRelease)) 35 } 36 37 return pkgs, nil, nil 38 } 39 40 // parseDpkgStatus is a parser function for Debian DB status contents, returning all Debian packages listed. 41 func parseDpkgStatus(reader io.Reader) ([]pkg.DpkgMetadata, error) { 42 buffedReader := bufio.NewReader(reader) 43 var metadata []pkg.DpkgMetadata 44 45 continueProcessing := true 46 for continueProcessing { 47 entry, err := parseDpkgStatusEntry(buffedReader) 48 if err != nil { 49 if errors.Is(err, errEndOfPackages) { 50 continueProcessing = false 51 } else { 52 return nil, err 53 } 54 } 55 if entry == nil { 56 continue 57 } 58 59 metadata = append(metadata, *entry) 60 } 61 62 return metadata, nil 63 } 64 65 // parseDpkgStatusEntry returns an individual Dpkg entry, or returns errEndOfPackages if there are no more packages to parse from the reader. 66 func parseDpkgStatusEntry(reader *bufio.Reader) (*pkg.DpkgMetadata, error) { 67 var retErr error 68 dpkgFields, err := extractAllFields(reader) 69 if err != nil { 70 if !errors.Is(err, errEndOfPackages) { 71 return nil, err 72 } 73 if len(dpkgFields) == 0 { 74 return nil, err 75 } 76 retErr = err 77 } 78 79 entry := pkg.DpkgMetadata{} 80 err = mapstructure.Decode(dpkgFields, &entry) 81 if err != nil { 82 return nil, err 83 } 84 85 sourceName, sourceVersion := extractSourceVersion(entry.Source) 86 if sourceVersion != "" { 87 entry.SourceVersion = sourceVersion 88 entry.Source = sourceName 89 } 90 91 if entry.Package == "" { 92 return nil, retErr 93 } 94 95 // there may be an optional conffiles section that we should persist as files 96 if conffilesSection, exists := dpkgFields["Conffiles"]; exists && conffilesSection != nil { 97 if sectionStr, ok := conffilesSection.(string); ok { 98 entry.Files = parseDpkgConffileInfo(strings.NewReader(sectionStr)) 99 } 100 } 101 102 if entry.Files == nil { 103 // ensure the default value for a collection is never nil since this may be shown as JSON 104 entry.Files = make([]pkg.DpkgFileRecord, 0) 105 } 106 107 return &entry, retErr 108 } 109 110 func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) { 111 dpkgFields := make(map[string]interface{}) 112 var key string 113 114 for { 115 line, err := reader.ReadString('\n') 116 if err != nil { 117 if errors.Is(err, io.EOF) { 118 return dpkgFields, errEndOfPackages 119 } 120 return nil, err 121 } 122 123 line = strings.TrimRight(line, "\n") 124 125 // empty line indicates end of entry 126 if len(line) == 0 { 127 // if the entry has not started, keep parsing lines 128 if len(dpkgFields) == 0 { 129 continue 130 } 131 break 132 } 133 134 switch { 135 case strings.HasPrefix(line, " "): 136 // a field-body continuation 137 if len(key) == 0 { 138 return nil, fmt.Errorf("no match for continuation: line: '%s'", line) 139 } 140 141 val, ok := dpkgFields[key] 142 if !ok { 143 return nil, fmt.Errorf("no previous key exists, expecting: %s", key) 144 } 145 // concatenate onto previous value 146 val = fmt.Sprintf("%s\n %s", val, strings.TrimSpace(line)) 147 dpkgFields[key] = val 148 default: 149 // parse a new key 150 var val interface{} 151 key, val, err = handleNewKeyValue(line) 152 if err != nil { 153 log.Tracef("parsing dpkg status: extracting key-value from line: %s err: %v", line, err) 154 continue 155 } 156 157 if _, ok := dpkgFields[key]; ok { 158 return nil, fmt.Errorf("duplicate key discovered: %s", key) 159 } 160 dpkgFields[key] = val 161 } 162 } 163 return dpkgFields, nil 164 } 165 166 // If the source entry string is of the form "<name> (<version>)" then parse and return the components, if 167 // of the "<name>" form, then return name and nil 168 func extractSourceVersion(source string) (string, string) { 169 // special handling for the Source field since it has formatted data 170 match := internal.MatchNamedCaptureGroups(sourceRegexp, source) 171 return match["name"], match["version"] 172 } 173 174 // handleNewKeyValue parse a new key-value pair from the given unprocessed line 175 func handleNewKeyValue(line string) (key string, val interface{}, err error) { 176 if i := strings.Index(line, ":"); i > 0 { 177 key = strings.TrimSpace(line[0:i]) 178 // mapstruct cant handle "-" 179 key = strings.ReplaceAll(key, "-", "") 180 val := strings.TrimSpace(line[i+1:]) 181 182 // further processing of values based on the key that was discovered 183 switch key { 184 case "InstalledSize": 185 s, err := humanize.ParseBytes(val) 186 if err != nil { 187 return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err) 188 } 189 return key, int(s), nil 190 default: 191 return key, val, nil 192 } 193 } 194 195 return "", nil, fmt.Errorf("cannot parse field from line: '%s'", line) 196 }