github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/alpm/parse_alpm_db.go (about) 1 package alpm 2 3 import ( 4 "bufio" 5 "compress/gzip" 6 "fmt" 7 "io" 8 "path/filepath" 9 "strconv" 10 "strings" 11 "time" 12 13 "github.com/mitchellh/mapstructure" 14 "github.com/nextlinux/gosbom/gosbom/artifact" 15 "github.com/nextlinux/gosbom/gosbom/file" 16 "github.com/nextlinux/gosbom/gosbom/pkg" 17 "github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic" 18 "github.com/vbatts/go-mtree" 19 ) 20 21 var _ generic.Parser = parseAlpmDB 22 23 var ( 24 ignoredFiles = map[string]bool{ 25 "/set": true, 26 ".BUILDINFO": true, 27 ".PKGINFO": true, 28 "": true, 29 } 30 ) 31 32 type parsedData struct { 33 Licenses string `mapstructure:"license"` 34 pkg.AlpmMetadata `mapstructure:",squash"` 35 } 36 37 func parseAlpmDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 38 data, err := parseAlpmDBEntry(reader) 39 if err != nil { 40 return nil, nil, err 41 } 42 43 base := filepath.Dir(reader.RealPath) 44 r, err := getFileReader(filepath.Join(base, "mtree"), resolver) 45 if err != nil { 46 return nil, nil, err 47 } 48 49 pkgFiles, err := parseMtree(r) 50 if err != nil { 51 return nil, nil, err 52 } 53 54 // replace the files found the pacman database with the files from the mtree These contain more metadata and 55 // thus more useful. 56 // TODO: probably want to use MTREE and PKGINFO here 57 data.Files = pkgFiles 58 59 // We only really do this to get any backup database entries from the files database 60 files := filepath.Join(base, "files") 61 _, err = getFileReader(files, resolver) 62 if err != nil { 63 return nil, nil, err 64 } 65 filesMetadata, err := parseAlpmDBEntry(reader) 66 if err != nil { 67 return nil, nil, err 68 } else if filesMetadata != nil { 69 data.Backup = filesMetadata.Backup 70 } 71 72 if data.Package == "" { 73 return nil, nil, nil 74 } 75 76 return []pkg.Package{ 77 newPackage( 78 data, 79 env.LinuxRelease, 80 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 81 ), 82 }, nil, nil 83 } 84 85 func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) { 86 scanner := newScanner(reader) 87 metadata, err := parseDatabase(scanner) 88 if err != nil { 89 return nil, err 90 } 91 return metadata, nil 92 } 93 94 func newScanner(reader io.Reader) *bufio.Scanner { 95 // This is taken from the apk parser 96 // https://github.com/nextlinux/gosbom/blob/v0.47.0/gosbom/pkg/cataloger/apkdb/parse_apk_db.go#L37 97 const maxScannerCapacity = 1024 * 1024 98 bufScan := make([]byte, maxScannerCapacity) 99 scanner := bufio.NewScanner(reader) 100 scanner.Buffer(bufScan, maxScannerCapacity) 101 onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) { 102 for i := 0; i < len(data); i++ { 103 if i > 0 && data[i-1] == '\n' && data[i] == '\n' { 104 return i + 1, data[:i-1], nil 105 } 106 } 107 if !atEOF { 108 return 0, nil, nil 109 } 110 // deliver the last token (which could be an empty string) 111 return 0, data, bufio.ErrFinalToken 112 } 113 114 scanner.Split(onDoubleLF) 115 return scanner 116 } 117 118 func getFileReader(path string, resolver file.Resolver) (io.Reader, error) { 119 locs, err := resolver.FilesByPath(path) 120 if err != nil { 121 return nil, err 122 } 123 124 if len(locs) == 0 { 125 return nil, fmt.Errorf("could not find file: %s", path) 126 } 127 // TODO: Should we maybe check if we found the file 128 dbContentReader, err := resolver.FileContentsByLocation(locs[0]) 129 if err != nil { 130 return nil, err 131 } 132 return dbContentReader, nil 133 } 134 135 func parseDatabase(b *bufio.Scanner) (*parsedData, error) { 136 var err error 137 pkgFields := make(map[string]interface{}) 138 for b.Scan() { 139 fields := strings.SplitN(b.Text(), "\n", 2) 140 141 // End of File 142 if len(fields) == 1 { 143 break 144 } 145 146 // The alpm database surrounds the keys with %. 147 key := strings.ReplaceAll(fields[0], "%", "") 148 key = strings.ToLower(key) 149 value := strings.TrimSpace(fields[1]) 150 151 switch key { 152 case "files": 153 var files []map[string]string 154 for _, f := range strings.Split(value, "\n") { 155 path := fmt.Sprintf("/%s", f) 156 if ok := ignoredFiles[path]; !ok { 157 files = append(files, map[string]string{"path": path}) 158 } 159 } 160 pkgFields[key] = files 161 case "backup": 162 var backup []map[string]interface{} 163 for _, f := range strings.Split(value, "\n") { 164 fields := strings.SplitN(f, "\t", 2) 165 path := fmt.Sprintf("/%s", fields[0]) 166 if ok := ignoredFiles[path]; !ok { 167 backup = append(backup, map[string]interface{}{ 168 "path": path, 169 "digests": []file.Digest{{ 170 Algorithm: "md5", 171 Value: fields[1], 172 }}}) 173 } 174 } 175 pkgFields[key] = backup 176 case "reason": 177 fallthrough 178 case "size": 179 pkgFields[key], err = strconv.ParseInt(value, 10, 64) 180 if err != nil { 181 return nil, fmt.Errorf("failed to parse %s to integer", value) 182 } 183 default: 184 pkgFields[key] = value 185 } 186 } 187 188 return parsePkgFiles(pkgFields) 189 } 190 191 func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) { 192 var entry parsedData 193 if err := mapstructure.Decode(pkgFields, &entry); err != nil { 194 return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err) 195 } 196 197 if entry.Backup == nil { 198 entry.Backup = make([]pkg.AlpmFileRecord, 0) 199 } 200 201 if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 { 202 return nil, nil 203 } 204 return &entry, nil 205 } 206 207 func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) { 208 var err error 209 var entries []pkg.AlpmFileRecord 210 211 r, err = gzip.NewReader(r) 212 if err != nil { 213 return nil, err 214 } 215 specDh, err := mtree.ParseSpec(r) 216 if err != nil { 217 return nil, err 218 } 219 for _, f := range specDh.Entries { 220 var entry pkg.AlpmFileRecord 221 entry.Digests = make([]file.Digest, 0) 222 fileFields := make(map[string]interface{}) 223 if ok := ignoredFiles[f.Name]; ok { 224 continue 225 } 226 path := fmt.Sprintf("/%s", f.Name) 227 fileFields["path"] = path 228 for _, kv := range f.Keywords { 229 kw := string(kv.Keyword()) 230 switch kw { 231 case "time": 232 // All unix timestamps have a .0 suffixs. 233 v := strings.Split(kv.Value(), ".") 234 i, _ := strconv.ParseInt(v[0], 10, 64) 235 tm := time.Unix(i, 0) 236 fileFields[kw] = tm 237 case "sha256digest": 238 entry.Digests = append(entry.Digests, file.Digest{ 239 Algorithm: "sha256", 240 Value: kv.Value(), 241 }) 242 case "md5digest": 243 entry.Digests = append(entry.Digests, file.Digest{ 244 Algorithm: "md5", 245 Value: kv.Value(), 246 }) 247 default: 248 fileFields[kw] = kv.Value() 249 } 250 } 251 if err := mapstructure.Decode(fileFields, &entry); err != nil { 252 return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err) 253 } 254 entries = append(entries, entry) 255 } 256 return entries, nil 257 }