github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/arch/parse_alpm_db.go (about) 1 package arch 2 3 import ( 4 "bufio" 5 "compress/gzip" 6 "context" 7 "fmt" 8 "io" 9 "path" 10 "path/filepath" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/mitchellh/mapstructure" 16 "github.com/vbatts/go-mtree" 17 18 "github.com/anchore/syft/internal" 19 "github.com/anchore/syft/internal/log" 20 "github.com/anchore/syft/syft/artifact" 21 "github.com/anchore/syft/syft/file" 22 "github.com/anchore/syft/syft/pkg" 23 "github.com/anchore/syft/syft/pkg/cataloger/generic" 24 ) 25 26 var _ generic.Parser = parseAlpmDB 27 28 var ( 29 ignoredFiles = map[string]bool{ 30 "/set": true, 31 ".BUILDINFO": true, 32 ".PKGINFO": true, 33 "": true, 34 } 35 ) 36 37 type parsedData struct { 38 Licenses string `mapstructure:"license"` 39 pkg.AlpmDBEntry `mapstructure:",squash"` 40 } 41 42 // parseAlpmDB parses the arch linux pacman database flat-files and returns the packages and relationships found within. 43 func parseAlpmDB(_ context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 44 data, err := parseAlpmDBEntry(reader) 45 if err != nil { 46 return nil, nil, err 47 } 48 49 if data == nil { 50 return nil, nil, nil 51 } 52 53 base := path.Dir(reader.RealPath) 54 55 // replace the files found the pacman database with the files from the mtree These contain more metadata and 56 // thus more useful. 57 files, fileLoc := fetchPkgFiles(base, resolver) 58 backups, backupLoc := fetchBackupFiles(base, resolver) 59 60 var locs []file.Location 61 if fileLoc != nil { 62 locs = append(locs, fileLoc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)) 63 data.Files = files 64 } 65 66 if backupLoc != nil { 67 locs = append(locs, backupLoc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)) 68 data.Backup = backups 69 } 70 71 if data.Package == "" { 72 return nil, nil, nil 73 } 74 75 return []pkg.Package{ 76 newPackage( 77 data, 78 env.LinuxRelease, 79 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 80 locs..., 81 ), 82 }, nil, nil 83 } 84 85 func fetchPkgFiles(base string, resolver file.Resolver) ([]pkg.AlpmFileRecord, *file.Location) { 86 // TODO: probably want to use MTREE and PKGINFO here 87 target := path.Join(base, "mtree") 88 89 loc, err := getLocation(target, resolver) 90 if err != nil { 91 log.WithFields("error", err, "path", target).Trace("failed to find mtree file") 92 return []pkg.AlpmFileRecord{}, nil 93 } 94 if loc == nil { 95 return []pkg.AlpmFileRecord{}, nil 96 } 97 98 reader, err := resolver.FileContentsByLocation(*loc) 99 if err != nil { 100 return []pkg.AlpmFileRecord{}, nil 101 } 102 defer internal.CloseAndLogError(reader, loc.RealPath) 103 104 pkgFiles, err := parseMtree(reader) 105 if err != nil { 106 log.WithFields("error", err, "path", target).Trace("failed to parse mtree file") 107 return []pkg.AlpmFileRecord{}, nil 108 } 109 return pkgFiles, loc 110 } 111 112 func fetchBackupFiles(base string, resolver file.Resolver) ([]pkg.AlpmFileRecord, *file.Location) { 113 // We only really do this to get any backup database entries from the files database 114 target := filepath.Join(base, "files") 115 116 loc, err := getLocation(target, resolver) 117 if err != nil { 118 log.WithFields("error", err, "path", target).Trace("failed to find alpm files") 119 return []pkg.AlpmFileRecord{}, nil 120 } 121 if loc == nil { 122 return []pkg.AlpmFileRecord{}, nil 123 } 124 125 reader, err := resolver.FileContentsByLocation(*loc) 126 if err != nil { 127 return []pkg.AlpmFileRecord{}, nil 128 } 129 defer internal.CloseAndLogError(reader, loc.RealPath) 130 131 filesMetadata, err := parseAlpmDBEntry(reader) 132 if err != nil { 133 return []pkg.AlpmFileRecord{}, nil 134 } 135 if filesMetadata != nil { 136 return filesMetadata.Backup, loc 137 } 138 return []pkg.AlpmFileRecord{}, loc 139 } 140 141 func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) { 142 scanner := newScanner(reader) 143 metadata, err := parseDatabase(scanner) 144 if err != nil { 145 return nil, err 146 } 147 return metadata, nil 148 } 149 150 func newScanner(reader io.Reader) *bufio.Scanner { 151 // This is taken from the apk parser 152 // https://github.com/anchore/syft/blob/v0.47.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L37 153 const maxScannerCapacity = 1024 * 1024 154 bufScan := make([]byte, maxScannerCapacity) 155 scanner := bufio.NewScanner(reader) 156 scanner.Buffer(bufScan, maxScannerCapacity) 157 onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) { 158 for i := 0; i < len(data); i++ { 159 if i > 0 && data[i-1] == '\n' && data[i] == '\n' { 160 return i + 1, data[:i-1], nil 161 } 162 } 163 if !atEOF { 164 return 0, nil, nil 165 } 166 // deliver the last token (which could be an empty string) 167 return 0, data, bufio.ErrFinalToken 168 } 169 170 scanner.Split(onDoubleLF) 171 return scanner 172 } 173 174 func getLocation(path string, resolver file.Resolver) (*file.Location, error) { 175 locs, err := resolver.FilesByPath(path) 176 if err != nil { 177 return nil, err 178 } 179 180 if len(locs) == 0 { 181 return nil, fmt.Errorf("could not find file: %s", path) 182 } 183 184 if len(locs) > 1 { 185 log.WithFields("path", path).Trace("multiple files found for path, using first path") 186 } 187 return &locs[0], nil 188 } 189 190 func parseDatabase(b *bufio.Scanner) (*parsedData, error) { 191 var err error 192 pkgFields := make(map[string]interface{}) 193 for b.Scan() { 194 fields := strings.SplitN(b.Text(), "\n", 2) 195 196 // End of File 197 if len(fields) == 1 { 198 break 199 } 200 201 // The alpm database surrounds the keys with %. 202 key := strings.ReplaceAll(fields[0], "%", "") 203 key = strings.ToLower(key) 204 value := strings.TrimSpace(fields[1]) 205 206 switch key { 207 case "files": 208 var files []map[string]string 209 for _, f := range strings.Split(value, "\n") { 210 p := fmt.Sprintf("/%s", f) 211 if ok := ignoredFiles[p]; !ok { 212 files = append(files, map[string]string{"path": p}) 213 } 214 } 215 pkgFields[key] = files 216 case "backup": 217 var backup []map[string]interface{} 218 for _, f := range strings.Split(value, "\n") { 219 fields := strings.SplitN(f, "\t", 2) 220 p := fmt.Sprintf("/%s", fields[0]) 221 if ok := ignoredFiles[p]; !ok { 222 backup = append(backup, map[string]interface{}{ 223 "path": p, 224 "digests": []file.Digest{{ 225 Algorithm: "md5", 226 Value: fields[1], 227 }}}) 228 } 229 } 230 pkgFields[key] = backup 231 case "depends", "provides": 232 pkgFields[key] = processLibrarySpecs(value) 233 case "reason": 234 fallthrough 235 case "size": 236 pkgFields[key], err = strconv.ParseInt(value, 10, 64) 237 if err != nil { 238 return nil, fmt.Errorf("failed to parse %s to integer", value) 239 } 240 default: 241 pkgFields[key] = value 242 } 243 } 244 245 return parsePkgFiles(pkgFields) 246 } 247 248 func processLibrarySpecs(value string) []string { 249 lines := strings.Split(value, "\n") 250 librarySpecs := make([]string, 0) 251 for _, line := range lines { 252 line = strings.TrimSpace(line) 253 if line == "" { 254 continue 255 } 256 librarySpecs = append(librarySpecs, line) 257 } 258 return librarySpecs 259 } 260 261 func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) { 262 var entry parsedData 263 if err := mapstructure.Decode(pkgFields, &entry); err != nil { 264 return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err) 265 } 266 267 if entry.Backup == nil { 268 entry.Backup = make([]pkg.AlpmFileRecord, 0) 269 } 270 271 if entry.Files == nil { 272 entry.Files = make([]pkg.AlpmFileRecord, 0) 273 } 274 275 if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 { 276 return nil, nil 277 } 278 return &entry, nil 279 } 280 281 func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) { 282 var err error 283 var entries []pkg.AlpmFileRecord 284 285 r, err = gzip.NewReader(r) 286 if err != nil { 287 return nil, err 288 } 289 specDh, err := mtree.ParseSpec(r) 290 if err != nil { 291 return nil, err 292 } 293 for _, f := range specDh.Entries { 294 var entry pkg.AlpmFileRecord 295 entry.Digests = make([]file.Digest, 0) 296 fileFields := make(map[string]interface{}) 297 if ok := ignoredFiles[f.Name]; ok { 298 continue 299 } 300 path := fmt.Sprintf("/%s", f.Name) 301 fileFields["path"] = path 302 for _, kv := range f.Keywords { 303 kw := string(kv.Keyword()) 304 switch kw { 305 case "time": 306 // All unix timestamps have a .0 suffixs. 307 v := strings.Split(kv.Value(), ".") 308 i, _ := strconv.ParseInt(v[0], 10, 64) 309 tm := time.Unix(i, 0) 310 fileFields[kw] = tm 311 case "sha256digest": 312 entry.Digests = append(entry.Digests, file.Digest{ 313 Algorithm: "sha256", 314 Value: kv.Value(), 315 }) 316 case "md5digest": 317 entry.Digests = append(entry.Digests, file.Digest{ 318 Algorithm: "md5", 319 Value: kv.Value(), 320 }) 321 default: 322 fileFields[kw] = kv.Value() 323 } 324 } 325 if err := mapstructure.Decode(fileFields, &entry); err != nil { 326 return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err) 327 } 328 entries = append(entries, entry) 329 } 330 return entries, nil 331 }