github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/arch/parse_alpm_db.go (about) 1 package arch 2 3 import ( 4 "bufio" 5 "compress/gzip" 6 "context" 7 "fmt" 8 "io" 9 "path" 10 "path/filepath" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/go-viper/mapstructure/v2" 16 "github.com/vbatts/go-mtree" 17 18 "github.com/anchore/syft/internal" 19 "github.com/anchore/syft/internal/log" 20 "github.com/anchore/syft/internal/unknown" 21 "github.com/anchore/syft/syft/artifact" 22 "github.com/anchore/syft/syft/file" 23 "github.com/anchore/syft/syft/pkg" 24 "github.com/anchore/syft/syft/pkg/cataloger/generic" 25 ) 26 27 var _ generic.Parser = parseAlpmDB 28 29 var ( 30 ignoredFiles = map[string]bool{ 31 "/set": true, 32 ".BUILDINFO": true, 33 ".PKGINFO": true, 34 "": true, 35 } 36 ) 37 38 type parsedData struct { 39 Licenses string `mapstructure:"license"` 40 pkg.AlpmDBEntry `mapstructure:",squash"` 41 } 42 43 // parseAlpmDB parses the arch linux pacman database flat-files and returns the packages and relationships found within. 44 func parseAlpmDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 45 var errs error 46 47 data, err := parseAlpmDBEntry(reader) 48 if err != nil { 49 return nil, nil, err 50 } 51 52 if data == nil { 53 return nil, nil, nil 54 } 55 56 base := path.Dir(reader.RealPath) 57 58 var locs []file.Location 59 60 // replace the files found the pacman database with the files from the mtree These contain more metadata and 61 // thus more useful. 62 files, fileLoc, err := fetchPkgFiles(base, resolver) 63 errs = unknown.Join(errs, err) 64 if err == nil { 65 locs = append(locs, fileLoc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)) 66 data.Files = files 67 } 68 backups, backupLoc, err := fetchBackupFiles(base, resolver) 69 errs = unknown.Join(errs, err) 70 if err == nil { 71 locs = append(locs, backupLoc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation)) 72 data.Backup = backups 73 } 74 75 if data.Package == "" { 76 return nil, nil, errs 77 } 78 79 return []pkg.Package{ 80 newPackage( 81 ctx, 82 data, 83 env.LinuxRelease, 84 reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 85 locs..., 86 ), 87 }, nil, errs 88 } 89 90 func fetchPkgFiles(base string, resolver file.Resolver) ([]pkg.AlpmFileRecord, file.Location, error) { 91 // TODO: probably want to use MTREE and PKGINFO here 92 target := path.Join(base, "mtree") 93 94 loc, err := getLocation(target, resolver) 95 if err != nil { 96 log.WithFields("error", err, "path", target).Trace("failed to find mtree file") 97 return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to find mtree file: %w", err)) 98 } 99 reader, err := resolver.FileContentsByLocation(loc) 100 if err != nil { 101 return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to get contents: %w", err)) 102 } 103 defer internal.CloseAndLogError(reader, loc.RealPath) 104 105 pkgFiles, err := parseMtree(reader) 106 if err != nil { 107 log.WithFields("error", err, "path", target).Trace("failed to parse mtree file") 108 return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to parse mtree: %w", err)) 109 } 110 return pkgFiles, loc, nil 111 } 112 113 func fetchBackupFiles(base string, resolver file.Resolver) ([]pkg.AlpmFileRecord, file.Location, error) { 114 // We only really do this to get any backup database entries from the files database 115 target := filepath.Join(base, "files") 116 117 loc, err := getLocation(target, resolver) 118 if err != nil { 119 log.WithFields("error", err, "path", target).Trace("failed to find alpm files") 120 return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to find alpm files: %w", err)) 121 } 122 123 reader, err := resolver.FileContentsByLocation(loc) 124 if err != nil { 125 return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to get contents: %w", err)) 126 } 127 defer internal.CloseAndLogError(reader, loc.RealPath) 128 129 filesMetadata, err := parseAlpmDBEntry(reader) 130 if err != nil { 131 return []pkg.AlpmFileRecord{}, loc, unknown.New(loc, fmt.Errorf("failed to parse alpm db entry: %w", err)) 132 } 133 if filesMetadata != nil { 134 return filesMetadata.Backup, loc, nil 135 } 136 return []pkg.AlpmFileRecord{}, loc, nil 137 } 138 139 func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) { 140 scanner := newScanner(reader) 141 metadata, err := parseDatabase(scanner) 142 if err != nil { 143 return nil, err 144 } 145 return metadata, nil 146 } 147 148 func newScanner(reader io.Reader) *bufio.Scanner { 149 // This is taken from the apk parser 150 // https://github.com/anchore/syft/blob/v0.47.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L37 151 const maxScannerCapacity = 1024 * 1024 152 bufScan := make([]byte, maxScannerCapacity) 153 scanner := bufio.NewScanner(reader) 154 scanner.Buffer(bufScan, maxScannerCapacity) 155 onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) { 156 for i := 0; i < len(data); i++ { 157 if i > 0 && data[i-1] == '\n' && data[i] == '\n' { 158 return i + 1, data[:i-1], nil 159 } 160 } 161 if !atEOF { 162 return 0, nil, nil 163 } 164 // deliver the last token (which could be an empty string) 165 return 0, data, bufio.ErrFinalToken 166 } 167 168 scanner.Split(onDoubleLF) 169 return scanner 170 } 171 172 func getLocation(path string, resolver file.Resolver) (file.Location, error) { 173 loc := file.NewLocation(path) 174 locs, err := resolver.FilesByPath(path) 175 if err != nil { 176 return loc, err 177 } 178 179 if len(locs) == 0 { 180 return loc, fmt.Errorf("could not find file: %s", path) 181 } 182 183 if len(locs) > 1 { 184 log.WithFields("path", path).Trace("multiple files found for path, using first path") 185 } 186 return locs[0], nil 187 } 188 189 func parseDatabase(b *bufio.Scanner) (*parsedData, error) { 190 var err error 191 pkgFields := make(map[string]interface{}) 192 for b.Scan() { 193 fields := strings.SplitN(b.Text(), "\n", 2) 194 195 // End of File 196 if len(fields) == 1 { 197 break 198 } 199 200 // The alpm database surrounds the keys with %. 201 key := strings.ReplaceAll(fields[0], "%", "") 202 key = strings.ToLower(key) 203 value := strings.TrimSpace(fields[1]) 204 205 switch key { 206 case "files": 207 var files []map[string]string 208 for _, f := range strings.Split(value, "\n") { 209 p := fmt.Sprintf("/%s", f) 210 if ok := ignoredFiles[p]; !ok { 211 files = append(files, map[string]string{"path": p}) 212 } 213 } 214 pkgFields[key] = files 215 case "backup": 216 var backup []map[string]interface{} 217 for _, f := range strings.Split(value, "\n") { 218 fields := strings.SplitN(f, "\t", 2) 219 p := fmt.Sprintf("/%s", fields[0]) 220 if ok := ignoredFiles[p]; !ok { 221 backup = append(backup, map[string]interface{}{ 222 "path": p, 223 "digests": []file.Digest{{ 224 Algorithm: "md5", 225 Value: fields[1], 226 }}}) 227 } 228 } 229 pkgFields[key] = backup 230 case "depends", "provides": 231 pkgFields[key] = processLibrarySpecs(value) 232 case "reason": 233 fallthrough 234 case "size": 235 pkgFields[key], err = strconv.ParseInt(value, 10, 64) 236 if err != nil { 237 return nil, fmt.Errorf("failed to parse %s to integer", value) 238 } 239 default: 240 pkgFields[key] = value 241 } 242 } 243 244 return parsePkgFiles(pkgFields) 245 } 246 247 func processLibrarySpecs(value string) []string { 248 lines := strings.Split(value, "\n") 249 librarySpecs := make([]string, 0) 250 for _, line := range lines { 251 line = strings.TrimSpace(line) 252 if line == "" { 253 continue 254 } 255 librarySpecs = append(librarySpecs, line) 256 } 257 return librarySpecs 258 } 259 260 func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) { 261 var entry parsedData 262 if err := mapstructure.Decode(pkgFields, &entry); err != nil { 263 return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err) 264 } 265 266 if entry.Backup == nil { 267 entry.Backup = make([]pkg.AlpmFileRecord, 0) 268 } 269 270 if entry.Files == nil { 271 entry.Files = make([]pkg.AlpmFileRecord, 0) 272 } 273 274 if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 { 275 return nil, nil 276 } 277 return &entry, nil 278 } 279 280 func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) { 281 var err error 282 var entries []pkg.AlpmFileRecord 283 284 r, err = gzip.NewReader(r) 285 if err != nil { 286 return nil, err 287 } 288 specDh, err := mtree.ParseSpec(r) 289 if err != nil { 290 return nil, err 291 } 292 for _, f := range specDh.Entries { 293 var entry pkg.AlpmFileRecord 294 entry.Digests = make([]file.Digest, 0) 295 fileFields := make(map[string]interface{}) 296 if ok := ignoredFiles[f.Name]; ok { 297 continue 298 } 299 path := fmt.Sprintf("/%s", f.Name) 300 fileFields["path"] = path 301 for _, kv := range f.Keywords { 302 kw := string(kv.Keyword()) 303 switch kw { 304 case "time": 305 // All unix timestamps have a .0 suffixs. 306 v := strings.Split(kv.Value(), ".") 307 i, _ := strconv.ParseInt(v[0], 10, 64) 308 tm := time.Unix(i, 0) 309 fileFields[kw] = tm 310 case "sha256digest": 311 entry.Digests = append(entry.Digests, file.Digest{ 312 Algorithm: "sha256", 313 Value: kv.Value(), 314 }) 315 case "md5digest": 316 entry.Digests = append(entry.Digests, file.Digest{ 317 Algorithm: "md5", 318 Value: kv.Value(), 319 }) 320 default: 321 fileFields[kw] = kv.Value() 322 } 323 } 324 if err := mapstructure.Decode(fileFields, &entry); err != nil { 325 return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err) 326 } 327 entries = append(entries, entry) 328 } 329 return entries, nil 330 }