github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/arch/parse_alpm_db.go (about) 1 package arch 2 3 import ( 4 "bufio" 5 "compress/gzip" 6 "fmt" 7 "io" 8 "path/filepath" 9 "strconv" 10 "strings" 11 "time" 12 13 "github.com/mitchellh/mapstructure" 14 "github.com/vbatts/go-mtree" 15 16 "github.com/anchore/syft/syft/artifact" 17 "github.com/anchore/syft/syft/file" 18 "github.com/anchore/syft/syft/pkg" 19 "github.com/anchore/syft/syft/pkg/cataloger/generic" 20 ) 21 22 var _ generic.Parser = parseAlpmDB 23 24 var ( 25 ignoredFiles = map[string]bool{ 26 "/set": true, 27 ".BUILDINFO": true, 28 ".PKGINFO": true, 29 "": true, 30 } 31 ) 32 33 type parsedData struct { 34 Licenses string `mapstructure:"license"` 35 pkg.AlpmDBEntry `mapstructure:",squash"` 36 } 37 38 // parseAlpmDB parses the arch linux pacman database flat-files and returns the packages and relationships found within. 39 func parseAlpmDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 40 data, err := parseAlpmDBEntry(reader) 41 if err != nil { 42 return nil, nil, err 43 } 44 45 base := filepath.Dir(reader.RealPath) 46 r, err := getFileReader(filepath.Join(base, "mtree"), resolver) 47 if err != nil { 48 return nil, nil, err 49 } 50 51 pkgFiles, err := parseMtree(r) 52 if err != nil { 53 return nil, nil, err 54 } 55 56 // replace the files found the pacman database with the files from the mtree These contain more metadata and 57 // thus more useful. 58 // TODO: probably want to use MTREE and PKGINFO here 59 data.Files = pkgFiles 60 61 // We only really do this to get any backup database entries from the files database 62 files := filepath.Join(base, "files") 63 _, err = getFileReader(files, resolver) 64 if err != nil { 65 return nil, nil, err 66 } 67 filesMetadata, err := parseAlpmDBEntry(reader) 68 if err != nil { 69 return nil, nil, err 70 } else if filesMetadata != nil { 71 data.Backup = filesMetadata.Backup 72 } 73 74 if data.Package == "" { 75 return nil, nil, nil 76 } 77 78 return []pkg.Package{ 79 newPackage( 80 data, 81 env.LinuxRelease, 82 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 83 ), 84 }, nil, nil 85 } 86 87 func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) { 88 scanner := newScanner(reader) 89 metadata, err := parseDatabase(scanner) 90 if err != nil { 91 return nil, err 92 } 93 return metadata, nil 94 } 95 96 func newScanner(reader io.Reader) *bufio.Scanner { 97 // This is taken from the apk parser 98 // https://github.com/anchore/syft/blob/v0.47.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L37 99 const maxScannerCapacity = 1024 * 1024 100 bufScan := make([]byte, maxScannerCapacity) 101 scanner := bufio.NewScanner(reader) 102 scanner.Buffer(bufScan, maxScannerCapacity) 103 onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) { 104 for i := 0; i < len(data); i++ { 105 if i > 0 && data[i-1] == '\n' && data[i] == '\n' { 106 return i + 1, data[:i-1], nil 107 } 108 } 109 if !atEOF { 110 return 0, nil, nil 111 } 112 // deliver the last token (which could be an empty string) 113 return 0, data, bufio.ErrFinalToken 114 } 115 116 scanner.Split(onDoubleLF) 117 return scanner 118 } 119 120 func getFileReader(path string, resolver file.Resolver) (io.Reader, error) { 121 locs, err := resolver.FilesByPath(path) 122 if err != nil { 123 return nil, err 124 } 125 126 if len(locs) == 0 { 127 return nil, fmt.Errorf("could not find file: %s", path) 128 } 129 // TODO: Should we maybe check if we found the file 130 dbContentReader, err := resolver.FileContentsByLocation(locs[0]) 131 if err != nil { 132 return nil, err 133 } 134 return dbContentReader, nil 135 } 136 137 func parseDatabase(b *bufio.Scanner) (*parsedData, error) { 138 var err error 139 pkgFields := make(map[string]interface{}) 140 for b.Scan() { 141 fields := strings.SplitN(b.Text(), "\n", 2) 142 143 // End of File 144 if len(fields) == 1 { 145 break 146 } 147 148 // The alpm database surrounds the keys with %. 149 key := strings.ReplaceAll(fields[0], "%", "") 150 key = strings.ToLower(key) 151 value := strings.TrimSpace(fields[1]) 152 153 switch key { 154 case "files": 155 var files []map[string]string 156 for _, f := range strings.Split(value, "\n") { 157 path := fmt.Sprintf("/%s", f) 158 if ok := ignoredFiles[path]; !ok { 159 files = append(files, map[string]string{"path": path}) 160 } 161 } 162 pkgFields[key] = files 163 case "backup": 164 var backup []map[string]interface{} 165 for _, f := range strings.Split(value, "\n") { 166 fields := strings.SplitN(f, "\t", 2) 167 path := fmt.Sprintf("/%s", fields[0]) 168 if ok := ignoredFiles[path]; !ok { 169 backup = append(backup, map[string]interface{}{ 170 "path": path, 171 "digests": []file.Digest{{ 172 Algorithm: "md5", 173 Value: fields[1], 174 }}}) 175 } 176 } 177 pkgFields[key] = backup 178 case "reason": 179 fallthrough 180 case "size": 181 pkgFields[key], err = strconv.ParseInt(value, 10, 64) 182 if err != nil { 183 return nil, fmt.Errorf("failed to parse %s to integer", value) 184 } 185 default: 186 pkgFields[key] = value 187 } 188 } 189 190 return parsePkgFiles(pkgFields) 191 } 192 193 func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) { 194 var entry parsedData 195 if err := mapstructure.Decode(pkgFields, &entry); err != nil { 196 return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err) 197 } 198 199 if entry.Backup == nil { 200 entry.Backup = make([]pkg.AlpmFileRecord, 0) 201 } 202 203 if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 { 204 return nil, nil 205 } 206 return &entry, nil 207 } 208 209 func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) { 210 var err error 211 var entries []pkg.AlpmFileRecord 212 213 r, err = gzip.NewReader(r) 214 if err != nil { 215 return nil, err 216 } 217 specDh, err := mtree.ParseSpec(r) 218 if err != nil { 219 return nil, err 220 } 221 for _, f := range specDh.Entries { 222 var entry pkg.AlpmFileRecord 223 entry.Digests = make([]file.Digest, 0) 224 fileFields := make(map[string]interface{}) 225 if ok := ignoredFiles[f.Name]; ok { 226 continue 227 } 228 path := fmt.Sprintf("/%s", f.Name) 229 fileFields["path"] = path 230 for _, kv := range f.Keywords { 231 kw := string(kv.Keyword()) 232 switch kw { 233 case "time": 234 // All unix timestamps have a .0 suffixs. 235 v := strings.Split(kv.Value(), ".") 236 i, _ := strconv.ParseInt(v[0], 10, 64) 237 tm := time.Unix(i, 0) 238 fileFields[kw] = tm 239 case "sha256digest": 240 entry.Digests = append(entry.Digests, file.Digest{ 241 Algorithm: "sha256", 242 Value: kv.Value(), 243 }) 244 case "md5digest": 245 entry.Digests = append(entry.Digests, file.Digest{ 246 Algorithm: "md5", 247 Value: kv.Value(), 248 }) 249 default: 250 fileFields[kw] = kv.Value() 251 } 252 } 253 if err := mapstructure.Decode(fileFields, &entry); err != nil { 254 return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err) 255 } 256 entries = append(entries, entry) 257 } 258 return entries, nil 259 }