github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/alpm/parse_alpm_db.go (about) 1 package alpm 2 3 import ( 4 "bufio" 5 "compress/gzip" 6 "fmt" 7 "io" 8 "path/filepath" 9 "strconv" 10 "strings" 11 "time" 12 13 "github.com/mitchellh/mapstructure" 14 "github.com/vbatts/go-mtree" 15 16 "github.com/anchore/syft/syft/artifact" 17 "github.com/anchore/syft/syft/file" 18 "github.com/anchore/syft/syft/pkg" 19 "github.com/anchore/syft/syft/pkg/cataloger/generic" 20 ) 21 22 var _ generic.Parser = parseAlpmDB 23 24 var ( 25 ignoredFiles = map[string]bool{ 26 "/set": true, 27 ".BUILDINFO": true, 28 ".PKGINFO": true, 29 "": true, 30 } 31 ) 32 33 type parsedData struct { 34 Licenses string `mapstructure:"license"` 35 pkg.AlpmMetadata `mapstructure:",squash"` 36 } 37 38 func parseAlpmDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 39 data, err := parseAlpmDBEntry(reader) 40 if err != nil { 41 return nil, nil, err 42 } 43 44 base := filepath.Dir(reader.RealPath) 45 r, err := getFileReader(filepath.Join(base, "mtree"), resolver) 46 if err != nil { 47 return nil, nil, err 48 } 49 50 pkgFiles, err := parseMtree(r) 51 if err != nil { 52 return nil, nil, err 53 } 54 55 // replace the files found the pacman database with the files from the mtree These contain more metadata and 56 // thus more useful. 57 // TODO: probably want to use MTREE and PKGINFO here 58 data.Files = pkgFiles 59 60 // We only really do this to get any backup database entries from the files database 61 files := filepath.Join(base, "files") 62 _, err = getFileReader(files, resolver) 63 if err != nil { 64 return nil, nil, err 65 } 66 filesMetadata, err := parseAlpmDBEntry(reader) 67 if err != nil { 68 return nil, nil, err 69 } else if filesMetadata != nil { 70 data.Backup = filesMetadata.Backup 71 } 72 73 if data.Package == "" { 74 return nil, nil, nil 75 } 76 77 return []pkg.Package{ 78 newPackage( 79 data, 80 env.LinuxRelease, 81 reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 82 ), 83 }, nil, nil 84 } 85 86 func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) { 87 scanner := newScanner(reader) 88 metadata, err := parseDatabase(scanner) 89 if err != nil { 90 return nil, err 91 } 92 return metadata, nil 93 } 94 95 func newScanner(reader io.Reader) *bufio.Scanner { 96 // This is taken from the apk parser 97 // https://github.com/anchore/syft/blob/v0.47.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L37 98 const maxScannerCapacity = 1024 * 1024 99 bufScan := make([]byte, maxScannerCapacity) 100 scanner := bufio.NewScanner(reader) 101 scanner.Buffer(bufScan, maxScannerCapacity) 102 onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) { 103 for i := 0; i < len(data); i++ { 104 if i > 0 && data[i-1] == '\n' && data[i] == '\n' { 105 return i + 1, data[:i-1], nil 106 } 107 } 108 if !atEOF { 109 return 0, nil, nil 110 } 111 // deliver the last token (which could be an empty string) 112 return 0, data, bufio.ErrFinalToken 113 } 114 115 scanner.Split(onDoubleLF) 116 return scanner 117 } 118 119 func getFileReader(path string, resolver file.Resolver) (io.Reader, error) { 120 locs, err := resolver.FilesByPath(path) 121 if err != nil { 122 return nil, err 123 } 124 125 if len(locs) == 0 { 126 return nil, fmt.Errorf("could not find file: %s", path) 127 } 128 // TODO: Should we maybe check if we found the file 129 dbContentReader, err := resolver.FileContentsByLocation(locs[0]) 130 if err != nil { 131 return nil, err 132 } 133 return dbContentReader, nil 134 } 135 136 func parseDatabase(b *bufio.Scanner) (*parsedData, error) { 137 var err error 138 pkgFields := make(map[string]interface{}) 139 for b.Scan() { 140 fields := strings.SplitN(b.Text(), "\n", 2) 141 142 // End of File 143 if len(fields) == 1 { 144 break 145 } 146 147 // The alpm database surrounds the keys with %. 148 key := strings.ReplaceAll(fields[0], "%", "") 149 key = strings.ToLower(key) 150 value := strings.TrimSpace(fields[1]) 151 152 switch key { 153 case "files": 154 var files []map[string]string 155 for _, f := range strings.Split(value, "\n") { 156 path := fmt.Sprintf("/%s", f) 157 if ok := ignoredFiles[path]; !ok { 158 files = append(files, map[string]string{"path": path}) 159 } 160 } 161 pkgFields[key] = files 162 case "backup": 163 var backup []map[string]interface{} 164 for _, f := range strings.Split(value, "\n") { 165 fields := strings.SplitN(f, "\t", 2) 166 path := fmt.Sprintf("/%s", fields[0]) 167 if ok := ignoredFiles[path]; !ok { 168 backup = append(backup, map[string]interface{}{ 169 "path": path, 170 "digests": []file.Digest{{ 171 Algorithm: "md5", 172 Value: fields[1], 173 }}}) 174 } 175 } 176 pkgFields[key] = backup 177 case "reason": 178 fallthrough 179 case "size": 180 pkgFields[key], err = strconv.ParseInt(value, 10, 64) 181 if err != nil { 182 return nil, fmt.Errorf("failed to parse %s to integer", value) 183 } 184 default: 185 pkgFields[key] = value 186 } 187 } 188 189 return parsePkgFiles(pkgFields) 190 } 191 192 func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) { 193 var entry parsedData 194 if err := mapstructure.Decode(pkgFields, &entry); err != nil { 195 return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err) 196 } 197 198 if entry.Backup == nil { 199 entry.Backup = make([]pkg.AlpmFileRecord, 0) 200 } 201 202 if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 { 203 return nil, nil 204 } 205 return &entry, nil 206 } 207 208 func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) { 209 var err error 210 var entries []pkg.AlpmFileRecord 211 212 r, err = gzip.NewReader(r) 213 if err != nil { 214 return nil, err 215 } 216 specDh, err := mtree.ParseSpec(r) 217 if err != nil { 218 return nil, err 219 } 220 for _, f := range specDh.Entries { 221 var entry pkg.AlpmFileRecord 222 entry.Digests = make([]file.Digest, 0) 223 fileFields := make(map[string]interface{}) 224 if ok := ignoredFiles[f.Name]; ok { 225 continue 226 } 227 path := fmt.Sprintf("/%s", f.Name) 228 fileFields["path"] = path 229 for _, kv := range f.Keywords { 230 kw := string(kv.Keyword()) 231 switch kw { 232 case "time": 233 // All unix timestamps have a .0 suffixs. 234 v := strings.Split(kv.Value(), ".") 235 i, _ := strconv.ParseInt(v[0], 10, 64) 236 tm := time.Unix(i, 0) 237 fileFields[kw] = tm 238 case "sha256digest": 239 entry.Digests = append(entry.Digests, file.Digest{ 240 Algorithm: "sha256", 241 Value: kv.Value(), 242 }) 243 case "md5digest": 244 entry.Digests = append(entry.Digests, file.Digest{ 245 Algorithm: "md5", 246 Value: kv.Value(), 247 }) 248 default: 249 fileFields[kw] = kv.Value() 250 } 251 } 252 if err := mapstructure.Decode(fileFields, &entry); err != nil { 253 return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err) 254 } 255 entries = append(entries, entry) 256 } 257 return entries, nil 258 }