github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/pkg/cataloger/apkdb/parse_apk_db.go (about) 1 package apkdb 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "path" 8 "regexp" 9 "strconv" 10 "strings" 11 12 "github.com/kastenhq/syft/internal" 13 "github.com/kastenhq/syft/internal/log" 14 "github.com/kastenhq/syft/syft/artifact" 15 "github.com/kastenhq/syft/syft/file" 16 "github.com/kastenhq/syft/syft/linux" 17 "github.com/kastenhq/syft/syft/pkg" 18 "github.com/kastenhq/syft/syft/pkg/cataloger/generic" 19 ) 20 21 // integrity check 22 var _ generic.Parser = parseApkDB 23 24 var ( 25 repoRegex = regexp.MustCompile(`(?m)^https://.*\.alpinelinux\.org/alpine/v([^/]+)/([a-zA-Z0-9_]+)$`) 26 ) 27 28 type parsedData struct { 29 License string `mapstructure:"L" json:"license"` 30 pkg.ApkMetadata 31 } 32 33 // parseApkDB parses packages from a given APK installed DB file. For more 34 // information on specific fields, see https://wiki.alpinelinux.org/wiki/Apk_spec. 35 // 36 //nolint:funlen,gocognit 37 func parseApkDB(resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 38 scanner := bufio.NewScanner(reader) 39 40 var apks []parsedData 41 var currentEntry parsedData 42 entryParsingInProgress := false 43 fileParsingCtx := newApkFileParsingContext() 44 45 // creating a dedicated append-like function here instead of using `append(...)` 46 // below since there is nontrivial logic to be performed for each finalized apk 47 // entry. 48 appendApk := func(p parsedData) { 49 if files := fileParsingCtx.files; len(files) >= 1 { 50 // attached accumulated files to current package 51 p.Files = files 52 53 // reset file parsing for next use 54 fileParsingCtx = newApkFileParsingContext() 55 } 56 57 nilFieldsToEmptySlice(&p) 58 apks = append(apks, p) 59 } 60 61 for scanner.Scan() { 62 line := scanner.Text() 63 64 if line == "" { 65 // i.e. apk entry separator 66 67 if entryParsingInProgress { 68 // current entry is complete 69 appendApk(currentEntry) 70 } 71 72 entryParsingInProgress = false 73 74 // zero-out currentEntry for use by any future entry 75 currentEntry = parsedData{} 76 77 continue 78 } 79 80 field := parseApkField(line) 81 if field == nil { 82 log.Warnf("unable to parse field data from line %q", line) 83 continue 84 } 85 if len(field.name) == 0 { 86 log.Warnf("failed to parse field name from line %q", line) 87 continue 88 } 89 if len(field.value) == 0 { 90 log.Debugf("line %q: parsed field %q appears to have an empty value, skipping", line, field.name) 91 continue 92 } 93 94 entryParsingInProgress = true 95 96 field.apply(¤tEntry, fileParsingCtx) 97 } 98 99 if entryParsingInProgress { 100 // There was no final empty line, so currentEntry hasn't been added to the 101 // collection yet; but we've now reached the end of scanning, so let's be sure to 102 // add currentEntry to the collection. 103 appendApk(currentEntry) 104 } 105 106 if err := scanner.Err(); err != nil { 107 return nil, nil, fmt.Errorf("failed to parse APK installed DB file: %w", err) 108 } 109 110 var r *linux.Release 111 if env != nil { 112 r = env.LinuxRelease 113 } 114 // this is somewhat ugly, but better than completely failing when we can't find the release, 115 // e.g. embedded deeper in the tree, like containers or chroots. 116 // but we now have no way of handling different repository sources. On the other hand, 117 // we never could before this. At least now, we can handle some. 118 // This should get fixed with https://gitlab.alpinelinux.org/alpine/apk-tools/-/issues/10875 119 if r == nil { 120 // find the repositories file from the relative directory of the DB file 121 releases := findReleases(resolver, reader.Location.RealPath) 122 123 if len(releases) > 0 { 124 r = &releases[0] 125 } 126 } 127 128 pkgs := make([]pkg.Package, 0, len(apks)) 129 for _, apk := range apks { 130 pkgs = append(pkgs, newPackage(apk, r, reader.Location)) 131 } 132 133 return pkgs, discoverPackageDependencies(pkgs), nil 134 } 135 136 func findReleases(resolver file.Resolver, dbPath string) []linux.Release { 137 if resolver == nil { 138 return nil 139 } 140 141 reposLocation := path.Clean(path.Join(path.Dir(dbPath), "../../../etc/apk/repositories")) 142 locations, err := resolver.FilesByPath(reposLocation) 143 if err != nil { 144 log.Tracef("unable to find APK repositories file %q: %+v", reposLocation, err) 145 return nil 146 } 147 148 if len(locations) == 0 { 149 return nil 150 } 151 location := locations[0] 152 153 reposReader, err := resolver.FileContentsByLocation(location) 154 if err != nil { 155 log.Tracef("unable to fetch contents for APK repositories file %q: %+v", reposLocation, err) 156 return nil 157 } 158 159 return parseReleasesFromAPKRepository(file.LocationReadCloser{ 160 Location: location, 161 ReadCloser: reposReader, 162 }) 163 } 164 165 func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release { 166 var releases []linux.Release 167 168 reposB, err := io.ReadAll(reader) 169 if err != nil { 170 log.Tracef("unable to read APK repositories file %q: %+v", reader.Location.RealPath, err) 171 return nil 172 } 173 174 parts := repoRegex.FindAllStringSubmatch(string(reposB), -1) 175 for _, part := range parts { 176 if len(part) >= 3 { 177 releases = append(releases, linux.Release{ 178 Name: "Alpine Linux", 179 ID: "alpine", 180 VersionID: part[1], 181 }) 182 } 183 } 184 185 return releases 186 } 187 188 func parseApkField(line string) *apkField { 189 parts := strings.SplitN(line, ":", 2) 190 if len(parts) != 2 { 191 return nil 192 } 193 194 f := apkField{ 195 name: parts[0], 196 value: parts[1], 197 } 198 199 return &f 200 } 201 202 type apkField struct { 203 name string 204 value string 205 } 206 207 //nolint:funlen 208 func (f apkField) apply(p *parsedData, ctx *apkFileParsingContext) { 209 switch f.name { 210 // APKINDEX field parsing 211 212 case "P": 213 p.Package = f.value 214 case "o": 215 p.OriginPackage = f.value 216 case "m": 217 p.Maintainer = f.value 218 case "V": 219 p.Version = f.value 220 case "L": 221 p.License = f.value 222 case "A": 223 p.Architecture = f.value 224 case "U": 225 p.URL = f.value 226 case "T": 227 p.Description = f.value 228 case "S": 229 i, err := strconv.Atoi(f.value) 230 if err != nil { 231 log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err) 232 return 233 } 234 235 p.Size = i 236 case "I": 237 i, err := strconv.Atoi(f.value) 238 if err != nil { 239 log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err) 240 return 241 } 242 243 p.InstalledSize = i 244 case "D": 245 deps := parseListValue(f.value) 246 p.Dependencies = deps 247 case "p": 248 provides := parseListValue(f.value) 249 p.Provides = provides 250 case "C": 251 p.Checksum = f.value 252 case "c": 253 p.GitCommit = f.value 254 255 // File/directory field parsing: 256 257 case "F": 258 directory := path.Join("/", f.value) 259 260 ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: directory}) 261 ctx.indexOfLatestDirectory = len(ctx.files) - 1 262 case "M": 263 i := ctx.indexOfLatestDirectory 264 latest := ctx.files[i] 265 266 var ok bool 267 latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value) 268 if !ok { 269 log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value) 270 return 271 } 272 273 // save updated directory 274 ctx.files[i] = latest 275 case "R": 276 var regularFile string 277 278 dirIndex := ctx.indexOfLatestDirectory 279 if dirIndex < 0 { 280 regularFile = path.Join("/", f.value) 281 } else { 282 latestDirPath := ctx.files[dirIndex].Path 283 regularFile = path.Join(latestDirPath, f.value) 284 } 285 286 ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: regularFile}) 287 ctx.indexOfLatestRegularFile = len(ctx.files) - 1 288 case "a": 289 i := ctx.indexOfLatestRegularFile 290 latest := ctx.files[i] 291 292 var ok bool 293 latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value) 294 if !ok { 295 log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value) 296 return 297 } 298 299 // save updated file 300 ctx.files[i] = latest 301 case "Z": 302 i := ctx.indexOfLatestRegularFile 303 latest := ctx.files[i] 304 latest.Digest = processChecksum(f.value) 305 306 // save updated file 307 ctx.files[i] = latest 308 } 309 } 310 311 func processFileInfo(v string) (uid, gid, perms string, ok bool) { 312 ok = false 313 314 fileInfo := strings.Split(v, ":") 315 if len(fileInfo) < 3 { 316 return 317 } 318 319 uid = fileInfo[0] 320 gid = fileInfo[1] 321 perms = fileInfo[2] 322 323 // note: there are more optional fields available that we are not capturing, 324 // e.g.: "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4=" 325 326 ok = true 327 return 328 } 329 330 // apkFileParsingContext helps keep track of what file data has been captured so far for the APK currently being parsed. 331 type apkFileParsingContext struct { 332 files []pkg.ApkFileRecord 333 indexOfLatestDirectory int 334 indexOfLatestRegularFile int 335 } 336 337 func newApkFileParsingContext() *apkFileParsingContext { 338 return &apkFileParsingContext{ 339 indexOfLatestDirectory: -1, // no directories yet 340 indexOfLatestRegularFile: -1, // no regular files yet 341 } 342 } 343 344 // parseListValue parses a space-separated list from an apk entry field value. 345 func parseListValue(value string) []string { 346 items := strings.Split(value, " ") 347 if len(items) >= 1 { 348 return items 349 } 350 351 return nil 352 } 353 354 func nilFieldsToEmptySlice(p *parsedData) { 355 if p.Dependencies == nil { 356 p.Dependencies = []string{} 357 } 358 359 if p.Provides == nil { 360 p.Provides = []string{} 361 } 362 363 if p.Files == nil { 364 p.Files = []pkg.ApkFileRecord{} 365 } 366 } 367 368 func processChecksum(value string) *file.Digest { 369 // from: https://wiki.alpinelinux.org/wiki/Apk_spec 370 // The package checksum field is the SHA1 hash of the second gzip stream (control stream) in the package. The 371 // binary hash digest is base64 encoded. This is prefixed with Q1 to differentiate it from the MD5 hashes 372 // used in older index formats. It is not possible to compute this checksum with standard command line tools 373 // but the apk-tools can compute it in their index operation. 374 375 // based on https://github.com/alpinelinux/apk-tools/blob/dd1908f2fc20b4cfe2c15c55fafaa5fadfb599dc/src/blob.c#L379-L393 376 // it seems that the old md5 checksum value was only the hex representation (not base64) 377 algorithm := "md5" 378 if strings.HasPrefix(value, "Q1") { 379 algorithm = "'Q1'+base64(sha1)" 380 } 381 382 return &file.Digest{ 383 Algorithm: algorithm, 384 Value: value, 385 } 386 } 387 388 func discoverPackageDependencies(pkgs []pkg.Package) (relationships []artifact.Relationship) { 389 // map["provides" string] -> packages that provide the "p" key 390 lookup := make(map[string][]pkg.Package) 391 // read "Provides" (p) and add as keys for lookup keys as well as package names 392 for _, p := range pkgs { 393 apkg, ok := p.Metadata.(pkg.ApkMetadata) 394 if !ok { 395 log.Warnf("cataloger failed to extract apk 'provides' metadata for package %+v", p.Name) 396 continue 397 } 398 lookup[p.Name] = append(lookup[p.Name], p) 399 for _, provides := range apkg.Provides { 400 k := stripVersionSpecifier(provides) 401 lookup[k] = append(lookup[k], p) 402 } 403 } 404 405 // read "Pull Dependencies" (D) and match with keys 406 for _, p := range pkgs { 407 apkg, ok := p.Metadata.(pkg.ApkMetadata) 408 if !ok { 409 log.Warnf("cataloger failed to extract apk dependency metadata for package %+v", p.Name) 410 continue 411 } 412 413 for _, depSpecifier := range apkg.Dependencies { 414 // use the lookup to find what pkg we depend on 415 dep := stripVersionSpecifier(depSpecifier) 416 for _, depPkg := range lookup[dep] { 417 // this is a pkg that package "p" depends on... make a relationship 418 relationships = append(relationships, artifact.Relationship{ 419 From: depPkg, 420 To: p, 421 Type: artifact.DependencyOfRelationship, 422 }) 423 } 424 } 425 } 426 return relationships 427 } 428 429 func stripVersionSpecifier(s string) string { 430 // examples: 431 // musl>=1 --> musl 432 // cmd:scanelf=1.3.4-r0 --> cmd:scanelf 433 434 items := internal.SplitAny(s, "<>=") 435 if len(items) == 0 { 436 return s 437 } 438 439 return items[0] 440 }