github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/alpine/parse_apk_db.go (about) 1 package alpine 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "path" 8 "regexp" 9 "strconv" 10 "strings" 11 12 "github.com/anchore/syft/syft/artifact" 13 "github.com/anchore/syft/syft/file" 14 "github.com/anchore/syft/syft/linux" 15 "github.com/anchore/syft/syft/pkg" 16 "github.com/anchore/syft/syft/pkg/cataloger/generic" 17 "github.com/lineaje-labs/syft/internal" 18 "github.com/lineaje-labs/syft/internal/log" 19 ) 20 21 // integrity check 22 var _ generic.Parser = parseApkDB 23 24 var ( 25 repoRegex = regexp.MustCompile(`(?m)^https://.*\.alpinelinux\.org/alpine/v([^/]+)/([a-zA-Z0-9_]+)$`) 26 ) 27 28 type parsedData struct { 29 License string `mapstructure:"L" json:"license"` 30 pkg.ApkDBEntry 31 } 32 33 // parseApkDB parses packages from a given APK "installed" flat-file DB. For more 34 // information on specific fields, see https://wiki.alpinelinux.org/wiki/Apk_spec. 35 // 36 //nolint:funlen,gocognit 37 func parseApkDB( 38 resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser, 39 ) ([]pkg.Package, []artifact.Relationship, error) { 40 scanner := bufio.NewScanner(reader) 41 42 var apks []parsedData 43 var currentEntry parsedData 44 entryParsingInProgress := false 45 fileParsingCtx := newApkFileParsingContext() 46 47 // creating a dedicated append-like function here instead of using `append(...)` 48 // below since there is nontrivial logic to be performed for each finalized apk 49 // entry. 50 appendApk := func(p parsedData) { 51 if files := fileParsingCtx.files; len(files) >= 1 { 52 // attached accumulated files to current package 53 p.Files = files 54 55 // reset file parsing for next use 56 fileParsingCtx = newApkFileParsingContext() 57 } 58 59 nilFieldsToEmptySlice(&p) 60 apks = append(apks, p) 61 } 62 63 for scanner.Scan() { 64 line := scanner.Text() 65 66 if line == "" { 67 // i.e. apk entry separator 68 69 if entryParsingInProgress { 70 // current entry is complete 71 appendApk(currentEntry) 72 } 73 74 entryParsingInProgress = false 75 76 // zero-out currentEntry for use by any future entry 77 currentEntry = parsedData{} 78 79 continue 80 } 81 82 field := parseApkField(line) 83 if field == nil { 84 log.Warnf("unable to parse field data from line %q", line) 85 continue 86 } 87 if len(field.name) == 0 { 88 log.Warnf("failed to parse field name from line %q", line) 89 continue 90 } 91 if len(field.value) == 0 { 92 log.Debugf("line %q: parsed field %q appears to have an empty value, skipping", line, field.name) 93 continue 94 } 95 96 entryParsingInProgress = true 97 98 field.apply(¤tEntry, fileParsingCtx) 99 } 100 101 if entryParsingInProgress { 102 // There was no final empty line, so currentEntry hasn't been added to the 103 // collection yet; but we've now reached the end of scanning, so let's be sure to 104 // add currentEntry to the collection. 105 appendApk(currentEntry) 106 } 107 108 if err := scanner.Err(); err != nil { 109 return nil, nil, fmt.Errorf("failed to parse APK installed DB file: %w", err) 110 } 111 112 var r *linux.Release 113 if env != nil { 114 r = env.LinuxRelease 115 } 116 // this is somewhat ugly, but better than completely failing when we can't find the release, 117 // e.g. embedded deeper in the tree, like containers or chroots. 118 // but we now have no way of handling different repository sources. On the other hand, 119 // we never could before this. At least now, we can handle some. 120 // This should get fixed with https://gitlab.alpinelinux.org/alpine/apk-tools/-/issues/10875 121 if r == nil { 122 // find the repositories file from the relative directory of the DB file 123 releases := findReleases(resolver, reader.Location.RealPath) 124 125 if len(releases) > 0 { 126 r = &releases[0] 127 } 128 } 129 130 pkgs := make([]pkg.Package, 0, len(apks)) 131 for _, apk := range apks { 132 pkgs = append(pkgs, newPackage(apk, r, reader.Location)) 133 } 134 135 return pkgs, discoverPackageDependencies(pkgs), nil 136 } 137 138 func findReleases(resolver file.Resolver, dbPath string) []linux.Release { 139 if resolver == nil { 140 return nil 141 } 142 143 reposLocation := path.Clean(path.Join(path.Dir(dbPath), "../../../etc/apk/repositories")) 144 locations, err := resolver.FilesByPath(reposLocation) 145 if err != nil { 146 log.Tracef("unable to find APK repositories file %q: %+v", reposLocation, err) 147 return nil 148 } 149 150 if len(locations) == 0 { 151 return nil 152 } 153 location := locations[0] 154 155 reposReader, err := resolver.FileContentsByLocation(location) 156 if err != nil { 157 log.Tracef("unable to fetch contents for APK repositories file %q: %+v", reposLocation, err) 158 return nil 159 } 160 161 return parseReleasesFromAPKRepository(file.LocationReadCloser{ 162 Location: location, 163 ReadCloser: reposReader, 164 }) 165 } 166 167 func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release { 168 var releases []linux.Release 169 170 reposB, err := io.ReadAll(reader) 171 if err != nil { 172 log.Tracef("unable to read APK repositories file %q: %+v", reader.Location.RealPath, err) 173 return nil 174 } 175 176 parts := repoRegex.FindAllStringSubmatch(string(reposB), -1) 177 for _, part := range parts { 178 if len(part) >= 3 { 179 releases = append(releases, linux.Release{ 180 Name: "Alpine Linux", 181 ID: "alpine", 182 VersionID: part[1], 183 }) 184 } 185 } 186 187 return releases 188 } 189 190 func parseApkField(line string) *apkField { 191 parts := strings.SplitN(line, ":", 2) 192 if len(parts) != 2 { 193 return nil 194 } 195 196 f := apkField{ 197 name: parts[0], 198 value: parts[1], 199 } 200 201 return &f 202 } 203 204 type apkField struct { 205 name string 206 value string 207 } 208 209 //nolint:funlen 210 func (f apkField) apply(p *parsedData, ctx *apkFileParsingContext) { 211 switch f.name { 212 // APKINDEX field parsing 213 214 case "P": 215 p.Package = f.value 216 case "o": 217 p.OriginPackage = f.value 218 case "m": 219 p.Maintainer = f.value 220 case "V": 221 p.Version = f.value 222 case "L": 223 p.License = f.value 224 case "A": 225 p.Architecture = f.value 226 case "U": 227 p.URL = f.value 228 case "T": 229 p.Description = f.value 230 case "S": 231 i, err := strconv.Atoi(f.value) 232 if err != nil { 233 log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err) 234 return 235 } 236 237 p.Size = i 238 case "I": 239 i, err := strconv.Atoi(f.value) 240 if err != nil { 241 log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err) 242 return 243 } 244 245 p.InstalledSize = i 246 case "D": 247 deps := parseListValue(f.value) 248 p.Dependencies = deps 249 case "p": 250 provides := parseListValue(f.value) 251 p.Provides = provides 252 case "C": 253 p.Checksum = f.value 254 case "c": 255 p.GitCommit = f.value 256 257 // File/directory field parsing: 258 259 case "F": 260 directory := path.Join("/", f.value) 261 262 ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: directory}) 263 ctx.indexOfLatestDirectory = len(ctx.files) - 1 264 case "M": 265 i := ctx.indexOfLatestDirectory 266 latest := ctx.files[i] 267 268 var ok bool 269 latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value) 270 if !ok { 271 log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value) 272 return 273 } 274 275 // save updated directory 276 ctx.files[i] = latest 277 case "R": 278 var regularFile string 279 280 dirIndex := ctx.indexOfLatestDirectory 281 if dirIndex < 0 { 282 regularFile = path.Join("/", f.value) 283 } else { 284 latestDirPath := ctx.files[dirIndex].Path 285 regularFile = path.Join(latestDirPath, f.value) 286 } 287 288 ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: regularFile}) 289 ctx.indexOfLatestRegularFile = len(ctx.files) - 1 290 case "a": 291 i := ctx.indexOfLatestRegularFile 292 latest := ctx.files[i] 293 294 var ok bool 295 latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value) 296 if !ok { 297 log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value) 298 return 299 } 300 301 // save updated file 302 ctx.files[i] = latest 303 case "Z": 304 i := ctx.indexOfLatestRegularFile 305 latest := ctx.files[i] 306 latest.Digest = processChecksum(f.value) 307 308 // save updated file 309 ctx.files[i] = latest 310 } 311 } 312 313 func processFileInfo(v string) (uid, gid, perms string, ok bool) { 314 ok = false 315 316 fileInfo := strings.Split(v, ":") 317 if len(fileInfo) < 3 { 318 return 319 } 320 321 uid = fileInfo[0] 322 gid = fileInfo[1] 323 perms = fileInfo[2] 324 325 // note: there are more optional fields available that we are not capturing, 326 // e.g.: "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4=" 327 328 ok = true 329 return 330 } 331 332 // apkFileParsingContext helps keep track of what file data has been captured so far for the APK currently being parsed. 333 type apkFileParsingContext struct { 334 files []pkg.ApkFileRecord 335 indexOfLatestDirectory int 336 indexOfLatestRegularFile int 337 } 338 339 func newApkFileParsingContext() *apkFileParsingContext { 340 return &apkFileParsingContext{ 341 indexOfLatestDirectory: -1, // no directories yet 342 indexOfLatestRegularFile: -1, // no regular files yet 343 } 344 } 345 346 // parseListValue parses a space-separated list from an apk entry field value. 347 func parseListValue(value string) []string { 348 items := strings.Split(value, " ") 349 if len(items) >= 1 { 350 return items 351 } 352 353 return nil 354 } 355 356 func nilFieldsToEmptySlice(p *parsedData) { 357 if p.Dependencies == nil { 358 p.Dependencies = []string{} 359 } 360 361 if p.Provides == nil { 362 p.Provides = []string{} 363 } 364 365 if p.Files == nil { 366 p.Files = []pkg.ApkFileRecord{} 367 } 368 } 369 370 func processChecksum(value string) *file.Digest { 371 // from: https://wiki.alpinelinux.org/wiki/Apk_spec 372 // The package checksum field is the SHA1 hash of the second gzip stream (control stream) in the package. The 373 // binary hash digest is base64 encoded. This is prefixed with Q1 to differentiate it from the MD5 hashes 374 // used in older index formats. It is not possible to compute this checksum with standard command line tools 375 // but the apk-tools can compute it in their index operation. 376 377 // based on https://github.com/alpinelinux/apk-tools/blob/dd1908f2fc20b4cfe2c15c55fafaa5fadfb599dc/src/blob.c#L379-L393 378 // it seems that the old md5 checksum value was only the hex representation (not base64) 379 algorithm := "md5" 380 if strings.HasPrefix(value, "Q1") { 381 algorithm = "'Q1'+base64(sha1)" 382 } 383 384 return &file.Digest{ 385 Algorithm: algorithm, 386 Value: value, 387 } 388 } 389 390 func discoverPackageDependencies(pkgs []pkg.Package) (relationships []artifact.Relationship) { 391 // map["provides" string] -> packages that provide the "p" key 392 lookup := make(map[string][]pkg.Package) 393 // read "Provides" (p) and add as keys for lookup keys as well as package names 394 for _, p := range pkgs { 395 apkg, ok := p.Metadata.(pkg.ApkDBEntry) 396 if !ok { 397 log.Warnf("cataloger failed to extract apk 'provides' metadata for package %+v", p.Name) 398 continue 399 } 400 lookup[p.Name] = append(lookup[p.Name], p) 401 for _, provides := range apkg.Provides { 402 k := stripVersionSpecifier(provides) 403 lookup[k] = append(lookup[k], p) 404 } 405 } 406 407 // read "Pull Dependencies" (D) and match with keys 408 for _, p := range pkgs { 409 apkg, ok := p.Metadata.(pkg.ApkDBEntry) 410 if !ok { 411 log.Warnf("cataloger failed to extract apk dependency metadata for package %+v", p.Name) 412 continue 413 } 414 415 for _, depSpecifier := range apkg.Dependencies { 416 // use the lookup to find what pkg we depend on 417 dep := stripVersionSpecifier(depSpecifier) 418 for _, depPkg := range lookup[dep] { 419 // this is a pkg that package "p" depends on... make a relationship 420 relationships = append(relationships, artifact.Relationship{ 421 From: depPkg, 422 To: p, 423 Type: artifact.DependencyOfRelationship, 424 }) 425 } 426 } 427 } 428 return relationships 429 } 430 431 func stripVersionSpecifier(s string) string { 432 // examples: 433 // musl>=1 --> musl 434 // cmd:scanelf=1.3.4-r0 --> cmd:scanelf 435 436 items := internal.SplitAny(s, "<>=") 437 if len(items) == 0 { 438 return s 439 } 440 441 return items[0] 442 }