github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/alpine/parse_apk_db.go (about) 1 package alpine 2 3 import ( 4 "bufio" 5 "context" 6 "fmt" 7 "io" 8 "path" 9 "regexp" 10 "strconv" 11 "strings" 12 13 "github.com/anchore/syft/internal" 14 "github.com/anchore/syft/internal/log" 15 "github.com/anchore/syft/internal/unknown" 16 "github.com/anchore/syft/syft/artifact" 17 "github.com/anchore/syft/syft/file" 18 "github.com/anchore/syft/syft/linux" 19 "github.com/anchore/syft/syft/pkg" 20 "github.com/anchore/syft/syft/pkg/cataloger/generic" 21 ) 22 23 // integrity check 24 var _ generic.Parser = parseApkDB 25 26 var ( 27 repoRegex = regexp.MustCompile(`(?m)^https://.*\.alpinelinux\.org/alpine/v([^/]+)/([a-zA-Z0-9_]+)$`) 28 ) 29 30 type parsedData struct { 31 License string `mapstructure:"L" json:"license"` 32 pkg.ApkDBEntry 33 } 34 35 // parseApkDB parses packages from a given APK "installed" flat-file DB. For more 36 // information on specific fields, see https://wiki.alpinelinux.org/wiki/Apk_spec. 37 // 38 //nolint:funlen 39 func parseApkDB(ctx context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 40 scanner := bufio.NewScanner(reader) 41 42 var errs error 43 var apks []parsedData 44 var currentEntry parsedData 45 entryParsingInProgress := false 46 fileParsingCtx := newApkFileParsingContext() 47 48 // creating a dedicated append-like function here instead of using `append(...)` 49 // below since there is nontrivial logic to be performed for each finalized apk 50 // entry. 51 appendApk := func(p parsedData) { 52 if files := fileParsingCtx.files; len(files) >= 1 { 53 // attached accumulated files to current package 54 p.Files = files 55 56 // reset file parsing for next use 57 fileParsingCtx = newApkFileParsingContext() 58 } 59 60 nilFieldsToEmptySlice(&p) 61 apks = append(apks, p) 62 } 63 64 for scanner.Scan() { 65 line := scanner.Text() 66 67 if line == "" { 68 // i.e. apk entry separator 69 70 if entryParsingInProgress { 71 // current entry is complete 72 appendApk(currentEntry) 73 } 74 75 entryParsingInProgress = false 76 77 // zero-out currentEntry for use by any future entry 78 currentEntry = parsedData{} 79 80 continue 81 } 82 83 field := parseApkField(line) 84 if field == nil { 85 log.Debugf("unable to parse field data from line %q", line) 86 errs = unknown.Appendf(errs, reader, "unable to parse field data from line %q", line) 87 continue 88 } 89 if len(field.name) == 0 { 90 log.Debugf("failed to parse field name from line %q", line) 91 errs = unknown.Appendf(errs, reader, "failed to parse field name from line %q", line) 92 continue 93 } 94 if len(field.value) == 0 { 95 log.Debugf("line %q: parsed field %q appears to have an empty value, skipping", line, field.name) 96 continue 97 } 98 99 entryParsingInProgress = true 100 101 field.apply(¤tEntry, fileParsingCtx) 102 } 103 104 if entryParsingInProgress { 105 // There was no final empty line, so currentEntry hasn't been added to the 106 // collection yet; but we've now reached the end of scanning, so let's be sure to 107 // add currentEntry to the collection. 108 appendApk(currentEntry) 109 } 110 111 if err := scanner.Err(); err != nil { 112 return nil, nil, fmt.Errorf("failed to parse APK installed DB file: %w", err) 113 } 114 115 var r *linux.Release 116 if env != nil { 117 r = env.LinuxRelease 118 } 119 // this is somewhat ugly, but better than completely failing when we can't find the release, 120 // e.g. embedded deeper in the tree, like containers or chroots. 121 // but we now have no way of handling different repository sources. On the other hand, 122 // we never could before this. At least now, we can handle some. 123 // This should get fixed with https://gitlab.alpinelinux.org/alpine/apk-tools/-/issues/10875 124 if r == nil { 125 // find the repositories file from the relative directory of the DB file 126 releases := findReleases(resolver, reader.RealPath) 127 128 if len(releases) > 0 { 129 r = &releases[0] 130 } 131 } 132 133 pkgs := make([]pkg.Package, 0, len(apks)) 134 for _, apk := range apks { 135 pkgs = append(pkgs, newPackage(ctx, apk, r, reader.Location)) 136 } 137 138 return pkgs, nil, errs 139 } 140 141 func findReleases(resolver file.Resolver, dbPath string) []linux.Release { 142 if resolver == nil { 143 return nil 144 } 145 146 reposLocation := path.Clean(path.Join(path.Dir(dbPath), "../../../etc/apk/repositories")) 147 locations, err := resolver.FilesByPath(reposLocation) 148 if err != nil { 149 log.Tracef("unable to find APK repositories file %q: %+v", reposLocation, err) 150 return nil 151 } 152 153 if len(locations) == 0 { 154 return nil 155 } 156 location := locations[0] 157 158 reposReader, err := resolver.FileContentsByLocation(location) 159 if err != nil { 160 log.Tracef("unable to fetch contents for APK repositories file %q: %+v", reposLocation, err) 161 return nil 162 } 163 defer internal.CloseAndLogError(reposReader, location.RealPath) 164 165 return parseReleasesFromAPKRepository(file.LocationReadCloser{ 166 Location: location, 167 ReadCloser: reposReader, 168 }) 169 } 170 171 func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release { 172 var releases []linux.Release 173 174 reposB, err := io.ReadAll(reader) 175 if err != nil { 176 log.Tracef("unable to read APK repositories file %q: %+v", reader.RealPath, err) 177 return nil 178 } 179 180 parts := repoRegex.FindAllStringSubmatch(string(reposB), -1) 181 for _, part := range parts { 182 if len(part) >= 3 { 183 releases = append(releases, linux.Release{ 184 Name: "Alpine Linux", 185 ID: "alpine", 186 VersionID: part[1], 187 }) 188 } 189 } 190 191 return releases 192 } 193 194 func parseApkField(line string) *apkField { 195 parts := strings.SplitN(line, ":", 2) 196 if len(parts) != 2 { 197 return nil 198 } 199 200 f := apkField{ 201 name: parts[0], 202 value: parts[1], 203 } 204 205 return &f 206 } 207 208 type apkField struct { 209 name string 210 value string 211 } 212 213 //nolint:funlen 214 func (f apkField) apply(p *parsedData, ctx *apkFileParsingContext) { 215 switch f.name { 216 // APKINDEX field parsing 217 218 case "P": 219 p.Package = f.value 220 case "o": 221 p.OriginPackage = f.value 222 case "m": 223 p.Maintainer = f.value 224 case "V": 225 p.Version = f.value 226 case "L": 227 p.License = f.value 228 case "A": 229 p.Architecture = f.value 230 case "U": 231 p.URL = f.value 232 case "T": 233 p.Description = f.value 234 case "S": 235 i, err := strconv.Atoi(f.value) 236 if err != nil { 237 log.Debugf("unable to parse value %q for field %q: %w", f.value, f.name, err) 238 return 239 } 240 241 p.Size = i 242 case "I": 243 i, err := strconv.Atoi(f.value) 244 if err != nil { 245 log.Debugf("unable to parse value %q for field %q: %w", f.value, f.name, err) 246 return 247 } 248 249 p.InstalledSize = i 250 case "D": 251 deps := parseListValue(f.value) 252 p.Dependencies = deps 253 case "p": 254 provides := parseListValue(f.value) 255 p.Provides = provides 256 case "C": 257 p.Checksum = f.value 258 case "c": 259 p.GitCommit = f.value 260 261 // File/directory field parsing: 262 263 case "F": 264 directory := path.Join("/", f.value) 265 266 ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: directory}) 267 ctx.indexOfLatestDirectory = len(ctx.files) - 1 268 case "M": 269 i := ctx.indexOfLatestDirectory 270 latest := ctx.files[i] 271 272 var ok bool 273 latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value) 274 if !ok { 275 log.Debugf("unexpected value for APK ACL field %q: %q", f.name, f.value) 276 return 277 } 278 279 // save updated directory 280 ctx.files[i] = latest 281 case "R": 282 var regularFile string 283 284 dirIndex := ctx.indexOfLatestDirectory 285 if dirIndex < 0 { 286 regularFile = path.Join("/", f.value) 287 } else { 288 latestDirPath := ctx.files[dirIndex].Path 289 regularFile = path.Join(latestDirPath, f.value) 290 } 291 292 ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: regularFile}) 293 ctx.indexOfLatestRegularFile = len(ctx.files) - 1 294 case "a": 295 i := ctx.indexOfLatestRegularFile 296 latest := ctx.files[i] 297 298 var ok bool 299 latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value) 300 if !ok { 301 log.Debugf("unexpected value for APK ACL field %q: %q", f.name, f.value) 302 return 303 } 304 305 // save updated file 306 ctx.files[i] = latest 307 case "Z": 308 i := ctx.indexOfLatestRegularFile 309 latest := ctx.files[i] 310 latest.Digest = processChecksum(f.value) 311 312 // save updated file 313 ctx.files[i] = latest 314 } 315 } 316 317 func processFileInfo(v string) (uid, gid, perms string, ok bool) { 318 ok = false 319 320 fileInfo := strings.Split(v, ":") 321 if len(fileInfo) < 3 { 322 return 323 } 324 325 uid = fileInfo[0] 326 gid = fileInfo[1] 327 perms = fileInfo[2] 328 329 // note: there are more optional fields available that we are not capturing, 330 // e.g.: "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4=" 331 332 ok = true 333 return 334 } 335 336 // apkFileParsingContext helps keep track of what file data has been captured so far for the APK currently being parsed. 337 type apkFileParsingContext struct { 338 files []pkg.ApkFileRecord 339 indexOfLatestDirectory int 340 indexOfLatestRegularFile int 341 } 342 343 func newApkFileParsingContext() *apkFileParsingContext { 344 return &apkFileParsingContext{ 345 indexOfLatestDirectory: -1, // no directories yet 346 indexOfLatestRegularFile: -1, // no regular files yet 347 } 348 } 349 350 // parseListValue parses a space-separated list from an apk entry field value. 351 func parseListValue(value string) []string { 352 items := strings.Split(value, " ") 353 if len(items) >= 1 { 354 return items 355 } 356 357 return nil 358 } 359 360 func nilFieldsToEmptySlice(p *parsedData) { 361 if p.Dependencies == nil { 362 p.Dependencies = []string{} 363 } 364 365 if p.Provides == nil { 366 p.Provides = []string{} 367 } 368 369 if p.Files == nil { 370 p.Files = []pkg.ApkFileRecord{} 371 } 372 } 373 374 func processChecksum(value string) *file.Digest { 375 // from: https://wiki.alpinelinux.org/wiki/Apk_spec 376 // The package checksum field is the SHA1 hash of the second gzip stream (control stream) in the package. The 377 // binary hash digest is base64 encoded. This is prefixed with Q1 to differentiate it from the MD5 hashes 378 // used in older index formats. It is not possible to compute this checksum with standard command line tools 379 // but the apk-tools can compute it in their index operation. 380 381 // based on https://github.com/alpinelinux/apk-tools/blob/dd1908f2fc20b4cfe2c15c55fafaa5fadfb599dc/src/blob.c#L379-L393 382 // it seems that the old md5 checksum value was only the hex representation (not base64) 383 algorithm := "md5" 384 if strings.HasPrefix(value, "Q1") { 385 algorithm = "'Q1'+base64(sha1)" 386 } 387 388 return &file.Digest{ 389 Algorithm: algorithm, 390 Value: value, 391 } 392 }