github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/alpine/parse_apk_db.go (about) 1 package alpine 2 3 import ( 4 "bufio" 5 "context" 6 "fmt" 7 "io" 8 "path" 9 "regexp" 10 "strconv" 11 "strings" 12 13 "github.com/anchore/syft/internal" 14 "github.com/anchore/syft/internal/log" 15 "github.com/anchore/syft/syft/artifact" 16 "github.com/anchore/syft/syft/file" 17 "github.com/anchore/syft/syft/linux" 18 "github.com/anchore/syft/syft/pkg" 19 "github.com/anchore/syft/syft/pkg/cataloger/generic" 20 ) 21 22 // integrity check 23 var _ generic.Parser = parseApkDB 24 25 var ( 26 repoRegex = regexp.MustCompile(`(?m)^https://.*\.alpinelinux\.org/alpine/v([^/]+)/([a-zA-Z0-9_]+)$`) 27 ) 28 29 type parsedData struct { 30 License string `mapstructure:"L" json:"license"` 31 pkg.ApkDBEntry 32 } 33 34 // parseApkDB parses packages from a given APK "installed" flat-file DB. For more 35 // information on specific fields, see https://wiki.alpinelinux.org/wiki/Apk_spec. 36 // 37 //nolint:funlen,gocognit 38 func parseApkDB(_ context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 39 scanner := bufio.NewScanner(reader) 40 41 var apks []parsedData 42 var currentEntry parsedData 43 entryParsingInProgress := false 44 fileParsingCtx := newApkFileParsingContext() 45 46 // creating a dedicated append-like function here instead of using `append(...)` 47 // below since there is nontrivial logic to be performed for each finalized apk 48 // entry. 49 appendApk := func(p parsedData) { 50 if files := fileParsingCtx.files; len(files) >= 1 { 51 // attached accumulated files to current package 52 p.Files = files 53 54 // reset file parsing for next use 55 fileParsingCtx = newApkFileParsingContext() 56 } 57 58 nilFieldsToEmptySlice(&p) 59 apks = append(apks, p) 60 } 61 62 for scanner.Scan() { 63 line := scanner.Text() 64 65 if line == "" { 66 // i.e. apk entry separator 67 68 if entryParsingInProgress { 69 // current entry is complete 70 appendApk(currentEntry) 71 } 72 73 entryParsingInProgress = false 74 75 // zero-out currentEntry for use by any future entry 76 currentEntry = parsedData{} 77 78 continue 79 } 80 81 field := parseApkField(line) 82 if field == nil { 83 log.Warnf("unable to parse field data from line %q", line) 84 continue 85 } 86 if len(field.name) == 0 { 87 log.Warnf("failed to parse field name from line %q", line) 88 continue 89 } 90 if len(field.value) == 0 { 91 log.Debugf("line %q: parsed field %q appears to have an empty value, skipping", line, field.name) 92 continue 93 } 94 95 entryParsingInProgress = true 96 97 field.apply(¤tEntry, fileParsingCtx) 98 } 99 100 if entryParsingInProgress { 101 // There was no final empty line, so currentEntry hasn't been added to the 102 // collection yet; but we've now reached the end of scanning, so let's be sure to 103 // add currentEntry to the collection. 104 appendApk(currentEntry) 105 } 106 107 if err := scanner.Err(); err != nil { 108 return nil, nil, fmt.Errorf("failed to parse APK installed DB file: %w", err) 109 } 110 111 var r *linux.Release 112 if env != nil { 113 r = env.LinuxRelease 114 } 115 // this is somewhat ugly, but better than completely failing when we can't find the release, 116 // e.g. embedded deeper in the tree, like containers or chroots. 117 // but we now have no way of handling different repository sources. On the other hand, 118 // we never could before this. At least now, we can handle some. 119 // This should get fixed with https://gitlab.alpinelinux.org/alpine/apk-tools/-/issues/10875 120 if r == nil { 121 // find the repositories file from the relative directory of the DB file 122 releases := findReleases(resolver, reader.Location.RealPath) 123 124 if len(releases) > 0 { 125 r = &releases[0] 126 } 127 } 128 129 pkgs := make([]pkg.Package, 0, len(apks)) 130 for _, apk := range apks { 131 pkgs = append(pkgs, newPackage(apk, r, reader.Location)) 132 } 133 134 return pkgs, nil, nil 135 } 136 137 func findReleases(resolver file.Resolver, dbPath string) []linux.Release { 138 if resolver == nil { 139 return nil 140 } 141 142 reposLocation := path.Clean(path.Join(path.Dir(dbPath), "../../../etc/apk/repositories")) 143 locations, err := resolver.FilesByPath(reposLocation) 144 if err != nil { 145 log.Tracef("unable to find APK repositories file %q: %+v", reposLocation, err) 146 return nil 147 } 148 149 if len(locations) == 0 { 150 return nil 151 } 152 location := locations[0] 153 154 reposReader, err := resolver.FileContentsByLocation(location) 155 if err != nil { 156 log.Tracef("unable to fetch contents for APK repositories file %q: %+v", reposLocation, err) 157 return nil 158 } 159 defer internal.CloseAndLogError(reposReader, location.RealPath) 160 161 return parseReleasesFromAPKRepository(file.LocationReadCloser{ 162 Location: location, 163 ReadCloser: reposReader, 164 }) 165 } 166 167 func parseReleasesFromAPKRepository(reader file.LocationReadCloser) []linux.Release { 168 var releases []linux.Release 169 170 reposB, err := io.ReadAll(reader) 171 if err != nil { 172 log.Tracef("unable to read APK repositories file %q: %+v", reader.Location.RealPath, err) 173 return nil 174 } 175 176 parts := repoRegex.FindAllStringSubmatch(string(reposB), -1) 177 for _, part := range parts { 178 if len(part) >= 3 { 179 releases = append(releases, linux.Release{ 180 Name: "Alpine Linux", 181 ID: "alpine", 182 VersionID: part[1], 183 }) 184 } 185 } 186 187 return releases 188 } 189 190 func parseApkField(line string) *apkField { 191 parts := strings.SplitN(line, ":", 2) 192 if len(parts) != 2 { 193 return nil 194 } 195 196 f := apkField{ 197 name: parts[0], 198 value: parts[1], 199 } 200 201 return &f 202 } 203 204 type apkField struct { 205 name string 206 value string 207 } 208 209 //nolint:funlen 210 func (f apkField) apply(p *parsedData, ctx *apkFileParsingContext) { 211 switch f.name { 212 // APKINDEX field parsing 213 214 case "P": 215 p.Package = f.value 216 case "o": 217 p.OriginPackage = f.value 218 case "m": 219 p.Maintainer = f.value 220 case "V": 221 p.Version = f.value 222 case "L": 223 p.License = f.value 224 case "A": 225 p.Architecture = f.value 226 case "U": 227 p.URL = f.value 228 case "T": 229 p.Description = f.value 230 case "S": 231 i, err := strconv.Atoi(f.value) 232 if err != nil { 233 log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err) 234 return 235 } 236 237 p.Size = i 238 case "I": 239 i, err := strconv.Atoi(f.value) 240 if err != nil { 241 log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err) 242 return 243 } 244 245 p.InstalledSize = i 246 case "D": 247 deps := parseListValue(f.value) 248 p.Dependencies = deps 249 case "p": 250 provides := parseListValue(f.value) 251 p.Provides = provides 252 case "C": 253 p.Checksum = f.value 254 case "c": 255 p.GitCommit = f.value 256 257 // File/directory field parsing: 258 259 case "F": 260 directory := path.Join("/", f.value) 261 262 ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: directory}) 263 ctx.indexOfLatestDirectory = len(ctx.files) - 1 264 case "M": 265 i := ctx.indexOfLatestDirectory 266 latest := ctx.files[i] 267 268 var ok bool 269 latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value) 270 if !ok { 271 log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value) 272 return 273 } 274 275 // save updated directory 276 ctx.files[i] = latest 277 case "R": 278 var regularFile string 279 280 dirIndex := ctx.indexOfLatestDirectory 281 if dirIndex < 0 { 282 regularFile = path.Join("/", f.value) 283 } else { 284 latestDirPath := ctx.files[dirIndex].Path 285 regularFile = path.Join(latestDirPath, f.value) 286 } 287 288 ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: regularFile}) 289 ctx.indexOfLatestRegularFile = len(ctx.files) - 1 290 case "a": 291 i := ctx.indexOfLatestRegularFile 292 latest := ctx.files[i] 293 294 var ok bool 295 latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value) 296 if !ok { 297 log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value) 298 return 299 } 300 301 // save updated file 302 ctx.files[i] = latest 303 case "Z": 304 i := ctx.indexOfLatestRegularFile 305 latest := ctx.files[i] 306 latest.Digest = processChecksum(f.value) 307 308 // save updated file 309 ctx.files[i] = latest 310 } 311 } 312 313 func processFileInfo(v string) (uid, gid, perms string, ok bool) { 314 ok = false 315 316 fileInfo := strings.Split(v, ":") 317 if len(fileInfo) < 3 { 318 return 319 } 320 321 uid = fileInfo[0] 322 gid = fileInfo[1] 323 perms = fileInfo[2] 324 325 // note: there are more optional fields available that we are not capturing, 326 // e.g.: "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4=" 327 328 ok = true 329 return 330 } 331 332 // apkFileParsingContext helps keep track of what file data has been captured so far for the APK currently being parsed. 333 type apkFileParsingContext struct { 334 files []pkg.ApkFileRecord 335 indexOfLatestDirectory int 336 indexOfLatestRegularFile int 337 } 338 339 func newApkFileParsingContext() *apkFileParsingContext { 340 return &apkFileParsingContext{ 341 indexOfLatestDirectory: -1, // no directories yet 342 indexOfLatestRegularFile: -1, // no regular files yet 343 } 344 } 345 346 // parseListValue parses a space-separated list from an apk entry field value. 347 func parseListValue(value string) []string { 348 items := strings.Split(value, " ") 349 if len(items) >= 1 { 350 return items 351 } 352 353 return nil 354 } 355 356 func nilFieldsToEmptySlice(p *parsedData) { 357 if p.Dependencies == nil { 358 p.Dependencies = []string{} 359 } 360 361 if p.Provides == nil { 362 p.Provides = []string{} 363 } 364 365 if p.Files == nil { 366 p.Files = []pkg.ApkFileRecord{} 367 } 368 } 369 370 func processChecksum(value string) *file.Digest { 371 // from: https://wiki.alpinelinux.org/wiki/Apk_spec 372 // The package checksum field is the SHA1 hash of the second gzip stream (control stream) in the package. The 373 // binary hash digest is base64 encoded. This is prefixed with Q1 to differentiate it from the MD5 hashes 374 // used in older index formats. It is not possible to compute this checksum with standard command line tools 375 // but the apk-tools can compute it in their index operation. 376 377 // based on https://github.com/alpinelinux/apk-tools/blob/dd1908f2fc20b4cfe2c15c55fafaa5fadfb599dc/src/blob.c#L379-L393 378 // it seems that the old md5 checksum value was only the hex representation (not base64) 379 algorithm := "md5" 380 if strings.HasPrefix(value, "Q1") { 381 algorithm = "'Q1'+base64(sha1)" 382 } 383 384 return &file.Digest{ 385 Algorithm: algorithm, 386 Value: value, 387 } 388 }