github.com/quay/claircore@v1.5.28/rhel/repositoryscanner.go (about) 1 package rhel 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/json" 7 "errors" 8 "fmt" 9 "io/fs" 10 "net/http" 11 "net/url" 12 "os" 13 "path/filepath" 14 "runtime/trace" 15 "strings" 16 "time" 17 18 "github.com/quay/zlog" 19 20 "github.com/quay/claircore" 21 "github.com/quay/claircore/indexer" 22 "github.com/quay/claircore/internal/zreader" 23 "github.com/quay/claircore/rhel/dockerfile" 24 "github.com/quay/claircore/rhel/internal/common" 25 "github.com/quay/claircore/rhel/internal/containerapi" 26 "github.com/quay/claircore/toolkit/types/cpe" 27 ) 28 29 /* 30 RepositoryScanner implements repository detection logic for RHEL. 31 32 The RHEL detection logic needs outside information because the Red Hat build 33 system does not (and did not, in the past) store the relevant information in the 34 layer itself. In addition, dnf and yum do not persist provenance information 35 outside of a cache and rpm considers such information outside its baliwick. 36 37 In the case of the RHEL ecosystem, "repository" is a bit of a misnomer, as 38 advisories are tracked on the Product level, and so Clair's "repository" data is 39 used instead to indicate a Product. This mismatch can lead to apparent 40 duplications in reporting. For example, if an advisory is marked as affecting 41 "cpe:/a:redhat:enterprise_linux:8" and 42 "cpe:/a:redhat:enterprise_linux:8::appstream", this results in two advisories 43 being recorded. (CPEs do not namespace the way this example may imply; that is 44 to say, the latter is not "contained in" or a "member of" the former.) If a 45 layer reports that it is both the "cpe:/a:redhat:enterprise_linux:8" and 46 "cpe:/a:redhat:enterprise_linux:8::appstream" layer, then both advisories match. 47 */ 48 type RepositoryScanner struct { 49 // These members are created after the Configure call. 50 upd *common.Updater 51 apiFetcher *containerapi.ContainerAPI 52 client *http.Client 53 54 cfg RepositoryScannerConfig 55 } 56 57 var ( 58 _ indexer.RepositoryScanner = (*RepositoryScanner)(nil) 59 _ indexer.RPCScanner = (*RepositoryScanner)(nil) 60 _ indexer.VersionedScanner = (*RepositoryScanner)(nil) 61 ) 62 63 // RepositoryScannerConfig is the configuration expected for a 64 // [RepositoryScanner]. 65 // 66 // Providing the "URL" and "File" members controls how the RepositoryScanner 67 // handles updating its mapping file: 68 // 69 // - If the "URL" is provided or no configuration is provided, the mapping file 70 // is fetched at construction time and then updated periodically. 71 // - If only the "File" is provided, it will be consulted exclusively. 72 // - If both the "URL" and "File" are provided, the file will be loaded 73 // initially and then updated periodically from the URL. 74 type RepositoryScannerConfig struct { 75 // DisableAPI disables the use of the API. 76 DisableAPI bool `json:"disable_api" yaml:"disable_api"` 77 // API is the URL to talk to the Red Hat Container API. 78 // 79 // See [DefaultContainerAPI] and [containerapi.ContainerAPI]. 80 API string `json:"api" yaml:"api"` 81 // Repo2CPEMappingURL can be used to fetch the repo mapping file. 82 // Consulting the mapping file is preferred over the Container API. 83 // 84 // See [DefaultRepo2CPEMappingURL] and [repo2cpe]. 85 Repo2CPEMappingURL string `json:"repo2cpe_mapping_url" yaml:"repo2cpe_mapping_url"` 86 // Repo2CPEMappingFile, if specified, is consulted instead of the [Repo2CPEMappingURL]. 87 // 88 // This should be provided to avoid any network traffic. 89 Repo2CPEMappingFile string `json:"repo2cpe_mapping_file" yaml:"repo2cpe_mapping_file"` 90 // Timeout controls the timeout for any remote calls this package makes. 91 // 92 // The default is 10 seconds. 93 Timeout time.Duration `json:"timeout" yaml:"timeout"` 94 } 95 96 const ( 97 // RepositoryKey marks a repository as being based on a Red Hat CPE. 98 repositoryKey = "rhel-cpe-repository" 99 // DefaultContainerAPI is the default Red Hat Container API URL. 100 // 101 //doc:url indexer 102 DefaultContainerAPI = "https://catalog.redhat.com/api/containers/" 103 // DefaultRepo2CPEMappingURL is default URL with a mapping file provided by Red Hat. 104 // 105 //doc:url indexer 106 DefaultRepo2CPEMappingURL = "https://access.redhat.com/security/data/metrics/repository-to-cpe.json" 107 ) 108 109 // Name implements [indexer.VersionedScanner]. 110 func (*RepositoryScanner) Name() string { return "rhel-repository-scanner" } 111 112 // Version implements [indexer.VersionedScanner]. 113 func (*RepositoryScanner) Version() string { return "1.1" } 114 115 // Kind implements [indexer.VersionedScanner]. 116 func (*RepositoryScanner) Kind() string { return "repository" } 117 118 // Configure implements [indexer.RPCScanner]. 119 func (r *RepositoryScanner) Configure(ctx context.Context, f indexer.ConfigDeserializer, c *http.Client) error { 120 ctx = zlog.ContextWithValues(ctx, 121 "component", "rhel/RepositoryScanner.Configure", 122 "version", r.Version()) 123 r.client = c 124 if err := f(&r.cfg); err != nil { 125 return err 126 } 127 // Set defaults if not set via passed function. 128 if r.cfg.API == "" { 129 r.cfg.API = DefaultContainerAPI 130 } 131 if r.cfg.Timeout == 0 { 132 r.cfg.Timeout = 10 * time.Second 133 } 134 135 var mf *mappingFile 136 switch { 137 case r.cfg.Repo2CPEMappingURL == "" && r.cfg.Repo2CPEMappingFile == "": 138 // defaults 139 r.cfg.Repo2CPEMappingURL = DefaultRepo2CPEMappingURL 140 case r.cfg.Repo2CPEMappingURL != "" && r.cfg.Repo2CPEMappingFile == "": 141 // remote only 142 case r.cfg.Repo2CPEMappingFile != "": 143 // seed from file 144 f, err := os.Open(r.cfg.Repo2CPEMappingFile) 145 if err != nil { 146 return err 147 } 148 defer f.Close() 149 z, err := zreader.Reader(f) 150 if err != nil { 151 return err 152 } 153 defer z.Close() 154 mf = &mappingFile{} 155 if err := json.NewDecoder(z).Decode(mf); err != nil { 156 return err 157 } 158 } 159 r.upd = common.NewUpdater(r.cfg.Repo2CPEMappingURL, mf) 160 tctx, done := context.WithTimeout(ctx, r.cfg.Timeout) 161 defer done() 162 r.upd.Get(tctx, c) 163 164 if r.cfg.DisableAPI { 165 zlog.Debug(ctx).Msg("container API disabled") 166 } else { 167 // Additional setup 168 root, err := url.Parse(r.cfg.API) 169 if err != nil { 170 return err 171 } 172 173 r.apiFetcher = &containerapi.ContainerAPI{ 174 Root: root, 175 Client: r.client, 176 } 177 } 178 179 return nil 180 } 181 182 // Scan implements [indexer.RepositoryScanner]. 183 func (r *RepositoryScanner) Scan(ctx context.Context, l *claircore.Layer) (repositories []*claircore.Repository, err error) { 184 defer trace.StartRegion(ctx, "Scanner.Scan").End() 185 ctx = zlog.ContextWithValues(ctx, 186 "component", "rhel/RepositoryScanner.Scan", 187 "version", r.Version(), 188 "layer", l.Hash.String()) 189 zlog.Debug(ctx).Msg("start") 190 defer zlog.Debug(ctx).Msg("done") 191 192 sys, err := l.FS() 193 if err != nil { 194 return nil, fmt.Errorf("rhel: unable to open layer: %w", err) 195 } 196 197 tctx, done := context.WithTimeout(ctx, r.cfg.Timeout) 198 defer done() 199 cmi, err := r.upd.Get(tctx, r.client) 200 if err != nil && cmi == nil { 201 return []*claircore.Repository{}, err 202 } 203 cm, ok := cmi.(*mappingFile) 204 if !ok || cm == nil { 205 return []*claircore.Repository{}, fmt.Errorf("rhel: unable to create a mappingFile object") 206 } 207 CPEs, err := mapContentSets(ctx, sys, cm) 208 if err != nil { 209 return []*claircore.Repository{}, err 210 } 211 if CPEs == nil && r.apiFetcher != nil { 212 // Embedded content-sets are available only for new images. 213 // For old images, use fallback option and query Red Hat Container API. 214 ctx, done := context.WithTimeout(ctx, r.cfg.Timeout) 215 defer done() 216 CPEs, err = mapContainerAPI(ctx, sys, r.apiFetcher) 217 if err != nil { 218 return []*claircore.Repository{}, err 219 } 220 } 221 222 for _, cpeID := range CPEs { 223 r := &claircore.Repository{ 224 Name: cpeID, 225 Key: repositoryKey, 226 } 227 r.CPE, err = cpe.Unbind(cpeID) 228 if err != nil { 229 zlog.Warn(ctx). 230 Err(err). 231 Str("url", `https://bugzilla.redhat.com/enter_bug.cgi?product=Container%20Factory`). 232 Str("cpeID", cpeID). 233 Msg("invalid CPE, please report a bug upstream") 234 continue 235 } 236 237 repositories = append(repositories, r) 238 } 239 240 return repositories, nil 241 } 242 243 // MapContentSets returns a slice of CPEs bound into strings, as discovered by 244 // examining information contained within the container. 245 func mapContentSets(ctx context.Context, sys fs.FS, cm *mappingFile) ([]string, error) { 246 // Get CPEs using embedded content-set files. 247 // The files is be stored in /root/buildinfo/content_manifests/ and will need to 248 // be translated using mapping file provided by Red Hat's PST team. 249 ms, err := fs.Glob(sys, `root/buildinfo/content_manifests/*.json`) 250 if err != nil { 251 panic("programmer error: " + err.Error()) 252 } 253 if ms == nil { 254 return nil, nil 255 } 256 p := ms[0] 257 zlog.Debug(ctx). 258 Str("manifest-path", p). 259 Msg("found content manifest file") 260 b, err := fs.ReadFile(sys, p) 261 if err != nil { 262 return nil, fmt.Errorf("rhel: unable to read %q: %w", p, err) 263 } 264 var m contentManifest 265 var syntaxErr *json.SyntaxError 266 err = json.Unmarshal(b, &m) 267 switch { 268 case errors.Is(err, nil): 269 case errors.As(err, &syntaxErr): 270 zlog.Warn(ctx). 271 Str("manifest-path", p). 272 Err(err). 273 Msg("could not unmarshal content_manifests file") 274 return nil, nil 275 default: 276 return nil, err 277 } 278 // If the JSON file is malformed and has a 0-length list of content sets, 279 // report nil so that the API can be consulted. 280 if len(m.ContentSets) == 0 { 281 return nil, nil 282 } 283 return cm.Get(ctx, m.ContentSets) 284 } 285 286 // MappingFile is a data struct for mapping file between repositories and CPEs 287 type mappingFile struct { 288 Data map[string]repo `json:"data"` 289 } 290 291 // Repo structure holds information about CPEs for given repo 292 type repo struct { 293 CPEs []string `json:"cpes"` 294 } 295 296 func (m *mappingFile) Get(ctx context.Context, rs []string) ([]string, error) { 297 s := map[string]struct{}{} 298 for _, r := range rs { 299 cpes, ok := m.Data[r] 300 if !ok { 301 zlog.Debug(ctx). 302 Str("repository", r). 303 Msg("repository not present in a mapping file") 304 continue 305 } 306 for _, cpe := range cpes.CPEs { 307 s[cpe] = struct{}{} 308 } 309 } 310 311 i, r := 0, make([]string, len(s)) 312 for k := range s { 313 r[i] = k 314 i++ 315 } 316 return r, nil 317 } 318 319 // ContentManifest structure is the data provided by OSBS. 320 type contentManifest struct { 321 ContentSets []string `json:"content_sets"` 322 Metadata manifestMetadata `json:"metadata"` 323 } 324 325 // ManifestMetadata struct holds additional metadata about the build. 326 type manifestMetadata struct { 327 ImageLayerIndex int `json:"image_layer_index"` 328 } 329 330 // MapContainerAPI returns a slice of CPEs bound into strings, as discovered by 331 // pulling labels from the Dockerfile contained in the layer and submitted to the 332 // Container API. 333 func mapContainerAPI(ctx context.Context, sys fs.FS, api *containerapi.ContainerAPI) ([]string, error) { 334 ms, err := fs.Glob(sys, "root/buildinfo/Dockerfile-*") 335 if err != nil { 336 panic("programmer error: " + err.Error()) 337 } 338 if ms == nil { 339 return nil, nil 340 } 341 p := ms[0] 342 b, err := fs.ReadFile(sys, p) 343 if err != nil { 344 return nil, fmt.Errorf("rhel: unable to read %q: %w", p, err) 345 } 346 347 nvr, arch, err := extractBuildNVR(ctx, p, b) 348 switch { 349 case errors.Is(err, nil): 350 case errors.Is(err, errBadDockerfile): 351 zlog.Info(ctx). 352 AnErr("label_error", err). 353 Msg("bad dockerfile") 354 return nil, nil 355 default: 356 return nil, err 357 } 358 359 cpes, err := api.GetCPEs(ctx, nvr, arch) 360 if err != nil { 361 return nil, err 362 } 363 zlog.Debug(ctx). 364 Str("nvr", nvr). 365 Str("arch", arch). 366 Strs("cpes", cpes). 367 Msg("got CPEs from container API") 368 return cpes, nil 369 } 370 371 // ExtractBuildNVR extracts the build's NVR and arch from the named Dockerfile and its contents. 372 // 373 // The `redhat.com.component` label is extracted from the contents and used as the "name." 374 // "Version" and "release" are extracted from the Dockerfile path. 375 // "Arch" is extracted from the `architecture` label. 376 func extractBuildNVR(ctx context.Context, dockerfilePath string, b []byte) (string, string, error) { 377 const ( 378 comp = `com.redhat.component` 379 arch = `architecture` 380 ) 381 ls, err := dockerfile.GetLabels(ctx, bytes.NewReader(b)) 382 if err != nil { 383 return "", "", err 384 } 385 n, ok := ls[comp] 386 if !ok { 387 return "", "", missingLabel(comp) 388 } 389 a, ok := ls[arch] 390 if !ok { 391 return "", "", missingLabel(arch) 392 } 393 v, r := parseVersionRelease(filepath.Base(dockerfilePath)) 394 return fmt.Sprintf("%s-%s-%s", n, v, r), a, nil 395 } 396 397 var errBadDockerfile = errors.New("bad dockerfile") 398 399 // MissingLabel is an error that provides information on which label was missing 400 // and "Is" errBadDockerfile. 401 type missingLabel string 402 403 func (e missingLabel) Error() string { 404 return fmt.Sprintf("dockerfile missing expected label %q", string(e)) 405 } 406 407 func (e missingLabel) Is(tgt error) bool { 408 if oe, ok := tgt.(missingLabel); ok { 409 return string(oe) == string(e) 410 } 411 return errors.Is(tgt, errBadDockerfile) 412 } 413 414 // ParseVersionRelease reports the version and release from an NVR string. 415 func parseVersionRelease(nvr string) (version, release string) { 416 releaseIndex := strings.LastIndex(nvr, "-") 417 release = nvr[releaseIndex+1:] 418 419 versionIndex := strings.LastIndex(nvr[:releaseIndex], "-") 420 version = nvr[versionIndex+1 : releaseIndex] 421 return 422 }