github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/internal/pkgtest/metadata_tracker.go (about) 1 // Package pkgtest provides test helpers for cataloger and parser testing, 2 // including automatic observation tracking for capability documentation. 3 package pkgtest 4 5 import ( 6 "encoding/json" 7 "os" 8 "path/filepath" 9 "reflect" 10 "sort" 11 "sync" 12 "time" 13 14 "github.com/anchore/syft/internal/capabilities/pkgtestobservation" 15 "github.com/anchore/syft/syft/artifact" 16 "github.com/anchore/syft/syft/pkg" 17 ) 18 19 var ( 20 globalTracker *MetadataTracker 21 globalTrackerOnce sync.Once 22 23 // commonPackageIntegrityFields are common field names used to store integrity hashes in package metadata. 24 // TODO: this is a best-effort list and may need to be expanded as new package types are added. Don't depend on this list to catch everything - it's only for test validation. 25 commonPackageIntegrityFields = []string{ 26 "Integrity", "Checksum", "H1Digest", 27 "OutputHash", "PkgHash", "ContentHash", 28 "PkgHashExt", "Hash", "IntegrityHash", 29 } 30 ) 31 32 // MetadataTracker collects metadata type and package type usage during test execution 33 type MetadataTracker struct { 34 mu sync.Mutex 35 parserData map[string]map[string]map[string]bool // package -> parser -> metadata types (set) 36 catalogerData map[string]map[string]bool // cataloger -> metadata types (set) 37 parserPackageTypes map[string]map[string]map[string]bool // package -> parser -> package types (set) 38 catalogerPackageTypes map[string]map[string]bool // cataloger -> package types (set) 39 40 // unified observations for the current test package 41 observations *pkgtestobservation.Test 42 } 43 44 // getTracker returns the singleton metadata tracker 45 func getTracker() *MetadataTracker { 46 globalTrackerOnce.Do(func() { 47 globalTracker = &MetadataTracker{ 48 parserData: make(map[string]map[string]map[string]bool), 49 catalogerData: make(map[string]map[string]bool), 50 parserPackageTypes: make(map[string]map[string]map[string]bool), 51 catalogerPackageTypes: make(map[string]map[string]bool), 52 } 53 }) 54 return globalTracker 55 } 56 57 // RecordParser records a metadata type usage for a parser function 58 func (t *MetadataTracker) RecordParser(packageName, parserFunction, metadataType string) { 59 if packageName == "" || parserFunction == "" || metadataType == "" { 60 return 61 } 62 63 // filter out non-metadata types 64 if metadataType == "pkg.Package" || metadataType == "" { 65 return 66 } 67 68 t.mu.Lock() 69 defer t.mu.Unlock() 70 71 if t.parserData[packageName] == nil { 72 t.parserData[packageName] = make(map[string]map[string]bool) 73 } 74 75 if t.parserData[packageName][parserFunction] == nil { 76 t.parserData[packageName][parserFunction] = make(map[string]bool) 77 } 78 79 t.parserData[packageName][parserFunction][metadataType] = true 80 } 81 82 // RecordCataloger records a metadata type usage for a cataloger 83 func (t *MetadataTracker) RecordCataloger(catalogerName, metadataType string) { 84 if catalogerName == "" || metadataType == "" { 85 return 86 } 87 88 // filter out non-metadata types 89 if metadataType == "pkg.Package" || metadataType == "" { 90 return 91 } 92 93 t.mu.Lock() 94 defer t.mu.Unlock() 95 96 if t.catalogerData[catalogerName] == nil { 97 t.catalogerData[catalogerName] = make(map[string]bool) 98 } 99 100 t.catalogerData[catalogerName][metadataType] = true 101 } 102 103 // RecordParserPackageType records a package type usage for a parser function 104 func (t *MetadataTracker) RecordParserPackageType(packageName, parserFunction, pkgType string) { 105 if packageName == "" || parserFunction == "" || pkgType == "" { 106 return 107 } 108 109 // filter out unknown types 110 if pkgType == pkg.UnknownPkg.String() || pkgType == "" { 111 return 112 } 113 114 t.mu.Lock() 115 defer t.mu.Unlock() 116 117 if t.parserPackageTypes[packageName] == nil { 118 t.parserPackageTypes[packageName] = make(map[string]map[string]bool) 119 } 120 121 if t.parserPackageTypes[packageName][parserFunction] == nil { 122 t.parserPackageTypes[packageName][parserFunction] = make(map[string]bool) 123 } 124 125 t.parserPackageTypes[packageName][parserFunction][pkgType] = true 126 } 127 128 // RecordCatalogerPackageType records a package type usage for a cataloger 129 func (t *MetadataTracker) RecordCatalogerPackageType(catalogerName, pkgType string) { 130 if catalogerName == "" || pkgType == "" { 131 return 132 } 133 134 // filter out unknown types 135 if pkgType == pkg.UnknownPkg.String() || pkgType == "" { 136 return 137 } 138 139 t.mu.Lock() 140 defer t.mu.Unlock() 141 142 if t.catalogerPackageTypes[catalogerName] == nil { 143 t.catalogerPackageTypes[catalogerName] = make(map[string]bool) 144 } 145 146 t.catalogerPackageTypes[catalogerName][pkgType] = true 147 } 148 149 // RecordParserPackageMetadata extracts and records metadata type and package type from a package for a parser 150 func (t *MetadataTracker) RecordParserPackageMetadata(packageName, parserFunction string, p pkg.Package) { 151 if p.Metadata != nil { 152 metadataType := getMetadataTypeName(p.Metadata) 153 if metadataType != "" { 154 t.RecordParser(packageName, parserFunction, metadataType) 155 } 156 } 157 158 // record package type 159 t.RecordParserPackageType(packageName, parserFunction, string(p.Type)) 160 } 161 162 // RecordCatalogerPackageMetadata extracts and records metadata type and package type from a package for a cataloger 163 func (t *MetadataTracker) RecordCatalogerPackageMetadata(catalogerName string, p pkg.Package) { 164 if p.Metadata != nil { 165 metadataType := getMetadataTypeName(p.Metadata) 166 if metadataType != "" { 167 t.RecordCataloger(catalogerName, metadataType) 168 } 169 } 170 171 // record package type 172 t.RecordCatalogerPackageType(catalogerName, string(p.Type)) 173 } 174 175 // aggregateObservations aggregates package and relationship observations into metadata types, package types, and observations. 176 // this is used by both parser and cataloger observation recording. 177 func aggregateObservations( 178 metadataTypes *[]string, 179 packageTypes *[]string, 180 obs *pkgtestobservation.Observations, 181 pkgs []pkg.Package, 182 relationships []artifact.Relationship, 183 ) { 184 // aggregate observations from packages 185 for _, p := range pkgs { 186 // metadata types 187 if p.Metadata != nil { 188 metadataType := getMetadataTypeName(p.Metadata) 189 if metadataType != "" && !contains(*metadataTypes, metadataType) { 190 *metadataTypes = append(*metadataTypes, metadataType) 191 } 192 } 193 194 // package types 195 pkgType := string(p.Type) 196 if pkgType != "" && pkgType != pkg.UnknownPkg.String() && !contains(*packageTypes, pkgType) { 197 *packageTypes = append(*packageTypes, pkgType) 198 } 199 200 // license observation 201 if !p.Licenses.Empty() { 202 obs.License = true 203 } 204 205 // file listing observation 206 if fileOwner, ok := p.Metadata.(pkg.FileOwner); ok { 207 files := fileOwner.OwnedFiles() 208 if len(files) > 0 { 209 obs.FileListing.Found = true 210 obs.FileListing.Count += len(files) 211 } 212 } 213 214 // file digests observation 215 if hasFileDigests(p.Metadata) { 216 obs.FileDigests.Found = true 217 obs.FileDigests.Count++ 218 } 219 220 // integrity hash observation 221 if hasIntegrityHash(p.Metadata) { 222 obs.IntegrityHash.Found = true 223 obs.IntegrityHash.Count++ 224 } 225 } 226 227 // relationship observations 228 depCount := countDependencyRelationships(relationships) 229 if depCount > 0 { 230 obs.Relationships.Found = true 231 obs.Relationships.Count = depCount 232 } 233 234 // sort arrays for consistency 235 sort.Strings(*metadataTypes) 236 sort.Strings(*packageTypes) 237 } 238 239 // ensureObservationsInitialized ensures t.observations is initialized and package name is set. 240 // must be called with t.mu locked. 241 func (t *MetadataTracker) ensureObservationsInitialized(packageName string) { 242 if t.observations == nil { 243 t.observations = &pkgtestobservation.Test{ 244 Package: packageName, 245 Catalogers: make(map[string]*pkgtestobservation.Cataloger), 246 Parsers: make(map[string]*pkgtestobservation.Parser), 247 } 248 return 249 } 250 251 // update package name if not set (for the first test) or if it matches (for subsequent tests in same package) 252 if t.observations.Package == "" || t.observations.Package == packageName { 253 t.observations.Package = packageName 254 } 255 } 256 257 // getOrCreateParser gets an existing parser observation or creates a new one. 258 // must be called with t.mu locked. 259 func (t *MetadataTracker) getOrCreateParser(parserFunction string) *pkgtestobservation.Parser { 260 if t.observations.Parsers[parserFunction] == nil { 261 t.observations.Parsers[parserFunction] = &pkgtestobservation.Parser{ 262 MetadataTypes: []string{}, 263 PackageTypes: []string{}, 264 Observations: pkgtestobservation.Observations{}, 265 } 266 } 267 return t.observations.Parsers[parserFunction] 268 } 269 270 // getOrCreateCataloger gets an existing cataloger observation or creates a new one. 271 // must be called with t.mu locked. 272 func (t *MetadataTracker) getOrCreateCataloger(catalogerName string) *pkgtestobservation.Cataloger { 273 if t.observations.Catalogers[catalogerName] == nil { 274 t.observations.Catalogers[catalogerName] = &pkgtestobservation.Cataloger{ 275 MetadataTypes: []string{}, 276 PackageTypes: []string{}, 277 Observations: pkgtestobservation.Observations{}, 278 } 279 } 280 return t.observations.Catalogers[catalogerName] 281 } 282 283 // RecordParserObservations records comprehensive observations for a parser. 284 func (t *MetadataTracker) RecordParserObservations( 285 packageName, parserFunction string, 286 pkgs []pkg.Package, 287 relationships []artifact.Relationship, 288 ) { 289 if packageName == "" || parserFunction == "" { 290 return 291 } 292 293 t.mu.Lock() 294 defer t.mu.Unlock() 295 296 t.ensureObservationsInitialized(packageName) 297 parser := t.getOrCreateParser(parserFunction) 298 aggregateObservations(&parser.MetadataTypes, &parser.PackageTypes, &parser.Observations, pkgs, relationships) 299 } 300 301 // RecordCatalogerObservations records comprehensive observations for a cataloger. 302 func (t *MetadataTracker) RecordCatalogerObservations( 303 packageName, catalogerName string, 304 pkgs []pkg.Package, 305 relationships []artifact.Relationship, 306 ) { 307 if packageName == "" || catalogerName == "" { 308 return 309 } 310 311 t.mu.Lock() 312 defer t.mu.Unlock() 313 314 t.ensureObservationsInitialized(packageName) 315 cataloger := t.getOrCreateCataloger(catalogerName) 316 aggregateObservations(&cataloger.MetadataTypes, &cataloger.PackageTypes, &cataloger.Observations, pkgs, relationships) 317 } 318 319 // ===== Metadata Type and Capability Detection ===== 320 // These functions use reflection to inspect package metadata and detect capabilities. 321 // They are best-effort and may not catch all cases. 322 323 // getMetadataTypeName returns the fully qualified type name of metadata (e.g., "pkg.ApkDBEntry"). 324 // extracts just the last package path segment to keep names concise. 325 func getMetadataTypeName(metadata interface{}) string { 326 if metadata == nil { 327 return "" 328 } 329 330 t := reflect.TypeOf(metadata) 331 if t == nil { 332 return "" 333 } 334 335 // handle pointers 336 if t.Kind() == reflect.Ptr { 337 t = t.Elem() 338 } 339 340 // return pkg path + type name (e.g., "pkg.ApkDBEntry") 341 if t.PkgPath() != "" { 342 // extract just "pkg" from "github.com/anchore/syft/syft/pkg" 343 pkgPath := lastPathSegment(t.PkgPath()) 344 return pkgPath + "." + t.Name() 345 } 346 347 return t.Name() 348 } 349 350 // lastPathSegment extracts the last segment from a package path. 351 // for example: "github.com/anchore/syft/syft/pkg" -> "pkg" 352 func lastPathSegment(path string) string { 353 for i := len(path) - 1; i >= 0; i-- { 354 if path[i] == '/' { 355 return path[i+1:] 356 } 357 } 358 return path 359 } 360 361 // hasIntegrityHash checks if metadata contains an integrity hash field. 362 // note: this uses a best-effort approach checking common field names. 363 // DO NOT depend on these values in auto-generated capabilities definitions - use for test validation only. 364 func hasIntegrityHash(metadata interface{}) bool { 365 v := dereferenceToStruct(metadata) 366 if !v.IsValid() || v.Kind() != reflect.Struct { 367 return false 368 } 369 370 for _, fieldName := range commonPackageIntegrityFields { 371 if hasPopulatedStringField(v, fieldName) { 372 return true 373 } 374 } 375 return false 376 } 377 378 // hasFileDigests checks if metadata contains file records with digests. 379 // note: uses a best-effort approach for detection. 380 // DO NOT depend on these values in auto-generated capabilities definitions - use for test validation only. 381 func hasFileDigests(metadata interface{}) bool { 382 v := dereferenceToStruct(metadata) 383 if !v.IsValid() || v.Kind() != reflect.Struct { 384 return false 385 } 386 387 filesField := v.FieldByName("Files") 388 if !filesField.IsValid() || filesField.Kind() != reflect.Slice { 389 return false 390 } 391 392 // check if any file record has a Digest field populated 393 for i := 0; i < filesField.Len(); i++ { 394 if hasPopulatedDigest(filesField.Index(i)) { 395 return true 396 } 397 } 398 return false 399 } 400 401 // dereferenceToStruct handles pointer dereferencing and returns the underlying value. 402 // returns an invalid value if the input is nil or not convertible to a struct. 403 func dereferenceToStruct(v interface{}) reflect.Value { 404 if v == nil { 405 return reflect.Value{} 406 } 407 408 val := reflect.ValueOf(v) 409 if val.Kind() == reflect.Ptr { 410 if val.IsNil() { 411 return reflect.Value{} 412 } 413 val = val.Elem() 414 } 415 return val 416 } 417 418 // hasPopulatedStringField checks if a struct has a non-empty string field with the given name. 419 func hasPopulatedStringField(v reflect.Value, fieldName string) bool { 420 field := v.FieldByName(fieldName) 421 return field.IsValid() && field.Kind() == reflect.String && field.String() != "" 422 } 423 424 // hasPopulatedDigest checks if a file record has a populated Digest field. 425 func hasPopulatedDigest(fileRecord reflect.Value) bool { 426 fileRecord = dereferenceToStruct(fileRecord.Interface()) 427 if !fileRecord.IsValid() || fileRecord.Kind() != reflect.Struct { 428 return false 429 } 430 431 digestField := fileRecord.FieldByName("Digest") 432 if !digestField.IsValid() { 433 return false 434 } 435 436 // check if digest is a pointer and not nil, or a non-zero value 437 switch digestField.Kind() { 438 case reflect.Ptr: 439 return !digestField.IsNil() 440 case reflect.String: 441 return digestField.String() != "" 442 case reflect.Struct: 443 return !digestField.IsZero() 444 } 445 return false 446 } 447 448 // ===== Utility Functions ===== 449 450 // countDependencyRelationships counts the number of dependency relationships. 451 func countDependencyRelationships(relationships []artifact.Relationship) int { 452 count := 0 453 for _, rel := range relationships { 454 if rel.Type == artifact.DependencyOfRelationship { 455 count++ 456 } 457 } 458 return count 459 } 460 461 // contains checks if a string slice contains a specific string. 462 func contains(slice []string, item string) bool { 463 for _, s := range slice { 464 if s == item { 465 return true 466 } 467 } 468 return false 469 } 470 471 // ===== Result Writing ===== 472 473 // WriteResults writes the collected observation data to test-fixtures/test-observations.json. 474 func (t *MetadataTracker) WriteResults() error { 475 t.mu.Lock() 476 defer t.mu.Unlock() 477 478 if t.observations == nil { 479 // no data to write 480 return nil 481 } 482 483 // create output directory 484 outDir := "test-fixtures" 485 if err := os.MkdirAll(outDir, 0755); err != nil { 486 return err 487 } 488 489 // write unified test-observations.json 490 t.observations.UpdatedAt = time.Now().UTC() 491 492 filename := filepath.Join(outDir, "test-observations.json") 493 return writeJSONFile(filename, t.observations) 494 } 495 496 // writeJSONFile writes data as pretty-printed JSON to the specified path. 497 func writeJSONFile(path string, data interface{}) error { 498 file, err := os.Create(path) 499 if err != nil { 500 return err 501 } 502 defer file.Close() 503 504 encoder := json.NewEncoder(file) 505 encoder.SetIndent("", " ") 506 return encoder.Encode(data) 507 } 508 509 // WriteResultsIfEnabled writes results if tracking is enabled. 510 // this is typically called via t.Cleanup() in tests. 511 func WriteResultsIfEnabled() error { 512 tracker := getTracker() 513 return tracker.WriteResults() 514 }