github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/internal/pkgtest/metadata_tracker.go (about)

     1  // Package pkgtest provides test helpers for cataloger and parser testing,
     2  // including automatic observation tracking for capability documentation.
     3  package pkgtest
     4  
     5  import (
     6  	"encoding/json"
     7  	"os"
     8  	"path/filepath"
     9  	"reflect"
    10  	"sort"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/anchore/syft/internal/capabilities/pkgtestobservation"
    15  	"github.com/anchore/syft/syft/artifact"
    16  	"github.com/anchore/syft/syft/pkg"
    17  )
    18  
    19  var (
    20  	globalTracker     *MetadataTracker
    21  	globalTrackerOnce sync.Once
    22  
    23  	// commonPackageIntegrityFields are common field names used to store integrity hashes in package metadata.
    24  	// TODO: this is a best-effort list and may need to be expanded as new package types are added. Don't depend on this list to catch everything - it's only for test validation.
    25  	commonPackageIntegrityFields = []string{
    26  		"Integrity", "Checksum", "H1Digest",
    27  		"OutputHash", "PkgHash", "ContentHash",
    28  		"PkgHashExt", "Hash", "IntegrityHash",
    29  	}
    30  )
    31  
    32  // MetadataTracker collects metadata type and package type usage during test execution
    33  type MetadataTracker struct {
    34  	mu                    sync.Mutex
    35  	parserData            map[string]map[string]map[string]bool // package -> parser -> metadata types (set)
    36  	catalogerData         map[string]map[string]bool            // cataloger -> metadata types (set)
    37  	parserPackageTypes    map[string]map[string]map[string]bool // package -> parser -> package types (set)
    38  	catalogerPackageTypes map[string]map[string]bool            // cataloger -> package types (set)
    39  
    40  	// unified observations for the current test package
    41  	observations *pkgtestobservation.Test
    42  }
    43  
    44  // getTracker returns the singleton metadata tracker
    45  func getTracker() *MetadataTracker {
    46  	globalTrackerOnce.Do(func() {
    47  		globalTracker = &MetadataTracker{
    48  			parserData:            make(map[string]map[string]map[string]bool),
    49  			catalogerData:         make(map[string]map[string]bool),
    50  			parserPackageTypes:    make(map[string]map[string]map[string]bool),
    51  			catalogerPackageTypes: make(map[string]map[string]bool),
    52  		}
    53  	})
    54  	return globalTracker
    55  }
    56  
    57  // RecordParser records a metadata type usage for a parser function
    58  func (t *MetadataTracker) RecordParser(packageName, parserFunction, metadataType string) {
    59  	if packageName == "" || parserFunction == "" || metadataType == "" {
    60  		return
    61  	}
    62  
    63  	// filter out non-metadata types
    64  	if metadataType == "pkg.Package" || metadataType == "" {
    65  		return
    66  	}
    67  
    68  	t.mu.Lock()
    69  	defer t.mu.Unlock()
    70  
    71  	if t.parserData[packageName] == nil {
    72  		t.parserData[packageName] = make(map[string]map[string]bool)
    73  	}
    74  
    75  	if t.parserData[packageName][parserFunction] == nil {
    76  		t.parserData[packageName][parserFunction] = make(map[string]bool)
    77  	}
    78  
    79  	t.parserData[packageName][parserFunction][metadataType] = true
    80  }
    81  
    82  // RecordCataloger records a metadata type usage for a cataloger
    83  func (t *MetadataTracker) RecordCataloger(catalogerName, metadataType string) {
    84  	if catalogerName == "" || metadataType == "" {
    85  		return
    86  	}
    87  
    88  	// filter out non-metadata types
    89  	if metadataType == "pkg.Package" || metadataType == "" {
    90  		return
    91  	}
    92  
    93  	t.mu.Lock()
    94  	defer t.mu.Unlock()
    95  
    96  	if t.catalogerData[catalogerName] == nil {
    97  		t.catalogerData[catalogerName] = make(map[string]bool)
    98  	}
    99  
   100  	t.catalogerData[catalogerName][metadataType] = true
   101  }
   102  
   103  // RecordParserPackageType records a package type usage for a parser function
   104  func (t *MetadataTracker) RecordParserPackageType(packageName, parserFunction, pkgType string) {
   105  	if packageName == "" || parserFunction == "" || pkgType == "" {
   106  		return
   107  	}
   108  
   109  	// filter out unknown types
   110  	if pkgType == pkg.UnknownPkg.String() || pkgType == "" {
   111  		return
   112  	}
   113  
   114  	t.mu.Lock()
   115  	defer t.mu.Unlock()
   116  
   117  	if t.parserPackageTypes[packageName] == nil {
   118  		t.parserPackageTypes[packageName] = make(map[string]map[string]bool)
   119  	}
   120  
   121  	if t.parserPackageTypes[packageName][parserFunction] == nil {
   122  		t.parserPackageTypes[packageName][parserFunction] = make(map[string]bool)
   123  	}
   124  
   125  	t.parserPackageTypes[packageName][parserFunction][pkgType] = true
   126  }
   127  
   128  // RecordCatalogerPackageType records a package type usage for a cataloger
   129  func (t *MetadataTracker) RecordCatalogerPackageType(catalogerName, pkgType string) {
   130  	if catalogerName == "" || pkgType == "" {
   131  		return
   132  	}
   133  
   134  	// filter out unknown types
   135  	if pkgType == pkg.UnknownPkg.String() || pkgType == "" {
   136  		return
   137  	}
   138  
   139  	t.mu.Lock()
   140  	defer t.mu.Unlock()
   141  
   142  	if t.catalogerPackageTypes[catalogerName] == nil {
   143  		t.catalogerPackageTypes[catalogerName] = make(map[string]bool)
   144  	}
   145  
   146  	t.catalogerPackageTypes[catalogerName][pkgType] = true
   147  }
   148  
   149  // RecordParserPackageMetadata extracts and records metadata type and package type from a package for a parser
   150  func (t *MetadataTracker) RecordParserPackageMetadata(packageName, parserFunction string, p pkg.Package) {
   151  	if p.Metadata != nil {
   152  		metadataType := getMetadataTypeName(p.Metadata)
   153  		if metadataType != "" {
   154  			t.RecordParser(packageName, parserFunction, metadataType)
   155  		}
   156  	}
   157  
   158  	// record package type
   159  	t.RecordParserPackageType(packageName, parserFunction, string(p.Type))
   160  }
   161  
   162  // RecordCatalogerPackageMetadata extracts and records metadata type and package type from a package for a cataloger
   163  func (t *MetadataTracker) RecordCatalogerPackageMetadata(catalogerName string, p pkg.Package) {
   164  	if p.Metadata != nil {
   165  		metadataType := getMetadataTypeName(p.Metadata)
   166  		if metadataType != "" {
   167  			t.RecordCataloger(catalogerName, metadataType)
   168  		}
   169  	}
   170  
   171  	// record package type
   172  	t.RecordCatalogerPackageType(catalogerName, string(p.Type))
   173  }
   174  
   175  // aggregateObservations aggregates package and relationship observations into metadata types, package types, and observations.
   176  // this is used by both parser and cataloger observation recording.
   177  func aggregateObservations(
   178  	metadataTypes *[]string,
   179  	packageTypes *[]string,
   180  	obs *pkgtestobservation.Observations,
   181  	pkgs []pkg.Package,
   182  	relationships []artifact.Relationship,
   183  ) {
   184  	// aggregate observations from packages
   185  	for _, p := range pkgs {
   186  		// metadata types
   187  		if p.Metadata != nil {
   188  			metadataType := getMetadataTypeName(p.Metadata)
   189  			if metadataType != "" && !contains(*metadataTypes, metadataType) {
   190  				*metadataTypes = append(*metadataTypes, metadataType)
   191  			}
   192  		}
   193  
   194  		// package types
   195  		pkgType := string(p.Type)
   196  		if pkgType != "" && pkgType != pkg.UnknownPkg.String() && !contains(*packageTypes, pkgType) {
   197  			*packageTypes = append(*packageTypes, pkgType)
   198  		}
   199  
   200  		// license observation
   201  		if !p.Licenses.Empty() {
   202  			obs.License = true
   203  		}
   204  
   205  		// file listing observation
   206  		if fileOwner, ok := p.Metadata.(pkg.FileOwner); ok {
   207  			files := fileOwner.OwnedFiles()
   208  			if len(files) > 0 {
   209  				obs.FileListing.Found = true
   210  				obs.FileListing.Count += len(files)
   211  			}
   212  		}
   213  
   214  		// file digests observation
   215  		if hasFileDigests(p.Metadata) {
   216  			obs.FileDigests.Found = true
   217  			obs.FileDigests.Count++
   218  		}
   219  
   220  		// integrity hash observation
   221  		if hasIntegrityHash(p.Metadata) {
   222  			obs.IntegrityHash.Found = true
   223  			obs.IntegrityHash.Count++
   224  		}
   225  	}
   226  
   227  	// relationship observations
   228  	depCount := countDependencyRelationships(relationships)
   229  	if depCount > 0 {
   230  		obs.Relationships.Found = true
   231  		obs.Relationships.Count = depCount
   232  	}
   233  
   234  	// sort arrays for consistency
   235  	sort.Strings(*metadataTypes)
   236  	sort.Strings(*packageTypes)
   237  }
   238  
   239  // ensureObservationsInitialized ensures t.observations is initialized and package name is set.
   240  // must be called with t.mu locked.
   241  func (t *MetadataTracker) ensureObservationsInitialized(packageName string) {
   242  	if t.observations == nil {
   243  		t.observations = &pkgtestobservation.Test{
   244  			Package:    packageName,
   245  			Catalogers: make(map[string]*pkgtestobservation.Cataloger),
   246  			Parsers:    make(map[string]*pkgtestobservation.Parser),
   247  		}
   248  		return
   249  	}
   250  
   251  	// update package name if not set (for the first test) or if it matches (for subsequent tests in same package)
   252  	if t.observations.Package == "" || t.observations.Package == packageName {
   253  		t.observations.Package = packageName
   254  	}
   255  }
   256  
   257  // getOrCreateParser gets an existing parser observation or creates a new one.
   258  // must be called with t.mu locked.
   259  func (t *MetadataTracker) getOrCreateParser(parserFunction string) *pkgtestobservation.Parser {
   260  	if t.observations.Parsers[parserFunction] == nil {
   261  		t.observations.Parsers[parserFunction] = &pkgtestobservation.Parser{
   262  			MetadataTypes: []string{},
   263  			PackageTypes:  []string{},
   264  			Observations:  pkgtestobservation.Observations{},
   265  		}
   266  	}
   267  	return t.observations.Parsers[parserFunction]
   268  }
   269  
   270  // getOrCreateCataloger gets an existing cataloger observation or creates a new one.
   271  // must be called with t.mu locked.
   272  func (t *MetadataTracker) getOrCreateCataloger(catalogerName string) *pkgtestobservation.Cataloger {
   273  	if t.observations.Catalogers[catalogerName] == nil {
   274  		t.observations.Catalogers[catalogerName] = &pkgtestobservation.Cataloger{
   275  			MetadataTypes: []string{},
   276  			PackageTypes:  []string{},
   277  			Observations:  pkgtestobservation.Observations{},
   278  		}
   279  	}
   280  	return t.observations.Catalogers[catalogerName]
   281  }
   282  
   283  // RecordParserObservations records comprehensive observations for a parser.
   284  func (t *MetadataTracker) RecordParserObservations(
   285  	packageName, parserFunction string,
   286  	pkgs []pkg.Package,
   287  	relationships []artifact.Relationship,
   288  ) {
   289  	if packageName == "" || parserFunction == "" {
   290  		return
   291  	}
   292  
   293  	t.mu.Lock()
   294  	defer t.mu.Unlock()
   295  
   296  	t.ensureObservationsInitialized(packageName)
   297  	parser := t.getOrCreateParser(parserFunction)
   298  	aggregateObservations(&parser.MetadataTypes, &parser.PackageTypes, &parser.Observations, pkgs, relationships)
   299  }
   300  
   301  // RecordCatalogerObservations records comprehensive observations for a cataloger.
   302  func (t *MetadataTracker) RecordCatalogerObservations(
   303  	packageName, catalogerName string,
   304  	pkgs []pkg.Package,
   305  	relationships []artifact.Relationship,
   306  ) {
   307  	if packageName == "" || catalogerName == "" {
   308  		return
   309  	}
   310  
   311  	t.mu.Lock()
   312  	defer t.mu.Unlock()
   313  
   314  	t.ensureObservationsInitialized(packageName)
   315  	cataloger := t.getOrCreateCataloger(catalogerName)
   316  	aggregateObservations(&cataloger.MetadataTypes, &cataloger.PackageTypes, &cataloger.Observations, pkgs, relationships)
   317  }
   318  
   319  // ===== Metadata Type and Capability Detection =====
   320  // These functions use reflection to inspect package metadata and detect capabilities.
   321  // They are best-effort and may not catch all cases.
   322  
   323  // getMetadataTypeName returns the fully qualified type name of metadata (e.g., "pkg.ApkDBEntry").
   324  // extracts just the last package path segment to keep names concise.
   325  func getMetadataTypeName(metadata interface{}) string {
   326  	if metadata == nil {
   327  		return ""
   328  	}
   329  
   330  	t := reflect.TypeOf(metadata)
   331  	if t == nil {
   332  		return ""
   333  	}
   334  
   335  	// handle pointers
   336  	if t.Kind() == reflect.Ptr {
   337  		t = t.Elem()
   338  	}
   339  
   340  	// return pkg path + type name (e.g., "pkg.ApkDBEntry")
   341  	if t.PkgPath() != "" {
   342  		// extract just "pkg" from "github.com/anchore/syft/syft/pkg"
   343  		pkgPath := lastPathSegment(t.PkgPath())
   344  		return pkgPath + "." + t.Name()
   345  	}
   346  
   347  	return t.Name()
   348  }
   349  
   350  // lastPathSegment extracts the last segment from a package path.
   351  // for example: "github.com/anchore/syft/syft/pkg" -> "pkg"
   352  func lastPathSegment(path string) string {
   353  	for i := len(path) - 1; i >= 0; i-- {
   354  		if path[i] == '/' {
   355  			return path[i+1:]
   356  		}
   357  	}
   358  	return path
   359  }
   360  
   361  // hasIntegrityHash checks if metadata contains an integrity hash field.
   362  // note: this uses a best-effort approach checking common field names.
   363  // DO NOT depend on these values in auto-generated capabilities definitions - use for test validation only.
   364  func hasIntegrityHash(metadata interface{}) bool {
   365  	v := dereferenceToStruct(metadata)
   366  	if !v.IsValid() || v.Kind() != reflect.Struct {
   367  		return false
   368  	}
   369  
   370  	for _, fieldName := range commonPackageIntegrityFields {
   371  		if hasPopulatedStringField(v, fieldName) {
   372  			return true
   373  		}
   374  	}
   375  	return false
   376  }
   377  
   378  // hasFileDigests checks if metadata contains file records with digests.
   379  // note: uses a best-effort approach for detection.
   380  // DO NOT depend on these values in auto-generated capabilities definitions - use for test validation only.
   381  func hasFileDigests(metadata interface{}) bool {
   382  	v := dereferenceToStruct(metadata)
   383  	if !v.IsValid() || v.Kind() != reflect.Struct {
   384  		return false
   385  	}
   386  
   387  	filesField := v.FieldByName("Files")
   388  	if !filesField.IsValid() || filesField.Kind() != reflect.Slice {
   389  		return false
   390  	}
   391  
   392  	// check if any file record has a Digest field populated
   393  	for i := 0; i < filesField.Len(); i++ {
   394  		if hasPopulatedDigest(filesField.Index(i)) {
   395  			return true
   396  		}
   397  	}
   398  	return false
   399  }
   400  
   401  // dereferenceToStruct handles pointer dereferencing and returns the underlying value.
   402  // returns an invalid value if the input is nil or not convertible to a struct.
   403  func dereferenceToStruct(v interface{}) reflect.Value {
   404  	if v == nil {
   405  		return reflect.Value{}
   406  	}
   407  
   408  	val := reflect.ValueOf(v)
   409  	if val.Kind() == reflect.Ptr {
   410  		if val.IsNil() {
   411  			return reflect.Value{}
   412  		}
   413  		val = val.Elem()
   414  	}
   415  	return val
   416  }
   417  
   418  // hasPopulatedStringField checks if a struct has a non-empty string field with the given name.
   419  func hasPopulatedStringField(v reflect.Value, fieldName string) bool {
   420  	field := v.FieldByName(fieldName)
   421  	return field.IsValid() && field.Kind() == reflect.String && field.String() != ""
   422  }
   423  
   424  // hasPopulatedDigest checks if a file record has a populated Digest field.
   425  func hasPopulatedDigest(fileRecord reflect.Value) bool {
   426  	fileRecord = dereferenceToStruct(fileRecord.Interface())
   427  	if !fileRecord.IsValid() || fileRecord.Kind() != reflect.Struct {
   428  		return false
   429  	}
   430  
   431  	digestField := fileRecord.FieldByName("Digest")
   432  	if !digestField.IsValid() {
   433  		return false
   434  	}
   435  
   436  	// check if digest is a pointer and not nil, or a non-zero value
   437  	switch digestField.Kind() {
   438  	case reflect.Ptr:
   439  		return !digestField.IsNil()
   440  	case reflect.String:
   441  		return digestField.String() != ""
   442  	case reflect.Struct:
   443  		return !digestField.IsZero()
   444  	}
   445  	return false
   446  }
   447  
   448  // ===== Utility Functions =====
   449  
   450  // countDependencyRelationships counts the number of dependency relationships.
   451  func countDependencyRelationships(relationships []artifact.Relationship) int {
   452  	count := 0
   453  	for _, rel := range relationships {
   454  		if rel.Type == artifact.DependencyOfRelationship {
   455  			count++
   456  		}
   457  	}
   458  	return count
   459  }
   460  
   461  // contains checks if a string slice contains a specific string.
   462  func contains(slice []string, item string) bool {
   463  	for _, s := range slice {
   464  		if s == item {
   465  			return true
   466  		}
   467  	}
   468  	return false
   469  }
   470  
   471  // ===== Result Writing =====
   472  
   473  // WriteResults writes the collected observation data to test-fixtures/test-observations.json.
   474  func (t *MetadataTracker) WriteResults() error {
   475  	t.mu.Lock()
   476  	defer t.mu.Unlock()
   477  
   478  	if t.observations == nil {
   479  		// no data to write
   480  		return nil
   481  	}
   482  
   483  	// create output directory
   484  	outDir := "test-fixtures"
   485  	if err := os.MkdirAll(outDir, 0755); err != nil {
   486  		return err
   487  	}
   488  
   489  	// write unified test-observations.json
   490  	t.observations.UpdatedAt = time.Now().UTC()
   491  
   492  	filename := filepath.Join(outDir, "test-observations.json")
   493  	return writeJSONFile(filename, t.observations)
   494  }
   495  
   496  // writeJSONFile writes data as pretty-printed JSON to the specified path.
   497  func writeJSONFile(path string, data interface{}) error {
   498  	file, err := os.Create(path)
   499  	if err != nil {
   500  		return err
   501  	}
   502  	defer file.Close()
   503  
   504  	encoder := json.NewEncoder(file)
   505  	encoder.SetIndent("", "  ")
   506  	return encoder.Encode(data)
   507  }
   508  
   509  // WriteResultsIfEnabled writes results if tracking is enabled.
   510  // this is typically called via t.Cleanup() in tests.
   511  func WriteResultsIfEnabled() error {
   512  	tracker := getTracker()
   513  	return tracker.WriteResults()
   514  }