github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/golang/license_finder.go (about)

     1  package golang
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"path/filepath"
     7  	"strings"
     8  
     9  	"github.com/spf13/afero"
    10  
    11  	"github.com/anchore/syft/internal"
    12  	"github.com/anchore/syft/internal/log"
    13  	"github.com/anchore/syft/syft/file"
    14  	"github.com/anchore/syft/syft/pkg"
    15  	"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
    16  )
    17  
    18  // resolveModuleLicenses finds and parses license files for Go modules
    19  func resolveModuleLicenses(ctx context.Context, scanRoot string, pkgInfos []pkgInfo, fs afero.Fs) pkg.LicenseSet {
    20  	out := pkg.NewLicenseSet()
    21  
    22  	for _, info := range pkgInfos {
    23  		modDir, pkgDir, err := getAbsolutePkgPaths(info)
    24  		if err != nil {
    25  			continue
    26  		}
    27  
    28  		licenseFiles, err := findAllLicenseCandidatesUpwards(pkgDir, modDir, fs)
    29  		if err != nil {
    30  			continue
    31  		}
    32  
    33  		for _, f := range licenseFiles {
    34  			out.Add(readLicenses(ctx, scanRoot, fs, f)...)
    35  		}
    36  	}
    37  
    38  	return out
    39  }
    40  
    41  func readLicenses(ctx context.Context, scanRoot string, fs afero.Fs, f string) []pkg.License {
    42  	contents, err := fs.Open(f)
    43  	if err != nil {
    44  		log.WithFields("file", f, "error", err).Debug("unable to read license file")
    45  		return nil
    46  	}
    47  	defer internal.CloseAndLogError(contents, f)
    48  	location := file.Location{}
    49  	if scanRoot != "" && strings.HasPrefix(f, scanRoot) {
    50  		// include location when licenses are found within the scan target
    51  		location = file.NewLocation(strings.TrimPrefix(f, scanRoot))
    52  	}
    53  	return pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(location, contents))
    54  }
    55  
    56  /*
    57  findAllLicenseCandidatesUpwards performs a bubble-up search per package:
    58  1. pkgInfos represents a sparse vertical distribution of packages within modules
    59  2. we get more pkgInfos for free when the build configuration is updated
    60  
    61  The recursion terminates via two conditions:
    62  - When dir is outside stopAt boundary (happy case)
    63  - When reaching filesystem root where parent == dir (edge case)
    64  
    65  Note: The code does NOT follow symlinks. It returns a slice of absolute paths that
    66  represent license file matches that are resolved independently of the bubble-up.
    67  
    68  When we should consider redesign tip to stem:
    69  - Reduced filesystem calls: Single traversal vs multiple per-package
    70  - Path deduplication: Avoids re-scanning common parent directories
    71  - Better for wide module structures: Efficient when many packages share parent paths
    72  - We need to consider the case here where nested modules are visited by accident and licenses
    73  are erroneously associated to a 'parent module'; bubble up currently prevents this
    74  */
    75  func findAllLicenseCandidatesUpwards(dir string, stopAt string, fs afero.Fs) ([]string, error) {
    76  	// Validate that both paths are absolute
    77  	if !filepath.IsAbs(dir) {
    78  		return nil, fmt.Errorf("dir must be an absolute path, got: %s", dir)
    79  	}
    80  	if !filepath.IsAbs(stopAt) {
    81  		return nil, fmt.Errorf("stopAt must be an absolute path, got: %s", stopAt)
    82  	}
    83  
    84  	return findLicenseCandidates(dir, stopAt, fs)
    85  }
    86  
    87  func findLicenseCandidates(dir string, stopAt string, fs afero.Fs) ([]string, error) {
    88  	// stop if we've gone outside the stopAt directory
    89  	if !strings.HasPrefix(dir, stopAt) {
    90  		return []string{}, nil
    91  	}
    92  
    93  	out, err := findLicensesInDir(dir, fs)
    94  	if err != nil {
    95  		return nil, err
    96  	}
    97  
    98  	parent := filepath.Dir(dir)
    99  	// can't go any higher up the directory tree: "/" case
   100  	if parent == dir {
   101  		return out, nil
   102  	}
   103  
   104  	// search parent directory and combine results
   105  	parentLicenses, err := findLicenseCandidates(parent, stopAt, fs)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	// Combine current directory licenses with parent directory licenses
   111  	return append(out, parentLicenses...), nil
   112  }
   113  
   114  func getAbsolutePkgPaths(info pkgInfo) (modDir string, pkgDir string, err error) {
   115  	pkgDir, err = filepath.Abs(info.pkgDir)
   116  	if err != nil {
   117  		return modDir, pkgDir, err
   118  	}
   119  
   120  	modDir, err = filepath.Abs(info.moduleDir)
   121  	if err != nil {
   122  		return modDir, pkgDir, err
   123  	}
   124  
   125  	if !strings.HasPrefix(pkgDir, modDir) {
   126  		return modDir, pkgDir, fmt.Errorf("modDir %s should contain pkgDir %s", modDir, pkgDir)
   127  	}
   128  
   129  	return modDir, pkgDir, nil
   130  }
   131  
   132  func findLicensesInDir(dir string, fs afero.Fs) ([]string, error) {
   133  	var out []string
   134  
   135  	dirContents, err := afero.ReadDir(fs, dir)
   136  	if err != nil {
   137  		return nil, err
   138  	}
   139  
   140  	for _, f := range dirContents {
   141  		if f.IsDir() {
   142  			continue
   143  		}
   144  
   145  		if licenses.IsLicenseFile(f.Name()) {
   146  			path := filepath.Join(dir, f.Name())
   147  			out = append(out, path)
   148  		}
   149  	}
   150  
   151  	return out, nil
   152  }