github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/golang/licenses.go (about)

     1  package golang
     2  
     3  import (
     4  	"archive/zip"
     5  	"bytes"
     6  	"context"
     7  	"fmt"
     8  	"io"
     9  	"io/fs"
    10  	"net/http"
    11  	"net/url"
    12  	"os"
    13  	"path"
    14  	"path/filepath"
    15  	"regexp"
    16  	"strings"
    17  
    18  	"github.com/go-git/go-billy/v5/memfs"
    19  	"github.com/go-git/go-git/v5"
    20  	"github.com/go-git/go-git/v5/plumbing"
    21  	"github.com/go-git/go-git/v5/storage/memory"
    22  
    23  	"github.com/anchore/syft/internal"
    24  	"github.com/anchore/syft/internal/cache"
    25  	"github.com/anchore/syft/internal/log"
    26  	"github.com/anchore/syft/syft/file"
    27  	"github.com/anchore/syft/syft/pkg"
    28  	"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
    29  )
    30  
    31  type goLicenseResolver struct {
    32  	catalogerName    string
    33  	opts             CatalogerConfig
    34  	localModCacheDir fs.FS
    35  	localVendorDir   fs.FS
    36  	licenseCache     cache.Resolver[[]pkg.License]
    37  }
    38  
    39  func newGoLicenseResolver(catalogerName string, opts CatalogerConfig) goLicenseResolver {
    40  	var localModCacheDir fs.FS
    41  	if opts.SearchLocalModCacheLicenses {
    42  		localModCacheDir = os.DirFS(opts.LocalModCacheDir)
    43  	}
    44  
    45  	var localVendorDir fs.FS
    46  	if opts.SearchLocalVendorLicenses {
    47  		vendorDir := opts.LocalVendorDir
    48  		if vendorDir == "" {
    49  			wd, err := os.Getwd()
    50  			if err != nil {
    51  				log.Debugf("unable to get CWD while resolving the local go vendor dir: %v", err)
    52  			} else {
    53  				vendorDir = filepath.Join(wd, "vendor")
    54  			}
    55  		}
    56  		localVendorDir = os.DirFS(vendorDir)
    57  	}
    58  
    59  	return goLicenseResolver{
    60  		catalogerName:    catalogerName,
    61  		opts:             opts,
    62  		localModCacheDir: localModCacheDir,
    63  		localVendorDir:   localVendorDir,
    64  		licenseCache:     cache.GetResolverCachingErrors[[]pkg.License]("golang", "v2"),
    65  	}
    66  }
    67  
    68  func remotesForModule(proxies []string, noProxy []string, module string) []string {
    69  	for _, pattern := range noProxy {
    70  		if matched, err := path.Match(pattern, module); err == nil && matched {
    71  			// matched to be direct for this module
    72  			return directProxiesOnly
    73  		}
    74  	}
    75  
    76  	return proxies
    77  }
    78  
    79  func (c *goLicenseResolver) getLicenses(ctx context.Context, resolver file.Resolver, moduleName, moduleVersion string) []pkg.License {
    80  	// search the scan target first, ignoring local and remote sources
    81  	pkgLicenses, err := c.findLicensesInSource(ctx, resolver,
    82  		fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion),
    83  	)
    84  	if err != nil {
    85  		log.WithFields("error", err, "module", moduleName, "version", moduleVersion).Trace("unable to read golang licenses from source")
    86  	}
    87  	if len(pkgLicenses) > 0 {
    88  		return pkgLicenses
    89  	}
    90  
    91  	// look in the local host mod directory...
    92  	if c.opts.SearchLocalModCacheLicenses {
    93  		pkgLicenses, err = c.getLicensesFromLocal(ctx, moduleName, moduleVersion)
    94  		if err != nil {
    95  			log.WithFields("error", err, "module", moduleName, "version", moduleVersion).Trace("unable to read golang licenses local")
    96  		}
    97  		if len(pkgLicenses) > 0 {
    98  			return pkgLicenses
    99  		}
   100  	}
   101  
   102  	// look in the local vendor directory...
   103  	if c.opts.SearchLocalVendorLicenses {
   104  		pkgLicenses, err = c.getLicensesFromLocalVendor(ctx, moduleName)
   105  		if err != nil {
   106  			log.WithFields("error", err, "module", moduleName, "version", moduleVersion).Trace("unable to read golang licenses vendor")
   107  		}
   108  		if len(pkgLicenses) > 0 {
   109  			return pkgLicenses
   110  		}
   111  	}
   112  
   113  	// download from remote sources
   114  	if c.opts.SearchRemoteLicenses {
   115  		pkgLicenses, err = c.getLicensesFromRemote(ctx, moduleName, moduleVersion)
   116  		if err != nil {
   117  			log.WithFields("error", err, "module", moduleName, "version", moduleVersion).Debug("unable to read golang licenses remote")
   118  		}
   119  	}
   120  
   121  	return pkgLicenses
   122  }
   123  
   124  func (c *goLicenseResolver) getLicensesFromLocal(ctx context.Context, moduleName, moduleVersion string) ([]pkg.License, error) {
   125  	if c.localModCacheDir == nil {
   126  		return nil, nil
   127  	}
   128  
   129  	subdir := moduleDir(moduleName, moduleVersion)
   130  
   131  	// get the local subdirectory containing the specific go module
   132  	dir, err := fs.Sub(c.localModCacheDir, subdir)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  
   137  	// if we're running against a directory on the filesystem, it may not include the
   138  	// user's homedir / GOPATH, so we defer to using the localModCacheResolver
   139  	// we use $GOPATH/pkg/mod to avoid leaking information about the user's system
   140  	return c.findLicensesInFS(ctx, "file://$GOPATH/pkg/mod/"+subdir+"/", dir)
   141  }
   142  
   143  func (c *goLicenseResolver) getLicensesFromLocalVendor(ctx context.Context, moduleName string) ([]pkg.License, error) {
   144  	if c.localVendorDir == nil {
   145  		return nil, nil
   146  	}
   147  
   148  	subdir := processCaps(moduleName)
   149  
   150  	// get the local subdirectory containing the specific go module
   151  	dir, err := fs.Sub(c.localVendorDir, subdir)
   152  	if err != nil {
   153  		return nil, err
   154  	}
   155  
   156  	// if we're running against a directory on the filesystem, it may not include the
   157  	// user's homedir / GOPATH, so we defer to using the localModCacheResolver
   158  	// we use $GOPATH/pkg/mod to avoid leaking information about the user's system
   159  	return c.findLicensesInFS(ctx, "file://$GO_VENDOR/"+subdir+"/", dir)
   160  }
   161  
   162  func (c *goLicenseResolver) getLicensesFromRemote(ctx context.Context, moduleName, moduleVersion string) ([]pkg.License, error) {
   163  	return c.licenseCache.Resolve(fmt.Sprintf("%s/%s", moduleName, moduleVersion), func() ([]pkg.License, error) {
   164  		proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName)
   165  
   166  		urlPrefix, fsys, err := getModule(proxies, moduleName, moduleVersion)
   167  		if err != nil {
   168  			return nil, err
   169  		}
   170  
   171  		return c.findLicensesInFS(ctx, urlPrefix, fsys)
   172  	})
   173  }
   174  
   175  func (c *goLicenseResolver) findLicensesInFS(ctx context.Context, urlPrefix string, fsys fs.FS) ([]pkg.License, error) {
   176  	var out []pkg.License
   177  	err := fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error {
   178  		if err != nil {
   179  			log.Debugf("error reading %s#%s: %v", urlPrefix, filePath, err)
   180  			return err
   181  		}
   182  		if d == nil {
   183  			log.Debugf("nil entry for %s#%s", urlPrefix, filePath)
   184  			return nil
   185  		}
   186  		if !licenses.IsLicenseFile(d.Name()) {
   187  			return nil
   188  		}
   189  		rdr, err := fsys.Open(filePath)
   190  		if err != nil {
   191  			log.Debugf("error opening license file %s: %v", filePath, err)
   192  			return nil
   193  		}
   194  		defer internal.CloseAndLogError(rdr, filePath)
   195  		foundLicenses := pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(file.NewLocation(filePath), rdr))
   196  		// since these licenses are found in an external fs.FS, not in the scanned source,
   197  		// get rid of the locations but keep information about the where the license was found
   198  		// by prepending the urlPrefix to the internal path for an accurate representation
   199  		for _, l := range foundLicenses {
   200  			l.URLs = []string{urlPrefix + filePath}
   201  			l.Locations = file.NewLocationSet()
   202  			out = append(out, l)
   203  		}
   204  		return nil
   205  	})
   206  	return out, err
   207  }
   208  
   209  func (c *goLicenseResolver) findLicensesInSource(ctx context.Context, resolver file.Resolver, globMatch string) ([]pkg.License, error) {
   210  	var out []pkg.License
   211  	locations, err := resolver.FilesByGlob(globMatch)
   212  	if err != nil {
   213  		return nil, err
   214  	}
   215  
   216  	for _, l := range locations {
   217  		parsed, err := c.parseLicenseFromLocation(ctx, l, resolver)
   218  		if err != nil {
   219  			return nil, err
   220  		}
   221  		out = append(out, parsed...)
   222  	}
   223  
   224  	// if we have a directory but simply don't have any found license files, indicate this so we
   225  	// don't re-download modules continually
   226  	if len(locations) > 0 && len(out) == 0 {
   227  		return nil, noLicensesFound{
   228  			glob: globMatch,
   229  		}
   230  	}
   231  
   232  	return out, nil
   233  }
   234  
   235  func (c *goLicenseResolver) parseLicenseFromLocation(ctx context.Context, l file.Location, resolver file.Resolver) ([]pkg.License, error) {
   236  	var out []pkg.License
   237  	fileName := path.Base(l.RealPath)
   238  	if licenses.IsLicenseFile(fileName) {
   239  		contents, err := resolver.FileContentsByLocation(l)
   240  		if err != nil {
   241  			return nil, err
   242  		}
   243  		defer internal.CloseAndLogError(contents, l.RealPath)
   244  		out = pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(l, contents))
   245  	}
   246  	return out, nil
   247  }
   248  
   249  func moduleDir(moduleName, moduleVersion string) string {
   250  	return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion)
   251  }
   252  
   253  var capReplacer = regexp.MustCompile("[A-Z]")
   254  
   255  func processCaps(s string) string {
   256  	return capReplacer.ReplaceAllStringFunc(s, func(s string) string {
   257  		return "!" + strings.ToLower(s)
   258  	})
   259  }
   260  
   261  func getModule(proxies []string, moduleName, moduleVersion string) (urlPrefix string, fsys fs.FS, err error) {
   262  	for _, proxy := range proxies {
   263  		u, _ := url.Parse(proxy)
   264  		if proxy == "direct" {
   265  			urlPrefix, fsys, err = getModuleRepository(moduleName, moduleVersion)
   266  			continue
   267  		}
   268  		switch u.Scheme {
   269  		case "https", "http":
   270  			urlPrefix, fsys, err = getModuleProxy(proxy, moduleName, moduleVersion)
   271  		case "file":
   272  			p := filepath.Join(u.Path, moduleName, "@v", moduleVersion)
   273  			urlPrefix = path.Join("file://", p) + "/"
   274  			log.WithFields("path", p).Info("looking for go module in filesystem")
   275  			fsys = os.DirFS(p)
   276  		}
   277  		if fsys != nil {
   278  			break
   279  		}
   280  	}
   281  	return
   282  }
   283  
   284  func getModuleProxy(proxy string, moduleName string, moduleVersion string) (moduleURL string, out fs.FS, _ error) {
   285  	u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion)
   286  
   287  	// get the module zip
   288  	log.WithFields("url", u).Info("downloading go module from proxy")
   289  	resp, err := http.Get(u) //nolint:gosec
   290  	if err != nil {
   291  		return "", nil, err
   292  	}
   293  	defer func() { _ = resp.Body.Close() }()
   294  
   295  	if resp.StatusCode != http.StatusOK {
   296  		u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion)
   297  
   298  		// try lowercasing it; some packages have mixed casing that really messes up the proxy
   299  		resp, err = http.Get(u) //nolint:gosec
   300  		if err != nil {
   301  			return "", nil, err
   302  		}
   303  		defer func() { _ = resp.Body.Close() }()
   304  		if resp.StatusCode != http.StatusOK {
   305  			return "", nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
   306  		}
   307  	}
   308  
   309  	// read the zip
   310  	b, err := io.ReadAll(resp.Body)
   311  	if err != nil {
   312  		return "", nil, err
   313  	}
   314  
   315  	out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength)
   316  	versionPath := findVersionPath(out, ".")
   317  	out = getSubFS(out, versionPath)
   318  
   319  	return u + "#" + versionPath + "/", out, err
   320  }
   321  
   322  func findVersionPath(f fs.FS, dir string) string {
   323  	list, _ := fs.ReadDir(f, dir)
   324  
   325  	for _, entry := range list {
   326  		name := entry.Name()
   327  		if strings.Contains(name, "@") {
   328  			return name
   329  		}
   330  		found := findVersionPath(f, path.Join(dir, name))
   331  		if found != "" {
   332  			return path.Join(name, found)
   333  		}
   334  	}
   335  
   336  	return ""
   337  }
   338  
   339  func getModuleRepository(moduleName string, moduleVersion string) (string, fs.FS, error) {
   340  	repoName := moduleName
   341  	parts := strings.Split(moduleName, "/")
   342  	if len(parts) > 2 {
   343  		repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2])
   344  	}
   345  
   346  	// see if there's a hash and use that if so, otherwise use a tag
   347  	splitVersion := strings.Split(moduleVersion, "-")
   348  	var cloneRefName plumbing.ReferenceName
   349  	refPath := ""
   350  	if len(splitVersion) < 3 {
   351  		tagName := splitVersion[0]
   352  		cloneRefName = plumbing.NewTagReferenceName(tagName)
   353  		refPath = "/tags/" + tagName
   354  	}
   355  
   356  	f := memfs.New()
   357  	buf := &bytes.Buffer{}
   358  	repoURL := fmt.Sprintf("https://%s", repoName)
   359  
   360  	log.WithFields("repoURL", repoURL, "ref", cloneRefName).Info("cloning go module repository")
   361  	r, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{
   362  		URL:           repoURL,
   363  		ReferenceName: cloneRefName,
   364  		SingleBranch:  true,
   365  		Depth:         1,
   366  		Progress:      buf,
   367  	})
   368  	if err != nil {
   369  		return "", nil, fmt.Errorf("%w -- %s", err, buf.String())
   370  	}
   371  
   372  	if len(splitVersion) > 2 {
   373  		sha := splitVersion[len(splitVersion)-1]
   374  		hash, err := r.ResolveRevision(plumbing.Revision(sha))
   375  		if err != nil || hash == nil {
   376  			log.Tracef("unable to resolve hash %s: %v", sha, err)
   377  		} else {
   378  			w, err := r.Worktree()
   379  			if err != nil {
   380  				log.Tracef("unable to get worktree, using default: %v", err)
   381  			}
   382  			err = w.Checkout(&git.CheckoutOptions{
   383  				Hash: *hash,
   384  			})
   385  			if err != nil {
   386  				log.Tracef("unable to checkout commit, using default: %v", err)
   387  			} else {
   388  				refPath = "/refs/" + hash.String()
   389  			}
   390  		}
   391  	}
   392  
   393  	return repoURL + refPath + "/", billyFSAdapter{fs: f}, err
   394  }
   395  
   396  type noLicensesFound struct {
   397  	glob string
   398  }
   399  
   400  func (l noLicensesFound) Error() string {
   401  	return fmt.Sprintf("unable to find license information matching: %s", l.glob)
   402  }
   403  
   404  var _ error = (*noLicensesFound)(nil)