github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/golang/licenses.go (about)

     1  package golang
     2  
     3  import (
     4  	"archive/zip"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/fs"
     9  	"net/http"
    10  	"net/url"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"regexp"
    15  	"strings"
    16  
    17  	"github.com/go-git/go-billy/v5/memfs"
    18  	"github.com/go-git/go-git/v5"
    19  	"github.com/go-git/go-git/v5/plumbing"
    20  	"github.com/go-git/go-git/v5/storage/memory"
    21  	"github.com/scylladb/go-set/strset"
    22  
    23  	"github.com/anchore/syft/internal"
    24  	"github.com/anchore/syft/internal/licenses"
    25  	"github.com/anchore/syft/internal/log"
    26  	"github.com/anchore/syft/syft/file"
    27  	"github.com/anchore/syft/syft/internal/fileresolver"
    28  	"github.com/anchore/syft/syft/pkg"
    29  )
    30  
    31  type goLicenses struct {
    32  	catalogerName         string
    33  	opts                  CatalogerConfig
    34  	localModCacheResolver file.WritableResolver
    35  	lowerLicenseFileNames *strset.Set
    36  }
    37  
    38  func newGoLicenses(catalogerName string, opts CatalogerConfig) goLicenses {
    39  	return goLicenses{
    40  		catalogerName:         catalogerName,
    41  		opts:                  opts,
    42  		localModCacheResolver: modCacheResolver(opts.LocalModCacheDir),
    43  		lowerLicenseFileNames: strset.New(lowercaseLicenseFiles()...),
    44  	}
    45  }
    46  
    47  func lowercaseLicenseFiles() []string {
    48  	fileNames := licenses.FileNames()
    49  	for i := range fileNames {
    50  		fileNames[i] = strings.ToLower(fileNames[i])
    51  	}
    52  	return fileNames
    53  }
    54  
    55  func remotesForModule(proxies []string, noProxy []string, module string) []string {
    56  	for _, pattern := range noProxy {
    57  		if matched, err := path.Match(pattern, module); err == nil && matched {
    58  			// matched to be direct for this module
    59  			return directProxiesOnly
    60  		}
    61  	}
    62  
    63  	return proxies
    64  }
    65  
    66  func modCacheResolver(modCacheDir string) file.WritableResolver {
    67  	var r file.WritableResolver
    68  
    69  	if modCacheDir == "" {
    70  		log.Trace("unable to determine mod cache directory, skipping mod cache resolver")
    71  		r = fileresolver.Empty{}
    72  	} else {
    73  		stat, err := os.Stat(modCacheDir)
    74  
    75  		if os.IsNotExist(err) || stat == nil || !stat.IsDir() {
    76  			log.Tracef("unable to open mod cache directory: %s, skipping mod cache resolver", modCacheDir)
    77  			r = fileresolver.Empty{}
    78  		} else {
    79  			r = fileresolver.NewFromUnindexedDirectory(modCacheDir)
    80  		}
    81  	}
    82  
    83  	return r
    84  }
    85  
    86  func (c *goLicenses) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) (licenses []pkg.License, err error) {
    87  	licenses, err = c.findLicenses(resolver,
    88  		fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion),
    89  	)
    90  	if err != nil || len(licenses) > 0 {
    91  		return requireCollection(licenses), err
    92  	}
    93  
    94  	// look in the local host mod cache...
    95  	licenses, err = c.getLicensesFromLocal(moduleName, moduleVersion)
    96  	if err != nil || len(licenses) > 0 {
    97  		return requireCollection(licenses), err
    98  	}
    99  
   100  	// we did not find it yet and remote searching was enabled
   101  	licenses, err = c.getLicensesFromRemote(moduleName, moduleVersion)
   102  	return requireCollection(licenses), err
   103  }
   104  
   105  func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]pkg.License, error) {
   106  	if !c.opts.SearchLocalModCacheLicenses {
   107  		return nil, nil
   108  	}
   109  
   110  	// if we're running against a directory on the filesystem, it may not include the
   111  	// user's homedir / GOPATH, so we defer to using the localModCacheResolver
   112  	return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
   113  }
   114  
   115  func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) {
   116  	if !c.opts.SearchRemoteLicenses {
   117  		return nil, nil
   118  	}
   119  
   120  	proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName)
   121  
   122  	fsys, err := getModule(proxies, moduleName, moduleVersion)
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  
   127  	dir := moduleDir(moduleName, moduleVersion)
   128  
   129  	// populate the mod cache with the results
   130  	err = fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error {
   131  		if err != nil {
   132  			log.Debug(err)
   133  			return nil
   134  		}
   135  		if d.IsDir() {
   136  			return nil
   137  		}
   138  		f, err := fsys.Open(filePath)
   139  		if err != nil {
   140  			return err
   141  		}
   142  		return c.localModCacheResolver.Write(file.NewLocation(path.Join(dir, filePath)), f)
   143  	})
   144  
   145  	if err != nil {
   146  		log.Tracef("remote proxy walk failed for: %s", moduleName)
   147  	}
   148  
   149  	return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
   150  }
   151  
   152  func (c *goLicenses) findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) {
   153  	out = make([]pkg.License, 0)
   154  	if resolver == nil {
   155  		return
   156  	}
   157  
   158  	locations, err := resolver.FilesByGlob(globMatch)
   159  	if err != nil {
   160  		return nil, err
   161  	}
   162  
   163  	for _, l := range locations {
   164  		parsed, err := c.parseLicenseFromLocation(l, resolver)
   165  		if err != nil {
   166  			return nil, err
   167  		}
   168  		out = append(out, parsed...)
   169  	}
   170  
   171  	return
   172  }
   173  
   174  func (c *goLicenses) parseLicenseFromLocation(l file.Location, resolver file.Resolver) ([]pkg.License, error) {
   175  	var out []pkg.License
   176  	fileName := path.Base(l.RealPath)
   177  	if c.lowerLicenseFileNames.Has(strings.ToLower(fileName)) {
   178  		contents, err := resolver.FileContentsByLocation(l)
   179  		if err != nil {
   180  			return nil, err
   181  		}
   182  		defer internal.CloseAndLogError(contents, l.RealPath)
   183  		parsed, err := licenses.Parse(contents, l)
   184  		if err != nil {
   185  			return nil, err
   186  		}
   187  
   188  		out = append(out, parsed...)
   189  	}
   190  	return out, nil
   191  }
   192  
   193  func moduleDir(moduleName, moduleVersion string) string {
   194  	return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion)
   195  }
   196  
   197  func moduleSearchGlob(moduleName, moduleVersion string) string {
   198  	return fmt.Sprintf("%s/*", moduleDir(moduleName, moduleVersion))
   199  }
   200  
   201  func requireCollection(licenses []pkg.License) []pkg.License {
   202  	if licenses == nil {
   203  		return make([]pkg.License, 0)
   204  	}
   205  	return licenses
   206  }
   207  
   208  var capReplacer = regexp.MustCompile("[A-Z]")
   209  
   210  func processCaps(s string) string {
   211  	return capReplacer.ReplaceAllStringFunc(s, func(s string) string {
   212  		return "!" + strings.ToLower(s)
   213  	})
   214  }
   215  
   216  func getModule(proxies []string, moduleName, moduleVersion string) (fsys fs.FS, err error) {
   217  	for _, proxy := range proxies {
   218  		u, _ := url.Parse(proxy)
   219  		if proxy == "direct" {
   220  			fsys, err = getModuleRepository(moduleName, moduleVersion)
   221  			continue
   222  		}
   223  		switch u.Scheme {
   224  		case "https", "http":
   225  			fsys, err = getModuleProxy(proxy, moduleName, moduleVersion)
   226  		case "file":
   227  			p := filepath.Join(u.Path, moduleName, "@v", moduleVersion)
   228  			fsys = os.DirFS(p)
   229  		}
   230  		if fsys != nil {
   231  			break
   232  		}
   233  	}
   234  	return
   235  }
   236  
   237  func getModuleProxy(proxy string, moduleName string, moduleVersion string) (out fs.FS, _ error) {
   238  	u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion)
   239  
   240  	// get the module zip
   241  	resp, err := http.Get(u) //nolint:gosec
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  	defer func() { _ = resp.Body.Close() }()
   246  
   247  	if resp.StatusCode != http.StatusOK {
   248  		u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion)
   249  
   250  		// try lowercasing it; some packages have mixed casing that really messes up the proxy
   251  		resp, err = http.Get(u) //nolint:gosec
   252  		if err != nil {
   253  			return nil, err
   254  		}
   255  		defer func() { _ = resp.Body.Close() }()
   256  		if resp.StatusCode != http.StatusOK {
   257  			return nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
   258  		}
   259  	}
   260  
   261  	// read the zip
   262  	b, err := io.ReadAll(resp.Body)
   263  	if err != nil {
   264  		return nil, err
   265  	}
   266  
   267  	out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength)
   268  	versionPath := findVersionPath(out, ".")
   269  	out = getSubFS(out, versionPath)
   270  
   271  	return out, err
   272  }
   273  
   274  func findVersionPath(f fs.FS, dir string) string {
   275  	list, _ := fs.ReadDir(f, dir)
   276  
   277  	for _, entry := range list {
   278  		name := entry.Name()
   279  		if strings.Contains(name, "@") {
   280  			return name
   281  		}
   282  		found := findVersionPath(f, path.Join(dir, name))
   283  		if found != "" {
   284  			return path.Join(name, found)
   285  		}
   286  	}
   287  
   288  	return ""
   289  }
   290  
   291  func getModuleRepository(moduleName string, moduleVersion string) (fs.FS, error) {
   292  	repoName := moduleName
   293  	parts := strings.Split(moduleName, "/")
   294  	if len(parts) > 2 {
   295  		repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2])
   296  	}
   297  
   298  	f := memfs.New()
   299  	buf := &bytes.Buffer{}
   300  	_, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{
   301  		URL:           fmt.Sprintf("https://%s", repoName),
   302  		ReferenceName: plumbing.NewTagReferenceName(moduleVersion), // FIXME version might be a SHA
   303  		SingleBranch:  true,
   304  		Depth:         1,
   305  		Progress:      buf,
   306  	})
   307  
   308  	if err != nil {
   309  		return nil, fmt.Errorf("%w -- %s", err, buf.String())
   310  	}
   311  
   312  	return billyFSAdapter{fs: f}, nil
   313  }