github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/golang/licenses.go (about)

     1  package golang
     2  
     3  import (
     4  	"archive/zip"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/fs"
     9  	"net/http"
    10  	"net/url"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"regexp"
    15  	"strings"
    16  
    17  	"github.com/go-git/go-billy/v5/memfs"
    18  	"github.com/go-git/go-git/v5"
    19  	"github.com/go-git/go-git/v5/plumbing"
    20  	"github.com/go-git/go-git/v5/storage/memory"
    21  
    22  	"github.com/anchore/syft/internal/licenses"
    23  	"github.com/anchore/syft/internal/log"
    24  	"github.com/anchore/syft/syft/event/monitor"
    25  	"github.com/anchore/syft/syft/file"
    26  	"github.com/anchore/syft/syft/internal/fileresolver"
    27  	"github.com/anchore/syft/syft/pkg"
    28  )
    29  
    30  type goLicenses struct {
    31  	opts                  GoCatalogerOpts
    32  	localModCacheResolver file.WritableResolver
    33  	progress              *monitor.CatalogerTask
    34  }
    35  
    36  func newGoLicenses(opts GoCatalogerOpts) goLicenses {
    37  	return goLicenses{
    38  		opts:                  opts,
    39  		localModCacheResolver: modCacheResolver(opts.localModCacheDir),
    40  		progress: &monitor.CatalogerTask{
    41  			SubStatus:          true,
    42  			RemoveOnCompletion: true,
    43  			Title:              "Downloading go mod",
    44  		},
    45  	}
    46  }
    47  
    48  func remotesForModule(proxies []string, noProxy []string, module string) []string {
    49  	for _, pattern := range noProxy {
    50  		if matched, err := path.Match(pattern, module); err == nil && matched {
    51  			// matched to be direct for this module
    52  			return directProxiesOnly
    53  		}
    54  	}
    55  
    56  	return proxies
    57  }
    58  
    59  func modCacheResolver(modCacheDir string) file.WritableResolver {
    60  	var r file.WritableResolver
    61  
    62  	if modCacheDir == "" {
    63  		log.Trace("unable to determine mod cache directory, skipping mod cache resolver")
    64  		r = fileresolver.Empty{}
    65  	} else {
    66  		stat, err := os.Stat(modCacheDir)
    67  
    68  		if os.IsNotExist(err) || stat == nil || !stat.IsDir() {
    69  			log.Tracef("unable to open mod cache directory: %s, skipping mod cache resolver", modCacheDir)
    70  			r = fileresolver.Empty{}
    71  		} else {
    72  			r = fileresolver.NewFromUnindexedDirectory(modCacheDir)
    73  		}
    74  	}
    75  
    76  	return r
    77  }
    78  
    79  func (c *goLicenses) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) (licenses []pkg.License, err error) {
    80  	licenses, err = findLicenses(resolver,
    81  		fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion),
    82  	)
    83  	if err != nil || len(licenses) > 0 {
    84  		return requireCollection(licenses), err
    85  	}
    86  
    87  	// look in the local host mod cache...
    88  	licenses, err = c.getLicensesFromLocal(moduleName, moduleVersion)
    89  	if err != nil || len(licenses) > 0 {
    90  		return requireCollection(licenses), err
    91  	}
    92  
    93  	// we did not find it yet and remote searching was enabled
    94  	licenses, err = c.getLicensesFromRemote(moduleName, moduleVersion)
    95  	return requireCollection(licenses), err
    96  }
    97  
    98  func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]pkg.License, error) {
    99  	if !c.opts.searchLocalModCacheLicenses {
   100  		return nil, nil
   101  	}
   102  
   103  	// if we're running against a directory on the filesystem, it may not include the
   104  	// user's homedir / GOPATH, so we defer to using the localModCacheResolver
   105  	return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
   106  }
   107  
   108  func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) {
   109  	if !c.opts.searchRemoteLicenses {
   110  		return nil, nil
   111  	}
   112  
   113  	proxies := remotesForModule(c.opts.proxies, c.opts.noProxy, moduleName)
   114  
   115  	fsys, err := getModule(c.progress, proxies, moduleName, moduleVersion)
   116  	if err != nil {
   117  		return nil, err
   118  	}
   119  
   120  	dir := moduleDir(moduleName, moduleVersion)
   121  
   122  	// populate the mod cache with the results
   123  	err = fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error {
   124  		if err != nil {
   125  			log.Debug(err)
   126  			return nil
   127  		}
   128  		if d.IsDir() {
   129  			return nil
   130  		}
   131  		f, err := fsys.Open(filePath)
   132  		if err != nil {
   133  			return err
   134  		}
   135  		return c.localModCacheResolver.Write(file.NewLocation(path.Join(dir, filePath)), f)
   136  	})
   137  
   138  	if err != nil {
   139  		log.Tracef("remote proxy walk failed for: %s", moduleName)
   140  	}
   141  
   142  	return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
   143  }
   144  
   145  func moduleDir(moduleName, moduleVersion string) string {
   146  	return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion)
   147  }
   148  
   149  func moduleSearchGlob(moduleName, moduleVersion string) string {
   150  	return fmt.Sprintf("%s/*", moduleDir(moduleName, moduleVersion))
   151  }
   152  
   153  func requireCollection(licenses []pkg.License) []pkg.License {
   154  	if licenses == nil {
   155  		return make([]pkg.License, 0)
   156  	}
   157  	return licenses
   158  }
   159  
   160  func findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) {
   161  	out = make([]pkg.License, 0)
   162  	if resolver == nil {
   163  		return
   164  	}
   165  
   166  	locations, err := resolver.FilesByGlob(globMatch)
   167  	if err != nil {
   168  		return nil, err
   169  	}
   170  
   171  	for _, l := range locations {
   172  		fileName := path.Base(l.RealPath)
   173  		if licenses.FileNameSet.Contains(fileName) {
   174  			contents, err := resolver.FileContentsByLocation(l)
   175  			if err != nil {
   176  				return nil, err
   177  			}
   178  			parsed, err := licenses.Parse(contents, l)
   179  			if err != nil {
   180  				return nil, err
   181  			}
   182  
   183  			out = append(out, parsed...)
   184  		}
   185  	}
   186  
   187  	return
   188  }
   189  
   190  var capReplacer = regexp.MustCompile("[A-Z]")
   191  
   192  func processCaps(s string) string {
   193  	return capReplacer.ReplaceAllStringFunc(s, func(s string) string {
   194  		return "!" + strings.ToLower(s)
   195  	})
   196  }
   197  
   198  func getModule(progress *monitor.CatalogerTask, proxies []string, moduleName, moduleVersion string) (fsys fs.FS, err error) {
   199  	for _, proxy := range proxies {
   200  		u, _ := url.Parse(proxy)
   201  		if proxy == "direct" {
   202  			fsys, err = getModuleRepository(progress, moduleName, moduleVersion)
   203  			continue
   204  		}
   205  		switch u.Scheme {
   206  		case "https", "http":
   207  			fsys, err = getModuleProxy(progress, proxy, moduleName, moduleVersion)
   208  		case "file":
   209  			p := filepath.Join(u.Path, moduleName, "@v", moduleVersion)
   210  			progress.SetValue(fmt.Sprintf("file: %s", p))
   211  			fsys = os.DirFS(p)
   212  		}
   213  		if fsys != nil {
   214  			break
   215  		}
   216  	}
   217  	return
   218  }
   219  
   220  func getModuleProxy(progress *monitor.CatalogerTask, proxy string, moduleName string, moduleVersion string) (out fs.FS, _ error) {
   221  	u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion)
   222  	progress.SetValue(u)
   223  	// get the module zip
   224  	resp, err := http.Get(u) //nolint:gosec
   225  	if err != nil {
   226  		return nil, err
   227  	}
   228  	defer func() { _ = resp.Body.Close() }()
   229  	if resp.StatusCode != http.StatusOK {
   230  		u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion)
   231  		progress.SetValue(u)
   232  		// try lowercasing it; some packages have mixed casing that really messes up the proxy
   233  		resp, err = http.Get(u) //nolint:gosec
   234  		if err != nil {
   235  			return nil, err
   236  		}
   237  		defer func() { _ = resp.Body.Close() }()
   238  		if resp.StatusCode != http.StatusOK {
   239  			return nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
   240  		}
   241  	}
   242  	// read the zip
   243  	b, err := io.ReadAll(resp.Body)
   244  	if err != nil {
   245  		return nil, err
   246  	}
   247  	out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength)
   248  	versionPath := findVersionPath(out, ".")
   249  	out = getSubFS(out, versionPath)
   250  	return out, err
   251  }
   252  
   253  func findVersionPath(f fs.FS, dir string) string {
   254  	list, _ := fs.ReadDir(f, dir)
   255  	for _, entry := range list {
   256  		name := entry.Name()
   257  		if strings.Contains(name, "@") {
   258  			return name
   259  		}
   260  		found := findVersionPath(f, path.Join(dir, name))
   261  		if found != "" {
   262  			return path.Join(name, found)
   263  		}
   264  	}
   265  	return ""
   266  }
   267  
   268  func getModuleRepository(progress *monitor.CatalogerTask, moduleName string, moduleVersion string) (fs.FS, error) {
   269  	repoName := moduleName
   270  	parts := strings.Split(moduleName, "/")
   271  	if len(parts) > 2 {
   272  		repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2])
   273  	}
   274  	progress.SetValue(fmt.Sprintf("git: %s", repoName))
   275  	f := memfs.New()
   276  	buf := &bytes.Buffer{}
   277  	_, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{
   278  		URL:           fmt.Sprintf("https://%s", repoName),
   279  		ReferenceName: plumbing.NewTagReferenceName(moduleVersion), // FIXME version might be a SHA
   280  		SingleBranch:  true,
   281  		Depth:         1,
   282  		Progress:      buf,
   283  	})
   284  	if err != nil {
   285  		return nil, fmt.Errorf("%w -- %s", err, buf.String())
   286  	}
   287  
   288  	return billyFSAdapter{fs: f}, nil
   289  }