github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/golang/licenses.go (about)

     1  package golang
     2  
     3  import (
     4  	"archive/zip"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/fs"
     9  	"net/http"
    10  	"net/url"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"regexp"
    15  	"strings"
    16  
    17  	"github.com/go-git/go-billy/v5/memfs"
    18  	"github.com/go-git/go-git/v5"
    19  	"github.com/go-git/go-git/v5/plumbing"
    20  	"github.com/go-git/go-git/v5/storage/memory"
    21  	"github.com/scylladb/go-set/strset"
    22  
    23  	"github.com/anchore/syft/syft/event/monitor"
    24  	"github.com/anchore/syft/syft/file"
    25  	"github.com/anchore/syft/syft/pkg"
    26  	"github.com/lineaje-labs/syft/internal/licenses"
    27  	"github.com/lineaje-labs/syft/internal/log"
    28  	"github.com/lineaje-labs/syft/syft/internal/fileresolver"
    29  )
    30  
    31  type goLicenses struct {
    32  	opts                  CatalogerConfig
    33  	localModCacheResolver file.WritableResolver
    34  	progress              *monitor.CatalogerTask
    35  	lowerLicenseFileNames *strset.Set
    36  }
    37  
    38  func newGoLicenses(opts CatalogerConfig) goLicenses {
    39  	return goLicenses{
    40  		opts:                  opts,
    41  		localModCacheResolver: modCacheResolver(opts.LocalModCacheDir),
    42  		progress: &monitor.CatalogerTask{
    43  			SubStatus:          true,
    44  			RemoveOnCompletion: true,
    45  			Title:              "Downloading go mod",
    46  		},
    47  		lowerLicenseFileNames: strset.New(lowercaseLicenseFiles()...),
    48  	}
    49  }
    50  
    51  func lowercaseLicenseFiles() []string {
    52  	fileNames := licenses.FileNames()
    53  	for i := range fileNames {
    54  		fileNames[i] = strings.ToLower(fileNames[i])
    55  	}
    56  	return fileNames
    57  }
    58  
    59  func remotesForModule(proxies []string, noProxy []string, module string) []string {
    60  	for _, pattern := range noProxy {
    61  		if matched, err := path.Match(pattern, module); err == nil && matched {
    62  			// matched to be direct for this module
    63  			return directProxiesOnly
    64  		}
    65  	}
    66  
    67  	return proxies
    68  }
    69  
    70  func modCacheResolver(modCacheDir string) file.WritableResolver {
    71  	var r file.WritableResolver
    72  
    73  	if modCacheDir == "" {
    74  		log.Trace("unable to determine mod cache directory, skipping mod cache resolver")
    75  		r = fileresolver.Empty{}
    76  	} else {
    77  		stat, err := os.Stat(modCacheDir)
    78  
    79  		if os.IsNotExist(err) || stat == nil || !stat.IsDir() {
    80  			log.Tracef("unable to open mod cache directory: %s, skipping mod cache resolver", modCacheDir)
    81  			r = fileresolver.Empty{}
    82  		} else {
    83  			r = fileresolver.NewFromUnindexedDirectory(modCacheDir)
    84  		}
    85  	}
    86  
    87  	return r
    88  }
    89  
    90  func (c *goLicenses) getLicenses(
    91  	resolver file.Resolver, moduleName, moduleVersion string,
    92  ) (licenses []pkg.License, err error) {
    93  	licenses, err = c.findLicenses(resolver,
    94  		fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion),
    95  	)
    96  	if err != nil || len(licenses) > 0 {
    97  		return requireCollection(licenses), err
    98  	}
    99  
   100  	// look in the local host mod cache...
   101  	licenses, err = c.getLicensesFromLocal(moduleName, moduleVersion)
   102  	if err != nil || len(licenses) > 0 {
   103  		return requireCollection(licenses), err
   104  	}
   105  
   106  	// we did not find it yet and remote searching was enabled
   107  	licenses, err = c.getLicensesFromRemote(moduleName, moduleVersion)
   108  	return requireCollection(licenses), err
   109  }
   110  
   111  func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]pkg.License, error) {
   112  	if !c.opts.SearchLocalModCacheLicenses {
   113  		return nil, nil
   114  	}
   115  
   116  	// if we're running against a directory on the filesystem, it may not include the
   117  	// user's homedir / GOPATH, so we defer to using the localModCacheResolver
   118  	return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
   119  }
   120  
   121  func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) {
   122  	if !c.opts.SearchRemoteLicenses {
   123  		return nil, nil
   124  	}
   125  
   126  	proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName)
   127  
   128  	fsys, err := getModule(c.progress, proxies, moduleName, moduleVersion)
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  
   133  	dir := moduleDir(moduleName, moduleVersion)
   134  
   135  	// populate the mod cache with the results
   136  	err = fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error {
   137  		if err != nil {
   138  			log.Debug(err)
   139  			return nil
   140  		}
   141  		if d.IsDir() {
   142  			return nil
   143  		}
   144  		f, err := fsys.Open(filePath)
   145  		if err != nil {
   146  			return err
   147  		}
   148  		return c.localModCacheResolver.Write(file.NewLocation(path.Join(dir, filePath)), f)
   149  	})
   150  
   151  	if err != nil {
   152  		log.Tracef("remote proxy walk failed for: %s", moduleName)
   153  	}
   154  
   155  	return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
   156  }
   157  
   158  func (c *goLicenses) findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) {
   159  	out = make([]pkg.License, 0)
   160  	if resolver == nil {
   161  		return
   162  	}
   163  
   164  	locations, err := resolver.FilesByGlob(globMatch)
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  
   169  	for _, l := range locations {
   170  		fileName := path.Base(l.RealPath)
   171  		if c.lowerLicenseFileNames.Has(strings.ToLower(fileName)) {
   172  			contents, err := resolver.FileContentsByLocation(l)
   173  			if err != nil {
   174  				return nil, err
   175  			}
   176  			parsed, err := licenses.Parse(contents, l)
   177  			if err != nil {
   178  				return nil, err
   179  			}
   180  
   181  			out = append(out, parsed...)
   182  		}
   183  	}
   184  
   185  	return
   186  }
   187  
   188  func moduleDir(moduleName, moduleVersion string) string {
   189  	return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion)
   190  }
   191  
   192  func moduleSearchGlob(moduleName, moduleVersion string) string {
   193  	return fmt.Sprintf("%s/*", moduleDir(moduleName, moduleVersion))
   194  }
   195  
   196  func requireCollection(licenses []pkg.License) []pkg.License {
   197  	if licenses == nil {
   198  		return make([]pkg.License, 0)
   199  	}
   200  	return licenses
   201  }
   202  
   203  var capReplacer = regexp.MustCompile("[A-Z]")
   204  
   205  func processCaps(s string) string {
   206  	return capReplacer.ReplaceAllStringFunc(s, func(s string) string {
   207  		return "!" + strings.ToLower(s)
   208  	})
   209  }
   210  
   211  func getModule(
   212  	progress *monitor.CatalogerTask, proxies []string, moduleName, moduleVersion string,
   213  ) (fsys fs.FS, err error) {
   214  	for _, proxy := range proxies {
   215  		u, _ := url.Parse(proxy)
   216  		if proxy == "direct" {
   217  			fsys, err = getModuleRepository(progress, moduleName, moduleVersion)
   218  			continue
   219  		}
   220  		switch u.Scheme {
   221  		case "https", "http":
   222  			fsys, err = getModuleProxy(progress, proxy, moduleName, moduleVersion)
   223  		case "file":
   224  			p := filepath.Join(u.Path, moduleName, "@v", moduleVersion)
   225  			progress.SetValue(fmt.Sprintf("file: %s", p))
   226  			fsys = os.DirFS(p)
   227  		}
   228  		if fsys != nil {
   229  			break
   230  		}
   231  	}
   232  	return
   233  }
   234  
   235  func getModuleProxy(
   236  	progress *monitor.CatalogerTask, proxy string, moduleName string, moduleVersion string,
   237  ) (out fs.FS, _ error) {
   238  	u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion)
   239  	progress.SetValue(u)
   240  
   241  	// get the module zip
   242  	resp, err := http.Get(u) //nolint:gosec
   243  	if err != nil {
   244  		return nil, err
   245  	}
   246  	defer func() { _ = resp.Body.Close() }()
   247  
   248  	if resp.StatusCode != http.StatusOK {
   249  		u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion)
   250  		progress.SetValue(u)
   251  
   252  		// try lowercasing it; some packages have mixed casing that really messes up the proxy
   253  		resp, err = http.Get(u) //nolint:gosec
   254  		if err != nil {
   255  			return nil, err
   256  		}
   257  		defer func() { _ = resp.Body.Close() }()
   258  		if resp.StatusCode != http.StatusOK {
   259  			return nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
   260  		}
   261  	}
   262  
   263  	// read the zip
   264  	b, err := io.ReadAll(resp.Body)
   265  	if err != nil {
   266  		return nil, err
   267  	}
   268  
   269  	out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength)
   270  	versionPath := findVersionPath(out, ".")
   271  	out = getSubFS(out, versionPath)
   272  
   273  	return out, err
   274  }
   275  
   276  func findVersionPath(f fs.FS, dir string) string {
   277  	list, _ := fs.ReadDir(f, dir)
   278  
   279  	for _, entry := range list {
   280  		name := entry.Name()
   281  		if strings.Contains(name, "@") {
   282  			return name
   283  		}
   284  		found := findVersionPath(f, path.Join(dir, name))
   285  		if found != "" {
   286  			return path.Join(name, found)
   287  		}
   288  	}
   289  
   290  	return ""
   291  }
   292  
   293  func getModuleRepository(progress *monitor.CatalogerTask, moduleName string, moduleVersion string) (fs.FS, error) {
   294  	repoName := moduleName
   295  	parts := strings.Split(moduleName, "/")
   296  	if len(parts) > 2 {
   297  		repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2])
   298  	}
   299  
   300  	progress.SetValue(fmt.Sprintf("git: %s", repoName))
   301  
   302  	f := memfs.New()
   303  	buf := &bytes.Buffer{}
   304  	_, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{
   305  		URL:           fmt.Sprintf("https://%s", repoName),
   306  		ReferenceName: plumbing.NewTagReferenceName(moduleVersion), // FIXME version might be a SHA
   307  		SingleBranch:  true,
   308  		Depth:         1,
   309  		Progress:      buf,
   310  	})
   311  
   312  	if err != nil {
   313  		return nil, fmt.Errorf("%w -- %s", err, buf.String())
   314  	}
   315  
   316  	return billyFSAdapter{fs: f}, nil
   317  }