github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/golang/licenses.go (about)

     1  package golang
     2  
     3  import (
     4  	"archive/zip"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"io/fs"
     9  	"net/http"
    10  	"net/url"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"regexp"
    15  	"strings"
    16  
    17  	"github.com/go-git/go-billy/v5/memfs"
    18  	"github.com/go-git/go-git/v5"
    19  	"github.com/go-git/go-git/v5/plumbing"
    20  	"github.com/go-git/go-git/v5/storage/memory"
    21  	"github.com/nextlinux/gosbom/gosbom/event"
    22  	"github.com/nextlinux/gosbom/gosbom/file"
    23  	"github.com/nextlinux/gosbom/gosbom/internal/fileresolver"
    24  	"github.com/nextlinux/gosbom/gosbom/pkg"
    25  	"github.com/nextlinux/gosbom/internal/licenses"
    26  	"github.com/nextlinux/gosbom/internal/log"
    27  )
    28  
    29  type goLicenses struct {
    30  	opts                  GoCatalogerOpts
    31  	localModCacheResolver file.WritableResolver
    32  	progress              *event.CatalogerTask
    33  }
    34  
    35  func newGoLicenses(opts GoCatalogerOpts) goLicenses {
    36  	return goLicenses{
    37  		opts:                  opts,
    38  		localModCacheResolver: modCacheResolver(opts.localModCacheDir),
    39  		progress: &event.CatalogerTask{
    40  			SubStatus:          true,
    41  			RemoveOnCompletion: true,
    42  			Title:              "Downloading go mod",
    43  		},
    44  	}
    45  }
    46  
    47  func remotesForModule(proxies []string, noProxy []string, module string) []string {
    48  	for _, pattern := range noProxy {
    49  		if matched, err := path.Match(pattern, module); err == nil && matched {
    50  			// matched to be direct for this module
    51  			return directProxiesOnly
    52  		}
    53  	}
    54  
    55  	return proxies
    56  }
    57  
    58  func modCacheResolver(modCacheDir string) file.WritableResolver {
    59  	var r file.WritableResolver
    60  
    61  	if modCacheDir == "" {
    62  		log.Trace("unable to determine mod cache directory, skipping mod cache resolver")
    63  		r = fileresolver.Empty{}
    64  	} else {
    65  		stat, err := os.Stat(modCacheDir)
    66  
    67  		if os.IsNotExist(err) || stat == nil || !stat.IsDir() {
    68  			log.Tracef("unable to open mod cache directory: %s, skipping mod cache resolver", modCacheDir)
    69  			r = fileresolver.Empty{}
    70  		} else {
    71  			r = fileresolver.NewFromUnindexedDirectory(modCacheDir)
    72  		}
    73  	}
    74  
    75  	return r
    76  }
    77  
    78  func (c *goLicenses) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) (licenses []pkg.License, err error) {
    79  	licenses, err = findLicenses(resolver,
    80  		fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion),
    81  	)
    82  	if err != nil || len(licenses) > 0 {
    83  		return requireCollection(licenses), err
    84  	}
    85  
    86  	// look in the local host mod cache...
    87  	licenses, err = c.getLicensesFromLocal(moduleName, moduleVersion)
    88  	if err != nil || len(licenses) > 0 {
    89  		return requireCollection(licenses), err
    90  	}
    91  
    92  	// we did not find it yet and remote searching was enabled
    93  	licenses, err = c.getLicensesFromRemote(moduleName, moduleVersion)
    94  	return requireCollection(licenses), err
    95  }
    96  
    97  func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]pkg.License, error) {
    98  	if !c.opts.searchLocalModCacheLicenses {
    99  		return nil, nil
   100  	}
   101  
   102  	// if we're running against a directory on the filesystem, it may not include the
   103  	// user's homedir / GOPATH, so we defer to using the localModCacheResolver
   104  	return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
   105  }
   106  
   107  func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) {
   108  	if !c.opts.searchRemoteLicenses {
   109  		return nil, nil
   110  	}
   111  
   112  	proxies := remotesForModule(c.opts.proxies, c.opts.noProxy, moduleName)
   113  
   114  	fsys, err := getModule(c.progress, proxies, moduleName, moduleVersion)
   115  	if err != nil {
   116  		return nil, err
   117  	}
   118  
   119  	dir := moduleDir(moduleName, moduleVersion)
   120  
   121  	// populate the mod cache with the results
   122  	err = fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error {
   123  		if err != nil {
   124  			log.Debug(err)
   125  			return nil
   126  		}
   127  		if d.IsDir() {
   128  			return nil
   129  		}
   130  		f, err := fsys.Open(filePath)
   131  		if err != nil {
   132  			return err
   133  		}
   134  		return c.localModCacheResolver.Write(file.NewLocation(path.Join(dir, filePath)), f)
   135  	})
   136  
   137  	if err != nil {
   138  		log.Tracef("remote proxy walk failed for: %s", moduleName)
   139  	}
   140  
   141  	return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion))
   142  }
   143  
   144  func moduleDir(moduleName, moduleVersion string) string {
   145  	return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion)
   146  }
   147  
   148  func moduleSearchGlob(moduleName, moduleVersion string) string {
   149  	return fmt.Sprintf("%s/*", moduleDir(moduleName, moduleVersion))
   150  }
   151  
   152  func requireCollection(licenses []pkg.License) []pkg.License {
   153  	if licenses == nil {
   154  		return make([]pkg.License, 0)
   155  	}
   156  	return licenses
   157  }
   158  
   159  func findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) {
   160  	out = make([]pkg.License, 0)
   161  	if resolver == nil {
   162  		return
   163  	}
   164  
   165  	locations, err := resolver.FilesByGlob(globMatch)
   166  	if err != nil {
   167  		return nil, err
   168  	}
   169  
   170  	for _, l := range locations {
   171  		fileName := path.Base(l.RealPath)
   172  		if licenses.FileNameSet.Contains(fileName) {
   173  			contents, err := resolver.FileContentsByLocation(l)
   174  			if err != nil {
   175  				return nil, err
   176  			}
   177  			parsed, err := licenses.Parse(contents, l)
   178  			if err != nil {
   179  				return nil, err
   180  			}
   181  
   182  			out = append(out, parsed...)
   183  		}
   184  	}
   185  
   186  	return
   187  }
   188  
   189  var capReplacer = regexp.MustCompile("[A-Z]")
   190  
   191  func processCaps(s string) string {
   192  	return capReplacer.ReplaceAllStringFunc(s, func(s string) string {
   193  		return "!" + strings.ToLower(s)
   194  	})
   195  }
   196  
   197  func getModule(progress *event.CatalogerTask, proxies []string, moduleName, moduleVersion string) (fsys fs.FS, err error) {
   198  	for _, proxy := range proxies {
   199  		u, _ := url.Parse(proxy)
   200  		if proxy == "direct" {
   201  			fsys, err = getModuleRepository(progress, moduleName, moduleVersion)
   202  			continue
   203  		}
   204  		switch u.Scheme {
   205  		case "https", "http":
   206  			fsys, err = getModuleProxy(progress, proxy, moduleName, moduleVersion)
   207  		case "file":
   208  			p := filepath.Join(u.Path, moduleName, "@v", moduleVersion)
   209  			progress.SetValue(fmt.Sprintf("file: %s", p))
   210  			fsys = os.DirFS(p)
   211  		}
   212  		if fsys != nil {
   213  			break
   214  		}
   215  	}
   216  	return
   217  }
   218  
   219  func getModuleProxy(progress *event.CatalogerTask, proxy string, moduleName string, moduleVersion string) (out fs.FS, _ error) {
   220  	u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion)
   221  	progress.SetValue(u)
   222  	// get the module zip
   223  	resp, err := http.Get(u) //nolint:gosec
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  	defer func() { _ = resp.Body.Close() }()
   228  	if resp.StatusCode != http.StatusOK {
   229  		u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion)
   230  		progress.SetValue(u)
   231  		// try lowercasing it; some packages have mixed casing that really messes up the proxy
   232  		resp, err = http.Get(u) //nolint:gosec
   233  		if err != nil {
   234  			return nil, err
   235  		}
   236  		defer func() { _ = resp.Body.Close() }()
   237  		if resp.StatusCode != http.StatusOK {
   238  			return nil, fmt.Errorf("failed to get module zip: %s", resp.Status)
   239  		}
   240  	}
   241  	// read the zip
   242  	b, err := io.ReadAll(resp.Body)
   243  	if err != nil {
   244  		return nil, err
   245  	}
   246  	out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength)
   247  	versionPath := findVersionPath(out, ".")
   248  	out = getSubFS(out, versionPath)
   249  	return out, err
   250  }
   251  
   252  func findVersionPath(f fs.FS, dir string) string {
   253  	list, _ := fs.ReadDir(f, dir)
   254  	for _, entry := range list {
   255  		name := entry.Name()
   256  		if strings.Contains(name, "@") {
   257  			return name
   258  		}
   259  		found := findVersionPath(f, path.Join(dir, name))
   260  		if found != "" {
   261  			return path.Join(name, found)
   262  		}
   263  	}
   264  	return ""
   265  }
   266  
   267  func getModuleRepository(progress *event.CatalogerTask, moduleName string, moduleVersion string) (fs.FS, error) {
   268  	repoName := moduleName
   269  	parts := strings.Split(moduleName, "/")
   270  	if len(parts) > 2 {
   271  		repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2])
   272  	}
   273  	progress.SetValue(fmt.Sprintf("git: %s", repoName))
   274  	f := memfs.New()
   275  	buf := &bytes.Buffer{}
   276  	_, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{
   277  		URL:           fmt.Sprintf("https://%s", repoName),
   278  		ReferenceName: plumbing.NewTagReferenceName(moduleVersion), // FIXME version might be a SHA
   279  		SingleBranch:  true,
   280  		Depth:         1,
   281  		Progress:      buf,
   282  	})
   283  	if err != nil {
   284  		return nil, fmt.Errorf("%w -- %s", err, buf.String())
   285  	}
   286  
   287  	return billyFSAdapter{fs: f}, nil
   288  }