github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/golang/licenses.go (about) 1 package golang 2 3 import ( 4 "archive/zip" 5 "bytes" 6 "context" 7 "fmt" 8 "io" 9 "io/fs" 10 "net/http" 11 "net/url" 12 "os" 13 "path" 14 "path/filepath" 15 "regexp" 16 "strings" 17 18 "github.com/go-git/go-billy/v5/memfs" 19 "github.com/go-git/go-git/v5" 20 "github.com/go-git/go-git/v5/plumbing" 21 "github.com/go-git/go-git/v5/storage/memory" 22 23 "github.com/anchore/syft/internal" 24 "github.com/anchore/syft/internal/cache" 25 "github.com/anchore/syft/internal/log" 26 "github.com/anchore/syft/syft/file" 27 "github.com/anchore/syft/syft/pkg" 28 "github.com/anchore/syft/syft/pkg/cataloger/internal/licenses" 29 ) 30 31 type goLicenseResolver struct { 32 catalogerName string 33 opts CatalogerConfig 34 localModCacheDir fs.FS 35 localVendorDir fs.FS 36 licenseCache cache.Resolver[[]pkg.License] 37 } 38 39 func newGoLicenseResolver(catalogerName string, opts CatalogerConfig) goLicenseResolver { 40 var localModCacheDir fs.FS 41 if opts.SearchLocalModCacheLicenses { 42 localModCacheDir = os.DirFS(opts.LocalModCacheDir) 43 } 44 45 var localVendorDir fs.FS 46 if opts.SearchLocalVendorLicenses { 47 vendorDir := opts.LocalVendorDir 48 if vendorDir == "" { 49 wd, err := os.Getwd() 50 if err != nil { 51 log.Debugf("unable to get CWD while resolving the local go vendor dir: %v", err) 52 } else { 53 vendorDir = filepath.Join(wd, "vendor") 54 } 55 } 56 localVendorDir = os.DirFS(vendorDir) 57 } 58 59 return goLicenseResolver{ 60 catalogerName: catalogerName, 61 opts: opts, 62 localModCacheDir: localModCacheDir, 63 localVendorDir: localVendorDir, 64 licenseCache: cache.GetResolverCachingErrors[[]pkg.License]("golang", "v2"), 65 } 66 } 67 68 func remotesForModule(proxies []string, noProxy []string, module string) []string { 69 for _, pattern := range noProxy { 70 if matched, err := path.Match(pattern, module); err == nil && matched { 71 // matched to be direct for this module 72 return directProxiesOnly 73 } 74 } 75 76 return proxies 77 } 78 79 func (c *goLicenseResolver) getLicenses(ctx context.Context, resolver file.Resolver, moduleName, moduleVersion string) []pkg.License { 80 // search the scan target first, ignoring local and remote sources 81 pkgLicenses, err := c.findLicensesInSource(ctx, resolver, 82 fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion), 83 ) 84 if err != nil { 85 log.WithFields("error", err, "module", moduleName, "version", moduleVersion).Trace("unable to read golang licenses from source") 86 } 87 if len(pkgLicenses) > 0 { 88 return pkgLicenses 89 } 90 91 // look in the local host mod directory... 92 if c.opts.SearchLocalModCacheLicenses { 93 pkgLicenses, err = c.getLicensesFromLocal(ctx, moduleName, moduleVersion) 94 if err != nil { 95 log.WithFields("error", err, "module", moduleName, "version", moduleVersion).Trace("unable to read golang licenses local") 96 } 97 if len(pkgLicenses) > 0 { 98 return pkgLicenses 99 } 100 } 101 102 // look in the local vendor directory... 103 if c.opts.SearchLocalVendorLicenses { 104 pkgLicenses, err = c.getLicensesFromLocalVendor(ctx, moduleName) 105 if err != nil { 106 log.WithFields("error", err, "module", moduleName, "version", moduleVersion).Trace("unable to read golang licenses vendor") 107 } 108 if len(pkgLicenses) > 0 { 109 return pkgLicenses 110 } 111 } 112 113 // download from remote sources 114 if c.opts.SearchRemoteLicenses { 115 pkgLicenses, err = c.getLicensesFromRemote(ctx, moduleName, moduleVersion) 116 if err != nil { 117 log.WithFields("error", err, "module", moduleName, "version", moduleVersion).Debug("unable to read golang licenses remote") 118 } 119 } 120 121 return pkgLicenses 122 } 123 124 func (c *goLicenseResolver) getLicensesFromLocal(ctx context.Context, moduleName, moduleVersion string) ([]pkg.License, error) { 125 if c.localModCacheDir == nil { 126 return nil, nil 127 } 128 129 subdir := moduleDir(moduleName, moduleVersion) 130 131 // get the local subdirectory containing the specific go module 132 dir, err := fs.Sub(c.localModCacheDir, subdir) 133 if err != nil { 134 return nil, err 135 } 136 137 // if we're running against a directory on the filesystem, it may not include the 138 // user's homedir / GOPATH, so we defer to using the localModCacheResolver 139 // we use $GOPATH/pkg/mod to avoid leaking information about the user's system 140 return c.findLicensesInFS(ctx, "file://$GOPATH/pkg/mod/"+subdir+"/", dir) 141 } 142 143 func (c *goLicenseResolver) getLicensesFromLocalVendor(ctx context.Context, moduleName string) ([]pkg.License, error) { 144 if c.localVendorDir == nil { 145 return nil, nil 146 } 147 148 subdir := processCaps(moduleName) 149 150 // get the local subdirectory containing the specific go module 151 dir, err := fs.Sub(c.localVendorDir, subdir) 152 if err != nil { 153 return nil, err 154 } 155 156 // if we're running against a directory on the filesystem, it may not include the 157 // user's homedir / GOPATH, so we defer to using the localModCacheResolver 158 // we use $GOPATH/pkg/mod to avoid leaking information about the user's system 159 return c.findLicensesInFS(ctx, "file://$GO_VENDOR/"+subdir+"/", dir) 160 } 161 162 func (c *goLicenseResolver) getLicensesFromRemote(ctx context.Context, moduleName, moduleVersion string) ([]pkg.License, error) { 163 return c.licenseCache.Resolve(fmt.Sprintf("%s/%s", moduleName, moduleVersion), func() ([]pkg.License, error) { 164 proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName) 165 166 urlPrefix, fsys, err := getModule(proxies, moduleName, moduleVersion) 167 if err != nil { 168 return nil, err 169 } 170 171 return c.findLicensesInFS(ctx, urlPrefix, fsys) 172 }) 173 } 174 175 func (c *goLicenseResolver) findLicensesInFS(ctx context.Context, urlPrefix string, fsys fs.FS) ([]pkg.License, error) { 176 var out []pkg.License 177 err := fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error { 178 if err != nil { 179 log.Debugf("error reading %s#%s: %v", urlPrefix, filePath, err) 180 return err 181 } 182 if d == nil { 183 log.Debugf("nil entry for %s#%s", urlPrefix, filePath) 184 return nil 185 } 186 if !licenses.IsLicenseFile(d.Name()) { 187 return nil 188 } 189 rdr, err := fsys.Open(filePath) 190 if err != nil { 191 log.Debugf("error opening license file %s: %v", filePath, err) 192 return nil 193 } 194 defer internal.CloseAndLogError(rdr, filePath) 195 foundLicenses := pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(file.NewLocation(filePath), rdr)) 196 // since these licenses are found in an external fs.FS, not in the scanned source, 197 // get rid of the locations but keep information about the where the license was found 198 // by prepending the urlPrefix to the internal path for an accurate representation 199 for _, l := range foundLicenses { 200 l.URLs = []string{urlPrefix + filePath} 201 l.Locations = file.NewLocationSet() 202 out = append(out, l) 203 } 204 return nil 205 }) 206 return out, err 207 } 208 209 func (c *goLicenseResolver) findLicensesInSource(ctx context.Context, resolver file.Resolver, globMatch string) ([]pkg.License, error) { 210 var out []pkg.License 211 locations, err := resolver.FilesByGlob(globMatch) 212 if err != nil { 213 return nil, err 214 } 215 216 for _, l := range locations { 217 parsed, err := c.parseLicenseFromLocation(ctx, l, resolver) 218 if err != nil { 219 return nil, err 220 } 221 out = append(out, parsed...) 222 } 223 224 // if we have a directory but simply don't have any found license files, indicate this so we 225 // don't re-download modules continually 226 if len(locations) > 0 && len(out) == 0 { 227 return nil, noLicensesFound{ 228 glob: globMatch, 229 } 230 } 231 232 return out, nil 233 } 234 235 func (c *goLicenseResolver) parseLicenseFromLocation(ctx context.Context, l file.Location, resolver file.Resolver) ([]pkg.License, error) { 236 var out []pkg.License 237 fileName := path.Base(l.RealPath) 238 if licenses.IsLicenseFile(fileName) { 239 contents, err := resolver.FileContentsByLocation(l) 240 if err != nil { 241 return nil, err 242 } 243 defer internal.CloseAndLogError(contents, l.RealPath) 244 out = pkg.NewLicensesFromReadCloserWithContext(ctx, file.NewLocationReadCloser(l, contents)) 245 } 246 return out, nil 247 } 248 249 func moduleDir(moduleName, moduleVersion string) string { 250 return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion) 251 } 252 253 var capReplacer = regexp.MustCompile("[A-Z]") 254 255 func processCaps(s string) string { 256 return capReplacer.ReplaceAllStringFunc(s, func(s string) string { 257 return "!" + strings.ToLower(s) 258 }) 259 } 260 261 func getModule(proxies []string, moduleName, moduleVersion string) (urlPrefix string, fsys fs.FS, err error) { 262 for _, proxy := range proxies { 263 u, _ := url.Parse(proxy) 264 if proxy == "direct" { 265 urlPrefix, fsys, err = getModuleRepository(moduleName, moduleVersion) 266 continue 267 } 268 switch u.Scheme { 269 case "https", "http": 270 urlPrefix, fsys, err = getModuleProxy(proxy, moduleName, moduleVersion) 271 case "file": 272 p := filepath.Join(u.Path, moduleName, "@v", moduleVersion) 273 urlPrefix = path.Join("file://", p) + "/" 274 log.WithFields("path", p).Info("looking for go module in filesystem") 275 fsys = os.DirFS(p) 276 } 277 if fsys != nil { 278 break 279 } 280 } 281 return 282 } 283 284 func getModuleProxy(proxy string, moduleName string, moduleVersion string) (moduleURL string, out fs.FS, _ error) { 285 u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion) 286 287 // get the module zip 288 log.WithFields("url", u).Info("downloading go module from proxy") 289 resp, err := http.Get(u) //nolint:gosec 290 if err != nil { 291 return "", nil, err 292 } 293 defer func() { _ = resp.Body.Close() }() 294 295 if resp.StatusCode != http.StatusOK { 296 u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion) 297 298 // try lowercasing it; some packages have mixed casing that really messes up the proxy 299 resp, err = http.Get(u) //nolint:gosec 300 if err != nil { 301 return "", nil, err 302 } 303 defer func() { _ = resp.Body.Close() }() 304 if resp.StatusCode != http.StatusOK { 305 return "", nil, fmt.Errorf("failed to get module zip: %s", resp.Status) 306 } 307 } 308 309 // read the zip 310 b, err := io.ReadAll(resp.Body) 311 if err != nil { 312 return "", nil, err 313 } 314 315 out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength) 316 versionPath := findVersionPath(out, ".") 317 out = getSubFS(out, versionPath) 318 319 return u + "#" + versionPath + "/", out, err 320 } 321 322 func findVersionPath(f fs.FS, dir string) string { 323 list, _ := fs.ReadDir(f, dir) 324 325 for _, entry := range list { 326 name := entry.Name() 327 if strings.Contains(name, "@") { 328 return name 329 } 330 found := findVersionPath(f, path.Join(dir, name)) 331 if found != "" { 332 return path.Join(name, found) 333 } 334 } 335 336 return "" 337 } 338 339 func getModuleRepository(moduleName string, moduleVersion string) (string, fs.FS, error) { 340 repoName := moduleName 341 parts := strings.Split(moduleName, "/") 342 if len(parts) > 2 { 343 repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2]) 344 } 345 346 // see if there's a hash and use that if so, otherwise use a tag 347 splitVersion := strings.Split(moduleVersion, "-") 348 var cloneRefName plumbing.ReferenceName 349 refPath := "" 350 if len(splitVersion) < 3 { 351 tagName := splitVersion[0] 352 cloneRefName = plumbing.NewTagReferenceName(tagName) 353 refPath = "/tags/" + tagName 354 } 355 356 f := memfs.New() 357 buf := &bytes.Buffer{} 358 repoURL := fmt.Sprintf("https://%s", repoName) 359 360 log.WithFields("repoURL", repoURL, "ref", cloneRefName).Info("cloning go module repository") 361 r, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{ 362 URL: repoURL, 363 ReferenceName: cloneRefName, 364 SingleBranch: true, 365 Depth: 1, 366 Progress: buf, 367 }) 368 if err != nil { 369 return "", nil, fmt.Errorf("%w -- %s", err, buf.String()) 370 } 371 372 if len(splitVersion) > 2 { 373 sha := splitVersion[len(splitVersion)-1] 374 hash, err := r.ResolveRevision(plumbing.Revision(sha)) 375 if err != nil || hash == nil { 376 log.Tracef("unable to resolve hash %s: %v", sha, err) 377 } else { 378 w, err := r.Worktree() 379 if err != nil { 380 log.Tracef("unable to get worktree, using default: %v", err) 381 } 382 err = w.Checkout(&git.CheckoutOptions{ 383 Hash: *hash, 384 }) 385 if err != nil { 386 log.Tracef("unable to checkout commit, using default: %v", err) 387 } else { 388 refPath = "/refs/" + hash.String() 389 } 390 } 391 } 392 393 return repoURL + refPath + "/", billyFSAdapter{fs: f}, err 394 } 395 396 type noLicensesFound struct { 397 glob string 398 } 399 400 func (l noLicensesFound) Error() string { 401 return fmt.Sprintf("unable to find license information matching: %s", l.glob) 402 } 403 404 var _ error = (*noLicensesFound)(nil)