github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/golang/licenses.go (about) 1 package golang 2 3 import ( 4 "archive/zip" 5 "bytes" 6 "fmt" 7 "io" 8 "io/fs" 9 "net/http" 10 "net/url" 11 "os" 12 "path" 13 "path/filepath" 14 "regexp" 15 "strings" 16 17 "github.com/go-git/go-billy/v5/memfs" 18 "github.com/go-git/go-git/v5" 19 "github.com/go-git/go-git/v5/plumbing" 20 "github.com/go-git/go-git/v5/storage/memory" 21 "github.com/scylladb/go-set/strset" 22 23 "github.com/anchore/syft/internal" 24 "github.com/anchore/syft/internal/licenses" 25 "github.com/anchore/syft/internal/log" 26 "github.com/anchore/syft/syft/file" 27 "github.com/anchore/syft/syft/internal/fileresolver" 28 "github.com/anchore/syft/syft/pkg" 29 ) 30 31 type goLicenses struct { 32 catalogerName string 33 opts CatalogerConfig 34 localModCacheResolver file.WritableResolver 35 lowerLicenseFileNames *strset.Set 36 } 37 38 func newGoLicenses(catalogerName string, opts CatalogerConfig) goLicenses { 39 return goLicenses{ 40 catalogerName: catalogerName, 41 opts: opts, 42 localModCacheResolver: modCacheResolver(opts.LocalModCacheDir), 43 lowerLicenseFileNames: strset.New(lowercaseLicenseFiles()...), 44 } 45 } 46 47 func lowercaseLicenseFiles() []string { 48 fileNames := licenses.FileNames() 49 for i := range fileNames { 50 fileNames[i] = strings.ToLower(fileNames[i]) 51 } 52 return fileNames 53 } 54 55 func remotesForModule(proxies []string, noProxy []string, module string) []string { 56 for _, pattern := range noProxy { 57 if matched, err := path.Match(pattern, module); err == nil && matched { 58 // matched to be direct for this module 59 return directProxiesOnly 60 } 61 } 62 63 return proxies 64 } 65 66 func modCacheResolver(modCacheDir string) file.WritableResolver { 67 var r file.WritableResolver 68 69 if modCacheDir == "" { 70 log.Trace("unable to determine mod cache directory, skipping mod cache resolver") 71 r = fileresolver.Empty{} 72 } else { 73 stat, err := os.Stat(modCacheDir) 74 75 if os.IsNotExist(err) || stat == nil || !stat.IsDir() { 76 log.Tracef("unable to open mod cache directory: %s, skipping mod cache resolver", modCacheDir) 77 r = fileresolver.Empty{} 78 } else { 79 r = fileresolver.NewFromUnindexedDirectory(modCacheDir) 80 } 81 } 82 83 return r 84 } 85 86 func (c *goLicenses) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) (licenses []pkg.License, err error) { 87 licenses, err = c.findLicenses(resolver, 88 fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion), 89 ) 90 if err != nil || len(licenses) > 0 { 91 return requireCollection(licenses), err 92 } 93 94 // look in the local host mod cache... 95 licenses, err = c.getLicensesFromLocal(moduleName, moduleVersion) 96 if err != nil || len(licenses) > 0 { 97 return requireCollection(licenses), err 98 } 99 100 // we did not find it yet and remote searching was enabled 101 licenses, err = c.getLicensesFromRemote(moduleName, moduleVersion) 102 return requireCollection(licenses), err 103 } 104 105 func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]pkg.License, error) { 106 if !c.opts.SearchLocalModCacheLicenses { 107 return nil, nil 108 } 109 110 // if we're running against a directory on the filesystem, it may not include the 111 // user's homedir / GOPATH, so we defer to using the localModCacheResolver 112 return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion)) 113 } 114 115 func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) { 116 if !c.opts.SearchRemoteLicenses { 117 return nil, nil 118 } 119 120 proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName) 121 122 fsys, err := getModule(proxies, moduleName, moduleVersion) 123 if err != nil { 124 return nil, err 125 } 126 127 dir := moduleDir(moduleName, moduleVersion) 128 129 // populate the mod cache with the results 130 err = fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error { 131 if err != nil { 132 log.Debug(err) 133 return nil 134 } 135 if d.IsDir() { 136 return nil 137 } 138 f, err := fsys.Open(filePath) 139 if err != nil { 140 return err 141 } 142 return c.localModCacheResolver.Write(file.NewLocation(path.Join(dir, filePath)), f) 143 }) 144 145 if err != nil { 146 log.Tracef("remote proxy walk failed for: %s", moduleName) 147 } 148 149 return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion)) 150 } 151 152 func (c *goLicenses) findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) { 153 out = make([]pkg.License, 0) 154 if resolver == nil { 155 return 156 } 157 158 locations, err := resolver.FilesByGlob(globMatch) 159 if err != nil { 160 return nil, err 161 } 162 163 for _, l := range locations { 164 parsed, err := c.parseLicenseFromLocation(l, resolver) 165 if err != nil { 166 return nil, err 167 } 168 out = append(out, parsed...) 169 } 170 171 return 172 } 173 174 func (c *goLicenses) parseLicenseFromLocation(l file.Location, resolver file.Resolver) ([]pkg.License, error) { 175 var out []pkg.License 176 fileName := path.Base(l.RealPath) 177 if c.lowerLicenseFileNames.Has(strings.ToLower(fileName)) { 178 contents, err := resolver.FileContentsByLocation(l) 179 if err != nil { 180 return nil, err 181 } 182 defer internal.CloseAndLogError(contents, l.RealPath) 183 parsed, err := licenses.Parse(contents, l) 184 if err != nil { 185 return nil, err 186 } 187 188 out = append(out, parsed...) 189 } 190 return out, nil 191 } 192 193 func moduleDir(moduleName, moduleVersion string) string { 194 return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion) 195 } 196 197 func moduleSearchGlob(moduleName, moduleVersion string) string { 198 return fmt.Sprintf("%s/*", moduleDir(moduleName, moduleVersion)) 199 } 200 201 func requireCollection(licenses []pkg.License) []pkg.License { 202 if licenses == nil { 203 return make([]pkg.License, 0) 204 } 205 return licenses 206 } 207 208 var capReplacer = regexp.MustCompile("[A-Z]") 209 210 func processCaps(s string) string { 211 return capReplacer.ReplaceAllStringFunc(s, func(s string) string { 212 return "!" + strings.ToLower(s) 213 }) 214 } 215 216 func getModule(proxies []string, moduleName, moduleVersion string) (fsys fs.FS, err error) { 217 for _, proxy := range proxies { 218 u, _ := url.Parse(proxy) 219 if proxy == "direct" { 220 fsys, err = getModuleRepository(moduleName, moduleVersion) 221 continue 222 } 223 switch u.Scheme { 224 case "https", "http": 225 fsys, err = getModuleProxy(proxy, moduleName, moduleVersion) 226 case "file": 227 p := filepath.Join(u.Path, moduleName, "@v", moduleVersion) 228 fsys = os.DirFS(p) 229 } 230 if fsys != nil { 231 break 232 } 233 } 234 return 235 } 236 237 func getModuleProxy(proxy string, moduleName string, moduleVersion string) (out fs.FS, _ error) { 238 u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion) 239 240 // get the module zip 241 resp, err := http.Get(u) //nolint:gosec 242 if err != nil { 243 return nil, err 244 } 245 defer func() { _ = resp.Body.Close() }() 246 247 if resp.StatusCode != http.StatusOK { 248 u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion) 249 250 // try lowercasing it; some packages have mixed casing that really messes up the proxy 251 resp, err = http.Get(u) //nolint:gosec 252 if err != nil { 253 return nil, err 254 } 255 defer func() { _ = resp.Body.Close() }() 256 if resp.StatusCode != http.StatusOK { 257 return nil, fmt.Errorf("failed to get module zip: %s", resp.Status) 258 } 259 } 260 261 // read the zip 262 b, err := io.ReadAll(resp.Body) 263 if err != nil { 264 return nil, err 265 } 266 267 out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength) 268 versionPath := findVersionPath(out, ".") 269 out = getSubFS(out, versionPath) 270 271 return out, err 272 } 273 274 func findVersionPath(f fs.FS, dir string) string { 275 list, _ := fs.ReadDir(f, dir) 276 277 for _, entry := range list { 278 name := entry.Name() 279 if strings.Contains(name, "@") { 280 return name 281 } 282 found := findVersionPath(f, path.Join(dir, name)) 283 if found != "" { 284 return path.Join(name, found) 285 } 286 } 287 288 return "" 289 } 290 291 func getModuleRepository(moduleName string, moduleVersion string) (fs.FS, error) { 292 repoName := moduleName 293 parts := strings.Split(moduleName, "/") 294 if len(parts) > 2 { 295 repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2]) 296 } 297 298 f := memfs.New() 299 buf := &bytes.Buffer{} 300 _, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{ 301 URL: fmt.Sprintf("https://%s", repoName), 302 ReferenceName: plumbing.NewTagReferenceName(moduleVersion), // FIXME version might be a SHA 303 SingleBranch: true, 304 Depth: 1, 305 Progress: buf, 306 }) 307 308 if err != nil { 309 return nil, fmt.Errorf("%w -- %s", err, buf.String()) 310 } 311 312 return billyFSAdapter{fs: f}, nil 313 }