github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/golang/licenses.go (about) 1 package golang 2 3 import ( 4 "archive/zip" 5 "bytes" 6 "fmt" 7 "io" 8 "io/fs" 9 "net/http" 10 "net/url" 11 "os" 12 "path" 13 "path/filepath" 14 "regexp" 15 "strings" 16 17 "github.com/go-git/go-billy/v5/memfs" 18 "github.com/go-git/go-git/v5" 19 "github.com/go-git/go-git/v5/plumbing" 20 "github.com/go-git/go-git/v5/storage/memory" 21 22 "github.com/anchore/syft/internal/licenses" 23 "github.com/anchore/syft/internal/log" 24 "github.com/anchore/syft/syft/event/monitor" 25 "github.com/anchore/syft/syft/file" 26 "github.com/anchore/syft/syft/internal/fileresolver" 27 "github.com/anchore/syft/syft/pkg" 28 ) 29 30 type goLicenses struct { 31 opts GoCatalogerOpts 32 localModCacheResolver file.WritableResolver 33 progress *monitor.CatalogerTask 34 } 35 36 func newGoLicenses(opts GoCatalogerOpts) goLicenses { 37 return goLicenses{ 38 opts: opts, 39 localModCacheResolver: modCacheResolver(opts.localModCacheDir), 40 progress: &monitor.CatalogerTask{ 41 SubStatus: true, 42 RemoveOnCompletion: true, 43 Title: "Downloading go mod", 44 }, 45 } 46 } 47 48 func remotesForModule(proxies []string, noProxy []string, module string) []string { 49 for _, pattern := range noProxy { 50 if matched, err := path.Match(pattern, module); err == nil && matched { 51 // matched to be direct for this module 52 return directProxiesOnly 53 } 54 } 55 56 return proxies 57 } 58 59 func modCacheResolver(modCacheDir string) file.WritableResolver { 60 var r file.WritableResolver 61 62 if modCacheDir == "" { 63 log.Trace("unable to determine mod cache directory, skipping mod cache resolver") 64 r = fileresolver.Empty{} 65 } else { 66 stat, err := os.Stat(modCacheDir) 67 68 if os.IsNotExist(err) || stat == nil || !stat.IsDir() { 69 log.Tracef("unable to open mod cache directory: %s, skipping mod cache resolver", modCacheDir) 70 r = fileresolver.Empty{} 71 } else { 72 r = fileresolver.NewFromUnindexedDirectory(modCacheDir) 73 } 74 } 75 76 return r 77 } 78 79 func (c *goLicenses) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) (licenses []pkg.License, err error) { 80 licenses, err = findLicenses(resolver, 81 fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion), 82 ) 83 if err != nil || len(licenses) > 0 { 84 return requireCollection(licenses), err 85 } 86 87 // look in the local host mod cache... 88 licenses, err = c.getLicensesFromLocal(moduleName, moduleVersion) 89 if err != nil || len(licenses) > 0 { 90 return requireCollection(licenses), err 91 } 92 93 // we did not find it yet and remote searching was enabled 94 licenses, err = c.getLicensesFromRemote(moduleName, moduleVersion) 95 return requireCollection(licenses), err 96 } 97 98 func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]pkg.License, error) { 99 if !c.opts.searchLocalModCacheLicenses { 100 return nil, nil 101 } 102 103 // if we're running against a directory on the filesystem, it may not include the 104 // user's homedir / GOPATH, so we defer to using the localModCacheResolver 105 return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion)) 106 } 107 108 func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) { 109 if !c.opts.searchRemoteLicenses { 110 return nil, nil 111 } 112 113 proxies := remotesForModule(c.opts.proxies, c.opts.noProxy, moduleName) 114 115 fsys, err := getModule(c.progress, proxies, moduleName, moduleVersion) 116 if err != nil { 117 return nil, err 118 } 119 120 dir := moduleDir(moduleName, moduleVersion) 121 122 // populate the mod cache with the results 123 err = fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error { 124 if err != nil { 125 log.Debug(err) 126 return nil 127 } 128 if d.IsDir() { 129 return nil 130 } 131 f, err := fsys.Open(filePath) 132 if err != nil { 133 return err 134 } 135 return c.localModCacheResolver.Write(file.NewLocation(path.Join(dir, filePath)), f) 136 }) 137 138 if err != nil { 139 log.Tracef("remote proxy walk failed for: %s", moduleName) 140 } 141 142 return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion)) 143 } 144 145 func moduleDir(moduleName, moduleVersion string) string { 146 return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion) 147 } 148 149 func moduleSearchGlob(moduleName, moduleVersion string) string { 150 return fmt.Sprintf("%s/*", moduleDir(moduleName, moduleVersion)) 151 } 152 153 func requireCollection(licenses []pkg.License) []pkg.License { 154 if licenses == nil { 155 return make([]pkg.License, 0) 156 } 157 return licenses 158 } 159 160 func findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) { 161 out = make([]pkg.License, 0) 162 if resolver == nil { 163 return 164 } 165 166 locations, err := resolver.FilesByGlob(globMatch) 167 if err != nil { 168 return nil, err 169 } 170 171 for _, l := range locations { 172 fileName := path.Base(l.RealPath) 173 if licenses.FileNameSet.Contains(fileName) { 174 contents, err := resolver.FileContentsByLocation(l) 175 if err != nil { 176 return nil, err 177 } 178 parsed, err := licenses.Parse(contents, l) 179 if err != nil { 180 return nil, err 181 } 182 183 out = append(out, parsed...) 184 } 185 } 186 187 return 188 } 189 190 var capReplacer = regexp.MustCompile("[A-Z]") 191 192 func processCaps(s string) string { 193 return capReplacer.ReplaceAllStringFunc(s, func(s string) string { 194 return "!" + strings.ToLower(s) 195 }) 196 } 197 198 func getModule(progress *monitor.CatalogerTask, proxies []string, moduleName, moduleVersion string) (fsys fs.FS, err error) { 199 for _, proxy := range proxies { 200 u, _ := url.Parse(proxy) 201 if proxy == "direct" { 202 fsys, err = getModuleRepository(progress, moduleName, moduleVersion) 203 continue 204 } 205 switch u.Scheme { 206 case "https", "http": 207 fsys, err = getModuleProxy(progress, proxy, moduleName, moduleVersion) 208 case "file": 209 p := filepath.Join(u.Path, moduleName, "@v", moduleVersion) 210 progress.SetValue(fmt.Sprintf("file: %s", p)) 211 fsys = os.DirFS(p) 212 } 213 if fsys != nil { 214 break 215 } 216 } 217 return 218 } 219 220 func getModuleProxy(progress *monitor.CatalogerTask, proxy string, moduleName string, moduleVersion string) (out fs.FS, _ error) { 221 u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion) 222 progress.SetValue(u) 223 // get the module zip 224 resp, err := http.Get(u) //nolint:gosec 225 if err != nil { 226 return nil, err 227 } 228 defer func() { _ = resp.Body.Close() }() 229 if resp.StatusCode != http.StatusOK { 230 u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion) 231 progress.SetValue(u) 232 // try lowercasing it; some packages have mixed casing that really messes up the proxy 233 resp, err = http.Get(u) //nolint:gosec 234 if err != nil { 235 return nil, err 236 } 237 defer func() { _ = resp.Body.Close() }() 238 if resp.StatusCode != http.StatusOK { 239 return nil, fmt.Errorf("failed to get module zip: %s", resp.Status) 240 } 241 } 242 // read the zip 243 b, err := io.ReadAll(resp.Body) 244 if err != nil { 245 return nil, err 246 } 247 out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength) 248 versionPath := findVersionPath(out, ".") 249 out = getSubFS(out, versionPath) 250 return out, err 251 } 252 253 func findVersionPath(f fs.FS, dir string) string { 254 list, _ := fs.ReadDir(f, dir) 255 for _, entry := range list { 256 name := entry.Name() 257 if strings.Contains(name, "@") { 258 return name 259 } 260 found := findVersionPath(f, path.Join(dir, name)) 261 if found != "" { 262 return path.Join(name, found) 263 } 264 } 265 return "" 266 } 267 268 func getModuleRepository(progress *monitor.CatalogerTask, moduleName string, moduleVersion string) (fs.FS, error) { 269 repoName := moduleName 270 parts := strings.Split(moduleName, "/") 271 if len(parts) > 2 { 272 repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2]) 273 } 274 progress.SetValue(fmt.Sprintf("git: %s", repoName)) 275 f := memfs.New() 276 buf := &bytes.Buffer{} 277 _, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{ 278 URL: fmt.Sprintf("https://%s", repoName), 279 ReferenceName: plumbing.NewTagReferenceName(moduleVersion), // FIXME version might be a SHA 280 SingleBranch: true, 281 Depth: 1, 282 Progress: buf, 283 }) 284 if err != nil { 285 return nil, fmt.Errorf("%w -- %s", err, buf.String()) 286 } 287 288 return billyFSAdapter{fs: f}, nil 289 }