github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/golang/licenses.go (about) 1 package golang 2 3 import ( 4 "archive/zip" 5 "bytes" 6 "fmt" 7 "io" 8 "io/fs" 9 "net/http" 10 "net/url" 11 "os" 12 "path" 13 "path/filepath" 14 "regexp" 15 "strings" 16 17 "github.com/go-git/go-billy/v5/memfs" 18 "github.com/go-git/go-git/v5" 19 "github.com/go-git/go-git/v5/plumbing" 20 "github.com/go-git/go-git/v5/storage/memory" 21 "github.com/scylladb/go-set/strset" 22 23 "github.com/anchore/syft/syft/event/monitor" 24 "github.com/anchore/syft/syft/file" 25 "github.com/anchore/syft/syft/pkg" 26 "github.com/lineaje-labs/syft/internal/licenses" 27 "github.com/lineaje-labs/syft/internal/log" 28 "github.com/lineaje-labs/syft/syft/internal/fileresolver" 29 ) 30 31 type goLicenses struct { 32 opts CatalogerConfig 33 localModCacheResolver file.WritableResolver 34 progress *monitor.CatalogerTask 35 lowerLicenseFileNames *strset.Set 36 } 37 38 func newGoLicenses(opts CatalogerConfig) goLicenses { 39 return goLicenses{ 40 opts: opts, 41 localModCacheResolver: modCacheResolver(opts.LocalModCacheDir), 42 progress: &monitor.CatalogerTask{ 43 SubStatus: true, 44 RemoveOnCompletion: true, 45 Title: "Downloading go mod", 46 }, 47 lowerLicenseFileNames: strset.New(lowercaseLicenseFiles()...), 48 } 49 } 50 51 func lowercaseLicenseFiles() []string { 52 fileNames := licenses.FileNames() 53 for i := range fileNames { 54 fileNames[i] = strings.ToLower(fileNames[i]) 55 } 56 return fileNames 57 } 58 59 func remotesForModule(proxies []string, noProxy []string, module string) []string { 60 for _, pattern := range noProxy { 61 if matched, err := path.Match(pattern, module); err == nil && matched { 62 // matched to be direct for this module 63 return directProxiesOnly 64 } 65 } 66 67 return proxies 68 } 69 70 func modCacheResolver(modCacheDir string) file.WritableResolver { 71 var r file.WritableResolver 72 73 if modCacheDir == "" { 74 log.Trace("unable to determine mod cache directory, skipping mod cache resolver") 75 r = fileresolver.Empty{} 76 } else { 77 stat, err := os.Stat(modCacheDir) 78 79 if os.IsNotExist(err) || stat == nil || !stat.IsDir() { 80 log.Tracef("unable to open mod cache directory: %s, skipping mod cache resolver", modCacheDir) 81 r = fileresolver.Empty{} 82 } else { 83 r = fileresolver.NewFromUnindexedDirectory(modCacheDir) 84 } 85 } 86 87 return r 88 } 89 90 func (c *goLicenses) getLicenses( 91 resolver file.Resolver, moduleName, moduleVersion string, 92 ) (licenses []pkg.License, err error) { 93 licenses, err = c.findLicenses(resolver, 94 fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion), 95 ) 96 if err != nil || len(licenses) > 0 { 97 return requireCollection(licenses), err 98 } 99 100 // look in the local host mod cache... 101 licenses, err = c.getLicensesFromLocal(moduleName, moduleVersion) 102 if err != nil || len(licenses) > 0 { 103 return requireCollection(licenses), err 104 } 105 106 // we did not find it yet and remote searching was enabled 107 licenses, err = c.getLicensesFromRemote(moduleName, moduleVersion) 108 return requireCollection(licenses), err 109 } 110 111 func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]pkg.License, error) { 112 if !c.opts.SearchLocalModCacheLicenses { 113 return nil, nil 114 } 115 116 // if we're running against a directory on the filesystem, it may not include the 117 // user's homedir / GOPATH, so we defer to using the localModCacheResolver 118 return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion)) 119 } 120 121 func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) { 122 if !c.opts.SearchRemoteLicenses { 123 return nil, nil 124 } 125 126 proxies := remotesForModule(c.opts.Proxies, c.opts.NoProxy, moduleName) 127 128 fsys, err := getModule(c.progress, proxies, moduleName, moduleVersion) 129 if err != nil { 130 return nil, err 131 } 132 133 dir := moduleDir(moduleName, moduleVersion) 134 135 // populate the mod cache with the results 136 err = fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error { 137 if err != nil { 138 log.Debug(err) 139 return nil 140 } 141 if d.IsDir() { 142 return nil 143 } 144 f, err := fsys.Open(filePath) 145 if err != nil { 146 return err 147 } 148 return c.localModCacheResolver.Write(file.NewLocation(path.Join(dir, filePath)), f) 149 }) 150 151 if err != nil { 152 log.Tracef("remote proxy walk failed for: %s", moduleName) 153 } 154 155 return c.findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion)) 156 } 157 158 func (c *goLicenses) findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) { 159 out = make([]pkg.License, 0) 160 if resolver == nil { 161 return 162 } 163 164 locations, err := resolver.FilesByGlob(globMatch) 165 if err != nil { 166 return nil, err 167 } 168 169 for _, l := range locations { 170 fileName := path.Base(l.RealPath) 171 if c.lowerLicenseFileNames.Has(strings.ToLower(fileName)) { 172 contents, err := resolver.FileContentsByLocation(l) 173 if err != nil { 174 return nil, err 175 } 176 parsed, err := licenses.Parse(contents, l) 177 if err != nil { 178 return nil, err 179 } 180 181 out = append(out, parsed...) 182 } 183 } 184 185 return 186 } 187 188 func moduleDir(moduleName, moduleVersion string) string { 189 return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion) 190 } 191 192 func moduleSearchGlob(moduleName, moduleVersion string) string { 193 return fmt.Sprintf("%s/*", moduleDir(moduleName, moduleVersion)) 194 } 195 196 func requireCollection(licenses []pkg.License) []pkg.License { 197 if licenses == nil { 198 return make([]pkg.License, 0) 199 } 200 return licenses 201 } 202 203 var capReplacer = regexp.MustCompile("[A-Z]") 204 205 func processCaps(s string) string { 206 return capReplacer.ReplaceAllStringFunc(s, func(s string) string { 207 return "!" + strings.ToLower(s) 208 }) 209 } 210 211 func getModule( 212 progress *monitor.CatalogerTask, proxies []string, moduleName, moduleVersion string, 213 ) (fsys fs.FS, err error) { 214 for _, proxy := range proxies { 215 u, _ := url.Parse(proxy) 216 if proxy == "direct" { 217 fsys, err = getModuleRepository(progress, moduleName, moduleVersion) 218 continue 219 } 220 switch u.Scheme { 221 case "https", "http": 222 fsys, err = getModuleProxy(progress, proxy, moduleName, moduleVersion) 223 case "file": 224 p := filepath.Join(u.Path, moduleName, "@v", moduleVersion) 225 progress.SetValue(fmt.Sprintf("file: %s", p)) 226 fsys = os.DirFS(p) 227 } 228 if fsys != nil { 229 break 230 } 231 } 232 return 233 } 234 235 func getModuleProxy( 236 progress *monitor.CatalogerTask, proxy string, moduleName string, moduleVersion string, 237 ) (out fs.FS, _ error) { 238 u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion) 239 progress.SetValue(u) 240 241 // get the module zip 242 resp, err := http.Get(u) //nolint:gosec 243 if err != nil { 244 return nil, err 245 } 246 defer func() { _ = resp.Body.Close() }() 247 248 if resp.StatusCode != http.StatusOK { 249 u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion) 250 progress.SetValue(u) 251 252 // try lowercasing it; some packages have mixed casing that really messes up the proxy 253 resp, err = http.Get(u) //nolint:gosec 254 if err != nil { 255 return nil, err 256 } 257 defer func() { _ = resp.Body.Close() }() 258 if resp.StatusCode != http.StatusOK { 259 return nil, fmt.Errorf("failed to get module zip: %s", resp.Status) 260 } 261 } 262 263 // read the zip 264 b, err := io.ReadAll(resp.Body) 265 if err != nil { 266 return nil, err 267 } 268 269 out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength) 270 versionPath := findVersionPath(out, ".") 271 out = getSubFS(out, versionPath) 272 273 return out, err 274 } 275 276 func findVersionPath(f fs.FS, dir string) string { 277 list, _ := fs.ReadDir(f, dir) 278 279 for _, entry := range list { 280 name := entry.Name() 281 if strings.Contains(name, "@") { 282 return name 283 } 284 found := findVersionPath(f, path.Join(dir, name)) 285 if found != "" { 286 return path.Join(name, found) 287 } 288 } 289 290 return "" 291 } 292 293 func getModuleRepository(progress *monitor.CatalogerTask, moduleName string, moduleVersion string) (fs.FS, error) { 294 repoName := moduleName 295 parts := strings.Split(moduleName, "/") 296 if len(parts) > 2 { 297 repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2]) 298 } 299 300 progress.SetValue(fmt.Sprintf("git: %s", repoName)) 301 302 f := memfs.New() 303 buf := &bytes.Buffer{} 304 _, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{ 305 URL: fmt.Sprintf("https://%s", repoName), 306 ReferenceName: plumbing.NewTagReferenceName(moduleVersion), // FIXME version might be a SHA 307 SingleBranch: true, 308 Depth: 1, 309 Progress: buf, 310 }) 311 312 if err != nil { 313 return nil, fmt.Errorf("%w -- %s", err, buf.String()) 314 } 315 316 return billyFSAdapter{fs: f}, nil 317 }