github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/golang/licenses.go (about) 1 package golang 2 3 import ( 4 "archive/zip" 5 "bytes" 6 "fmt" 7 "io" 8 "io/fs" 9 "net/http" 10 "net/url" 11 "os" 12 "path" 13 "path/filepath" 14 "regexp" 15 "strings" 16 17 "github.com/go-git/go-billy/v5/memfs" 18 "github.com/go-git/go-git/v5" 19 "github.com/go-git/go-git/v5/plumbing" 20 "github.com/go-git/go-git/v5/storage/memory" 21 "github.com/nextlinux/gosbom/gosbom/event" 22 "github.com/nextlinux/gosbom/gosbom/file" 23 "github.com/nextlinux/gosbom/gosbom/internal/fileresolver" 24 "github.com/nextlinux/gosbom/gosbom/pkg" 25 "github.com/nextlinux/gosbom/internal/licenses" 26 "github.com/nextlinux/gosbom/internal/log" 27 ) 28 29 type goLicenses struct { 30 opts GoCatalogerOpts 31 localModCacheResolver file.WritableResolver 32 progress *event.CatalogerTask 33 } 34 35 func newGoLicenses(opts GoCatalogerOpts) goLicenses { 36 return goLicenses{ 37 opts: opts, 38 localModCacheResolver: modCacheResolver(opts.localModCacheDir), 39 progress: &event.CatalogerTask{ 40 SubStatus: true, 41 RemoveOnCompletion: true, 42 Title: "Downloading go mod", 43 }, 44 } 45 } 46 47 func remotesForModule(proxies []string, noProxy []string, module string) []string { 48 for _, pattern := range noProxy { 49 if matched, err := path.Match(pattern, module); err == nil && matched { 50 // matched to be direct for this module 51 return directProxiesOnly 52 } 53 } 54 55 return proxies 56 } 57 58 func modCacheResolver(modCacheDir string) file.WritableResolver { 59 var r file.WritableResolver 60 61 if modCacheDir == "" { 62 log.Trace("unable to determine mod cache directory, skipping mod cache resolver") 63 r = fileresolver.Empty{} 64 } else { 65 stat, err := os.Stat(modCacheDir) 66 67 if os.IsNotExist(err) || stat == nil || !stat.IsDir() { 68 log.Tracef("unable to open mod cache directory: %s, skipping mod cache resolver", modCacheDir) 69 r = fileresolver.Empty{} 70 } else { 71 r = fileresolver.NewFromUnindexedDirectory(modCacheDir) 72 } 73 } 74 75 return r 76 } 77 78 func (c *goLicenses) getLicenses(resolver file.Resolver, moduleName, moduleVersion string) (licenses []pkg.License, err error) { 79 licenses, err = findLicenses(resolver, 80 fmt.Sprintf(`**/go/pkg/mod/%s@%s/*`, processCaps(moduleName), moduleVersion), 81 ) 82 if err != nil || len(licenses) > 0 { 83 return requireCollection(licenses), err 84 } 85 86 // look in the local host mod cache... 87 licenses, err = c.getLicensesFromLocal(moduleName, moduleVersion) 88 if err != nil || len(licenses) > 0 { 89 return requireCollection(licenses), err 90 } 91 92 // we did not find it yet and remote searching was enabled 93 licenses, err = c.getLicensesFromRemote(moduleName, moduleVersion) 94 return requireCollection(licenses), err 95 } 96 97 func (c *goLicenses) getLicensesFromLocal(moduleName, moduleVersion string) ([]pkg.License, error) { 98 if !c.opts.searchLocalModCacheLicenses { 99 return nil, nil 100 } 101 102 // if we're running against a directory on the filesystem, it may not include the 103 // user's homedir / GOPATH, so we defer to using the localModCacheResolver 104 return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion)) 105 } 106 107 func (c *goLicenses) getLicensesFromRemote(moduleName, moduleVersion string) ([]pkg.License, error) { 108 if !c.opts.searchRemoteLicenses { 109 return nil, nil 110 } 111 112 proxies := remotesForModule(c.opts.proxies, c.opts.noProxy, moduleName) 113 114 fsys, err := getModule(c.progress, proxies, moduleName, moduleVersion) 115 if err != nil { 116 return nil, err 117 } 118 119 dir := moduleDir(moduleName, moduleVersion) 120 121 // populate the mod cache with the results 122 err = fs.WalkDir(fsys, ".", func(filePath string, d fs.DirEntry, err error) error { 123 if err != nil { 124 log.Debug(err) 125 return nil 126 } 127 if d.IsDir() { 128 return nil 129 } 130 f, err := fsys.Open(filePath) 131 if err != nil { 132 return err 133 } 134 return c.localModCacheResolver.Write(file.NewLocation(path.Join(dir, filePath)), f) 135 }) 136 137 if err != nil { 138 log.Tracef("remote proxy walk failed for: %s", moduleName) 139 } 140 141 return findLicenses(c.localModCacheResolver, moduleSearchGlob(moduleName, moduleVersion)) 142 } 143 144 func moduleDir(moduleName, moduleVersion string) string { 145 return fmt.Sprintf("%s@%s", processCaps(moduleName), moduleVersion) 146 } 147 148 func moduleSearchGlob(moduleName, moduleVersion string) string { 149 return fmt.Sprintf("%s/*", moduleDir(moduleName, moduleVersion)) 150 } 151 152 func requireCollection(licenses []pkg.License) []pkg.License { 153 if licenses == nil { 154 return make([]pkg.License, 0) 155 } 156 return licenses 157 } 158 159 func findLicenses(resolver file.Resolver, globMatch string) (out []pkg.License, err error) { 160 out = make([]pkg.License, 0) 161 if resolver == nil { 162 return 163 } 164 165 locations, err := resolver.FilesByGlob(globMatch) 166 if err != nil { 167 return nil, err 168 } 169 170 for _, l := range locations { 171 fileName := path.Base(l.RealPath) 172 if licenses.FileNameSet.Contains(fileName) { 173 contents, err := resolver.FileContentsByLocation(l) 174 if err != nil { 175 return nil, err 176 } 177 parsed, err := licenses.Parse(contents, l) 178 if err != nil { 179 return nil, err 180 } 181 182 out = append(out, parsed...) 183 } 184 } 185 186 return 187 } 188 189 var capReplacer = regexp.MustCompile("[A-Z]") 190 191 func processCaps(s string) string { 192 return capReplacer.ReplaceAllStringFunc(s, func(s string) string { 193 return "!" + strings.ToLower(s) 194 }) 195 } 196 197 func getModule(progress *event.CatalogerTask, proxies []string, moduleName, moduleVersion string) (fsys fs.FS, err error) { 198 for _, proxy := range proxies { 199 u, _ := url.Parse(proxy) 200 if proxy == "direct" { 201 fsys, err = getModuleRepository(progress, moduleName, moduleVersion) 202 continue 203 } 204 switch u.Scheme { 205 case "https", "http": 206 fsys, err = getModuleProxy(progress, proxy, moduleName, moduleVersion) 207 case "file": 208 p := filepath.Join(u.Path, moduleName, "@v", moduleVersion) 209 progress.SetValue(fmt.Sprintf("file: %s", p)) 210 fsys = os.DirFS(p) 211 } 212 if fsys != nil { 213 break 214 } 215 } 216 return 217 } 218 219 func getModuleProxy(progress *event.CatalogerTask, proxy string, moduleName string, moduleVersion string) (out fs.FS, _ error) { 220 u := fmt.Sprintf("%s/%s/@v/%s.zip", proxy, moduleName, moduleVersion) 221 progress.SetValue(u) 222 // get the module zip 223 resp, err := http.Get(u) //nolint:gosec 224 if err != nil { 225 return nil, err 226 } 227 defer func() { _ = resp.Body.Close() }() 228 if resp.StatusCode != http.StatusOK { 229 u = fmt.Sprintf("%s/%s/@v/%s.zip", proxy, strings.ToLower(moduleName), moduleVersion) 230 progress.SetValue(u) 231 // try lowercasing it; some packages have mixed casing that really messes up the proxy 232 resp, err = http.Get(u) //nolint:gosec 233 if err != nil { 234 return nil, err 235 } 236 defer func() { _ = resp.Body.Close() }() 237 if resp.StatusCode != http.StatusOK { 238 return nil, fmt.Errorf("failed to get module zip: %s", resp.Status) 239 } 240 } 241 // read the zip 242 b, err := io.ReadAll(resp.Body) 243 if err != nil { 244 return nil, err 245 } 246 out, err = zip.NewReader(bytes.NewReader(b), resp.ContentLength) 247 versionPath := findVersionPath(out, ".") 248 out = getSubFS(out, versionPath) 249 return out, err 250 } 251 252 func findVersionPath(f fs.FS, dir string) string { 253 list, _ := fs.ReadDir(f, dir) 254 for _, entry := range list { 255 name := entry.Name() 256 if strings.Contains(name, "@") { 257 return name 258 } 259 found := findVersionPath(f, path.Join(dir, name)) 260 if found != "" { 261 return path.Join(name, found) 262 } 263 } 264 return "" 265 } 266 267 func getModuleRepository(progress *event.CatalogerTask, moduleName string, moduleVersion string) (fs.FS, error) { 268 repoName := moduleName 269 parts := strings.Split(moduleName, "/") 270 if len(parts) > 2 { 271 repoName = fmt.Sprintf("%s/%s/%s", parts[0], parts[1], parts[2]) 272 } 273 progress.SetValue(fmt.Sprintf("git: %s", repoName)) 274 f := memfs.New() 275 buf := &bytes.Buffer{} 276 _, err := git.Clone(memory.NewStorage(), f, &git.CloneOptions{ 277 URL: fmt.Sprintf("https://%s", repoName), 278 ReferenceName: plumbing.NewTagReferenceName(moduleVersion), // FIXME version might be a SHA 279 SingleBranch: true, 280 Depth: 1, 281 Progress: buf, 282 }) 283 if err != nil { 284 return nil, fmt.Errorf("%w -- %s", err, buf.String()) 285 } 286 287 return billyFSAdapter{fs: f}, nil 288 }