github.com/grafana/pyroscope@v1.18.0/pkg/frontend/vcs/source/golang/modules.go (about) 1 package golang 2 3 import ( 4 "context" 5 "fmt" 6 "net/http" 7 "path/filepath" 8 "regexp" 9 "strings" 10 11 "connectrpc.com/connect" 12 "github.com/PuerkitoBio/goquery" 13 "golang.org/x/mod/modfile" 14 "golang.org/x/mod/module" 15 "golang.org/x/mod/semver" 16 ) 17 18 const ( 19 GoMod = "go.mod" 20 21 GitHubPath = "github.com/" 22 GooglePath = "go.googlesource.com/" 23 GoPkgInPath = "gopkg.in/" 24 ) 25 26 var versionSuffixRE = regexp.MustCompile(`/v[0-9]+[/]*`) 27 28 // Module represents a go module with a file path in that module 29 type Module struct { 30 module.Version 31 FilePath string 32 } 33 34 // ParseModuleFromPath parses the module from the given path. 35 func ParseModuleFromPath(path string) (Module, bool) { 36 parts := strings.Split(path, "@v") 37 if len(parts) != 2 { 38 return Module{}, false 39 } 40 first := strings.Index(parts[1], "/") 41 if first < 0 { 42 return Module{}, false 43 } 44 filePath := parts[1][first+1:] 45 modulePath := parts[0] 46 47 // The go mod folder typically starts with "pkg/mod". If that segment can be found, shorten the module path, so no other folders with dots get accidentally picked up. 48 if pos := strings.Index(modulePath, "/pkg/mod/"); pos > 0 { 49 modulePath = modulePath[pos:] 50 } 51 52 // searching for the first domain name 53 domainParts := strings.Split(modulePath, "/") 54 for i, part := range domainParts { 55 if strings.Contains(part, ".") { 56 return Module{ 57 Version: module.Version{ 58 Path: strings.Join(domainParts[i:], "/"), 59 Version: "v" + parts[1][:first], 60 }, 61 FilePath: filePath, 62 }, true 63 } 64 } 65 return Module{}, false 66 } 67 68 func (m Module) IsGitHub() bool { 69 return strings.HasPrefix(m.Path, GitHubPath) 70 } 71 72 func (m Module) IsGoogleSource() bool { 73 return strings.HasPrefix(m.Path, GooglePath) 74 } 75 76 func (m Module) IsGoPkgIn() bool { 77 return strings.HasPrefix(m.Path, GoPkgInPath) 78 } 79 80 func (m Module) String() string { 81 return fmt.Sprintf("%s@%s", m.Path, m.Version) 82 } 83 84 type HttpClient interface { 85 Do(req *http.Request) (*http.Response, error) 86 } 87 88 // Resolve resolves the module path to a canonical path. 89 func (module *Module) Resolve(ctx context.Context, mainModule module.Version, modfile *modfile.File, httpClient HttpClient) error { 90 if modfile != nil { 91 mainModule.Path = modfile.Module.Mod.Path 92 module.applyGoMod(mainModule, modfile) 93 } 94 if err := module.resolveVanityURL(ctx, httpClient); err != nil { 95 return err 96 } 97 // remove version suffix such as /v2 or /v11 ... 98 module.Path = versionSuffixRE.ReplaceAllString(module.Path, "") 99 return nil 100 } 101 102 func (module *Module) resolveVanityURL(ctx context.Context, httpClient HttpClient) error { 103 switch { 104 // no need to resolve vanity URL 105 case module.IsGitHub(): 106 return nil 107 case module.IsGoPkgIn(): 108 return module.resolveGoPkgIn() 109 default: 110 return module.resolveGoGet(ctx, httpClient) 111 } 112 } 113 114 // resolveGoGet resolves the module path using go-get meta tags. 115 // normally go-import meta tag should be used to resolve vanity. 116 // 117 // curl -v 'https://google.golang.org/protobuf?go-get=1' 118 // 119 // careful follow redirect see: curl -L -v 'connectrpc.com/connect?go-get=1' 120 // if go-source meta tag is present prefer it over go-import. 121 // see https://go.dev/ref/mod#vcs-find 122 func (module *Module) resolveGoGet(ctx context.Context, httpClient HttpClient) error { 123 req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://%s?go-get=1", strings.TrimRight(module.Path, "/")), nil) 124 if err != nil { 125 return err 126 } 127 resp, err := httpClient.Do(req) 128 if err != nil { 129 return err 130 } 131 defer resp.Body.Close() 132 if resp.StatusCode != http.StatusOK { 133 return connect.NewError(connect.CodeNotFound, fmt.Errorf("failed to fetch go lib %s: %s", module.Path, resp.Status)) 134 } 135 136 // look for go-source meta tag first 137 doc, err := goquery.NewDocumentFromReader(resp.Body) 138 if err != nil { 139 return err 140 } 141 var found bool 142 // <meta name="go-source" content="google.golang.org/protobuf https://github.com/protocolbuffers/protobuf-go https://github.com/protocolbuffers/protobuf-go/tree/master{/dir} https://github.com/protocolbuffers/protobuf-go/tree/master{/dir}/{file}#L{line}"> 143 doc.Find("meta[name='go-source']").Each(func(i int, s *goquery.Selection) { 144 content, ok := s.Attr("content") 145 if !ok { 146 return 147 } 148 content = cleanWhiteSpace(content) 149 parts := strings.Split(content, " ") 150 if len(parts) < 2 { 151 return 152 } 153 154 // prefer github if available in go-source 155 if !found && strings.Contains(module.Path, parts[0]) && strings.Contains(parts[1], "github.com/") { 156 found = true 157 subPath := strings.Replace(module.Path, parts[0], "", 1) 158 module.Path = filepath.Join(strings.TrimRight( 159 strings.TrimPrefix( 160 strings.TrimPrefix(parts[1], "https://"), 161 "http://", 162 ), "/"), 163 subPath, 164 ) 165 166 } 167 }) 168 if found { 169 return nil 170 } 171 // <meta name="go-import" content="google.golang.org/protobuf git https://go.googlesource.com/protobuf"> 172 // <meta name="go-import" content="golang.org/x/oauth2 git https://go.googlesource.com/oauth2"> 173 // <meta name="go-import" content="go.uber.org/atomic git https://github.com/uber-go/atomic"> 174 doc.Find("meta[name='go-import']").Each(func(i int, s *goquery.Selection) { 175 content, ok := s.Attr("content") 176 if !ok { 177 return 178 } 179 parts := strings.Split(cleanWhiteSpace(content), " ") 180 if len(parts) < 3 { 181 return 182 } 183 184 if !found && strings.Contains(module.Path, parts[0]) && parts[1] == "git" { 185 found = true 186 subPath := strings.Replace(module.Path, parts[0], "", 1) 187 module.Path = filepath.Join(strings.TrimRight( 188 strings.TrimPrefix( 189 strings.TrimPrefix(parts[2], "https://"), 190 "http://", 191 ), "/"), 192 subPath, 193 ) 194 195 } 196 }) 197 return nil 198 } 199 200 // resolveGoPkgIn resolves the gopkg.in path to a github path. 201 // see https://labix.org/gopkg.in 202 // gopkg.in/pkg.v3 → github.com/go-pkg/pkg (branch/tag v3, v3.N, or v3.N.M) 203 // gopkg.in/user/pkg.v3 → github.com/user/pkg (branch/tag v3, v3.N, or v3.N.M) 204 func (module *Module) resolveGoPkgIn() error { 205 parts := strings.Split(module.Path, "/") 206 if len(parts) < 2 { 207 return fmt.Errorf("invalid gopkg.in path: %s", module.Path) 208 } 209 packageNameParts := strings.Split(parts[len(parts)-1], ".") 210 if len(packageNameParts) < 2 || packageNameParts[0] == "" { 211 return fmt.Errorf("invalid gopkg.in path: %s", module.Path) 212 } 213 switch len(parts) { 214 case 2: 215 module.Path = fmt.Sprintf("github.com/go-%s/%s", packageNameParts[0], packageNameParts[0]) 216 case 3: 217 module.Path = fmt.Sprintf("github.com/%s/%s", parts[1], packageNameParts[0]) 218 default: 219 return fmt.Errorf("invalid gopkg.in path: %s", module.Path) 220 } 221 return nil 222 } 223 224 // applyGoMod applies the go.mod file to the module. 225 func (module *Module) applyGoMod(mainModule module.Version, modf *modfile.File) { 226 for _, req := range modf.Require { 227 if req.Mod.Path == module.Path { 228 module.Version.Version = req.Mod.Version 229 } 230 } 231 for _, req := range modf.Replace { 232 if req.Old.Path == module.Path { 233 module.Path = req.New.Path 234 module.Version.Version = req.New.Version 235 } 236 } 237 if strings.HasPrefix(module.Path, "./") { 238 module.Version.Version = mainModule.Version 239 module.Path = filepath.Join(mainModule.Path, module.Path) 240 } 241 } 242 243 type GitHubFile struct { 244 Owner, Repo, Ref, Path string 245 } 246 247 // GithubFile returns the github file information. 248 func (m Module) GithubFile() (GitHubFile, error) { 249 if !m.IsGitHub() { 250 return GitHubFile{}, fmt.Errorf("invalid github URL: %s", m.Path) 251 } 252 version, err := refFromVersion(m.Version.Version) 253 if err != nil { 254 return GitHubFile{}, err 255 } 256 if version == "" { 257 version = "main" 258 } 259 parts := strings.Split(m.Path, "/") 260 if len(parts) < 3 { 261 return GitHubFile{}, fmt.Errorf("invalid github URL: %s", m.Path) 262 } 263 return GitHubFile{ 264 // ! character is used for capitalization 265 // example: github.com/!f!zambia/eagle@v0.0.2/eagle.go 266 Owner: strings.ReplaceAll(parts[1], "!", ""), 267 Repo: parts[2], 268 Ref: version, 269 Path: filepath.Join(strings.Join(parts[3:], "/"), m.FilePath), 270 }, nil 271 } 272 273 // GoogleSourceURL returns the URL of the file in the google source repository. 274 // Example https://go.googlesource.com/oauth2/+/4ce7bbb2ffdc6daed06e2ec28916fd08d96bc3ea/amazon/amazon.go 275 func (m Module) GoogleSourceURL() (string, error) { 276 if !m.IsGoogleSource() { 277 return "", fmt.Errorf("invalid google source path: %s", m.Path) 278 } 279 parts := strings.Split(strings.Trim(m.Path, "/"), "/") 280 if len(parts) < 2 { 281 return "", fmt.Errorf("invalid google source path: %s", m.Path) 282 } 283 projectName := parts[1] 284 filePath := m.FilePath 285 extraPath := strings.Join(parts[2:], "/") 286 if extraPath != "" { 287 filePath = filepath.Join(extraPath, filePath) 288 } 289 version, err := refFromVersion(m.Version.Version) 290 if err != nil { 291 return "", err 292 } 293 if version == "" { 294 version = "master" 295 } 296 return fmt.Sprintf("https://go.googlesource.com/%s/+/%s/%s?format=TEXT", projectName, version, filePath), nil 297 } 298 299 // refFromVersion returns the git ref from the given module version. 300 func refFromVersion(version string) (string, error) { 301 if module.IsPseudoVersion(version) { 302 rev, err := module.PseudoVersionRev(version) 303 if err != nil { 304 return "", err 305 } 306 return rev, nil 307 } 308 if sem := semver.Canonical(version); sem != "" { 309 return sem, nil 310 } 311 312 return version, nil 313 } 314 315 // cleanWhiteSpace removes all white space characters from the given string. 316 func cleanWhiteSpace(s string) string { 317 space := false 318 return strings.Map(func(r rune) rune { 319 if r == '\n' || r == '\t' { 320 return -1 321 } 322 if r == ' ' && space { 323 return -1 324 } 325 space = r == ' ' 326 return r 327 }, s) 328 }