github.com/grafana/pyroscope@v1.18.0/pkg/frontend/vcs/source/golang/modules.go (about)

     1  package golang
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net/http"
     7  	"path/filepath"
     8  	"regexp"
     9  	"strings"
    10  
    11  	"connectrpc.com/connect"
    12  	"github.com/PuerkitoBio/goquery"
    13  	"golang.org/x/mod/modfile"
    14  	"golang.org/x/mod/module"
    15  	"golang.org/x/mod/semver"
    16  )
    17  
    18  const (
    19  	GoMod = "go.mod"
    20  
    21  	GitHubPath  = "github.com/"
    22  	GooglePath  = "go.googlesource.com/"
    23  	GoPkgInPath = "gopkg.in/"
    24  )
    25  
    26  var versionSuffixRE = regexp.MustCompile(`/v[0-9]+[/]*`)
    27  
    28  // Module represents a go module with a file path in that module
    29  type Module struct {
    30  	module.Version
    31  	FilePath string
    32  }
    33  
    34  // ParseModuleFromPath parses the module from the given path.
    35  func ParseModuleFromPath(path string) (Module, bool) {
    36  	parts := strings.Split(path, "@v")
    37  	if len(parts) != 2 {
    38  		return Module{}, false
    39  	}
    40  	first := strings.Index(parts[1], "/")
    41  	if first < 0 {
    42  		return Module{}, false
    43  	}
    44  	filePath := parts[1][first+1:]
    45  	modulePath := parts[0]
    46  
    47  	// The go mod folder typically starts with "pkg/mod". If that segment can be found, shorten the module path, so no other folders with dots get accidentally picked up.
    48  	if pos := strings.Index(modulePath, "/pkg/mod/"); pos > 0 {
    49  		modulePath = modulePath[pos:]
    50  	}
    51  
    52  	// searching for the first domain name
    53  	domainParts := strings.Split(modulePath, "/")
    54  	for i, part := range domainParts {
    55  		if strings.Contains(part, ".") {
    56  			return Module{
    57  				Version: module.Version{
    58  					Path:    strings.Join(domainParts[i:], "/"),
    59  					Version: "v" + parts[1][:first],
    60  				},
    61  				FilePath: filePath,
    62  			}, true
    63  		}
    64  	}
    65  	return Module{}, false
    66  }
    67  
    68  func (m Module) IsGitHub() bool {
    69  	return strings.HasPrefix(m.Path, GitHubPath)
    70  }
    71  
    72  func (m Module) IsGoogleSource() bool {
    73  	return strings.HasPrefix(m.Path, GooglePath)
    74  }
    75  
    76  func (m Module) IsGoPkgIn() bool {
    77  	return strings.HasPrefix(m.Path, GoPkgInPath)
    78  }
    79  
    80  func (m Module) String() string {
    81  	return fmt.Sprintf("%s@%s", m.Path, m.Version)
    82  }
    83  
    84  type HttpClient interface {
    85  	Do(req *http.Request) (*http.Response, error)
    86  }
    87  
    88  // Resolve resolves the module path to a canonical path.
    89  func (module *Module) Resolve(ctx context.Context, mainModule module.Version, modfile *modfile.File, httpClient HttpClient) error {
    90  	if modfile != nil {
    91  		mainModule.Path = modfile.Module.Mod.Path
    92  		module.applyGoMod(mainModule, modfile)
    93  	}
    94  	if err := module.resolveVanityURL(ctx, httpClient); err != nil {
    95  		return err
    96  	}
    97  	// remove version suffix such as /v2 or /v11 ...
    98  	module.Path = versionSuffixRE.ReplaceAllString(module.Path, "")
    99  	return nil
   100  }
   101  
   102  func (module *Module) resolveVanityURL(ctx context.Context, httpClient HttpClient) error {
   103  	switch {
   104  	// no need to resolve vanity URL
   105  	case module.IsGitHub():
   106  		return nil
   107  	case module.IsGoPkgIn():
   108  		return module.resolveGoPkgIn()
   109  	default:
   110  		return module.resolveGoGet(ctx, httpClient)
   111  	}
   112  }
   113  
   114  // resolveGoGet resolves the module path using go-get meta tags.
   115  // normally go-import meta tag should be used to resolve vanity.
   116  //
   117  //	curl -v 'https://google.golang.org/protobuf?go-get=1'
   118  //
   119  // careful follow redirect see: curl -L -v 'connectrpc.com/connect?go-get=1'
   120  // if go-source meta tag is present prefer it over go-import.
   121  // see https://go.dev/ref/mod#vcs-find
   122  func (module *Module) resolveGoGet(ctx context.Context, httpClient HttpClient) error {
   123  	req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://%s?go-get=1", strings.TrimRight(module.Path, "/")), nil)
   124  	if err != nil {
   125  		return err
   126  	}
   127  	resp, err := httpClient.Do(req)
   128  	if err != nil {
   129  		return err
   130  	}
   131  	defer resp.Body.Close()
   132  	if resp.StatusCode != http.StatusOK {
   133  		return connect.NewError(connect.CodeNotFound, fmt.Errorf("failed to fetch go lib %s: %s", module.Path, resp.Status))
   134  	}
   135  
   136  	// look for go-source meta tag first
   137  	doc, err := goquery.NewDocumentFromReader(resp.Body)
   138  	if err != nil {
   139  		return err
   140  	}
   141  	var found bool
   142  	// <meta name="go-source" content="google.golang.org/protobuf https://github.com/protocolbuffers/protobuf-go https://github.com/protocolbuffers/protobuf-go/tree/master{/dir} https://github.com/protocolbuffers/protobuf-go/tree/master{/dir}/{file}#L{line}">
   143  	doc.Find("meta[name='go-source']").Each(func(i int, s *goquery.Selection) {
   144  		content, ok := s.Attr("content")
   145  		if !ok {
   146  			return
   147  		}
   148  		content = cleanWhiteSpace(content)
   149  		parts := strings.Split(content, " ")
   150  		if len(parts) < 2 {
   151  			return
   152  		}
   153  
   154  		// prefer github if available in go-source
   155  		if !found && strings.Contains(module.Path, parts[0]) && strings.Contains(parts[1], "github.com/") {
   156  			found = true
   157  			subPath := strings.Replace(module.Path, parts[0], "", 1)
   158  			module.Path = filepath.Join(strings.TrimRight(
   159  				strings.TrimPrefix(
   160  					strings.TrimPrefix(parts[1], "https://"),
   161  					"http://",
   162  				), "/"),
   163  				subPath,
   164  			)
   165  
   166  		}
   167  	})
   168  	if found {
   169  		return nil
   170  	}
   171  	// <meta name="go-import" content="google.golang.org/protobuf git https://go.googlesource.com/protobuf">
   172  	// <meta name="go-import" content="golang.org/x/oauth2 git https://go.googlesource.com/oauth2">
   173  	// <meta name="go-import" content="go.uber.org/atomic git https://github.com/uber-go/atomic">
   174  	doc.Find("meta[name='go-import']").Each(func(i int, s *goquery.Selection) {
   175  		content, ok := s.Attr("content")
   176  		if !ok {
   177  			return
   178  		}
   179  		parts := strings.Split(cleanWhiteSpace(content), " ")
   180  		if len(parts) < 3 {
   181  			return
   182  		}
   183  
   184  		if !found && strings.Contains(module.Path, parts[0]) && parts[1] == "git" {
   185  			found = true
   186  			subPath := strings.Replace(module.Path, parts[0], "", 1)
   187  			module.Path = filepath.Join(strings.TrimRight(
   188  				strings.TrimPrefix(
   189  					strings.TrimPrefix(parts[2], "https://"),
   190  					"http://",
   191  				), "/"),
   192  				subPath,
   193  			)
   194  
   195  		}
   196  	})
   197  	return nil
   198  }
   199  
   200  // resolveGoPkgIn resolves the gopkg.in path to a github path.
   201  // see https://labix.org/gopkg.in
   202  // gopkg.in/pkg.v3      → github.com/go-pkg/pkg (branch/tag v3, v3.N, or v3.N.M)
   203  // gopkg.in/user/pkg.v3 → github.com/user/pkg   (branch/tag v3, v3.N, or v3.N.M)
   204  func (module *Module) resolveGoPkgIn() error {
   205  	parts := strings.Split(module.Path, "/")
   206  	if len(parts) < 2 {
   207  		return fmt.Errorf("invalid gopkg.in path: %s", module.Path)
   208  	}
   209  	packageNameParts := strings.Split(parts[len(parts)-1], ".")
   210  	if len(packageNameParts) < 2 || packageNameParts[0] == "" {
   211  		return fmt.Errorf("invalid gopkg.in path: %s", module.Path)
   212  	}
   213  	switch len(parts) {
   214  	case 2:
   215  		module.Path = fmt.Sprintf("github.com/go-%s/%s", packageNameParts[0], packageNameParts[0])
   216  	case 3:
   217  		module.Path = fmt.Sprintf("github.com/%s/%s", parts[1], packageNameParts[0])
   218  	default:
   219  		return fmt.Errorf("invalid gopkg.in path: %s", module.Path)
   220  	}
   221  	return nil
   222  }
   223  
   224  // applyGoMod applies the go.mod file to the module.
   225  func (module *Module) applyGoMod(mainModule module.Version, modf *modfile.File) {
   226  	for _, req := range modf.Require {
   227  		if req.Mod.Path == module.Path {
   228  			module.Version.Version = req.Mod.Version
   229  		}
   230  	}
   231  	for _, req := range modf.Replace {
   232  		if req.Old.Path == module.Path {
   233  			module.Path = req.New.Path
   234  			module.Version.Version = req.New.Version
   235  		}
   236  	}
   237  	if strings.HasPrefix(module.Path, "./") {
   238  		module.Version.Version = mainModule.Version
   239  		module.Path = filepath.Join(mainModule.Path, module.Path)
   240  	}
   241  }
   242  
   243  type GitHubFile struct {
   244  	Owner, Repo, Ref, Path string
   245  }
   246  
   247  // GithubFile returns the github file information.
   248  func (m Module) GithubFile() (GitHubFile, error) {
   249  	if !m.IsGitHub() {
   250  		return GitHubFile{}, fmt.Errorf("invalid github URL: %s", m.Path)
   251  	}
   252  	version, err := refFromVersion(m.Version.Version)
   253  	if err != nil {
   254  		return GitHubFile{}, err
   255  	}
   256  	if version == "" {
   257  		version = "main"
   258  	}
   259  	parts := strings.Split(m.Path, "/")
   260  	if len(parts) < 3 {
   261  		return GitHubFile{}, fmt.Errorf("invalid github URL: %s", m.Path)
   262  	}
   263  	return GitHubFile{
   264  		// ! character is used for capitalization
   265  		// example: github.com/!f!zambia/eagle@v0.0.2/eagle.go
   266  		Owner: strings.ReplaceAll(parts[1], "!", ""),
   267  		Repo:  parts[2],
   268  		Ref:   version,
   269  		Path:  filepath.Join(strings.Join(parts[3:], "/"), m.FilePath),
   270  	}, nil
   271  }
   272  
   273  // GoogleSourceURL returns the URL of the file in the google source repository.
   274  // Example https://go.googlesource.com/oauth2/+/4ce7bbb2ffdc6daed06e2ec28916fd08d96bc3ea/amazon/amazon.go
   275  func (m Module) GoogleSourceURL() (string, error) {
   276  	if !m.IsGoogleSource() {
   277  		return "", fmt.Errorf("invalid google source path: %s", m.Path)
   278  	}
   279  	parts := strings.Split(strings.Trim(m.Path, "/"), "/")
   280  	if len(parts) < 2 {
   281  		return "", fmt.Errorf("invalid google source path: %s", m.Path)
   282  	}
   283  	projectName := parts[1]
   284  	filePath := m.FilePath
   285  	extraPath := strings.Join(parts[2:], "/")
   286  	if extraPath != "" {
   287  		filePath = filepath.Join(extraPath, filePath)
   288  	}
   289  	version, err := refFromVersion(m.Version.Version)
   290  	if err != nil {
   291  		return "", err
   292  	}
   293  	if version == "" {
   294  		version = "master"
   295  	}
   296  	return fmt.Sprintf("https://go.googlesource.com/%s/+/%s/%s?format=TEXT", projectName, version, filePath), nil
   297  }
   298  
   299  // refFromVersion returns the git ref from the given module version.
   300  func refFromVersion(version string) (string, error) {
   301  	if module.IsPseudoVersion(version) {
   302  		rev, err := module.PseudoVersionRev(version)
   303  		if err != nil {
   304  			return "", err
   305  		}
   306  		return rev, nil
   307  	}
   308  	if sem := semver.Canonical(version); sem != "" {
   309  		return sem, nil
   310  	}
   311  
   312  	return version, nil
   313  }
   314  
   315  // cleanWhiteSpace removes all white space characters from the given string.
   316  func cleanWhiteSpace(s string) string {
   317  	space := false
   318  	return strings.Map(func(r rune) rune {
   319  		if r == '\n' || r == '\t' {
   320  			return -1
   321  		}
   322  		if r == ' ' && space {
   323  			return -1
   324  		}
   325  		space = r == ' '
   326  		return r
   327  	}, s)
   328  }