github.com/glimps-jbo/go-licenses@v0.0.0-20230908151000-e06d3c113277/internal/third_party/pkgsite/source/source.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package source constructs public URLs that link to the source files in a module. It
     6  // can be used to build references to Go source code, or to any other files in a
     7  // module.
     8  //
     9  // Of course, the module zip file contains all the files in the module. This
    10  // package attempts to find the origin of the zip file, in a repository that is
    11  // publicly readable, and constructs links to that repo. While a module zip file
    12  // could in theory come from anywhere, including a non-public location, this
    13  // package recognizes standard module path patterns and construct repository
    14  // URLs from them, like the go command does.
    15  package source
    16  
    17  //
    18  // Much of this code was adapted from
    19  // https://go.googlesource.com/gddo/+/refs/heads/master/gosrc
    20  // and
    21  // https://go.googlesource.com/go/+/refs/heads/master/src/cmd/go/internal/get
    22  
    23  import (
    24  	"context"
    25  	"fmt"
    26  	"log" // We cannot use glog instead, because its "v" flag conflicts with other libraries we use.
    27  	"net/http"
    28  	"path"
    29  	"regexp"
    30  	"strconv"
    31  	"strings"
    32  	"time"
    33  
    34  	"github.com/glimps-jbo/go-licenses/internal/third_party/pkgsite/derrors"
    35  	"github.com/glimps-jbo/go-licenses/internal/third_party/pkgsite/stdlib"
    36  	"github.com/glimps-jbo/go-licenses/internal/third_party/pkgsite/version"
    37  	"go.opencensus.io/plugin/ochttp"
    38  	"go.opencensus.io/trace"
    39  	"golang.org/x/net/context/ctxhttp"
    40  )
    41  
    42  // Info holds source information about a module, used to generate URLs referring
    43  // to directories, files and lines.
    44  type Info struct {
    45  	repoURL   string       // URL of repo containing module; exported for DB schema compatibility
    46  	moduleDir string       // directory of module relative to repo root
    47  	commit    string       // tag or ID of commit corresponding to version
    48  	templates urlTemplates // for building URLs
    49  }
    50  
    51  // FileURL returns a URL for a file whose pathname is relative to the module's home directory.
    52  func (i *Info) FileURL(pathname string) string {
    53  	if i == nil {
    54  		return ""
    55  	}
    56  	dir, base := path.Split(pathname)
    57  	return expand(i.templates.File, map[string]string{
    58  		"repo":       i.repoURL,
    59  		"importPath": path.Join(strings.TrimPrefix(i.repoURL, "https://"), dir),
    60  		"commit":     i.commit,
    61  		"dir":        dir,
    62  		"file":       path.Join(i.moduleDir, pathname),
    63  		"base":       base,
    64  	})
    65  }
    66  
    67  type Client struct {
    68  	// client used for HTTP requests. It is mutable for testing purposes.
    69  	// If nil, then moduleInfoDynamic will return nil, nil; also for testing.
    70  	httpClient *http.Client
    71  }
    72  
    73  // New constructs a *Client using the provided timeout.
    74  func NewClient(timeout time.Duration) *Client {
    75  	return &Client{
    76  		httpClient: &http.Client{
    77  			Transport: &ochttp.Transport{},
    78  			Timeout:   timeout,
    79  		},
    80  	}
    81  }
    82  
    83  // NewClientForTesting returns a Client suitable for testing. It returns the
    84  // same results as an ordinary client for statically recognizable paths, but
    85  // always returns a nil *Info for dynamic paths (those requiring HTTP requests).
    86  func NewClientForTesting() *Client {
    87  	return &Client{}
    88  }
    89  
    90  // doURL makes an HTTP request using the given url and method. It returns an
    91  // error if the request returns an error. If only200 is true, it also returns an
    92  // error if any status code other than 200 is returned.
    93  func (c *Client) doURL(ctx context.Context, method, url string, only200 bool) (_ *http.Response, err error) {
    94  	defer derrors.Wrap(&err, "doURL(ctx, client, %q, %q)", method, url)
    95  
    96  	if c == nil || c.httpClient == nil {
    97  		return nil, fmt.Errorf("c.httpClient cannot be nil")
    98  	}
    99  	req, err := http.NewRequest(method, url, nil)
   100  	if err != nil {
   101  		return nil, err
   102  	}
   103  	resp, err := ctxhttp.Do(ctx, c.httpClient, req)
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  	if only200 && resp.StatusCode != 200 {
   108  		resp.Body.Close()
   109  		return nil, fmt.Errorf("status %s", resp.Status)
   110  	}
   111  	return resp, nil
   112  }
   113  
   114  // ModuleInfo determines the repository corresponding to the module path. It
   115  // returns a URL to that repo, as well as the directory of the module relative
   116  // to the repo root.
   117  //
   118  // ModuleInfo may fetch from arbitrary URLs, so it can be slow.
   119  func ModuleInfo(ctx context.Context, client *Client, modulePath, v string) (info *Info, err error) {
   120  	defer derrors.Wrap(&err, "source.ModuleInfo(ctx, %q, %q)", modulePath, v)
   121  	ctx, span := trace.StartSpan(ctx, "source.ModuleInfo")
   122  	defer span.End()
   123  
   124  	// The example.com domain can never be real; it is reserved for testing
   125  	// (https://en.wikipedia.org/wiki/Example.com). Treat it as if it used
   126  	// GitHub templates.
   127  	if strings.HasPrefix(modulePath, "example.com/") {
   128  		return NewGitHubInfo("https://"+modulePath, "", v), nil
   129  	}
   130  
   131  	if modulePath == stdlib.ModulePath {
   132  		return newStdlibInfo(v)
   133  	}
   134  
   135  	repo, relativeModulePath, templates, transformCommit, err := matchStatic(modulePath)
   136  	if err != nil {
   137  		info, err = moduleInfoDynamic(ctx, client, modulePath, v)
   138  		if err != nil {
   139  			return nil, err
   140  		}
   141  	} else {
   142  		commit, isHash := commitFromVersion(v, relativeModulePath)
   143  		if transformCommit != nil {
   144  			commit = transformCommit(commit, isHash)
   145  		}
   146  		info = &Info{
   147  			repoURL:   trimVCSSuffix("https://" + repo),
   148  			moduleDir: relativeModulePath,
   149  			commit:    commit,
   150  			templates: templates,
   151  		}
   152  	}
   153  	if info != nil {
   154  		adjustVersionedModuleDirectory(ctx, client, info)
   155  	}
   156  	if strings.HasPrefix(modulePath, "golang.org/") {
   157  		adjustGoRepoInfo(info, modulePath, version.IsPseudo(v))
   158  	}
   159  	return info, nil
   160  	// TODO(golang/go#39627): support launchpad.net, including the special case
   161  	// in cmd/go/internal/get/vcs.go.
   162  }
   163  
   164  func newStdlibInfo(version string) (_ *Info, err error) {
   165  	defer derrors.Wrap(&err, "newStdlibInfo(%q)", version)
   166  
   167  	commit, err := stdlib.TagForVersion(version)
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	templates := csopensourceTemplates
   173  	templates.Raw = "https://github.com/golang/go/raw/{commit}/{file}"
   174  	return &Info{
   175  		repoURL:   stdlib.GoSourceRepoURL,
   176  		moduleDir: stdlib.Directory(version),
   177  		commit:    commit,
   178  		templates: templates,
   179  	}, nil
   180  }
   181  
   182  // csNonXRepos is a set of repos hosted at https://cs.opensource.google/go,
   183  // that are not an x/repo.
   184  var csNonXRepos = map[string]bool{
   185  	"dl":        true,
   186  	"proposal":  true,
   187  	"vscode-go": true,
   188  }
   189  
   190  // csXRepos is the set of repos hosted at https://cs.opensource.google/go,
   191  // that have a x/ prefix.
   192  //
   193  // x/scratch is not included.
   194  var csXRepos = map[string]bool{
   195  	"x/arch":       true,
   196  	"x/benchmarks": true,
   197  	"x/blog":       true,
   198  	"x/build":      true,
   199  	"x/crypto":     true,
   200  	"x/debug":      true,
   201  	"x/example":    true,
   202  	"x/exp":        true,
   203  	"x/image":      true,
   204  	"x/mobile":     true,
   205  	"x/mod":        true,
   206  	"x/net":        true,
   207  	"x/oauth2":     true,
   208  	"x/perf":       true,
   209  	"x/pkgsite":    true,
   210  	"x/playground": true,
   211  	"x/review":     true,
   212  	"x/sync":       true,
   213  	"x/sys":        true,
   214  	"x/talks":      true,
   215  	"x/term":       true,
   216  	"x/text":       true,
   217  	"x/time":       true,
   218  	"x/tools":      true,
   219  	"x/tour":       true,
   220  	"x/vgo":        true,
   221  	"x/website":    true,
   222  	"x/xerrors":    true,
   223  }
   224  
   225  func adjustGoRepoInfo(info *Info, modulePath string, isHash bool) {
   226  	suffix := strings.TrimPrefix(modulePath, "golang.org/")
   227  
   228  	// Validate that this is a repo that exists on
   229  	// https://cs.opensource.google/go. Otherwise, default to the existing
   230  	// info.
   231  	parts := strings.Split(suffix, "/")
   232  	if len(parts) >= 2 {
   233  		suffix = parts[0] + "/" + parts[1]
   234  	}
   235  	if strings.HasPrefix(suffix, "x/") {
   236  		if !csXRepos[suffix] {
   237  			return
   238  		}
   239  	} else if !csNonXRepos[suffix] {
   240  		return
   241  	}
   242  
   243  	// rawURL needs to be set before info.templates is changed.
   244  	rawURL := fmt.Sprintf(
   245  		"https://github.com/golang/%s/raw/{commit}/{file}", strings.TrimPrefix(suffix, "x/"))
   246  
   247  	info.repoURL = fmt.Sprintf("https://cs.opensource.google/go/%s", suffix)
   248  	info.templates = csopensourceTemplates
   249  	info.templates.Raw = rawURL
   250  
   251  	if isHash {
   252  		// When we have a pseudoversion, info.commit will be an actual commit
   253  		// instead of a tag.
   254  		//
   255  		// https://cs.opensource.google/go/* has short commits hardcoded to 8
   256  		// chars. Commits shorter or longer will not work, unless it is the full
   257  		// commit hash.
   258  		info.commit = info.commit[0:8]
   259  	}
   260  }
   261  
   262  // matchStatic matches the given module or repo path against a list of known
   263  // patterns. It returns the repo name, the module path relative to the repo
   264  // root, and URL templates if there is a match.
   265  //
   266  // The relative module path may not be correct in all cases: it is wrong if it
   267  // ends in a version that is not part of the repo directory structure, because
   268  // the repo follows the "major branch" convention for versions 2 and above.
   269  // E.g. this function could return "foo/v2", but the module files live under "foo"; the
   270  // "/v2" is part of the module path (and the import paths of its packages) but
   271  // is not a subdirectory. This mistake is corrected in adjustVersionedModuleDirectory,
   272  // once we have all the information we need to fix it.
   273  //
   274  // repo + "/" + relativeModulePath is often, but not always, equal to
   275  // moduleOrRepoPath. It is not when the argument is a module path that uses the
   276  // go command's general syntax, which ends in a ".vcs" (e.g. ".git", ".hg") that
   277  // is neither part of the repo nor the suffix. For example, if the argument is
   278  //
   279  //	github.com/a/b/c
   280  //
   281  // then repo="github.com/a/b" and relativeModulePath="c"; together they make up the module path.
   282  // But if the argument is
   283  //
   284  //	example.com/a/b.git/c
   285  //
   286  // then repo="example.com/a/b" and relativeModulePath="c"; the ".git" is omitted, since it is neither
   287  // part of the repo nor part of the relative path to the module within the repo.
   288  func matchStatic(moduleOrRepoPath string) (repo, relativeModulePath string, _ urlTemplates, transformCommit transformCommitFunc, _ error) {
   289  	for _, pat := range patterns {
   290  		matches := pat.re.FindStringSubmatch(moduleOrRepoPath)
   291  		if matches == nil {
   292  			continue
   293  		}
   294  		var repo string
   295  		for i, n := range pat.re.SubexpNames() {
   296  			if n == "repo" {
   297  				repo = matches[i]
   298  				break
   299  			}
   300  		}
   301  		// Special case: git.apache.org has a go-import tag that points to
   302  		// github.com/apache, but it's not quite right (the repo prefix is
   303  		// missing a ".git"), so handle it here.
   304  		const apacheDomain = "git.apache.org/"
   305  		if strings.HasPrefix(repo, apacheDomain) {
   306  			repo = strings.Replace(repo, apacheDomain, "github.com/apache/", 1)
   307  		}
   308  		// Special case: module paths are blitiri.com.ar/go/..., but repos are blitiri.com.ar/git/r/...
   309  		if strings.HasPrefix(repo, "blitiri.com.ar/") {
   310  			repo = strings.Replace(repo, "/go/", "/git/r/", 1)
   311  		}
   312  		relativeModulePath = strings.TrimPrefix(moduleOrRepoPath, matches[0])
   313  		relativeModulePath = strings.TrimPrefix(relativeModulePath, "/")
   314  		return repo, relativeModulePath, pat.templates, pat.transformCommit, nil
   315  	}
   316  	return "", "", urlTemplates{}, nil, derrors.NotFound
   317  }
   318  
   319  // moduleInfoDynamic uses the go-import and go-source meta tags to construct an Info.
   320  func moduleInfoDynamic(ctx context.Context, client *Client, modulePath, version string) (_ *Info, err error) {
   321  	defer derrors.Wrap(&err, "moduleInfoDynamic(ctx, client, %q, %q)", modulePath, version)
   322  
   323  	if client.httpClient == nil {
   324  		return nil, nil // for testing
   325  	}
   326  
   327  	sourceMeta, err := fetchMeta(ctx, client, modulePath)
   328  	if err != nil {
   329  		return nil, err
   330  	}
   331  	// Don't check that the tag information at the repo root prefix is the same
   332  	// as in the module path. It was done for us by the proxy and/or go command.
   333  	// (This lets us merge information from the go-import and go-source tags.)
   334  
   335  	// sourceMeta contains some information about where the module's source lives. But there
   336  	// are some problems:
   337  	// - We may only have a go-import tag, not a go-source tag, so we don't have URL templates for
   338  	//   building URLs to files and directories.
   339  	// - Even if we do have a go-source tag, its URL template format predates
   340  	//   versioning, so the URL templates won't provide a way to specify a
   341  	//   version or commit.
   342  	//
   343  	// We resolve these problems as follows:
   344  	// 1. First look at the repo URL from the tag. If that matches a known hosting site, use the
   345  	//    URL templates corresponding to that site and ignore whatever's in the tag.
   346  	// 2. Then look at the URL templates to see if they match a known pattern, and use the templates
   347  	//    from that pattern. For example, the meta tags for gopkg.in/yaml.v2 only mention github
   348  	//    in the URL templates, like "https://github.com/go-yaml/yaml/tree/v2.2.3{/dir}". We can observe
   349  	//    that that template begins with a known pattern--a GitHub repo, ignore the rest of it, and use the
   350  	//    GitHub URL templates that we know.
   351  	repoURL := sourceMeta.repoURL
   352  	_, _, templates, transformCommit, _ := matchStatic(removeHTTPScheme(repoURL))
   353  	// If err != nil, templates will be the zero value, so we can ignore it (same just below).
   354  	if templates == (urlTemplates{}) {
   355  		var repo string
   356  		repo, _, templates, transformCommit, _ = matchStatic(removeHTTPScheme(sourceMeta.dirTemplate))
   357  		if templates == (urlTemplates{}) {
   358  			if err == nil {
   359  				templates, transformCommit = matchLegacyTemplates(ctx, sourceMeta)
   360  				repoURL = strings.TrimSuffix(repoURL, ".git")
   361  			} else {
   362  				log.Printf("no templates for repo URL %q from meta tag: err=%v", sourceMeta.repoURL, err)
   363  			}
   364  		} else {
   365  			// Use the repo from the template, not the original one.
   366  			repoURL = "https://" + repo
   367  		}
   368  	}
   369  	dir := strings.TrimPrefix(strings.TrimPrefix(modulePath, sourceMeta.repoRootPrefix), "/")
   370  	commit, isHash := commitFromVersion(version, dir)
   371  	if transformCommit != nil {
   372  		commit = transformCommit(commit, isHash)
   373  	}
   374  	return &Info{
   375  		repoURL:   strings.TrimSuffix(repoURL, "/"),
   376  		moduleDir: dir,
   377  		commit:    commit,
   378  		templates: templates,
   379  	}, nil
   380  }
   381  
   382  // List of template regexps and their corresponding likely templates,
   383  // used by matchLegacyTemplates below.
   384  var legacyTemplateMatches = []struct {
   385  	fileRegexp      *regexp.Regexp
   386  	templates       urlTemplates
   387  	transformCommit transformCommitFunc
   388  }{
   389  	{
   390  		regexp.MustCompile(`/src/branch/\w+\{/dir\}/\{file\}#L\{line\}$`),
   391  		giteaURLTemplates, giteaTransformCommit,
   392  	},
   393  	{
   394  		regexp.MustCompile(`/src/\w+\{/dir\}/\{file\}#L\{line\}$`),
   395  		giteaURLTemplates, nil,
   396  	},
   397  	{
   398  		regexp.MustCompile(`/-/blob/\w+\{/dir\}/\{file\}#L\{line\}$`),
   399  		gitlabURLTemplates, nil,
   400  	},
   401  	{
   402  		regexp.MustCompile(`/tree\{/dir\}/\{file\}#n\{line\}$`),
   403  		fdioURLTemplates, fdioTransformCommit,
   404  	},
   405  }
   406  
   407  // matchLegacyTemplates matches the templates from the go-source meta tag
   408  // against some known patterns to guess the version-aware URL templates. If it
   409  // can't find a match, it falls back using the go-source templates with some
   410  // small replacements. These will not be version-aware but will still serve
   411  // source at a fixed commit, which is better than nothing.
   412  func matchLegacyTemplates(ctx context.Context, sm *sourceMeta) (_ urlTemplates, transformCommit transformCommitFunc) {
   413  	if sm.fileTemplate == "" {
   414  		return urlTemplates{}, nil
   415  	}
   416  	for _, ltm := range legacyTemplateMatches {
   417  		if ltm.fileRegexp.MatchString(sm.fileTemplate) {
   418  			return ltm.templates, ltm.transformCommit
   419  		}
   420  	}
   421  	log.Printf("matchLegacyTemplates: no matches for repo URL %q; replacing", sm.repoURL)
   422  	rep := strings.NewReplacer(
   423  		"{/dir}/{file}", "/{file}",
   424  		"{dir}/{file}", "{file}",
   425  		"{/dir}", "/{dir}")
   426  	line := rep.Replace(sm.fileTemplate)
   427  	file := line
   428  	if i := strings.LastIndexByte(line, '#'); i > 0 {
   429  		file = line[:i]
   430  	}
   431  	return urlTemplates{
   432  		Repo:      sm.repoURL,
   433  		Directory: rep.Replace(sm.dirTemplate),
   434  		File:      file,
   435  		Line:      line,
   436  	}, nil
   437  }
   438  
   439  // adjustVersionedModuleDirectory changes info.moduleDir if necessary to
   440  // correctly reflect the repo structure. info.moduleDir will be wrong if it has
   441  // a suffix "/vN" for N > 1, and the repo uses the "major branch" convention,
   442  // where modules at version 2 and higher live on branches rather than
   443  // subdirectories. See https://research.swtch.com/vgo-module for a discussion of
   444  // the "major branch" vs. "major subdirectory" conventions for organizing a
   445  // repo.
   446  func adjustVersionedModuleDirectory(ctx context.Context, client *Client, info *Info) {
   447  	dirWithoutVersion := removeVersionSuffix(info.moduleDir)
   448  	if info.moduleDir == dirWithoutVersion {
   449  		return
   450  	}
   451  	// moduleDir does have a "/vN" for N > 1. To see if that is the actual directory,
   452  	// fetch the go.mod file from it.
   453  	res, err := client.doURL(ctx, "HEAD", info.FileURL("go.mod"), true)
   454  	// On any failure, assume that the right directory is the one without the version.
   455  	if err != nil {
   456  		info.moduleDir = dirWithoutVersion
   457  	} else {
   458  		res.Body.Close()
   459  	}
   460  }
   461  
   462  // removeHTTPScheme removes an initial "http://" or "https://" from url.
   463  // The result can be used to match against our static patterns.
   464  // If the URL uses a different scheme, it won't be removed and it won't
   465  // match any patterns, as intended.
   466  func removeHTTPScheme(url string) string {
   467  	for _, prefix := range []string{"https://", "http://"} {
   468  		if strings.HasPrefix(url, prefix) {
   469  			return url[len(prefix):]
   470  		}
   471  	}
   472  	return url
   473  }
   474  
   475  // removeVersionSuffix returns s with "/vN" removed if N is an integer > 1.
   476  // Otherwise it returns s.
   477  func removeVersionSuffix(s string) string {
   478  	dir, base := path.Split(s)
   479  	if !strings.HasPrefix(base, "v") {
   480  		return s
   481  	}
   482  	if n, err := strconv.Atoi(base[1:]); err != nil || n < 2 {
   483  		return s
   484  	}
   485  	return strings.TrimSuffix(dir, "/")
   486  }
   487  
   488  type transformCommitFunc func(commit string, isHash bool) string
   489  
   490  // Patterns for determining repo and URL templates from module paths or repo
   491  // URLs. Each regexp must match a prefix of the target string, and must have a
   492  // group named "repo".
   493  var patterns = []struct {
   494  	pattern   string // uncompiled regexp
   495  	templates urlTemplates
   496  	re        *regexp.Regexp
   497  	// transformCommit may alter the commit before substitution
   498  	transformCommit transformCommitFunc
   499  }{
   500  	{
   501  		pattern:   `^(?P<repo>github\.com/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)`,
   502  		templates: githubURLTemplates,
   503  	},
   504  	{
   505  		// Assume that any site beginning with "github." works like github.com.
   506  		pattern:   `^(?P<repo>github\.[a-z0-9A-Z.-]+/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)(\.git|$)`,
   507  		templates: githubURLTemplates,
   508  	},
   509  	{
   510  		pattern:   `^(?P<repo>bitbucket\.org/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)`,
   511  		templates: bitbucketURLTemplates,
   512  	},
   513  	{
   514  		// Gitlab repos can have multiple path components.
   515  		pattern:   `^(?P<repo>gitlab\.com/[^.]+)(\.git|$)`,
   516  		templates: gitlabURLTemplates,
   517  	},
   518  	{
   519  		// Assume that any site beginning with "gitlab." works like gitlab.com.
   520  		pattern:   `^(?P<repo>gitlab\.[a-z0-9A-Z.-]+/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)(\.git|$)`,
   521  		templates: gitlabURLTemplates,
   522  	},
   523  	{
   524  		pattern:   `^(?P<repo>gitee\.com/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)(\.git|$)`,
   525  		templates: githubURLTemplates,
   526  	},
   527  	{
   528  		pattern: `^(?P<repo>git\.sr\.ht/~[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)`,
   529  		templates: urlTemplates{
   530  			Directory: "{repo}/tree/{commit}/{dir}",
   531  			File:      "{repo}/tree/{commit}/{file}",
   532  			Line:      "{repo}/tree/{commit}/{file}#L{line}",
   533  			Raw:       "{repo}/blob/{commit}/{file}",
   534  		},
   535  	},
   536  	{
   537  		pattern:         `^(?P<repo>git\.fd\.io/[a-z0-9A-Z_.\-]+)`,
   538  		templates:       fdioURLTemplates,
   539  		transformCommit: fdioTransformCommit,
   540  	},
   541  	{
   542  		pattern:   `^(?P<repo>git\.pirl\.io/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)`,
   543  		templates: gitlabURLTemplates,
   544  	},
   545  	{
   546  		pattern:         `^(?P<repo>gitea\.com/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)(\.git|$)`,
   547  		templates:       giteaURLTemplates,
   548  		transformCommit: giteaTransformCommit,
   549  	},
   550  	{
   551  		// Assume that any site beginning with "gitea." works like gitea.com.
   552  		pattern:         `^(?P<repo>gitea\.[a-z0-9A-Z.-]+/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)(\.git|$)`,
   553  		templates:       giteaURLTemplates,
   554  		transformCommit: giteaTransformCommit,
   555  	},
   556  	{
   557  		pattern:         `^(?P<repo>go\.isomorphicgo\.org/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)(\.git|$)`,
   558  		templates:       giteaURLTemplates,
   559  		transformCommit: giteaTransformCommit,
   560  	},
   561  	{
   562  		pattern:         `^(?P<repo>git\.openprivacy\.ca/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)(\.git|$)`,
   563  		templates:       giteaURLTemplates,
   564  		transformCommit: giteaTransformCommit,
   565  	},
   566  	{
   567  		pattern:         `^(?P<repo>codeberg\.org/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)(\.git|$)`,
   568  		templates:       giteaURLTemplates,
   569  		transformCommit: giteaTransformCommit,
   570  	},
   571  	{
   572  		pattern: `^(?P<repo>gogs\.[a-z0-9A-Z.-]+/[a-z0-9A-Z_.\-]+/[a-z0-9A-Z_.\-]+)(\.git|$)`,
   573  		// Gogs uses the same basic structure as Gitea, but omits the type of
   574  		// commit ("tag" or "commit"), so we don't need a transformCommit
   575  		// function. Gogs does not support short hashes, but we create those
   576  		// URLs anyway. See gogs/gogs#6242.
   577  		templates: giteaURLTemplates,
   578  	},
   579  	{
   580  		pattern: `^(?P<repo>dmitri\.shuralyov\.com\/.+)$`,
   581  		templates: urlTemplates{
   582  			Repo:      "{repo}/...",
   583  			Directory: "https://gotools.org/{importPath}?rev={commit}",
   584  			File:      "https://gotools.org/{importPath}?rev={commit}#{base}",
   585  			Line:      "https://gotools.org/{importPath}?rev={commit}#{base}-L{line}",
   586  		},
   587  	},
   588  	{
   589  		pattern: `^(?P<repo>blitiri\.com\.ar/go/.+)$`,
   590  		templates: urlTemplates{
   591  			Repo:      "{repo}",
   592  			Directory: "{repo}/b/master/t/{dir}",
   593  			File:      "{repo}/b/master/t/{dir}f={file}.html",
   594  			Line:      "{repo}/b/master/t/{dir}f={file}.html#line-{line}",
   595  		},
   596  	},
   597  
   598  	// Patterns that match the general go command pattern, where they must have
   599  	// a ".git" repo suffix in an import path. If matching a repo URL from a meta tag,
   600  	// there is no ".git".
   601  	{
   602  		pattern:   `^(?P<repo>[^.]+\.googlesource\.com/[^.]+)(\.git|$)`,
   603  		templates: googlesourceURLTemplates,
   604  	},
   605  	{
   606  		pattern:   `^(?P<repo>git\.apache\.org/[^.]+)(\.git|$)`,
   607  		templates: githubURLTemplates,
   608  	},
   609  	// General syntax for the go command. We can extract the repo and directory, but
   610  	// we don't know the URL templates.
   611  	// Must be last in this list.
   612  	{
   613  		pattern:   `(?P<repo>([a-z0-9.\-]+\.)+[a-z0-9.\-]+(:[0-9]+)?(/~?[A-Za-z0-9_.\-]+)+?)\.(bzr|fossil|git|hg|svn)`,
   614  		templates: urlTemplates{},
   615  	},
   616  }
   617  
   618  func init() {
   619  	for i := range patterns {
   620  		re := regexp.MustCompile(patterns[i].pattern)
   621  		// The pattern regexp must contain a group named "repo".
   622  		found := false
   623  		for _, n := range re.SubexpNames() {
   624  			if n == "repo" {
   625  				found = true
   626  				break
   627  			}
   628  		}
   629  		if !found {
   630  			panic(fmt.Sprintf("pattern %s missing <repo> group", patterns[i].pattern))
   631  		}
   632  		patterns[i].re = re
   633  	}
   634  }
   635  
   636  // giteaTransformCommit transforms commits for the Gitea code hosting system.
   637  func giteaTransformCommit(commit string, isHash bool) string {
   638  	// Hashes use "commit", tags use "tag".
   639  	// Short hashes are supported as of v1.14.0.
   640  	if isHash {
   641  		return "commit/" + commit
   642  	}
   643  	return "tag/" + commit
   644  }
   645  
   646  func fdioTransformCommit(commit string, isHash bool) string {
   647  	// hashes use "?id=", tags use "?h="
   648  	p := "h"
   649  	if isHash {
   650  		p = "id"
   651  	}
   652  	return fmt.Sprintf("%s=%s", p, commit)
   653  }
   654  
   655  // urlTemplates describes how to build URLs from bits of source information.
   656  // The fields are exported for JSON encoding.
   657  //
   658  // The template variables are:
   659  //
   660  //   - {repo}       - Repository URL with "https://" prefix ("https://example.com/myrepo").
   661  //   - {importPath} - Package import path ("example.com/myrepo/mypkg").
   662  //   - {commit}     - Tag name or commit hash corresponding to version ("v0.1.0" or "1234567890ab").
   663  //   - {dir}        - Path to directory of the package, relative to repo root ("mypkg").
   664  //   - {file}       - Path to file containing the identifier, relative to repo root ("mypkg/file.go").
   665  //   - {base}       - Base name of file containing the identifier, including file extension ("file.go").
   666  //   - {line}       - Line number for the identifier ("41").
   667  type urlTemplates struct {
   668  	Repo      string `json:",omitempty"` // Optional URL template for the repository home page, with {repo}. If left empty, a default template "{repo}" is used.
   669  	Directory string // URL template for a directory, with {repo}, {importPath}, {commit}, {dir}.
   670  	File      string // URL template for a file, with {repo}, {importPath}, {commit}, {file}, {base}.
   671  	Line      string // URL template for a line, with {repo}, {importPath}, {commit}, {file}, {base}, {line}.
   672  	Raw       string // Optional URL template for the raw contents of a file, with {repo}, {commit}, {file}.
   673  }
   674  
   675  var (
   676  	githubURLTemplates = urlTemplates{
   677  		Directory: "{repo}/tree/{commit}/{dir}",
   678  		File:      "{repo}/blob/{commit}/{file}",
   679  		Line:      "{repo}/blob/{commit}/{file}#L{line}",
   680  		Raw:       "{repo}/raw/{commit}/{file}",
   681  	}
   682  
   683  	bitbucketURLTemplates = urlTemplates{
   684  		Directory: "{repo}/src/{commit}/{dir}",
   685  		File:      "{repo}/src/{commit}/{file}",
   686  		Line:      "{repo}/src/{commit}/{file}#lines-{line}",
   687  		Raw:       "{repo}/raw/{commit}/{file}",
   688  	}
   689  	giteaURLTemplates = urlTemplates{
   690  		Directory: "{repo}/src/{commit}/{dir}",
   691  		File:      "{repo}/src/{commit}/{file}",
   692  		Line:      "{repo}/src/{commit}/{file}#L{line}",
   693  		Raw:       "{repo}/raw/{commit}/{file}",
   694  	}
   695  	googlesourceURLTemplates = urlTemplates{
   696  		Directory: "{repo}/+/{commit}/{dir}",
   697  		File:      "{repo}/+/{commit}/{file}",
   698  		Line:      "{repo}/+/{commit}/{file}#{line}",
   699  		// Gitiles has no support for serving raw content at this time.
   700  	}
   701  	gitlabURLTemplates = urlTemplates{
   702  		Directory: "{repo}/-/tree/{commit}/{dir}",
   703  		File:      "{repo}/-/blob/{commit}/{file}",
   704  		Line:      "{repo}/-/blob/{commit}/{file}#L{line}",
   705  		Raw:       "{repo}/-/raw/{commit}/{file}",
   706  	}
   707  	fdioURLTemplates = urlTemplates{
   708  		Directory: "{repo}/tree/{dir}?{commit}",
   709  		File:      "{repo}/tree/{file}?{commit}",
   710  		Line:      "{repo}/tree/{file}?{commit}#n{line}",
   711  		Raw:       "{repo}/plain/{file}?{commit}",
   712  	}
   713  	csopensourceTemplates = urlTemplates{
   714  		Directory: "{repo}/+/{commit}:{dir}",
   715  		File:      "{repo}/+/{commit}:{file}",
   716  		Line:      "{repo}/+/{commit}:{file};l={line}",
   717  		// Gitiles has no support for serving raw content at this time.
   718  	}
   719  )
   720  
   721  // commitFromVersion returns a string that refers to a commit corresponding to version.
   722  // It also reports whether the returned value is a commit hash.
   723  // The string may be a tag, or it may be the hash or similar unique identifier of a commit.
   724  // The second argument is the module path relative to the repo root.
   725  func commitFromVersion(vers, relativeModulePath string) (commit string, isHash bool) {
   726  	// Commit for the module: either a sha for pseudoversions, or a tag.
   727  	v := strings.TrimSuffix(vers, "+incompatible")
   728  	if version.IsPseudo(v) {
   729  		// Use the commit hash at the end.
   730  		return v[strings.LastIndex(v, "-")+1:], true
   731  	} else {
   732  		// The tags for a nested module begin with the relative module path of the module,
   733  		// removing a "/vN" suffix if N > 1.
   734  		prefix := removeVersionSuffix(relativeModulePath)
   735  		if prefix != "" {
   736  			return prefix + "/" + v, false
   737  		}
   738  		return v, false
   739  	}
   740  }
   741  
   742  // trimVCSSuffix removes a VCS suffix from a repo URL in selected cases.
   743  //
   744  // The Go command allows a VCS suffix on a repo, like github.com/foo/bar.git. But
   745  // some code hosting sites don't support all paths constructed from such URLs.
   746  // For example, GitHub will redirect github.com/foo/bar.git to github.com/foo/bar,
   747  // but will 404 on github.com/goo/bar.git/tree/master and any other URL with a
   748  // non-empty path.
   749  //
   750  // To be conservative, we remove the suffix only in cases where we know it's
   751  // wrong.
   752  func trimVCSSuffix(repoURL string) string {
   753  	if !strings.HasSuffix(repoURL, ".git") {
   754  		return repoURL
   755  	}
   756  	if strings.HasPrefix(repoURL, "https://github.com/") || strings.HasPrefix(repoURL, "https://gitlab.com/") {
   757  		return strings.TrimSuffix(repoURL, ".git")
   758  	}
   759  	return repoURL
   760  }
   761  
   762  // The following code copied from cmd/go/internal/get:
   763  
   764  // expand rewrites s to replace {k} with match[k] for each key k in match.
   765  func expand(s string, match map[string]string) string {
   766  	// We want to replace each match exactly once, and the result of expansion
   767  	// must not depend on the iteration order through the map.
   768  	// A strings.Replacer has exactly the properties we're looking for.
   769  	oldNew := make([]string, 0, 2*len(match))
   770  	for k, v := range match {
   771  		oldNew = append(oldNew, "{"+k+"}", v)
   772  	}
   773  	return strings.NewReplacer(oldNew...).Replace(s)
   774  }
   775  
   776  // NewGitHubInfo creates a source.Info with GitHub URL templates.
   777  // It is for testing only.
   778  func NewGitHubInfo(repoURL, moduleDir, commit string) *Info {
   779  	return &Info{
   780  		repoURL:   trimVCSSuffix(repoURL),
   781  		moduleDir: moduleDir,
   782  		commit:    commit,
   783  		templates: githubURLTemplates,
   784  	}
   785  }