sigs.k8s.io/cluster-api@v1.7.1/cmd/clusterctl/client/repository/repository_github.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package repository
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"io"
    23  	"net/http"
    24  	"net/url"
    25  	"os"
    26  	"path"
    27  	"path/filepath"
    28  	"strings"
    29  	"time"
    30  
    31  	"github.com/blang/semver/v4"
    32  	"github.com/google/go-github/v53/github"
    33  	"github.com/pkg/errors"
    34  	"golang.org/x/oauth2"
    35  	"k8s.io/apimachinery/pkg/util/version"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  
    38  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    39  	"sigs.k8s.io/cluster-api/cmd/clusterctl/client/config"
    40  	logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log"
    41  	"sigs.k8s.io/cluster-api/internal/goproxy"
    42  )
    43  
    44  const (
    45  	httpsScheme                    = "https"
    46  	githubDomain                   = "github.com"
    47  	githubReleaseRepository        = "releases"
    48  	githubLatestReleaseLabel       = "latest"
    49  	githubListReleasesPerPageLimit = 100
    50  )
    51  
    52  var (
    53  	errNotFound = errors.New("404 Not Found")
    54  
    55  	// Caches used to limit the number of GitHub API calls.
    56  
    57  	cacheVersions              = map[string][]string{}
    58  	cacheReleases              = map[string]*github.RepositoryRelease{}
    59  	cacheFiles                 = map[string][]byte{}
    60  	retryableOperationInterval = 10 * time.Second
    61  	retryableOperationTimeout  = 1 * time.Minute
    62  )
    63  
    64  // gitHubRepository provides support for providers hosted on GitHub.
    65  //
    66  // We support GitHub repositories that use the release feature to publish artifacts and versions.
    67  // Repositories must use versioned releases, including the "latest" meta version
    68  // (https://help.github.com/en/github/administering-a-repository/linking-to-releases#linking-to-the-latest-release).
    69  type gitHubRepository struct {
    70  	providerConfig           config.Provider
    71  	configVariablesClient    config.VariablesClient
    72  	authenticatingHTTPClient *http.Client
    73  	owner                    string
    74  	repository               string
    75  	defaultVersion           string
    76  	rootPath                 string
    77  	componentsPath           string
    78  	injectClient             *github.Client
    79  	injectGoproxyClient      *goproxy.Client
    80  }
    81  
    82  var _ Repository = &gitHubRepository{}
    83  
    84  type githubRepositoryOption func(*gitHubRepository)
    85  
    86  func injectGithubClient(c *github.Client) githubRepositoryOption {
    87  	return func(g *gitHubRepository) {
    88  		g.injectClient = c
    89  	}
    90  }
    91  
    92  func injectGoproxyClient(c *goproxy.Client) githubRepositoryOption {
    93  	return func(g *gitHubRepository) {
    94  		g.injectGoproxyClient = c
    95  	}
    96  }
    97  
    98  // DefaultVersion returns defaultVersion field of gitHubRepository struct.
    99  func (g *gitHubRepository) DefaultVersion() string {
   100  	return g.defaultVersion
   101  }
   102  
   103  // GetVersions returns the list of versions that are available in a provider repository.
   104  func (g *gitHubRepository) GetVersions(ctx context.Context) ([]string, error) {
   105  	log := logf.Log
   106  
   107  	cacheID := fmt.Sprintf("%s/%s", g.owner, g.repository)
   108  	if versions, ok := cacheVersions[cacheID]; ok {
   109  		return versions, nil
   110  	}
   111  
   112  	goProxyClient, err := g.getGoproxyClient(ctx)
   113  	if err != nil {
   114  		return nil, errors.Wrap(err, "get versions client")
   115  	}
   116  
   117  	var versions []string
   118  	if goProxyClient != nil {
   119  		// A goproxy is also able to handle the github repository path instead of the actual go module name.
   120  		gomodulePath := path.Join(githubDomain, g.owner, g.repository)
   121  
   122  		var parsedVersions semver.Versions
   123  		parsedVersions, err = goProxyClient.GetVersions(ctx, gomodulePath)
   124  
   125  		// Log the error before fallback to github repository client happens.
   126  		if err != nil {
   127  			log.V(5).Info("error using Goproxy client to list versions for repository, falling back to github client", "owner", g.owner, "repository", g.repository, "error", err)
   128  		}
   129  
   130  		for _, v := range parsedVersions {
   131  			versions = append(versions, "v"+v.String())
   132  		}
   133  	}
   134  
   135  	// Fallback to github repository client if goProxyClient is nil or an error occurred.
   136  	if goProxyClient == nil || err != nil {
   137  		versions, err = g.getVersions(ctx)
   138  		if err != nil {
   139  			return nil, errors.Wrapf(err, "failed to get repository versions")
   140  		}
   141  	}
   142  
   143  	cacheVersions[cacheID] = versions
   144  	return versions, nil
   145  }
   146  
   147  // RootPath returns rootPath field of gitHubRepository struct.
   148  func (g *gitHubRepository) RootPath() string {
   149  	return g.rootPath
   150  }
   151  
   152  // ComponentsPath returns componentsPath field of gitHubRepository struct.
   153  func (g *gitHubRepository) ComponentsPath() string {
   154  	return g.componentsPath
   155  }
   156  
   157  // GetFile returns a file for a given provider version.
   158  func (g *gitHubRepository) GetFile(ctx context.Context, version, path string) ([]byte, error) {
   159  	log := logf.Log
   160  
   161  	cacheID := fmt.Sprintf("%s/%s:%s:%s", g.owner, g.repository, version, path)
   162  	if content, ok := cacheFiles[cacheID]; ok {
   163  		return content, nil
   164  	}
   165  
   166  	// Try to get the file using http get.
   167  	// NOTE: this can be disabled by setting GORPOXY to `direct` or `off` (same knobs used for skipping goproxy requests).
   168  	if goProxyClient, _ := g.getGoproxyClient(ctx); goProxyClient != nil {
   169  		files, err := g.httpGetFilesFromRelease(ctx, version, path)
   170  		if err != nil {
   171  			log.V(5).Info("error using httpGet to get file from GitHub releases, falling back to github client", "owner", g.owner, "repository", g.repository, "version", version, "path", path, "error", err)
   172  		} else {
   173  			cacheFiles[cacheID] = files
   174  			return files, nil
   175  		}
   176  	}
   177  
   178  	// If the http get request failed (or it is disabled) falls back on using the GITHUB api to download the file
   179  
   180  	release, err := g.getReleaseByTag(ctx, version)
   181  	if err != nil {
   182  		if errors.Is(err, errNotFound) {
   183  			// If it was ErrNotFound, then there is no release yet for the resolved tag.
   184  			// Ref: https://github.com/kubernetes-sigs/cluster-api/issues/7889
   185  			return nil, errors.Wrapf(err, "release not found for version %s, please retry later or set \"GOPROXY=off\" to get the current stable release", version)
   186  		}
   187  		return nil, errors.Wrapf(err, "failed to get GitHub release %s", version)
   188  	}
   189  
   190  	// Download files from the release.
   191  	files, err := g.downloadFilesFromRelease(ctx, release, path)
   192  	if err != nil {
   193  		return nil, errors.Wrapf(err, "failed to download files from GitHub release %s", version)
   194  	}
   195  
   196  	cacheFiles[cacheID] = files
   197  	return files, nil
   198  }
   199  
   200  // NewGitHubRepository returns a gitHubRepository implementation.
   201  func NewGitHubRepository(ctx context.Context, providerConfig config.Provider, configVariablesClient config.VariablesClient, opts ...githubRepositoryOption) (Repository, error) {
   202  	if configVariablesClient == nil {
   203  		return nil, errors.New("invalid arguments: configVariablesClient can't be nil")
   204  	}
   205  
   206  	rURL, err := url.Parse(providerConfig.URL())
   207  	if err != nil {
   208  		return nil, errors.Wrap(err, "invalid url")
   209  	}
   210  
   211  	// Check if the url is a github repository
   212  	if rURL.Scheme != httpsScheme || rURL.Host != githubDomain {
   213  		return nil, errors.New("invalid url: a GitHub repository url should start with https://github.com")
   214  	}
   215  
   216  	// Check if the path is in the expected format,
   217  	// url's path has an extra leading slash at the end which we need to clean up before splitting.
   218  	urlSplit := strings.Split(strings.TrimPrefix(rURL.Path, "/"), "/")
   219  	if len(urlSplit) < 5 || urlSplit[2] != githubReleaseRepository {
   220  		return nil, errors.Errorf(
   221  			"invalid url: a GitHub repository url should be in the form https://github.com/{owner}/{Repository}/%s/{latest|version-tag}/{componentsClient.yaml}",
   222  			githubReleaseRepository,
   223  		)
   224  	}
   225  
   226  	// Extract all the info from url split.
   227  	owner := urlSplit[0]
   228  	repository := urlSplit[1]
   229  	defaultVersion := urlSplit[3]
   230  	path := strings.Join(urlSplit[4:], "/")
   231  
   232  	// Use path's directory as a rootPath.
   233  	rootPath := filepath.Dir(path)
   234  	// Use the file name (if any) as componentsPath.
   235  	componentsPath := getComponentsPath(path, rootPath)
   236  
   237  	repo := &gitHubRepository{
   238  		providerConfig:        providerConfig,
   239  		configVariablesClient: configVariablesClient,
   240  		owner:                 owner,
   241  		repository:            repository,
   242  		defaultVersion:        defaultVersion,
   243  		rootPath:              rootPath,
   244  		componentsPath:        componentsPath,
   245  	}
   246  
   247  	// Process githubRepositoryOptions.
   248  	for _, o := range opts {
   249  		o(repo)
   250  	}
   251  
   252  	if token, err := configVariablesClient.Get(config.GitHubTokenVariable); err == nil {
   253  		repo.setClientToken(ctx, token)
   254  	}
   255  
   256  	if defaultVersion == githubLatestReleaseLabel {
   257  		repo.defaultVersion, err = latestContractRelease(ctx, repo, clusterv1.GroupVersion.Version)
   258  		if err != nil {
   259  			return nil, errors.Wrap(err, "failed to get latest release")
   260  		}
   261  	}
   262  
   263  	return repo, nil
   264  }
   265  
   266  // getComponentsPath returns the file name.
   267  func getComponentsPath(path string, rootPath string) string {
   268  	filePath := strings.TrimPrefix(path, rootPath)
   269  	componentsPath := strings.TrimPrefix(filePath, "/")
   270  	return componentsPath
   271  }
   272  
   273  // getClient returns a github API client.
   274  func (g *gitHubRepository) getClient() *github.Client {
   275  	if g.injectClient != nil {
   276  		return g.injectClient
   277  	}
   278  	return github.NewClient(g.authenticatingHTTPClient)
   279  }
   280  
   281  // getGoproxyClient returns a go proxy client.
   282  // It returns nil, nil if the environment variable is set to `direct` or `off`
   283  // to skip goproxy requests.
   284  func (g *gitHubRepository) getGoproxyClient(_ context.Context) (*goproxy.Client, error) {
   285  	if g.injectGoproxyClient != nil {
   286  		return g.injectGoproxyClient, nil
   287  	}
   288  	scheme, host, err := goproxy.GetSchemeAndHost(os.Getenv("GOPROXY"))
   289  	if err != nil {
   290  		return nil, err
   291  	}
   292  	// Don't return a client if scheme and host is set to empty string.
   293  	if scheme == "" && host == "" {
   294  		return nil, nil
   295  	}
   296  	return goproxy.NewClient(scheme, host), nil
   297  }
   298  
   299  // setClientToken sets authenticatingHTTPClient field of gitHubRepository struct.
   300  func (g *gitHubRepository) setClientToken(ctx context.Context, token string) {
   301  	ts := oauth2.StaticTokenSource(
   302  		&oauth2.Token{AccessToken: token},
   303  	)
   304  	g.authenticatingHTTPClient = oauth2.NewClient(ctx, ts)
   305  }
   306  
   307  // getVersions returns all the release versions for a github repository.
   308  func (g *gitHubRepository) getVersions(ctx context.Context) ([]string, error) {
   309  	client := g.getClient()
   310  
   311  	// Get all the releases.
   312  	// NB. currently Github API does not support result ordering, so it not possible to limit results
   313  	var allReleases []*github.RepositoryRelease
   314  	var retryError error
   315  	_ = wait.PollUntilContextTimeout(ctx, retryableOperationInterval, retryableOperationTimeout, true, func(ctx context.Context) (bool, error) {
   316  		var listReleasesErr error
   317  		// Get the first page of GitHub releases.
   318  		releases, response, listReleasesErr := client.Repositories.ListReleases(ctx, g.owner, g.repository, &github.ListOptions{PerPage: githubListReleasesPerPageLimit})
   319  		if listReleasesErr != nil {
   320  			retryError = g.handleGithubErr(listReleasesErr, "failed to get the list of releases")
   321  			// Return immediately if we are rate limited.
   322  			if _, ok := listReleasesErr.(*github.RateLimitError); ok {
   323  				return false, retryError
   324  			}
   325  			return false, nil
   326  		}
   327  		allReleases = append(allReleases, releases...)
   328  
   329  		// Paginated GitHub APIs provide pointers to the first, next, previous and last
   330  		// pages in the response, which can be used to iterate through the pages.
   331  		// https://github.com/google/go-github/blob/14bb610698fc2f9013cad5db79b2d5fe4d53e13c/github/github.go#L541-L551
   332  		for response.NextPage != 0 {
   333  			releases, response, listReleasesErr = client.Repositories.ListReleases(ctx, g.owner, g.repository, &github.ListOptions{Page: response.NextPage, PerPage: githubListReleasesPerPageLimit})
   334  			if listReleasesErr != nil {
   335  				retryError = g.handleGithubErr(listReleasesErr, "failed to get the list of releases")
   336  				// Return immediately if we are rate limited.
   337  				if _, ok := listReleasesErr.(*github.RateLimitError); ok {
   338  					return false, retryError
   339  				}
   340  				return false, nil
   341  			}
   342  			allReleases = append(allReleases, releases...)
   343  		}
   344  		retryError = nil
   345  		return true, nil
   346  	})
   347  	if retryError != nil {
   348  		return nil, retryError
   349  	}
   350  	versions := []string{}
   351  	for _, r := range allReleases {
   352  		r := r // pin
   353  		if r.TagName == nil {
   354  			continue
   355  		}
   356  		tagName := *r.TagName
   357  		if _, err := version.ParseSemantic(tagName); err != nil {
   358  			// Discard releases with tags that are not a valid semantic versions (the user can point explicitly to such releases).
   359  			continue
   360  		}
   361  		versions = append(versions, tagName)
   362  	}
   363  
   364  	return versions, nil
   365  }
   366  
   367  // getReleaseByTag returns the github repository release with a specific tag name.
   368  func (g *gitHubRepository) getReleaseByTag(ctx context.Context, tag string) (*github.RepositoryRelease, error) {
   369  	cacheID := fmt.Sprintf("%s/%s:%s", g.owner, g.repository, tag)
   370  	if release, ok := cacheReleases[cacheID]; ok {
   371  		return release, nil
   372  	}
   373  
   374  	client := g.getClient()
   375  
   376  	var release *github.RepositoryRelease
   377  	var retryError error
   378  	_ = wait.PollUntilContextTimeout(ctx, retryableOperationInterval, retryableOperationTimeout, true, func(ctx context.Context) (bool, error) {
   379  		var getReleasesErr error
   380  		release, _, getReleasesErr = client.Repositories.GetReleaseByTag(ctx, g.owner, g.repository, tag)
   381  		if getReleasesErr != nil {
   382  			retryError = g.handleGithubErr(getReleasesErr, "failed to read release %q", tag)
   383  			// Return immediately if not found
   384  			if errors.Is(retryError, errNotFound) {
   385  				return false, retryError
   386  			}
   387  			// Return immediately if we are rate limited.
   388  			if _, ok := getReleasesErr.(*github.RateLimitError); ok {
   389  				return false, retryError
   390  			}
   391  			return false, nil
   392  		}
   393  		retryError = nil
   394  		return true, nil
   395  	})
   396  	if retryError != nil {
   397  		return nil, retryError
   398  	}
   399  
   400  	cacheReleases[cacheID] = release
   401  	return release, nil
   402  }
   403  
   404  // httpGetFilesFromRelease gets a file from github using http get.
   405  func (g *gitHubRepository) httpGetFilesFromRelease(ctx context.Context, version, fileName string) ([]byte, error) {
   406  	downloadURL := fmt.Sprintf("https://github.com/%s/%s/releases/download/%s/%s", g.owner, g.repository, version, fileName)
   407  	var retryError error
   408  	var content []byte
   409  	_ = wait.PollUntilContextTimeout(ctx, retryableOperationInterval, retryableOperationTimeout, true, func(context.Context) (bool, error) {
   410  		resp, err := http.Get(downloadURL) //nolint:gosec,noctx
   411  		if err != nil {
   412  			retryError = errors.Wrap(err, "error sending request")
   413  			return false, nil
   414  		}
   415  		defer resp.Body.Close()
   416  
   417  		if resp.StatusCode != http.StatusOK {
   418  			retryError = errors.Errorf("error getting file, status code: %d", resp.StatusCode)
   419  			return false, nil
   420  		}
   421  
   422  		content, err = io.ReadAll(resp.Body)
   423  		if err != nil {
   424  			retryError = errors.Wrap(err, "error reading response body")
   425  			return false, nil
   426  		}
   427  
   428  		retryError = nil
   429  		return true, nil
   430  	})
   431  	if retryError != nil {
   432  		return nil, retryError
   433  	}
   434  	return content, nil
   435  }
   436  
   437  // downloadFilesFromRelease download a file from release.
   438  func (g *gitHubRepository) downloadFilesFromRelease(ctx context.Context, release *github.RepositoryRelease, fileName string) ([]byte, error) {
   439  	client := g.getClient()
   440  	absoluteFileName := filepath.Join(g.rootPath, fileName)
   441  
   442  	// Search for the file into the release assets, retrieving the asset id.
   443  	var assetID *int64
   444  	for _, a := range release.Assets {
   445  		if a.Name != nil && *a.Name == absoluteFileName {
   446  			assetID = a.ID
   447  			break
   448  		}
   449  	}
   450  	if assetID == nil {
   451  		return nil, errors.Errorf("failed to get file %q from %q release", fileName, *release.TagName)
   452  	}
   453  
   454  	var reader io.ReadCloser
   455  	var retryError error
   456  	var content []byte
   457  	_ = wait.PollUntilContextTimeout(ctx, retryableOperationInterval, retryableOperationTimeout, true, func(ctx context.Context) (bool, error) {
   458  		var redirect string
   459  		var downloadReleaseError error
   460  		reader, redirect, downloadReleaseError = client.Repositories.DownloadReleaseAsset(ctx, g.owner, g.repository, *assetID, http.DefaultClient)
   461  		if downloadReleaseError != nil {
   462  			retryError = g.handleGithubErr(downloadReleaseError, "failed to download file %q from %q release", *release.TagName, fileName)
   463  			// Return immediately if we are rate limited.
   464  			if _, ok := downloadReleaseError.(*github.RateLimitError); ok {
   465  				return false, retryError
   466  			}
   467  			return false, nil
   468  		}
   469  		defer reader.Close()
   470  
   471  		if redirect != "" {
   472  			// NOTE: DownloadReleaseAsset should not return a redirect address when used with the DefaultClient.
   473  			retryError = errors.New("unexpected redirect while downloading the release asset")
   474  			return true, retryError
   475  		}
   476  
   477  		// Read contents from the reader (redirect or not), and return.
   478  		var err error
   479  		content, err = io.ReadAll(reader)
   480  		if err != nil {
   481  			retryError = errors.Wrapf(err, "failed to read downloaded file %q from %q release", *release.TagName, fileName)
   482  			return false, nil
   483  		}
   484  
   485  		retryError = nil
   486  		return true, nil
   487  	})
   488  	if retryError != nil {
   489  		return nil, retryError
   490  	}
   491  
   492  	return content, nil
   493  }
   494  
   495  // handleGithubErr wraps error messages.
   496  func (g *gitHubRepository) handleGithubErr(err error, message string, args ...interface{}) error {
   497  	if _, ok := err.(*github.RateLimitError); ok {
   498  		return errors.New("rate limit for github api has been reached. Please wait one hour or get a personal API token and assign it to the GITHUB_TOKEN environment variable")
   499  	}
   500  	if ghErr, ok := err.(*github.ErrorResponse); ok {
   501  		if ghErr.Response.StatusCode == http.StatusNotFound {
   502  			return errNotFound
   503  		}
   504  	}
   505  	return errors.Wrapf(err, message, args...)
   506  }