github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/pod-utils/clone/clone.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package clone
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"net/url"
    23  	"os/exec"
    24  	"path"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/sirupsen/logrus"
    30  
    31  	"k8s.io/apimachinery/pkg/util/sets"
    32  	prowapi "sigs.k8s.io/prow/pkg/apis/prowjobs/v1"
    33  	"sigs.k8s.io/prow/pkg/config/secret"
    34  	"sigs.k8s.io/prow/pkg/github"
    35  	"sigs.k8s.io/prow/pkg/logrusutil"
    36  )
    37  
    38  type runnable interface {
    39  	run() (string, string, error)
    40  }
    41  
    42  // Run clones the refs under the prescribed directory and optionally
    43  // configures the git username and email in the repository as well.
    44  func Run(refs prowapi.Refs, dir, gitUserName, gitUserEmail, cookiePath string, env []string, userGenerator github.UserGenerator, tokenGenerator github.TokenGenerator) Record {
    45  	startTime := time.Now()
    46  	record := Record{Refs: refs}
    47  
    48  	var (
    49  		user  string
    50  		token string
    51  		err   error
    52  	)
    53  	if userGenerator != nil {
    54  		user, err = userGenerator()
    55  		if err != nil {
    56  			logrus.WithError(err).Warn("Cannot generate user")
    57  			return record
    58  		}
    59  	}
    60  	if tokenGenerator != nil {
    61  		token, err = tokenGenerator(refs.Org)
    62  		if err != nil {
    63  			logrus.WithError(err).Warnf("Cannot generate token for %s", refs.Org)
    64  			return record
    65  		}
    66  	}
    67  
    68  	if token != "" {
    69  		logrus.SetFormatter(logrusutil.NewCensoringFormatter(logrus.StandardLogger().Formatter, func() sets.Set[string] {
    70  			return sets.New[string](token)
    71  		}))
    72  	}
    73  	logrus.WithFields(logrus.Fields{"refs": refs}).Info("Cloning refs")
    74  
    75  	// This function runs the provided commands in order, logging them as they run,
    76  	// aborting early and returning if any command fails.
    77  	runCommands := func(commands []runnable) error {
    78  		for _, command := range commands {
    79  			startTime := time.Now()
    80  			formattedCommand, output, err := command.run()
    81  			log := logrus.WithFields(logrus.Fields{"command": formattedCommand, "output": output})
    82  			if err != nil {
    83  				log = log.WithField("error", err)
    84  			}
    85  			log.Info("Ran command")
    86  			message := ""
    87  			if err != nil {
    88  				message = err.Error()
    89  				record.Failed = true
    90  			}
    91  			record.Commands = append(record.Commands, Command{
    92  				Command:  censorToken(string(secret.Censor([]byte(formattedCommand))), token),
    93  				Output:   censorToken(string(secret.Censor([]byte(output))), token),
    94  				Error:    censorToken(string(secret.Censor([]byte(message))), token),
    95  				Duration: time.Since(startTime),
    96  			})
    97  			if err != nil {
    98  				return err
    99  			}
   100  		}
   101  		return nil
   102  	}
   103  
   104  	g := gitCtxForRefs(refs, dir, env, user, token)
   105  	if err := runCommands(g.commandsForBaseRef(refs, gitUserName, gitUserEmail, cookiePath)); err != nil {
   106  		return record
   107  	}
   108  
   109  	timestamp, err := g.gitHeadTimestamp()
   110  	if err != nil {
   111  		timestamp = int(time.Now().Unix())
   112  	}
   113  	if err := runCommands(g.commandsForPullRefs(refs, timestamp)); err != nil {
   114  		return record
   115  	}
   116  
   117  	finalSHA, err := g.gitRevParse()
   118  	if err != nil {
   119  		logrus.WithError(err).Warnf("Cannot resolve finalSHA for ref %#v", refs)
   120  	} else {
   121  		record.FinalSHA = finalSHA
   122  	}
   123  
   124  	record.Duration = time.Since(startTime)
   125  
   126  	return record
   127  }
   128  
   129  func censorToken(msg, token string) string {
   130  	if token == "" {
   131  		return msg
   132  	}
   133  	censored := bytes.ReplaceAll([]byte(msg), []byte(token), []byte("CENSORED"))
   134  	return string(censored)
   135  }
   136  
   137  // PathForRefs determines the full path to where
   138  // refs should be cloned
   139  func PathForRefs(baseDir string, refs prowapi.Refs) string {
   140  	var clonePath string
   141  	if refs.PathAlias != "" {
   142  		clonePath = refs.PathAlias
   143  	} else if refs.RepoLink != "" {
   144  		// Drop the protocol from the RepoLink
   145  		parts := strings.Split(refs.RepoLink, "://")
   146  		clonePath = parts[len(parts)-1]
   147  	} else {
   148  		clonePath = fmt.Sprintf("github.com/%s/%s", refs.Org, refs.Repo)
   149  	}
   150  	return path.Join(baseDir, "src", clonePath)
   151  }
   152  
   153  // gitCtx collects a few common values needed for all git commands.
   154  type gitCtx struct {
   155  	cloneDir      string
   156  	env           []string
   157  	repositoryURI string
   158  }
   159  
   160  // gitCtxForRefs creates a gitCtx based on the provide refs and baseDir.
   161  func gitCtxForRefs(refs prowapi.Refs, baseDir string, env []string, user, token string) gitCtx {
   162  	var repoURI string
   163  	if refs.RepoLink != "" {
   164  		repoURI = fmt.Sprintf("%s.git", refs.RepoLink)
   165  	} else {
   166  		repoURI = fmt.Sprintf("https://github.com/%s/%s.git", refs.Org, refs.Repo)
   167  	}
   168  
   169  	g := gitCtx{
   170  		cloneDir:      PathForRefs(baseDir, refs),
   171  		env:           env,
   172  		repositoryURI: repoURI,
   173  	}
   174  	if refs.CloneURI != "" {
   175  		g.repositoryURI = refs.CloneURI
   176  	}
   177  
   178  	if token != "" {
   179  		u, err := url.Parse(g.repositoryURI)
   180  		// Ignore invalid URL from a CloneURI override (e.g. git@github.com:owner/repo)
   181  		if err == nil {
   182  			if user != "" {
   183  				u.User = url.UserPassword(user, token)
   184  			} else {
   185  				// GitHub requires that the personal access token is set as a username.
   186  				// e.g., https://<token>:x-oauth-basic@github.com/owner/repo.git
   187  				u.User = url.UserPassword(token, "x-oauth-basic")
   188  			}
   189  			g.repositoryURI = u.String()
   190  		}
   191  	}
   192  
   193  	return g
   194  }
   195  
   196  func (g *gitCtx) gitCommand(args ...string) cloneCommand {
   197  	return cloneCommand{dir: g.cloneDir, env: g.env, command: "git", args: args}
   198  }
   199  
   200  var (
   201  	fetchRetries = []time.Duration{
   202  		100 * time.Millisecond,
   203  		200 * time.Millisecond,
   204  		400 * time.Millisecond,
   205  		800 * time.Millisecond,
   206  		2 * time.Second,
   207  		5 * time.Second,
   208  		10 * time.Second,
   209  		15 * time.Second,
   210  		30 * time.Second,
   211  	}
   212  )
   213  
   214  func (g *gitCtx) gitFetch(fetchArgs ...string) retryCommand {
   215  	args := []string{"fetch"}
   216  	args = append(args, fetchArgs...)
   217  
   218  	return retryCommand{
   219  		runnable: g.gitCommand(args...),
   220  		retries:  fetchRetries,
   221  	}
   222  }
   223  
   224  // commandsForBaseRef returns the list of commands needed to initialize and
   225  // configure a local git directory, as well as fetch and check out the provided
   226  // base ref.
   227  func (g *gitCtx) commandsForBaseRef(refs prowapi.Refs, gitUserName, gitUserEmail, cookiePath string) []runnable {
   228  	var commands []runnable
   229  	commands = append(commands, cloneCommand{dir: "/", env: g.env, command: "mkdir", args: []string{"-p", g.cloneDir}})
   230  
   231  	commands = append(commands, g.gitCommand("init"))
   232  	if gitUserName != "" {
   233  		commands = append(commands, g.gitCommand("config", "user.name", gitUserName))
   234  	}
   235  	if gitUserEmail != "" {
   236  		commands = append(commands, g.gitCommand("config", "user.email", gitUserEmail))
   237  	}
   238  	if cookiePath != "" && refs.SkipSubmodules {
   239  		commands = append(commands, g.gitCommand("config", "http.cookiefile", cookiePath))
   240  	}
   241  
   242  	var depthArgs []string
   243  	if d := refs.CloneDepth; d > 0 {
   244  		depthArgs = append(depthArgs, "--depth", strconv.Itoa(d))
   245  	}
   246  	var filterArgs []string
   247  	if refs.BloblessFetch != nil && *refs.BloblessFetch {
   248  		filterArgs = append(filterArgs, "--filter=blob:none")
   249  	}
   250  
   251  	if !refs.SkipFetchHead {
   252  		var fetchArgs []string
   253  		fetchArgs = append(fetchArgs, depthArgs...)
   254  		fetchArgs = append(fetchArgs, filterArgs...)
   255  		fetchArgs = append(fetchArgs, g.repositoryURI, "--tags", "--prune")
   256  		commands = append(commands, g.gitFetch(fetchArgs...))
   257  	}
   258  
   259  	var fetchRef string
   260  	var target string
   261  	if refs.BaseSHA != "" {
   262  		fetchRef = refs.BaseSHA
   263  		target = refs.BaseSHA
   264  	} else {
   265  		fetchRef = refs.BaseRef
   266  		target = "FETCH_HEAD"
   267  	}
   268  
   269  	{
   270  		var fetchArgs []string
   271  		fetchArgs = append(fetchArgs, depthArgs...)
   272  		fetchArgs = append(fetchArgs, filterArgs...)
   273  		fetchArgs = append(fetchArgs, g.repositoryURI, fetchRef)
   274  		commands = append(commands, g.gitFetch(fetchArgs...))
   275  	}
   276  
   277  	// we need to be "on" the target branch after the sync
   278  	// so we need to set the branch to point to the base ref,
   279  	// but we cannot update a branch we are on, so in case we
   280  	// are on the branch we are syncing, we check out the SHA
   281  	// first and reset the branch second, then check out the
   282  	// branch we just reset to be in the correct final state
   283  	commands = append(commands, g.gitCommand("checkout", target))
   284  	commands = append(commands, g.gitCommand("branch", "--force", refs.BaseRef, target))
   285  	commands = append(commands, g.gitCommand("checkout", refs.BaseRef))
   286  
   287  	return commands
   288  }
   289  
   290  // gitHeadTimestamp returns the timestamp of the HEAD commit as seconds from the
   291  // UNIX epoch. If unable to read the timestamp for any reason (such as missing
   292  // the git, or not using a git repo), it returns 0 and an error.
   293  func (g *gitCtx) gitHeadTimestamp() (int, error) {
   294  	gitShowCommand := g.gitCommand("show", "-s", "--format=format:%ct", "HEAD")
   295  	_, gitOutput, err := gitShowCommand.run()
   296  	if err != nil {
   297  		logrus.WithError(err).Debug("Could not obtain timestamp of git HEAD")
   298  		return 0, err
   299  	}
   300  	timestamp, convErr := strconv.Atoi(strings.TrimSpace(string(gitOutput)))
   301  	if convErr != nil {
   302  		logrus.WithError(convErr).Errorf("Failed to parse timestamp %q", gitOutput)
   303  		return 0, convErr
   304  	}
   305  	return timestamp, nil
   306  }
   307  
   308  // gitTimestampEnvs returns the list of environment variables needed to override
   309  // git's author and commit timestamps when creating new commits.
   310  func gitTimestampEnvs(timestamp int) []string {
   311  	return []string{
   312  		fmt.Sprintf("GIT_AUTHOR_DATE=%d", timestamp),
   313  		fmt.Sprintf("GIT_COMMITTER_DATE=%d", timestamp),
   314  	}
   315  }
   316  
   317  // gitRevParse returns current commit from HEAD in a git tree
   318  func (g *gitCtx) gitRevParse() (string, error) {
   319  	gitRevParseCommand := g.gitCommand("rev-parse", "HEAD")
   320  	_, commit, err := gitRevParseCommand.run()
   321  	if err != nil {
   322  		logrus.WithError(err).Error("git rev-parse HEAD failed!")
   323  		return "", err
   324  	}
   325  	return strings.TrimSpace(commit), nil
   326  }
   327  
   328  // commandsForPullRefs returns the list of commands needed to fetch and
   329  // merge any pull refs as well as submodules. These commands should be run only
   330  // after the commands provided by commandsForBaseRef have been run
   331  // successfully.
   332  // Each merge commit will be created at sequential seconds after fakeTimestamp.
   333  // It's recommended that fakeTimestamp be set to the timestamp of the base ref.
   334  // This enables reproducible timestamps and git tree digests every time the same
   335  // set of base and pull refs are used.
   336  func (g *gitCtx) commandsForPullRefs(refs prowapi.Refs, fakeTimestamp int) []runnable {
   337  	var commands []runnable
   338  	for _, prRef := range refs.Pulls {
   339  		var fetchArgs []string
   340  		if refs.BloblessFetch != nil && *refs.BloblessFetch {
   341  			fetchArgs = append(fetchArgs, "--filter=blob:none")
   342  		}
   343  		ref := fmt.Sprintf("pull/%d/head", prRef.Number)
   344  		if prRef.SHA != "" {
   345  			ref = prRef.SHA
   346  		}
   347  		if prRef.Ref != "" {
   348  			ref = prRef.Ref
   349  		}
   350  		fetchArgs = append(fetchArgs, g.repositoryURI, ref)
   351  		commands = append(commands, g.gitFetch(fetchArgs...))
   352  		var prCheckout string
   353  		if prRef.SHA != "" {
   354  			prCheckout = prRef.SHA
   355  		} else {
   356  			prCheckout = "FETCH_HEAD"
   357  		}
   358  		fakeTimestamp++
   359  		gitMergeCommand := g.gitCommand("merge", "--no-ff", prCheckout)
   360  		gitMergeCommand.env = append(gitMergeCommand.env, gitTimestampEnvs(fakeTimestamp)...)
   361  		commands = append(commands, gitMergeCommand)
   362  	}
   363  
   364  	// unless the user specifically asks us not to, init submodules
   365  	if !refs.SkipSubmodules {
   366  		commands = append(commands, g.gitCommand("submodule", "update", "--init", "--recursive"))
   367  	}
   368  
   369  	return commands
   370  }
   371  
   372  type retryCommand struct {
   373  	runnable
   374  	retries []time.Duration
   375  }
   376  
   377  func (rc retryCommand) run() (string, string, error) {
   378  	cmd, out, err := rc.runnable.run()
   379  	if err == nil {
   380  		return cmd, out, err
   381  	}
   382  	for _, dur := range rc.retries {
   383  		logrus.WithError(err).WithFields(logrus.Fields{
   384  			"sleep":   dur,
   385  			"command": cmd,
   386  		}).Info("Retrying after sleep")
   387  		time.Sleep(dur)
   388  		cmd, out, err = rc.runnable.run()
   389  		if err == nil {
   390  			break
   391  		}
   392  	}
   393  	return cmd, out, err
   394  }
   395  
   396  type cloneCommand struct {
   397  	dir     string
   398  	env     []string
   399  	command string
   400  	args    []string
   401  }
   402  
   403  func (c cloneCommand) run() (string, string, error) {
   404  	var output bytes.Buffer
   405  	cmd := exec.Command(c.command, c.args...)
   406  	cmd.Dir = c.dir
   407  	cmd.Env = append(cmd.Env, c.env...)
   408  	cmd.Stdout = &output
   409  	cmd.Stderr = &output
   410  	err := cmd.Run()
   411  	return c.String(), output.String(), err
   412  }
   413  
   414  func (c cloneCommand) String() string {
   415  	return fmt.Sprintf("PWD=%s %s %s %s", c.dir, strings.Join(c.env, " "), c.command, strings.Join(c.args, " "))
   416  }