github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/cmd/deck/pr_history.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net/url"
    23  	"path"
    24  	"regexp"
    25  	"sort"
    26  	"strconv"
    27  	"strings"
    28  	"time"
    29  
    30  	"github.com/sirupsen/logrus"
    31  	"k8s.io/apimachinery/pkg/util/sets"
    32  
    33  	v1 "sigs.k8s.io/prow/pkg/apis/prowjobs/v1"
    34  	"sigs.k8s.io/prow/pkg/config"
    35  	"sigs.k8s.io/prow/pkg/gcsupload"
    36  	"sigs.k8s.io/prow/pkg/git/v2"
    37  	pkgio "sigs.k8s.io/prow/pkg/io"
    38  	"sigs.k8s.io/prow/pkg/pod-utils/downwardapi"
    39  )
    40  
    41  var pullCommitRe = regexp.MustCompile(`^[-\.\w]+:\w{40},\d+:(\w{40})$`)
    42  
    43  type prHistoryTemplate struct {
    44  	Link    string
    45  	Name    string
    46  	Jobs    []prJobData
    47  	Commits []commitData
    48  }
    49  
    50  type prJobData struct {
    51  	Name   string
    52  	Link   string
    53  	Builds []buildData
    54  }
    55  
    56  type jobBuilds struct {
    57  	name          string
    58  	buildPrefixes []string
    59  }
    60  
    61  type commitData struct {
    62  	Hash       string
    63  	HashPrefix string // used only for display purposes, so don't worry about uniqueness
    64  	Link       string
    65  	MaxWidth   int
    66  	latest     time.Time // time stamp of the job most recently started
    67  }
    68  
    69  type latestCommit []commitData
    70  
    71  func (a latestCommit) Len() int      { return len(a) }
    72  func (a latestCommit) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
    73  func (a latestCommit) Less(i, j int) bool {
    74  	if len(a[i].Hash) != 40 {
    75  		return true
    76  	}
    77  	if len(a[j].Hash) != 40 {
    78  		return false
    79  	}
    80  	return a[i].latest.Before(a[j].latest)
    81  }
    82  
    83  type byStarted []buildData
    84  
    85  func (a byStarted) Len() int           { return len(a) }
    86  func (a byStarted) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
    87  func (a byStarted) Less(i, j int) bool { return a[i].Started.Before(a[j].Started) }
    88  
    89  func githubPRLink(githubHost, org, repo string, pr int) string {
    90  	return fmt.Sprintf("https://%s/%s/%s/pull/%d", githubHost, org, repo, pr)
    91  }
    92  
    93  func githubCommitLink(githubHost, org, repo, commitHash string) string {
    94  	return fmt.Sprintf("https://%s/%s/%s/commit/%s", githubHost, org, repo, commitHash)
    95  }
    96  
    97  func jobHistLink(storageProvider, bucketName, jobName string) string {
    98  	return fmt.Sprintf("/job-history/%s/%s/pr-logs/directory/%s", storageProvider, bucketName, jobName)
    99  }
   100  
   101  // gets the pull commit hash from metadata
   102  func getPullCommitHash(pull string) (string, error) {
   103  	match := pullCommitRe.FindStringSubmatch(pull)
   104  	if len(match) != 2 {
   105  		expected := "branch:hash,pullNumber:hash"
   106  		return "", fmt.Errorf("unable to parse pull %q (expected %q)", pull, expected)
   107  	}
   108  	return match[1], nil
   109  }
   110  
   111  // listJobBuilds concurrently lists builds for the given job prefixes that have been run on a PR
   112  func listJobBuilds(ctx context.Context, bucket storageBucket, jobPrefixes []string) []jobBuilds {
   113  	jobch := make(chan jobBuilds)
   114  	defer close(jobch)
   115  	for i, jobPrefix := range jobPrefixes {
   116  		go func(i int, jobPrefix string) {
   117  			buildPrefixes, err := bucket.listSubDirs(ctx, jobPrefix)
   118  			if err != nil {
   119  				logrus.WithError(err).Warningf("Error getting builds for job %s", jobPrefix)
   120  			}
   121  			jobch <- jobBuilds{
   122  				name:          path.Base(jobPrefix),
   123  				buildPrefixes: buildPrefixes,
   124  			}
   125  		}(i, jobPrefix)
   126  	}
   127  	jobs := []jobBuilds{}
   128  	for range jobPrefixes {
   129  		job := <-jobch
   130  		jobs = append(jobs, job)
   131  	}
   132  	return jobs
   133  }
   134  
   135  // getPRBuildData concurrently fetches metadata on each build of each job run on a PR
   136  func getPRBuildData(ctx context.Context, bucket storageBucket, jobs []jobBuilds) []buildData {
   137  	buildch := make(chan buildData)
   138  	defer close(buildch)
   139  	expected := 0
   140  	for _, job := range jobs {
   141  		for j, buildPrefix := range job.buildPrefixes {
   142  			go func(j int, jobName, buildPrefix string) {
   143  				build, err := getBuildData(ctx, bucket, buildPrefix)
   144  				if err != nil {
   145  					if pkgio.IsNotExist(err) {
   146  						logrus.WithError(err).WithField("prefix", buildPrefix).Debug("Build information incomplete.")
   147  					} else {
   148  						logrus.WithError(err).WithField("prefix", buildPrefix).Warning("Build information incomplete.")
   149  					}
   150  				}
   151  				split := strings.Split(strings.TrimSuffix(buildPrefix, "/"), "/")
   152  				build.SpyglassLink = path.Join(spyglassPrefix, bucket.getStorageProvider(), bucket.getName(), buildPrefix)
   153  				build.ID = split[len(split)-1]
   154  				build.jobName = jobName
   155  				build.prefix = buildPrefix
   156  				build.index = j
   157  				buildch <- build
   158  			}(j, job.name, buildPrefix)
   159  			expected++
   160  		}
   161  	}
   162  	builds := []buildData{}
   163  	for k := 0; k < expected; k++ {
   164  		build := <-buildch
   165  		builds = append(builds, build)
   166  	}
   167  	return builds
   168  }
   169  
   170  func updateCommitData(commits map[string]*commitData, githubHost, org, repo, hash string, buildTime time.Time, width int) {
   171  	commit, ok := commits[hash]
   172  	if !ok {
   173  		commits[hash] = &commitData{
   174  			Hash:       hash,
   175  			HashPrefix: hash,
   176  		}
   177  		commit = commits[hash]
   178  		if len(hash) == 40 {
   179  			commit.HashPrefix = hash[:7]
   180  			commit.Link = githubCommitLink(githubHost, org, repo, hash)
   181  		}
   182  	}
   183  	if buildTime.After(commit.latest) {
   184  		commit.latest = buildTime
   185  	}
   186  	if width > commit.MaxWidth {
   187  		commit.MaxWidth = width
   188  	}
   189  }
   190  
   191  // parsePullURL parses PR history URLs. Expects this format:
   192  // .../pr-history?org=<org>&repo=<repo>&pr=<pr number>
   193  func parsePullURL(u *url.URL) (org, repo string, pr int, err error) {
   194  	var prStr string
   195  	vals := u.Query()
   196  	if org = vals.Get("org"); org == "" {
   197  		return "", "", 0, fmt.Errorf("no value provided for org")
   198  	}
   199  	if repo = vals.Get("repo"); repo == "" {
   200  		return "", "", 0, fmt.Errorf("no value provided for repo")
   201  	}
   202  	prStr = vals.Get("pr")
   203  	pr, err = strconv.Atoi(prStr)
   204  	if err != nil {
   205  		return "", "", 0, fmt.Errorf("invalid PR number %q: %w", prStr, err)
   206  	}
   207  	return org, repo, pr, nil
   208  }
   209  
   210  // getStorageDirsForPR returns a map from bucket names -> set of "directories" containing presubmit data
   211  func getStorageDirsForPR(c *config.Config, gitHubClient deckGitHubClient, gitClient git.ClientFactory, org, repo, cloneURI string, prNumber int) (map[string]sets.Set[string], error) {
   212  	toSearch := make(map[string]sets.Set[string])
   213  	fullRepo := org + "/" + repo
   214  
   215  	if c.InRepoConfigEnabled(fullRepo) && gitHubClient == nil {
   216  		logrus.Info("Unable to get InRepoConfig PRs for PR History.")
   217  		return nil, nil
   218  	}
   219  	prRefGetter := config.NewRefGetterForGitHubPullRequest(gitHubClient, org, repo, prNumber)
   220  	presubmits, err := c.GetPresubmits(gitClient, org+"/"+repo, "", prRefGetter.BaseSHA, prRefGetter.HeadSHA)
   221  	if err != nil {
   222  		return nil, fmt.Errorf("failed to get Presubmits for pull request %s/%s#%d: %w", org, repo, prNumber, err)
   223  	}
   224  	if len(presubmits) == 0 {
   225  		return toSearch, fmt.Errorf("couldn't find presubmits for %q in config", fullRepo)
   226  	}
   227  
   228  	for _, presubmit := range presubmits {
   229  		var gcsConfig *v1.GCSConfiguration
   230  		if presubmit.DecorationConfig != nil && presubmit.DecorationConfig.GCSConfiguration != nil {
   231  			gcsConfig = presubmit.DecorationConfig.GCSConfiguration
   232  		} else {
   233  			// for undecorated jobs assume the default
   234  			def := c.Plank.GuessDefaultDecorationConfig(fullRepo, presubmit.Cluster)
   235  			if def == nil || def.GCSConfiguration == nil {
   236  				return toSearch, fmt.Errorf("failed to guess gcs config based on default decoration config: %w", err)
   237  			}
   238  			gcsConfig = def.GCSConfiguration
   239  		}
   240  
   241  		gcsPath, _, _ := gcsupload.PathsForJob(gcsConfig, &downwardapi.JobSpec{
   242  			Type: v1.PresubmitJob,
   243  			Job:  presubmit.Name,
   244  			Refs: &v1.Refs{
   245  				Repo: repo,
   246  				Org:  org,
   247  				Pulls: []v1.Pull{
   248  					{Number: prNumber},
   249  				},
   250  			},
   251  		}, "")
   252  		gcsPath, _ = path.Split(path.Clean(gcsPath))
   253  		bucketName := gcsConfig.Bucket
   254  		// bucket is the bucket field of the GCSConfiguration, which means it could be missing the
   255  		// storageProvider prefix (but it's deprecated to use a bucket name without <storage-type>:// prefix)
   256  		if !strings.Contains(bucketName, "://") {
   257  			bucketName = "gs://" + bucketName
   258  		}
   259  		if _, ok := toSearch[bucketName]; !ok {
   260  			toSearch[bucketName] = sets.Set[string]{}
   261  		}
   262  		toSearch[bucketName].Insert(gcsPath)
   263  	}
   264  	return toSearch, nil
   265  }
   266  
   267  func getPRHistory(ctx context.Context, prHistoryURL *url.URL, config *config.Config, opener pkgio.Opener, gitHubClient deckGitHubClient, gitClient git.ClientFactory, githubHost string) (prHistoryTemplate, error) {
   268  	start := time.Now()
   269  	template := prHistoryTemplate{}
   270  
   271  	org, repo, pr, err := parsePullURL(prHistoryURL)
   272  	if err != nil {
   273  		return template, fmt.Errorf("failed to parse URL %s: %w", prHistoryURL.String(), err)
   274  	}
   275  	template.Name = fmt.Sprintf("%s/%s #%d", org, repo, pr)
   276  	template.Link = githubPRLink(githubHost, org, repo, pr) // TODO(ibzib) support Gerrit :/
   277  
   278  	// cloneURI is used for cloning Gerrit repos, set it to empty for now as PR
   279  	// history doesn't work for Gerrit yet.
   280  	// TODO(chaodaiG): update once
   281  	// https://github.com/kubernetes/test-infra/issues/24130 is fixed.
   282  	cloneURI := ""
   283  	toSearch, err := getStorageDirsForPR(config, gitHubClient, gitClient, org, repo, cloneURI, pr)
   284  	if err != nil {
   285  		return template, fmt.Errorf("failed to list directories for PR %s: %w", template.Name, err)
   286  	}
   287  
   288  	builds := []buildData{}
   289  	// job name -> commit hash -> list of builds
   290  	jobCommitBuilds := make(map[string]map[string][]buildData)
   291  
   292  	for bucket, storagePaths := range toSearch {
   293  		parsedBucket, err := url.Parse(bucket)
   294  		if err != nil {
   295  			return template, fmt.Errorf("parse bucket %s: %w", bucket, err)
   296  		}
   297  		bucketName := parsedBucket.Host
   298  		storageProvider := parsedBucket.Scheme
   299  		bucket, err := newBlobStorageBucket(bucketName, storageProvider, config, opener)
   300  		if err != nil {
   301  			return template, err
   302  		}
   303  		for storagePath := range storagePaths {
   304  			jobPrefixes, err := bucket.listSubDirs(ctx, storagePath)
   305  			if err != nil {
   306  				return template, fmt.Errorf("failed to get job names: %w", err)
   307  			}
   308  			// We assume job names to be unique, as enforced during config validation.
   309  			for _, jobPrefix := range jobPrefixes {
   310  				jobName := path.Base(jobPrefix)
   311  				jobData := prJobData{
   312  					Name: jobName,
   313  					Link: jobHistLink(storageProvider, bucketName, jobName),
   314  				}
   315  				template.Jobs = append(template.Jobs, jobData)
   316  				jobCommitBuilds[jobName] = make(map[string][]buildData)
   317  			}
   318  			jobs := listJobBuilds(ctx, bucket, jobPrefixes)
   319  			builds = append(builds, getPRBuildData(ctx, bucket, jobs)...)
   320  		}
   321  	}
   322  
   323  	commits := make(map[string]*commitData)
   324  	for _, build := range builds {
   325  		jobName := build.jobName
   326  		hash := build.commitHash
   327  		jobCommitBuilds[jobName][hash] = append(jobCommitBuilds[jobName][hash], build)
   328  		updateCommitData(commits, githubHost, org, repo, hash, build.Started, len(jobCommitBuilds[jobName][hash]))
   329  	}
   330  	for _, commit := range commits {
   331  		template.Commits = append(template.Commits, *commit)
   332  	}
   333  	// builds are grouped by commit, then sorted by build start time (newest-first)
   334  	sort.Sort(sort.Reverse(latestCommit(template.Commits)))
   335  	for i, job := range template.Jobs {
   336  		for _, commit := range template.Commits {
   337  			builds := jobCommitBuilds[job.Name][commit.Hash]
   338  			sort.Sort(sort.Reverse(byStarted(builds)))
   339  			template.Jobs[i].Builds = append(template.Jobs[i].Builds, builds...)
   340  			// pad empty spaces
   341  			for k := len(builds); k < commit.MaxWidth; k++ {
   342  				template.Jobs[i].Builds = append(template.Jobs[i].Builds, buildData{})
   343  			}
   344  		}
   345  	}
   346  
   347  	elapsed := time.Since(start)
   348  	logrus.WithField("duration", elapsed.String()).Infof("loaded %s", prHistoryURL.Path)
   349  
   350  	return template, nil
   351  }