github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/cmd/deck/job_history.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package main
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"errors"
    23  	"fmt"
    24  	"io"
    25  	"net/url"
    26  	"path"
    27  	"regexp"
    28  	"sort"
    29  	"strconv"
    30  	"strings"
    31  	"time"
    32  
    33  	"github.com/GoogleCloudPlatform/testgrid/metadata"
    34  	"github.com/sirupsen/logrus"
    35  
    36  	prowv1 "sigs.k8s.io/prow/pkg/apis/prowjobs/v1"
    37  	"sigs.k8s.io/prow/pkg/config"
    38  	pkgio "sigs.k8s.io/prow/pkg/io"
    39  	"sigs.k8s.io/prow/pkg/io/providers"
    40  	"sigs.k8s.io/prow/pkg/pod-utils/gcs"
    41  )
    42  
    43  const (
    44  	resultsPerPage  = 20
    45  	idParam         = "buildId"
    46  	latestBuildFile = "latest-build.txt"
    47  
    48  	// ** Job history assumes the GCS layout specified here:
    49  	// https://github.com/kubernetes/test-infra/tree/master/gubernator#gcs-bucket-layout
    50  	logsPrefix     = gcs.NonPRLogs
    51  	spyglassPrefix = "/view"
    52  	emptyID        = uint64(0) // indicates no build id was specified
    53  )
    54  
    55  var (
    56  	linkRe = regexp.MustCompile(`/([0-9]+)\.txt$`)
    57  )
    58  
    59  type buildData struct {
    60  	index        int
    61  	jobName      string
    62  	prefix       string
    63  	SpyglassLink string
    64  	ID           string
    65  	Started      time.Time
    66  	Duration     time.Duration
    67  	Result       string
    68  	commitHash   string
    69  	Refs         *prowv1.Refs
    70  }
    71  
    72  // storageBucket is an abstraction for unit testing
    73  type storageBucket interface {
    74  	getName() string
    75  	getStorageProvider() string
    76  	listSubDirs(ctx context.Context, prefix string) ([]string, error)
    77  	listAll(ctx context.Context, prefix string) ([]string, error)
    78  	readObject(ctx context.Context, key string) ([]byte, error)
    79  }
    80  
    81  // blobStorageBucket is our real implementation of storageBucket.
    82  // Use `newBlobStorageBucket` to instantiate (includes bucket-level validation).
    83  type blobStorageBucket struct {
    84  	name            string
    85  	storageProvider string
    86  	pkgio.Opener
    87  }
    88  
    89  // newBlobStorageBucket validates the bucketName and returns a new instance of blobStorageBucket.
    90  func newBlobStorageBucket(bucketName, storageProvider string, config *config.Config, opener pkgio.Opener) (blobStorageBucket, error) {
    91  	if err := config.ValidateStorageBucket(bucketName); err != nil {
    92  		return blobStorageBucket{}, fmt.Errorf("could not instantiate storage bucket: %w", err)
    93  	}
    94  	return blobStorageBucket{bucketName, storageProvider, opener}, nil
    95  }
    96  
    97  type jobHistoryTemplate struct {
    98  	OlderLink    string
    99  	NewerLink    string
   100  	LatestLink   string
   101  	Name         string
   102  	ResultsShown int
   103  	ResultsTotal int
   104  	Builds       []buildData
   105  }
   106  
   107  func (bucket blobStorageBucket) readObject(ctx context.Context, key string) ([]byte, error) {
   108  	u := url.URL{
   109  		Scheme: bucket.storageProvider,
   110  		Host:   bucket.name,
   111  		Path:   key,
   112  	}
   113  	rc, err := bucket.Opener.Reader(ctx, u.String())
   114  	if err != nil {
   115  		return nil, fmt.Errorf("creating reader for object %s: %w", key, err)
   116  	}
   117  	defer rc.Close()
   118  	return io.ReadAll(rc)
   119  }
   120  
   121  func (bucket blobStorageBucket) getName() string {
   122  	return bucket.name
   123  }
   124  
   125  func (bucket blobStorageBucket) getStorageProvider() string {
   126  	return bucket.storageProvider
   127  }
   128  
   129  func readLatestBuild(ctx context.Context, bucket storageBucket, root string) (uint64, error) {
   130  	key := path.Join(root, latestBuildFile)
   131  	data, err := bucket.readObject(ctx, key)
   132  	if err != nil {
   133  		return emptyID, fmt.Errorf("failed to read %s: %w", key, err)
   134  	}
   135  	n, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
   136  	if err != nil {
   137  		return emptyID, fmt.Errorf("failed to parse %s: %w", key, err)
   138  	}
   139  	return n, nil
   140  }
   141  
   142  // resolve symlinks into the actual log directory for a particular test run, e.g.:
   143  // * input:  gs://prow-artifacts/pr-logs/pull/cluster-api-provider-openstack/1687/bazel-build/1248207834168954881
   144  // * output: pr-logs/pull/cluster-api-provider-openstack/1687/bazel-build/1248207834168954881
   145  func (bucket blobStorageBucket) resolveSymLink(ctx context.Context, symLink string) (string, error) {
   146  	data, err := bucket.readObject(ctx, symLink)
   147  	if err != nil {
   148  		return "", fmt.Errorf("failed to read %s: %w", symLink, err)
   149  	}
   150  	// strip gs://<bucket-name> from global address `u`
   151  	u := strings.TrimSpace(string(data))
   152  	parsedURL, err := url.Parse(u)
   153  	if err != nil {
   154  		return "", err
   155  	}
   156  	return strings.TrimPrefix(parsedURL.Path, "/"), nil
   157  }
   158  
   159  func (bucket blobStorageBucket) spyglassLink(ctx context.Context, root, id string) (string, error) {
   160  	p, err := bucket.getPath(ctx, root, id, "")
   161  	if err != nil {
   162  		return "", fmt.Errorf("failed to get path: %w", err)
   163  	}
   164  	return path.Join(spyglassPrefix, bucket.storageProvider, bucket.name, p), nil
   165  }
   166  
   167  func (bucket blobStorageBucket) getPath(ctx context.Context, root, id, fname string) (string, error) {
   168  	if strings.HasPrefix(root, logsPrefix) {
   169  		return path.Join(root, id, fname), nil
   170  	}
   171  	symLink := path.Join(root, id+".txt")
   172  	dir, err := bucket.resolveSymLink(ctx, symLink)
   173  	if err != nil {
   174  		return "", fmt.Errorf("failed to resolve sym link: %w", err)
   175  	}
   176  	return path.Join(dir, fname), nil
   177  }
   178  
   179  // reads specified JSON file in to `data`
   180  func readJSON(ctx context.Context, bucket storageBucket, key string, data interface{}) error {
   181  	rawData, err := bucket.readObject(ctx, key)
   182  	if err != nil {
   183  		return fmt.Errorf("failed to read %s: %w", key, err)
   184  	}
   185  	err = json.Unmarshal(rawData, &data)
   186  	if err != nil {
   187  		return fmt.Errorf("failed to parse %s: %w", key, err)
   188  	}
   189  	return nil
   190  }
   191  
   192  // Lists the "directory paths" immediately under prefix.
   193  func (bucket blobStorageBucket) listSubDirs(ctx context.Context, prefix string) ([]string, error) {
   194  	if !strings.HasSuffix(prefix, "/") {
   195  		prefix += "/"
   196  	}
   197  	it, err := bucket.Opener.Iterator(ctx, fmt.Sprintf("%s://%s/%s", bucket.storageProvider, bucket.name, prefix), "/")
   198  	if err != nil {
   199  		return nil, err
   200  	}
   201  
   202  	dirs := []string{}
   203  	for {
   204  		attrs, err := it.Next(ctx)
   205  		if err != nil {
   206  			if err == io.EOF {
   207  				break
   208  			}
   209  			return dirs, err
   210  		}
   211  		if attrs.IsDir {
   212  			dirs = append(dirs, attrs.Name)
   213  		}
   214  	}
   215  	return dirs, nil
   216  }
   217  
   218  // Lists all keys with given prefix.
   219  func (bucket blobStorageBucket) listAll(ctx context.Context, prefix string) ([]string, error) {
   220  	if !strings.HasSuffix(prefix, "/") {
   221  		prefix = prefix + "/"
   222  	}
   223  	it, err := bucket.Opener.Iterator(ctx, fmt.Sprintf("%s://%s/%s", bucket.storageProvider, bucket.name, prefix), "")
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  
   228  	keys := []string{}
   229  	for {
   230  		attrs, err := it.Next(ctx)
   231  		if err != nil {
   232  			if err == io.EOF {
   233  				break
   234  			}
   235  			return keys, err
   236  		}
   237  		keys = append(keys, attrs.Name)
   238  	}
   239  	return keys, nil
   240  }
   241  
   242  // Gets all build ids for a job.
   243  func (bucket blobStorageBucket) listBuildIDs(ctx context.Context, root string) ([]uint64, error) {
   244  	var ids []uint64
   245  	if strings.HasPrefix(root, logsPrefix) {
   246  		dirs, listErr := bucket.listSubDirs(ctx, root)
   247  		for _, dir := range dirs {
   248  			leaf := path.Base(dir)
   249  			i, err := strconv.ParseUint(leaf, 10, 64)
   250  			if err == nil {
   251  				ids = append(ids, i)
   252  			} else {
   253  				logrus.WithFields(logrus.Fields{"gcs-path": dir, "dir-name": leaf}).Debug("Unrecognized directory name (expected int64)")
   254  			}
   255  		}
   256  		if listErr != nil {
   257  			return ids, fmt.Errorf("failed to list directories: %w", listErr)
   258  		}
   259  	} else {
   260  		keys, listErr := bucket.listAll(ctx, root)
   261  		for _, key := range keys {
   262  			matches := linkRe.FindStringSubmatch(key)
   263  			if len(matches) == 2 {
   264  				i, err := strconv.ParseUint(matches[1], 10, 64)
   265  				if err == nil {
   266  					ids = append(ids, i)
   267  				} else {
   268  					logrus.Warningf("unrecognized file name (expected <uint64>.txt): %s", key)
   269  				}
   270  			}
   271  		}
   272  		if listErr != nil {
   273  			return ids, fmt.Errorf("failed to list keys: %w", listErr)
   274  		}
   275  	}
   276  	return ids, nil
   277  }
   278  
   279  // parseJobHistURL parses the job History URL
   280  // example urls:
   281  // * new format: https://prow.k8s.io/job-history/gs/kubernetes-jenkins/pr-logs/directory/pull-capi?buildId=1245584383100850177
   282  // * old format: https://prow.k8s.io/job-history/kubernetes-jenkins/pr-logs/directory/pull-capi?buildId=1245584383100850177
   283  // Newly generated URLs will include the storageProvider. We still support old URLs so they don't break.
   284  // For old URLs we assume that the storageProvider is `gs`.
   285  // examples return values:
   286  // * storageProvider: gs, s3
   287  // * bucketName: kubernetes-jenkins
   288  // * root: pr-logs/directory/pull-capi
   289  // * buildID: 1245584383100850177
   290  func parseJobHistURL(url *url.URL) (storageProvider, bucketName, root string, buildID uint64, err error) {
   291  	buildID = emptyID
   292  	p := strings.TrimPrefix(url.Path, "/job-history/")
   293  	// examples for p:
   294  	// * new format: gs/kubernetes-jenkins/pr-logs/directory/pull-cluster-api-provider-openstack-test
   295  	// * old format: kubernetes-jenkins/pr-logs/directory/pull-cluster-api-provider-openstack-test
   296  
   297  	// inject gs/ if old format is used
   298  	if !providers.HasStorageProviderPrefix(p) {
   299  		p = fmt.Sprintf("%s/%s", providers.GS, p)
   300  	}
   301  
   302  	// handle new format
   303  	s := strings.SplitN(p, "/", 3)
   304  	if len(s) < 3 {
   305  		err = fmt.Errorf("invalid path (expected either /job-history/<gcs-path> or /job-history/<storage-type>/<storage-path>): %v", url.Path)
   306  		return
   307  	}
   308  	storageProvider = s[0]
   309  	bucketName = s[1]
   310  	root = s[2] // `root` is the root "directory" prefix for this job's results
   311  
   312  	if bucketName == "" {
   313  		err = fmt.Errorf("missing bucket name: %v", url.Path)
   314  		return
   315  	}
   316  	if root == "" {
   317  		err = fmt.Errorf("invalid path for job: %v", url.Path)
   318  		return
   319  	}
   320  
   321  	if idVals := url.Query()[idParam]; len(idVals) >= 1 && idVals[0] != "" {
   322  		buildID, err = strconv.ParseUint(idVals[0], 10, 64)
   323  		if err != nil {
   324  			err = fmt.Errorf("invalid value for %s: %w", idParam, err)
   325  			return
   326  		}
   327  		if buildID < 1 {
   328  			err = fmt.Errorf("invalid value %s = %d", idParam, buildID)
   329  			return
   330  		}
   331  	}
   332  
   333  	return
   334  }
   335  
   336  func linkID(url *url.URL, id uint64) string {
   337  	u := *url
   338  	q := u.Query()
   339  	var val string
   340  	if id != emptyID {
   341  		val = strconv.FormatUint(id, 10)
   342  	}
   343  	q.Set(idParam, val)
   344  	u.RawQuery = q.Encode()
   345  	return u.String()
   346  }
   347  
   348  func getBuildData(ctx context.Context, bucket storageBucket, dir string) (buildData, error) {
   349  	b := buildData{
   350  		Result:     "Unknown",
   351  		commitHash: "Unknown",
   352  	}
   353  	started := metadata.Started{}
   354  	err := readJSON(ctx, bucket, path.Join(dir, prowv1.StartedStatusFile), &started)
   355  	if err != nil {
   356  		return b, fmt.Errorf("failed to read started.json: %w", err)
   357  	}
   358  	b.Started = time.Unix(started.Timestamp, 0)
   359  	finished := metadata.Finished{}
   360  	err = readJSON(ctx, bucket, path.Join(dir, prowv1.FinishedStatusFile), &finished)
   361  	if err != nil {
   362  		b.Result = "Pending"
   363  		for _, ref := range started.Repos {
   364  			if strings.Contains(ref, ","+started.Pull+":") {
   365  				started.Pull = ref
   366  				break
   367  			}
   368  		}
   369  		logrus.WithError(err).Debugf("failed to read finished.json (job might be unfinished)")
   370  	}
   371  
   372  	pj := prowv1.ProwJob{}
   373  	err = readJSON(ctx, bucket, path.Join(dir, prowv1.ProwJobFile), &pj)
   374  	if err != nil {
   375  		logrus.WithError(err).Debugf("failed to read %s", prowv1.ProwJobFile)
   376  	} else {
   377  		if pj.Spec.Refs != nil {
   378  			b.Refs = pj.Spec.Refs
   379  		}
   380  	}
   381  
   382  	if commitHash, err := getPullCommitHash(started.Pull); err == nil {
   383  		b.commitHash = commitHash
   384  	}
   385  
   386  	// Testgrid metadata.Finished is deprecating the Revision field, however
   387  	// the actual finished.json is still using revision and maps to DeprecatedRevision.
   388  	// TODO(ttyang): update both to match when fejta completely removes DeprecatedRevision.
   389  	if finished.DeprecatedRevision != "" {
   390  		b.commitHash = finished.DeprecatedRevision
   391  	}
   392  
   393  	if finished.Timestamp != nil {
   394  		b.Duration = time.Unix(*finished.Timestamp, 0).Sub(b.Started)
   395  	} else {
   396  		b.Duration = time.Since(b.Started).Round(time.Second)
   397  	}
   398  	if finished.Result != "" {
   399  		b.Result = finished.Result
   400  	}
   401  	return b, nil
   402  }
   403  
   404  // assumes a to be sorted in descending order
   405  // returns a subslice of a along with its indices (inclusive)
   406  func cropResults(a []uint64, max uint64) ([]uint64, int, int) {
   407  	res := []uint64{}
   408  	firstIndex := -1
   409  	lastIndex := 0
   410  	for i, v := range a {
   411  		if v <= max {
   412  			res = append(res, v)
   413  			if firstIndex == -1 {
   414  				firstIndex = i
   415  			}
   416  			lastIndex = i
   417  			if len(res) >= resultsPerPage {
   418  				break
   419  			}
   420  		}
   421  	}
   422  	return res, firstIndex, lastIndex
   423  }
   424  
   425  // golang <3
   426  type uint64slice []uint64
   427  
   428  func (a uint64slice) Len() int           { return len(a) }
   429  func (a uint64slice) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
   430  func (a uint64slice) Less(i, j int) bool { return a[i] < a[j] }
   431  
   432  // Gets job history from the bucket specified in config.
   433  func getJobHistory(ctx context.Context, url *url.URL, cfg config.Getter, opener pkgio.Opener) (jobHistoryTemplate, error) {
   434  	start := time.Now()
   435  	tmpl := jobHistoryTemplate{}
   436  
   437  	storageProvider, bucketName, root, top, err := parseJobHistURL(url)
   438  	if err != nil {
   439  		return tmpl, fmt.Errorf("invalid url %s: %w", url.String(), err)
   440  	}
   441  
   442  	if bucketAlias, exists := cfg().Deck.Spyglass.BucketAliases[bucketName]; exists {
   443  		bucketName = bucketAlias
   444  	}
   445  
   446  	bucket, err := newBlobStorageBucket(bucketName, storageProvider, cfg(), opener)
   447  	if err != nil {
   448  		return tmpl, err
   449  	}
   450  	tmpl.Name = root
   451  	latest, err := readLatestBuild(ctx, bucket, root)
   452  	if err != nil {
   453  		return tmpl, fmt.Errorf("failed to locate build data: %w", err)
   454  	}
   455  	if top == emptyID || top > latest {
   456  		top = latest
   457  	}
   458  	if top != latest {
   459  		tmpl.LatestLink = linkID(url, emptyID)
   460  	}
   461  
   462  	// Don't spend an unbound amount of time finding a potentially huge history
   463  	buildIDListCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
   464  	defer cancel()
   465  	buildIDs, err := bucket.listBuildIDs(buildIDListCtx, root)
   466  	if err != nil && !errors.Is(err, context.DeadlineExceeded) {
   467  		return tmpl, fmt.Errorf("failed to get build ids: %w", err)
   468  	}
   469  
   470  	sort.Sort(sort.Reverse(uint64slice(buildIDs)))
   471  
   472  	// determine which results to display on this page
   473  	shownIDs, firstIndex, lastIndex := cropResults(buildIDs, top)
   474  
   475  	// get links to the neighboring pages
   476  	if firstIndex > 0 {
   477  		nextIndex := firstIndex - resultsPerPage
   478  		// here emptyID indicates the most recent build, which will not necessarily be buildIDs[0]
   479  		next := emptyID
   480  		if nextIndex >= 0 {
   481  			next = buildIDs[nextIndex]
   482  		}
   483  		tmpl.NewerLink = linkID(url, next)
   484  	}
   485  	if lastIndex < len(buildIDs)-1 {
   486  		tmpl.OlderLink = linkID(url, buildIDs[lastIndex+1])
   487  	}
   488  
   489  	tmpl.Builds = make([]buildData, len(shownIDs))
   490  	tmpl.ResultsShown = len(shownIDs)
   491  	tmpl.ResultsTotal = len(buildIDs)
   492  
   493  	// concurrently fetch data for all of the builds to be shown
   494  	bch := make(chan buildData)
   495  	for i, buildID := range shownIDs {
   496  		go func(i int, buildID uint64) {
   497  			id := strconv.FormatUint(buildID, 10)
   498  			dir, err := bucket.getPath(ctx, root, id, "")
   499  			if err != nil {
   500  				if !pkgio.IsNotExist(err) {
   501  					logrus.WithError(err).Error("Failed to get path")
   502  				}
   503  				bch <- buildData{}
   504  				return
   505  			}
   506  			b, err := getBuildData(ctx, bucket, dir)
   507  			if err != nil {
   508  				if pkgio.IsNotExist(err) {
   509  					logrus.WithError(err).WithField("build-id", buildID).Debug("Build information incomplete.")
   510  				} else {
   511  					logrus.WithError(err).WithField("build-id", buildID).Warning("Build information incomplete.")
   512  				}
   513  			}
   514  			b.index = i
   515  			b.ID = id
   516  			b.SpyglassLink, err = bucket.spyglassLink(ctx, root, id)
   517  			if err != nil {
   518  				logrus.WithError(err).Errorf("failed to get spyglass link")
   519  			}
   520  			bch <- b
   521  		}(i, buildID)
   522  	}
   523  	for i := 0; i < len(shownIDs); i++ {
   524  		b := <-bch
   525  		tmpl.Builds[b.index] = b
   526  	}
   527  
   528  	elapsed := time.Since(start)
   529  	logrus.Infof("loaded %s in %v", url.Path, elapsed)
   530  	return tmpl, nil
   531  }