github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/spyglass/lenses/metadata/lens.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package metadata provides a metadata viewer for Spyglass
    18  package metadata
    19  
    20  import (
    21  	"bytes"
    22  	"encoding/json"
    23  	"regexp"
    24  	"sort"
    25  	"strings"
    26  	"time"
    27  
    28  	"fmt"
    29  	"html/template"
    30  	"path/filepath"
    31  
    32  	"github.com/GoogleCloudPlatform/testgrid/metadata"
    33  	"github.com/sirupsen/logrus"
    34  	v1 "k8s.io/api/core/v1"
    35  
    36  	prowv1 "sigs.k8s.io/prow/pkg/apis/prowjobs/v1"
    37  	"sigs.k8s.io/prow/pkg/config"
    38  	k8sreporter "sigs.k8s.io/prow/pkg/crier/reporters/gcs/kubernetes"
    39  	"sigs.k8s.io/prow/pkg/spyglass/api"
    40  	"sigs.k8s.io/prow/pkg/spyglass/lenses"
    41  )
    42  
    43  const (
    44  	name     = "metadata"
    45  	title    = "Metadata"
    46  	priority = 0
    47  )
    48  
    49  // Lens is the implementation of a metadata-rendering Spyglass lens.
    50  type Lens struct{}
    51  
    52  func init() {
    53  	lenses.RegisterLens(Lens{})
    54  }
    55  
    56  // Config returns the lens's configuration.
    57  func (lens Lens) Config() lenses.LensConfig {
    58  	return lenses.LensConfig{
    59  		Title:     title,
    60  		Name:      name,
    61  		Priority:  priority,
    62  		HideTitle: true,
    63  	}
    64  }
    65  
    66  // Header renders the <head> from template.html.
    67  func (lens Lens) Header(artifacts []api.Artifact, resourceDir string, config json.RawMessage, spyglassConfig config.Spyglass) string {
    68  	t, err := template.ParseFiles(filepath.Join(resourceDir, "template.html"))
    69  	if err != nil {
    70  		return fmt.Sprintf("<!-- FAILED LOADING HEADER: %v -->", err)
    71  	}
    72  	var buf bytes.Buffer
    73  	if err := t.ExecuteTemplate(&buf, "header", nil); err != nil {
    74  		return fmt.Sprintf("<!-- FAILED EXECUTING HEADER TEMPLATE: %v -->", err)
    75  	}
    76  	return buf.String()
    77  }
    78  
    79  // Callback does nothing.
    80  func (lens Lens) Callback(artifacts []api.Artifact, resourceDir string, data string, config json.RawMessage, spyglassConfig config.Spyglass) string {
    81  	return ""
    82  }
    83  
    84  // Body creates a view for prow job metadata.
    85  func (lens Lens) Body(artifacts []api.Artifact, resourceDir string, data string, config json.RawMessage, spyglassConfig config.Spyglass) string {
    86  	var buf bytes.Buffer
    87  	type MetadataViewData struct {
    88  		StartTime    time.Time
    89  		FinishedTime time.Time
    90  		Finished     bool
    91  		Passed       bool
    92  		Errored      bool
    93  		Elapsed      time.Duration
    94  		Hint         string
    95  		Metadata     map[string]interface{}
    96  	}
    97  	metadataViewData := MetadataViewData{}
    98  	started := metadata.Started{}
    99  	finished := metadata.Finished{}
   100  	for _, a := range artifacts {
   101  		read, err := a.ReadAll()
   102  		if err != nil {
   103  			logrus.WithError(err).Error("Failed reading from artifact.")
   104  		}
   105  		switch a.JobPath() {
   106  		case prowv1.StartedStatusFile:
   107  			if len(read) > 0 {
   108  				if err = json.Unmarshal(read, &started); err != nil {
   109  					logrus.WithError(err).Error("Error unmarshaling started.json")
   110  				}
   111  				metadataViewData.StartTime = time.Unix(started.Timestamp, 0)
   112  			} else {
   113  				logrus.Debug("Empty finished.json")
   114  			}
   115  		case prowv1.FinishedStatusFile:
   116  			if len(read) > 0 {
   117  				if err = json.Unmarshal(read, &finished); err != nil {
   118  					logrus.WithError(err).Error("Error unmarshaling finished.json")
   119  				}
   120  				metadataViewData.Finished = true
   121  				if finished.Timestamp != nil {
   122  					metadataViewData.FinishedTime = time.Unix(*finished.Timestamp, 0)
   123  				}
   124  				if finished.Passed != nil {
   125  					metadataViewData.Passed = *finished.Passed
   126  				} else {
   127  					metadataViewData.Passed = finished.Result == "SUCCESS"
   128  				}
   129  			} else {
   130  				logrus.Debug("Empty finished.json")
   131  			}
   132  		case "podinfo.json":
   133  			metadataViewData.Hint = hintFromPodInfo(read)
   134  		case prowv1.ProwJobFile:
   135  			// Only show the prowjob-based hint if we don't have a pod-based one
   136  			// (the pod-based ones are probably more useful when they exist)
   137  			if metadataViewData.Hint == "" {
   138  				hint, errored := hintFromProwJob(read)
   139  				metadataViewData.Hint = hint
   140  				metadataViewData.Errored = errored
   141  			}
   142  		}
   143  	}
   144  
   145  	if !metadataViewData.StartTime.IsZero() {
   146  		if metadataViewData.FinishedTime.IsZero() {
   147  			metadataViewData.Elapsed = time.Since(metadataViewData.StartTime)
   148  		} else {
   149  			metadataViewData.Elapsed =
   150  				metadataViewData.FinishedTime.Sub(metadataViewData.StartTime)
   151  		}
   152  		metadataViewData.Elapsed = metadataViewData.Elapsed.Round(time.Second)
   153  	}
   154  
   155  	metadataViewData.Metadata = map[string]interface{}{"node": started.Node}
   156  
   157  	metadatas := []metadata.Metadata{started.Metadata, finished.Metadata}
   158  	for _, m := range metadatas {
   159  		for k, v := range lens.flattenMetadata(m) {
   160  			metadataViewData.Metadata[k] = v
   161  		}
   162  	}
   163  
   164  	metadataTemplate, err := template.ParseFiles(filepath.Join(resourceDir, "template.html"))
   165  	if err != nil {
   166  		return fmt.Sprintf("Failed to load template: %v", err)
   167  	}
   168  
   169  	if err := metadataTemplate.ExecuteTemplate(&buf, "body", metadataViewData); err != nil {
   170  		logrus.WithError(err).Error("Error executing template.")
   171  	}
   172  	return buf.String()
   173  }
   174  
   175  var failedMountRegex = regexp.MustCompile(`MountVolume.SetUp failed for volume "(.+?)" : (.+)`)
   176  
   177  func hintFromPodInfo(buf []byte) string {
   178  	var report k8sreporter.PodReport
   179  	if err := json.Unmarshal(buf, &report); err != nil {
   180  		logrus.WithError(err).Info("Failed to decode podinfo.json")
   181  		// This error isn't worth highlighting here, and will be reported in the
   182  		// podinfo lens if that is enabled.
   183  		return ""
   184  	}
   185  
   186  	// We're more likely to pick a relevant event if we use the last ones first.
   187  	sort.Slice(report.Events, func(i, j int) bool {
   188  		a := &report.Events[i]
   189  		b := &report.Events[j]
   190  		return b.LastTimestamp.Before(&a.LastTimestamp)
   191  	})
   192  
   193  	// If the pod completed successfully there's probably not much to say.
   194  	if report.Pod.Status.Phase == v1.PodSucceeded {
   195  		return ""
   196  	}
   197  	// Check if we have any images that didn't pull
   198  	for _, s := range append(report.Pod.Status.InitContainerStatuses, report.Pod.Status.ContainerStatuses...) {
   199  		if s.State.Waiting != nil && (s.State.Waiting.Reason == "ImagePullBackOff" || s.State.Waiting.Reason == "ErrImagePull") {
   200  			return fmt.Sprintf("The %s container could not start because it could not pull %q. Check your images. Full message: %q", s.Name, s.Image, s.State.Waiting.Message)
   201  		}
   202  	}
   203  	// Check if we're trying to mount a volume
   204  	if report.Pod.Status.Phase == v1.PodPending {
   205  		failedMount := false
   206  		for _, e := range report.Events {
   207  			if e.Reason == "FailedMount" {
   208  				failedMount = true
   209  				if strings.HasPrefix(e.Message, "MountVolume.SetUp") {
   210  					// Annoyingly, parsing this message is the only way to get this information.
   211  					// If we can't parse it, we'll fall through to a generic bad volume message below.
   212  					results := failedMountRegex.FindStringSubmatch(e.Message)
   213  					if results == nil {
   214  						continue
   215  					}
   216  					return fmt.Sprintf("The pod could not start because it could not mount the volume %q: %s", results[1], results[2])
   217  				}
   218  			}
   219  		}
   220  		if failedMount {
   221  			return "The job could not started because one or more of the volumes could not be mounted."
   222  		}
   223  	}
   224  	// Check if we cannot be scheduled
   225  	// This is unlikely - we only outright fail if a pod is actually scheduled to a node that can't support it.
   226  	if report.Pod.Status.Phase == v1.PodFailed && report.Pod.Status.Reason == "MatchNodeSelector" {
   227  		return "The job could not start because it was scheduled to a node that does not satisfy its NodeSelector"
   228  	}
   229  	// Usually we would fail to schedule it at all, so it will be pending forever.
   230  	if report.Pod.Status.Phase == v1.PodPending {
   231  		for _, e := range report.Events {
   232  			if e.Reason == "FailedScheduling" {
   233  				return fmt.Sprintf("There are no nodes that your pod can schedule to - check your requests, tolerations, and node selectors (%s)", e.Message)
   234  			}
   235  		}
   236  	}
   237  
   238  	// There are a bunch of fun ways for the node to fail that we've seen before
   239  	for _, e := range report.Events {
   240  		if e.Reason == "FailedCreatePodSandbox" || e.Reason == "FailedSync" {
   241  			return "The job may have executed on an unhealthy node. Contact your prow maintainers with a link to this page or check the detailed pod information."
   242  		}
   243  	}
   244  
   245  	// There are cases where initContainers failed to start
   246  	var msgs []string
   247  	for _, ic := range report.Pod.Status.InitContainerStatuses {
   248  		if ic.Ready {
   249  			continue
   250  		}
   251  		var msg string
   252  		// Init container not ready by the time this job failed
   253  		// The 3 different states should be mutually exclusive, if it happens
   254  		// that there are more than one, use the most severe one
   255  		if state := ic.State.Terminated; state != nil {
   256  			msg = fmt.Sprintf("state: terminated, reason: %q, message: %q", state.Reason, state.Message)
   257  		} else if state := ic.State.Waiting; state != nil {
   258  			msg = fmt.Sprintf("state: waiting, reason: %q, message: %q", state.Reason, state.Message)
   259  		} else if state := ic.State.Running; state != nil {
   260  			// Yes this is weird, but it did happened https://github.com/kubernetes/test-infra/issues/21985
   261  			msg = "state: running"
   262  		}
   263  		msgs = append(msgs, fmt.Sprintf("Init container %s not ready: (%s)", ic.Name, msg))
   264  	}
   265  	return strings.Join(msgs, "\n")
   266  }
   267  
   268  func hintFromProwJob(buf []byte) (string, bool) {
   269  	var pj prowv1.ProwJob
   270  	if err := json.Unmarshal(buf, &pj); err != nil {
   271  		logrus.WithError(err).Infof("Failed to decode %s", prowv1.ProwJobFile)
   272  		return "", false
   273  	}
   274  
   275  	if pj.Status.State == prowv1.ErrorState {
   276  		return fmt.Sprintf("Job execution failed: %s", pj.Status.Description), true
   277  	}
   278  
   279  	return "", false
   280  }
   281  
   282  // flattenMetadata flattens the metadata for use by Body.
   283  func (lens Lens) flattenMetadata(metadata map[string]interface{}) map[string]string {
   284  	results := map[string]string{}
   285  
   286  	for k1, v1 := range metadata {
   287  		if s, ok := v1.(map[string]interface{}); ok && len(s) > 0 {
   288  			subObjectResults := lens.flattenMetadata(s)
   289  			for k2, v2 := range subObjectResults {
   290  				results[fmt.Sprintf("%s.%s", k1, k2)] = v2
   291  			}
   292  		} else if s, ok := v1.(string); ok && v1 != "" { // We ought to consider relaxing this so that non-strings will be considered
   293  			results[k1] = s
   294  		}
   295  	}
   296  
   297  	return results
   298  }