sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/sidecar/run.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sidecar
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"context"
    23  	"encoding/json"
    24  	"fmt"
    25  	"io"
    26  	"os"
    27  	"os/signal"
    28  	"strings"
    29  	"sync"
    30  	"syscall"
    31  	"time"
    32  
    33  	"github.com/sirupsen/logrus"
    34  	"sigs.k8s.io/prow/pkg/flagutil"
    35  	"sigs.k8s.io/prow/pkg/logrusutil"
    36  	"sigs.k8s.io/prow/pkg/pjutil/pprof"
    37  
    38  	prowv1 "sigs.k8s.io/prow/pkg/apis/prowjobs/v1"
    39  	"sigs.k8s.io/prow/pkg/entrypoint"
    40  	"sigs.k8s.io/prow/pkg/pod-utils/downwardapi"
    41  	"sigs.k8s.io/prow/pkg/pod-utils/gcs"
    42  	"sigs.k8s.io/prow/pkg/pod-utils/wrapper"
    43  
    44  	testgridmetadata "github.com/GoogleCloudPlatform/testgrid/metadata"
    45  )
    46  
    47  const LogFileName = "sidecar-logs.json"
    48  
    49  func LogSetup() (*os.File, error) {
    50  	logrusutil.ComponentInit()
    51  	logrus.SetLevel(logrus.DebugLevel)
    52  	logFile, err := os.CreateTemp("", "sidecar-logs*.txt")
    53  	if err == nil {
    54  		logrus.SetOutput(io.MultiWriter(os.Stderr, logFile))
    55  	}
    56  	return logFile, err
    57  }
    58  
    59  func nameEntry(idx int, opt wrapper.Options) string {
    60  	return fmt.Sprintf("entry %d: %s", idx, strings.Join(opt.Args, " "))
    61  }
    62  
    63  func wait(ctx context.Context, entries []wrapper.Options) (bool, bool, int) {
    64  
    65  	var paths []string
    66  
    67  	for _, opt := range entries {
    68  		paths = append(paths, opt.MarkerFile)
    69  	}
    70  
    71  	results := wrapper.WaitForMarkers(ctx, paths...)
    72  
    73  	passed := true
    74  	var aborted bool
    75  	var failures int
    76  
    77  	for _, res := range results {
    78  		passed = passed && res.Err == nil && res.ReturnCode == 0
    79  		aborted = aborted || res.ReturnCode == entrypoint.AbortedErrorCode
    80  		if res.ReturnCode != 0 && res.ReturnCode != entrypoint.PreviousErrorCode {
    81  			failures++
    82  		}
    83  	}
    84  
    85  	return passed, aborted, failures
    86  
    87  }
    88  
    89  // Run will watch for the process being wrapped to exit
    90  // and then post the status of that process and any artifacts
    91  // to cloud storage.
    92  func (o Options) Run(ctx context.Context, logFile *os.File) (int, error) {
    93  	if o.WriteMemoryProfile {
    94  		pprof.WriteMemoryProfiles(flagutil.DefaultMemoryProfileInterval)
    95  	}
    96  	spec, err := downwardapi.ResolveSpecFromEnv()
    97  	if err != nil {
    98  		return 0, fmt.Errorf("could not resolve job spec: %w", err)
    99  	}
   100  
   101  	entries := o.entries()
   102  	var once sync.Once
   103  
   104  	ctx, cancel := context.WithCancel(ctx)
   105  
   106  	interrupt := make(chan os.Signal, 1)
   107  	signal.Notify(interrupt, os.Interrupt, syscall.SIGTERM)
   108  	go func() {
   109  		select {
   110  		case s := <-interrupt:
   111  			if o.IgnoreInterrupts {
   112  				logrus.Warnf("Received an interrupt: %s, ignoring...", s)
   113  			} else {
   114  				// If we are being asked to terminate by the kubelet but we have
   115  				// NOT seen the test process exit cleanly, we need a to start
   116  				// uploading artifacts to GCS immediately. If we notice the process
   117  				// exit while doing this best-effort upload, we can race with the
   118  				// second upload but we can tolerate this as we'd rather get SOME
   119  				// data into GCS than attempt to cancel these uploads and get none.
   120  				logrus.Errorf("Received an interrupt: %s, cancelling...", s)
   121  
   122  				// perform pre upload tasks
   123  				o.preUpload()
   124  
   125  				buildLogs := logReadersFuncs(entries)
   126  				metadata := combineMetadata(entries)
   127  
   128  				//Peform best-effort upload
   129  				err := o.doUpload(ctx, spec, false, true, metadata, buildLogs, logFile, &once)
   130  				if err != nil {
   131  					logrus.WithError(err).Error("Failed to perform best-effort upload")
   132  				} else {
   133  					logrus.Error("Best-effort upload was successful")
   134  				}
   135  			}
   136  		case <-ctx.Done():
   137  		}
   138  	}()
   139  
   140  	passed, aborted, failures := wait(ctx, entries)
   141  
   142  	cancel()
   143  	// If we are being asked to terminate by the kubelet but we have
   144  	// seen the test process exit cleanly, we need a chance to upload
   145  	// artifacts to GCS. The only valid way for this program to exit
   146  	// after a SIGINT or SIGTERM in this situation is to finish
   147  	// uploading, so we ignore the signals.
   148  	signal.Ignore(os.Interrupt, syscall.SIGTERM)
   149  
   150  	o.preUpload()
   151  
   152  	buildLogs := logReadersFuncs(entries)
   153  	metadata := combineMetadata(entries)
   154  	return failures, o.doUpload(context.Background(), spec, passed, aborted, metadata, buildLogs, logFile, &once)
   155  }
   156  
   157  const errorKey = "sidecar-errors"
   158  
   159  func logReadersFuncs(entries []wrapper.Options) map[string]gcs.ReaderFunc {
   160  	readerFuncs := make(map[string]gcs.ReaderFunc)
   161  	for _, opt := range entries {
   162  		opt := opt
   163  		f := func() (io.ReadCloser, error) {
   164  			log, err := os.Open(opt.ProcessLog)
   165  			if err != nil {
   166  				logrus.WithError(err).Errorf("Failed to open %s", opt.ProcessLog)
   167  				r := strings.NewReader(fmt.Sprintf("Failed to open %s: %v\n", opt.ProcessLog, err))
   168  				return io.NopCloser(r), nil
   169  			} else {
   170  				return log, nil
   171  			}
   172  		}
   173  		buildLog := "build-log.txt"
   174  		if len(entries) > 1 {
   175  			buildLog = fmt.Sprintf("%s-build-log.txt", opt.ContainerName)
   176  		}
   177  		readerFuncs[buildLog] = f
   178  	}
   179  	return readerFuncs
   180  }
   181  
   182  func combineMetadata(entries []wrapper.Options) map[string]interface{} {
   183  	errors := map[string]error{}
   184  	metadata := map[string]interface{}{}
   185  	for i, opt := range entries {
   186  		ent := nameEntry(i, opt)
   187  		metadataFile := opt.MetadataFile
   188  		if _, err := os.Stat(metadataFile); err != nil {
   189  			if !os.IsNotExist(err) {
   190  				logrus.WithError(err).Errorf("Failed to stat %s", metadataFile)
   191  				errors[ent] = err
   192  			}
   193  			continue
   194  		}
   195  		metadataRaw, err := os.ReadFile(metadataFile)
   196  		if err != nil {
   197  			logrus.WithError(err).Errorf("cannot read %s", metadataFile)
   198  			errors[ent] = err
   199  			continue
   200  		}
   201  
   202  		piece := map[string]interface{}{}
   203  		if err := json.Unmarshal(metadataRaw, &piece); err != nil {
   204  			logrus.WithError(err).Errorf("Failed to unmarshal %s", metadataFile)
   205  			errors[ent] = err
   206  			continue
   207  		}
   208  
   209  		for k, v := range piece {
   210  			metadata[k] = v // TODO(fejta): consider deeper merge
   211  		}
   212  	}
   213  	if len(errors) > 0 {
   214  		metadata[errorKey] = errors
   215  	}
   216  	return metadata
   217  }
   218  
   219  // preUpload peforms steps required before actual upload
   220  func (o Options) preUpload() {
   221  	if o.DeprecatedWrapperOptions != nil {
   222  		// This only fires if the prowjob controller and sidecar are at different commits
   223  		logrus.Warn("Using deprecated wrapper_options instead of entries. Please update prow/pod-utils/decorate before June 2019")
   224  	}
   225  
   226  	if o.CensoringOptions != nil {
   227  		if err := o.censor(); err != nil {
   228  			logrus.WithError(err).Warn("Failed to censor data")
   229  		}
   230  	}
   231  }
   232  
   233  func (o Options) doUpload(ctx context.Context, spec *downwardapi.JobSpec, passed, aborted bool, metadata map[string]interface{}, logReadersFuncs map[string]gcs.ReaderFunc, logFile *os.File, once *sync.Once) error {
   234  	startTime := time.Now()
   235  	logrus.Info("Starting to upload")
   236  	uploadTargets := make(map[string]gcs.UploadFunc)
   237  
   238  	defer func() {
   239  		logrus.WithField("duration", time.Since(startTime).String()).Info("Finished uploading")
   240  	}()
   241  
   242  	for logName, readerFunc := range logReadersFuncs {
   243  		uploadTargets[logName] = gcs.DataUpload(readerFunc)
   244  	}
   245  
   246  	logFileName := logFile.Name()
   247  
   248  	once.Do(func() {
   249  		logrus.SetOutput(os.Stderr)
   250  		logFile.Sync()
   251  		logFile.Close()
   252  	})
   253  
   254  	newLogReader := func() (io.ReadCloser, error) {
   255  		f, err := os.Open(logFileName)
   256  		if err != nil {
   257  			logrus.WithError(err).Error("Could not open log file")
   258  			return nil, err
   259  		}
   260  		r := bufio.NewReader(f)
   261  		return struct {
   262  			io.Reader
   263  			io.Closer
   264  		}{r, f}, nil
   265  	}
   266  	uploadTargets[LogFileName] = gcs.DataUpload(newLogReader)
   267  
   268  	var result string
   269  	switch {
   270  	case passed:
   271  		result = "SUCCESS"
   272  	case aborted:
   273  		result = "ABORTED"
   274  	default:
   275  		result = "FAILURE"
   276  	}
   277  
   278  	now := time.Now().Unix()
   279  	finished := testgridmetadata.Finished{
   280  		Timestamp: &now,
   281  		Passed:    &passed,
   282  		Result:    result,
   283  		Metadata:  metadata,
   284  		// TODO(fejta): JobVersion,
   285  	}
   286  
   287  	// TODO(fejta): move to initupload and Started.Repos, RepoVersion
   288  	finished.DeprecatedRevision = downwardapi.GetRevisionFromSpec(spec)
   289  
   290  	finishedData, err := json.Marshal(&finished)
   291  	if err != nil {
   292  		logrus.WithError(err).Warn("Could not marshal finishing data")
   293  	} else {
   294  		newReader := func() (io.ReadCloser, error) {
   295  			return io.NopCloser(bytes.NewReader(finishedData)), nil
   296  		}
   297  		uploadTargets[prowv1.FinishedStatusFile] = gcs.DataUpload(newReader)
   298  	}
   299  
   300  	if err := o.GcsOptions.Run(ctx, spec, uploadTargets); err != nil {
   301  		return fmt.Errorf("failed to upload to GCS: %w", err)
   302  	}
   303  
   304  	return nil
   305  }