github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/prow/sidecar/run.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sidecar
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/json"
    22  	"fmt"
    23  	"io/ioutil"
    24  	"os"
    25  	"os/signal"
    26  	"path/filepath"
    27  	"strconv"
    28  	"strings"
    29  	"sync"
    30  	"syscall"
    31  	"time"
    32  
    33  	"github.com/fsnotify/fsnotify"
    34  	"github.com/sirupsen/logrus"
    35  
    36  	"k8s.io/test-infra/prow/kube"
    37  	"k8s.io/test-infra/prow/pod-utils/downwardapi"
    38  	"k8s.io/test-infra/prow/pod-utils/gcs"
    39  )
    40  
    41  // Run will watch for the process being wrapped to exit
    42  // and then post the status of that process and any artifacts
    43  // to cloud storage.
    44  func (o Options) Run() error {
    45  	spec, err := downwardapi.ResolveSpecFromEnv()
    46  	if err != nil {
    47  		return fmt.Errorf("could not resolve job spec: %v", err)
    48  	}
    49  
    50  	// If we are being asked to terminate by the kubelet but we have
    51  	// NOT seen the test process exit cleanly, we need a to start
    52  	// uploading artifacts to GCS immediately. If we notice the process
    53  	// exit while doing this best-effort upload, we can race with the
    54  	// second upload but we can tolerate this as we'd rather get SOME
    55  	// data into GCS than attempt to cancel these uploads and get none.
    56  	interrupt := make(chan os.Signal)
    57  	signal.Notify(interrupt, os.Interrupt, syscall.SIGTERM)
    58  	go func() {
    59  		select {
    60  		case s := <-interrupt:
    61  			logrus.Errorf("Received an interrupt: %s", s)
    62  			o.doUpload(spec, false, true, nil)
    63  		}
    64  	}()
    65  
    66  	// Only start watching file events if the file doesn't exist
    67  	// If the file exists, it means the main process already completed.
    68  	if _, err := os.Stat(o.WrapperOptions.MarkerFile); os.IsNotExist(err) {
    69  		watcher, err := fsnotify.NewWatcher()
    70  		if err != nil {
    71  			return fmt.Errorf("could not begin fsnotify watch: %v", err)
    72  		}
    73  		defer watcher.Close()
    74  
    75  		ticker := time.NewTicker(30 * time.Second)
    76  		group := sync.WaitGroup{}
    77  		group.Add(1)
    78  		go func() {
    79  			defer group.Done()
    80  			for {
    81  				select {
    82  				case event := <-watcher.Events:
    83  					if event.Name == o.WrapperOptions.MarkerFile && event.Op&fsnotify.Create == fsnotify.Create {
    84  						return
    85  					}
    86  				case err := <-watcher.Errors:
    87  					logrus.WithError(err).Info("Encountered an error during fsnotify watch")
    88  				case <-ticker.C:
    89  					if _, err := os.Stat(o.WrapperOptions.MarkerFile); err == nil {
    90  						return
    91  					}
    92  				}
    93  			}
    94  		}()
    95  
    96  		dir := filepath.Dir(o.WrapperOptions.MarkerFile)
    97  		if err := watcher.Add(dir); err != nil {
    98  			return fmt.Errorf("could not add to fsnotify watch: %v", err)
    99  		}
   100  		group.Wait()
   101  		ticker.Stop()
   102  	}
   103  
   104  	// If we are being asked to terminate by the kubelet but we have
   105  	// seen the test process exit cleanly, we need a chance to upload
   106  	// artifacts to GCS. The only valid way for this program to exit
   107  	// after a SIGINT or SIGTERM in this situation is to finish]
   108  	// uploading, so we ignore the signals.
   109  	signal.Ignore(os.Interrupt, syscall.SIGTERM)
   110  
   111  	passed := false
   112  	aborted := false
   113  	returnCodeData, err := ioutil.ReadFile(o.WrapperOptions.MarkerFile)
   114  	if err != nil {
   115  		logrus.WithError(err).Warn("Could not read return code from marker file")
   116  	} else {
   117  		returnCode, err := strconv.Atoi(strings.TrimSpace(string(returnCodeData)))
   118  		if err != nil {
   119  			logrus.WithError(err).Warn("Failed to parse process return code")
   120  		}
   121  		passed = returnCode == 0 && err == nil
   122  		aborted = returnCode == 130
   123  	}
   124  
   125  	metadataFile := o.WrapperOptions.MetadataFile
   126  	if _, err := os.Stat(metadataFile); err != nil {
   127  		if !os.IsNotExist(err) {
   128  			logrus.WithError(err).Errorf("Failed to stat %s", metadataFile)
   129  		}
   130  		return o.doUpload(spec, passed, aborted, nil)
   131  	}
   132  
   133  	metadataRaw, err := ioutil.ReadFile(metadataFile)
   134  	if err != nil {
   135  		logrus.WithError(err).Errorf("cannot read %s", metadataFile)
   136  		return o.doUpload(spec, passed, aborted, nil)
   137  	}
   138  
   139  	metadata := map[string]interface{}{}
   140  	if err := json.Unmarshal(metadataRaw, &metadata); err != nil {
   141  		logrus.WithError(err).Errorf("Failed to unmarshal %s", metadataFile)
   142  		return o.doUpload(spec, passed, aborted, nil)
   143  	}
   144  
   145  	return o.doUpload(spec, passed, aborted, metadata)
   146  }
   147  
   148  func getRevisionFromRef(refs *kube.Refs) string {
   149  	if len(refs.Pulls) > 0 {
   150  		return refs.Pulls[0].SHA
   151  	}
   152  
   153  	if refs.BaseSHA != "" {
   154  		return refs.BaseSHA
   155  	}
   156  
   157  	return refs.BaseRef
   158  }
   159  
   160  func (o Options) doUpload(spec *downwardapi.JobSpec, passed, aborted bool, metadata map[string]interface{}) error {
   161  	uploadTargets := map[string]gcs.UploadFunc{
   162  		"build-log.txt": gcs.FileUpload(o.WrapperOptions.ProcessLog),
   163  	}
   164  
   165  	var result string
   166  	switch {
   167  	case passed:
   168  		result = "SUCCESS"
   169  	case aborted:
   170  		result = "ABORTED"
   171  	default:
   172  		result = "FAILURE"
   173  	}
   174  
   175  	// TODO(krzyzacy): Unify with downstream spyglass definition
   176  	finished := struct {
   177  		Timestamp int64                  `json:"timestamp"`
   178  		Passed    bool                   `json:"passed"`
   179  		Result    string                 `json:"result"`
   180  		Metadata  map[string]interface{} `json:"metadata,omitempty"`
   181  		Revision  string                 `json:"revision,omitempty"`
   182  	}{
   183  		Timestamp: time.Now().Unix(),
   184  		Passed:    passed,
   185  		Result:    result,
   186  		Metadata:  metadata,
   187  	}
   188  
   189  	if spec.Refs != nil {
   190  		finished.Revision = getRevisionFromRef(spec.Refs)
   191  	} else if len(spec.ExtraRefs) > 0 {
   192  		finished.Revision = getRevisionFromRef(&spec.ExtraRefs[0])
   193  	}
   194  
   195  	finishedData, err := json.Marshal(&finished)
   196  	if err != nil {
   197  		logrus.WithError(err).Warn("Could not marshal finishing data")
   198  	} else {
   199  		uploadTargets["finished.json"] = gcs.DataUpload(bytes.NewBuffer(finishedData))
   200  	}
   201  
   202  	if err := o.GcsOptions.Run(spec, uploadTargets); err != nil {
   203  		return fmt.Errorf("failed to upload to GCS: %v", err)
   204  	}
   205  
   206  	return nil
   207  }