github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/robots/issue-creator/sources/flakyjob-reporter.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sources
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/json"
    22  	"flag"
    23  	"fmt"
    24  	"io/ioutil"
    25  	"net/http"
    26  	"sort"
    27  	"time"
    28  
    29  	"github.com/golang/glog"
    30  
    31  	githubapi "github.com/google/go-github/github"
    32  	"k8s.io/test-infra/robots/issue-creator/creator"
    33  )
    34  
    35  // FlakyJob is a struct that represents a single job and the flake data associated with it.
    36  // FlakyJob implements the Issue interface so that it can be synced with github issues via the IssueCreator.
    37  type FlakyJob struct {
    38  	// Name is the job's name.
    39  	Name string
    40  	// Consistency is the percentage of builds that passed.
    41  	Consistency *float64 `json:"consistency"`
    42  	// FlakeCount is the number of flakes.
    43  	FlakeCount *int `json:"flakes"`
    44  	// FlakyTests is a map of test names to the number of times that test failed.
    45  	// Any test that failed at least once a day for the past week on this job is included.
    46  	FlakyTests map[string]int `json:"flakiest"`
    47  	// testsSorted is a list of the FlakyTests test names sorted by desc. number of flakes.
    48  	// This field is lazily populated and should be accessed via TestsSorted().
    49  	testsSorted []string
    50  
    51  	// reporter is a pointer to the FlakyJobReporter that created this FlakyJob.
    52  	reporter *FlakyJobReporter
    53  }
    54  
    55  // FlakyJobReporter is a munger that creates github issues for the flakiest kubernetes jobs.
    56  // The flakiest jobs are parsed from JSON generated by /test-infra/experiment/bigquery/flakes.sh
    57  type FlakyJobReporter struct {
    58  	flakyJobDataURL string
    59  	syncCount       int
    60  
    61  	creator *creator.IssueCreator
    62  }
    63  
    64  func init() {
    65  	creator.RegisterSourceOrDie("flakyjob-reporter", &FlakyJobReporter{})
    66  }
    67  
    68  // RegisterFlags registers options for this munger; returns any that require a restart when changed.
    69  func (fjr *FlakyJobReporter) RegisterFlags() {
    70  	flag.StringVar(&fjr.flakyJobDataURL, "flakyjob-url", "https://storage.googleapis.com/k8s-metrics/flakes-latest.json", "The url where flaky job JSON data can be found.")
    71  	flag.IntVar(&fjr.syncCount, "flakyjob-count", 3, "The number of flaky jobs to try to sync to github.")
    72  }
    73  
    74  // Issues is the main work method of FlakyJobReporter. It fetches and parses flaky job data,
    75  // then syncs the top issues to github with the IssueCreator.
    76  func (fjr *FlakyJobReporter) Issues(c *creator.IssueCreator) ([]creator.Issue, error) {
    77  	fjr.creator = c
    78  	json, err := ReadHTTP(fjr.flakyJobDataURL)
    79  	if err != nil {
    80  		return nil, err
    81  	}
    82  
    83  	flakyJobs, err := fjr.parseFlakyJobs(json)
    84  	if err != nil {
    85  		return nil, err
    86  	}
    87  
    88  	count := fjr.syncCount
    89  	if len(flakyJobs) < count {
    90  		count = len(flakyJobs)
    91  	}
    92  	issues := make([]creator.Issue, 0, count)
    93  	for _, fj := range flakyJobs[0:count] {
    94  		issues = append(issues, fj)
    95  	}
    96  
    97  	return issues, nil
    98  }
    99  
   100  // parseFlakyJobs parses JSON generated by the 'flakes' bigquery metric into a sorted slice of
   101  // *FlakyJob.
   102  func (fjr *FlakyJobReporter) parseFlakyJobs(jsonIn []byte) ([]*FlakyJob, error) {
   103  	var flakeMap map[string]*FlakyJob
   104  	err := json.Unmarshal(jsonIn, &flakeMap)
   105  	if err != nil || flakeMap == nil {
   106  		return nil, fmt.Errorf("error unmarshaling flaky jobs json: %v", err)
   107  	}
   108  	flakyJobs := make([]*FlakyJob, 0, len(flakeMap))
   109  
   110  	for job, fj := range flakeMap {
   111  		if job == "" {
   112  			glog.Errorf("Flaky jobs json contained a job with an empty jobname.\n")
   113  			continue
   114  		}
   115  		if fj == nil {
   116  			glog.Errorf("Flaky jobs json has invalid data for job '%s'.\n", job)
   117  			continue
   118  		}
   119  		if fj.Consistency == nil {
   120  			glog.Errorf("Flaky jobs json has no 'consistency' field for job '%s'.\n", job)
   121  			continue
   122  		}
   123  		if fj.FlakeCount == nil {
   124  			glog.Errorf("Flaky jobs json has no 'flakes' field for job '%s'.\n", job)
   125  			continue
   126  		}
   127  		if fj.FlakyTests == nil {
   128  			glog.Errorf("Flaky jobs json has no 'flakiest' field for job '%s'.\n", job)
   129  			continue
   130  		}
   131  		fj.Name = job
   132  		fj.reporter = fjr
   133  		flakyJobs = append(flakyJobs, fj)
   134  	}
   135  
   136  	sort.SliceStable(flakyJobs, func(i, j int) bool {
   137  		if *flakyJobs[i].FlakeCount == *flakyJobs[j].FlakeCount {
   138  			return *flakyJobs[i].Consistency < *flakyJobs[j].Consistency
   139  		}
   140  		return *flakyJobs[i].FlakeCount > *flakyJobs[j].FlakeCount
   141  	})
   142  
   143  	return flakyJobs, nil
   144  }
   145  
   146  // TestsSorted returns a slice of the testnames from a FlakyJob's FlakyTests map. The slice is
   147  // sorted by descending number of failures for the tests.
   148  func (fj *FlakyJob) TestsSorted() []string {
   149  	if fj.testsSorted != nil {
   150  		return fj.testsSorted
   151  	}
   152  	fj.testsSorted = make([]string, len(fj.FlakyTests))
   153  	i := 0
   154  	for test := range fj.FlakyTests {
   155  		fj.testsSorted[i] = test
   156  		i++
   157  	}
   158  	sort.SliceStable(fj.testsSorted, func(i, j int) bool {
   159  		return fj.FlakyTests[fj.testsSorted[i]] > fj.FlakyTests[fj.testsSorted[j]]
   160  	})
   161  	return fj.testsSorted
   162  }
   163  
   164  // Title yields the initial title text of the github issue.
   165  func (fj *FlakyJob) Title() string {
   166  	return fmt.Sprintf("%s flaked %d times in the past week", fj.Name, *fj.FlakeCount)
   167  }
   168  
   169  // ID yields the string identifier that uniquely identifies this issue.
   170  // This ID must appear in the body of the issue.
   171  // DO NOT CHANGE how this ID is formatted or duplicate issues may be created on github.
   172  func (fj *FlakyJob) ID() string {
   173  	return fmt.Sprintf("Flaky Job: %s", fj.Name)
   174  }
   175  
   176  // Body returns the body text of the github issue and *must* contain the output of ID().
   177  // closedIssues is a (potentially empty) slice containing all closed issues authored by this bot
   178  // that contain ID() in their body.
   179  // If Body returns an empty string no issue is created.
   180  func (fj *FlakyJob) Body(closedIssues []*githubapi.Issue) string {
   181  	// First check that the most recently closed issue (if any exist) was closed
   182  	// at least a week ago (since that is the sliding window size used by the flake metric).
   183  	cutoffTime := time.Now().AddDate(0, 0, -7)
   184  	for _, closed := range closedIssues {
   185  		if closed.ClosedAt.After(cutoffTime) {
   186  			return ""
   187  		}
   188  	}
   189  
   190  	// Print stats about the flaky job.
   191  	var buf bytes.Buffer
   192  	fmt.Fprintf(&buf, "### %s\n Flakes in the past week: **%d**\n Consistency: **%.2f%%**\n",
   193  		fj.ID(), *fj.FlakeCount, *fj.Consistency*100)
   194  	if len(fj.FlakyTests) > 0 {
   195  		fmt.Fprint(&buf, "\n#### Flakiest tests by flake count:\n| Test | Flake Count |\n| --- | --- |\n")
   196  		for _, testName := range fj.TestsSorted() {
   197  			fmt.Fprintf(&buf, "| %s | %d |\n", testName, fj.FlakyTests[testName])
   198  		}
   199  	}
   200  	// List previously closed issues if there are any.
   201  	if len(closedIssues) > 0 {
   202  		fmt.Fprint(&buf, "\n#### Previously closed issues for this job flaking:\n")
   203  		for _, closed := range closedIssues {
   204  			fmt.Fprintf(&buf, "#%d ", *closed.Number)
   205  		}
   206  		fmt.Fprint(&buf, "\n")
   207  	}
   208  
   209  	// Create /assign command.
   210  	testsSorted := fj.TestsSorted()
   211  	ownersMap := fj.reporter.creator.TestsOwners(testsSorted)
   212  	if len(ownersMap) > 0 {
   213  		fmt.Fprint(&buf, "\n/assign")
   214  		for user := range ownersMap {
   215  			fmt.Fprintf(&buf, " @%s", user)
   216  		}
   217  		fmt.Fprint(&buf, "\n")
   218  	}
   219  
   220  	// Explain why assignees were assigned and why sig labels were applied.
   221  	fmt.Fprintf(&buf, "\n%s", fj.reporter.creator.ExplainTestAssignments(testsSorted))
   222  
   223  	fmt.Fprintf(&buf, "\n[Flakiest Jobs](%s)\n", fj.reporter.flakyJobDataURL)
   224  	return buf.String()
   225  }
   226  
   227  // Labels returns the labels to apply to the issue created for this flaky job on github.
   228  func (fj *FlakyJob) Labels() []string {
   229  	labels := []string{"kind/flake"}
   230  	// get sig labels
   231  	for sig := range fj.reporter.creator.TestsSIGs(fj.TestsSorted()) {
   232  		labels = append(labels, "sig/"+sig)
   233  	}
   234  	return labels
   235  }
   236  
   237  // Owners returns the list of usernames to assign to this issue on github.
   238  func (fj *FlakyJob) Owners() []string {
   239  	// Assign owners by including a /assign command in the body instead of using Owners to set
   240  	// assignees on the issue request. This lets prow do the assignee validation and will mention
   241  	// the user we want to assign even if they can't be assigned.
   242  	return nil
   243  }
   244  
   245  // Priority calculates and returns the priority of this issue
   246  // The returned bool indicates if the returned priority is valid and can be used
   247  func (fj *FlakyJob) Priority() (string, bool) {
   248  	// TODO: implement priority calculations later
   249  	return "", false
   250  }
   251  
   252  // ReadHTTP fetches file contents from a URL with retries.
   253  func ReadHTTP(url string) ([]byte, error) {
   254  	var err error
   255  	retryDelay := time.Duration(2) * time.Second
   256  	for retryCount := 0; retryCount < 5; retryCount++ {
   257  		if retryCount > 0 {
   258  			time.Sleep(retryDelay)
   259  			retryDelay *= time.Duration(2)
   260  		}
   261  
   262  		resp, err := http.Get(url)
   263  		if resp != nil && resp.StatusCode >= 500 {
   264  			// Retry on this type of error.
   265  			continue
   266  		}
   267  		if err != nil {
   268  			return nil, err
   269  		}
   270  		defer resp.Body.Close()
   271  
   272  		body, err := ioutil.ReadAll(resp.Body)
   273  		if err != nil {
   274  			continue
   275  		}
   276  		return body, nil
   277  	}
   278  	return nil, fmt.Errorf("ran out of retries reading from '%s'. Last error was %v", url, err)
   279  }