github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/canary.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	"encoding/json"
    16  	"fmt"
    17  	"net/http"
    18  	"regexp"
    19  	"sort"
    20  	"strconv"
    21  	"strings"
    22  	"time"
    23  
    24  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    25  )
    26  
    27  // This file contains common elements for all 3rd party test suite roachtests.
    28  // TODO(bram): There are more common elements between all the canary tests,
    29  // factor more of them into here.
    30  
    31  // blacklist is a lists of known test errors and failures.
    32  type blacklist map[string]string
    33  
    34  // blacklistForVersion contains both a blacklist of known test errors and
    35  // failures but also an optional ignorelist for flaky tests.
    36  // When the test suite is run, the results are compared to this list.
    37  // Any passed test that is not on this blacklist is reported as PASS - expected
    38  // Any passed test that is on this blacklist is reported as PASS - unexpected
    39  // Any failed test that is on this blacklist is reported as FAIL - expected
    40  // Any failed test that is not on blackthis list is reported as FAIL - unexpected
    41  // Any test on this blacklist that is not run is reported as FAIL - not run
    42  // Ant test in the ignorelist is reported as SKIP if it is run
    43  type blacklistForVersion struct {
    44  	versionPrefix  string
    45  	blacklistname  string
    46  	blacklist      blacklist
    47  	ignorelistname string
    48  	ignorelist     blacklist
    49  }
    50  
    51  type blacklistsForVersion []blacklistForVersion
    52  
    53  // getLists returns the appropriate blacklist and ignorelist based on the
    54  // cockroach version. This check only looks to ensure that the prefix that
    55  // matches.
    56  func (b blacklistsForVersion) getLists(version string) (string, blacklist, string, blacklist) {
    57  	for _, info := range b {
    58  		if strings.HasPrefix(version, info.versionPrefix) {
    59  			return info.blacklistname, info.blacklist, info.ignorelistname, info.ignorelist
    60  		}
    61  	}
    62  	return "", nil, "", nil
    63  }
    64  
    65  func fetchCockroachVersion(ctx context.Context, c *cluster, nodeIndex int) (string, error) {
    66  	db, err := c.ConnE(ctx, nodeIndex)
    67  	if err != nil {
    68  		return "", err
    69  	}
    70  	defer db.Close()
    71  	var version string
    72  	if err := db.QueryRowContext(ctx,
    73  		`SELECT value FROM crdb_internal.node_build_info where field = 'Version'`,
    74  	).Scan(&version); err != nil {
    75  		return "", err
    76  	}
    77  	return version, nil
    78  }
    79  
    80  // maybeAddGithubLink will take the issue and if it is just a number, then it
    81  // will return a full github link.
    82  func maybeAddGithubLink(issue string) string {
    83  	if len(issue) == 0 {
    84  		return ""
    85  	}
    86  	issueNum, err := strconv.Atoi(issue)
    87  	if err != nil {
    88  		return issue
    89  	}
    90  	return fmt.Sprintf("https://github.com/cockroachdb/cockroach/issues/%d", issueNum)
    91  }
    92  
    93  // The following functions are augmented basic cluster functions but there tends
    94  // to be common networking issues that cause test failures and require putting
    95  // a retry block around them.
    96  
    97  var canaryRetryOptions = retry.Options{
    98  	InitialBackoff: 10 * time.Second,
    99  	Multiplier:     2,
   100  	MaxBackoff:     5 * time.Minute,
   101  	MaxRetries:     10,
   102  }
   103  
   104  // repeatRunE is the same function as c.RunE but with an automatic retry loop.
   105  func repeatRunE(
   106  	ctx context.Context, c *cluster, node nodeListOption, operation string, args ...string,
   107  ) error {
   108  	var lastError error
   109  	for attempt, r := 0, retry.StartWithCtx(ctx, canaryRetryOptions); r.Next(); {
   110  		if ctx.Err() != nil {
   111  			return ctx.Err()
   112  		}
   113  		if c.t.Failed() {
   114  			return fmt.Errorf("test has failed")
   115  		}
   116  		attempt++
   117  		c.l.Printf("attempt %d - %s", attempt, operation)
   118  		lastError = c.RunE(ctx, node, args...)
   119  		if lastError != nil {
   120  			c.l.Printf("error - retrying: %s", lastError)
   121  			continue
   122  		}
   123  		return nil
   124  	}
   125  	return fmt.Errorf("all attempts failed for %s due to error: %s", operation, lastError)
   126  }
   127  
   128  // repeatRunWithBuffer is the same function as c.RunWithBuffer but with an
   129  // automatic retry loop.
   130  func repeatRunWithBuffer(
   131  	ctx context.Context, c *cluster, l *logger, node nodeListOption, operation string, args ...string,
   132  ) ([]byte, error) {
   133  	var (
   134  		lastResult []byte
   135  		lastError  error
   136  	)
   137  	for attempt, r := 0, retry.StartWithCtx(ctx, canaryRetryOptions); r.Next(); {
   138  		if ctx.Err() != nil {
   139  			return nil, ctx.Err()
   140  		}
   141  		if c.t.Failed() {
   142  			return nil, fmt.Errorf("test has failed")
   143  		}
   144  		attempt++
   145  		c.l.Printf("attempt %d - %s", attempt, operation)
   146  		lastResult, lastError = c.RunWithBuffer(ctx, l, node, args...)
   147  		if lastError != nil {
   148  			c.l.Printf("error - retrying: %s\n%s", lastError, string(lastResult))
   149  			continue
   150  		}
   151  		return lastResult, nil
   152  	}
   153  	return nil, fmt.Errorf("all attempts failed for %s, with error: %s\n%s", operation, lastError, lastResult)
   154  }
   155  
   156  // repeatGitCloneE is the same function as c.GitCloneE but with an automatic
   157  // retry loop.
   158  func repeatGitCloneE(
   159  	ctx context.Context, l *logger, c *cluster, src, dest, branch string, node nodeListOption,
   160  ) error {
   161  	var lastError error
   162  	for attempt, r := 0, retry.StartWithCtx(ctx, canaryRetryOptions); r.Next(); {
   163  		if ctx.Err() != nil {
   164  			return ctx.Err()
   165  		}
   166  		if c.t.Failed() {
   167  			return fmt.Errorf("test has failed")
   168  		}
   169  		attempt++
   170  		l.Printf("attempt %d - clone %s", attempt, src)
   171  		lastError = c.GitClone(ctx, l, src, dest, branch, node)
   172  		if lastError != nil {
   173  			c.l.Printf("error - retrying: %s", lastError)
   174  			continue
   175  		}
   176  		return nil
   177  	}
   178  	return fmt.Errorf("could not clone %s due to error: %s", src, lastError)
   179  }
   180  
   181  // repeatGetLatestTag fetches the latest (sorted) tag from a github repo.
   182  // There is no equivalent function on the cluster as this is really only needed
   183  // for the canary tests.
   184  // The regex passed in must contain at least a single group named "major" and
   185  // may contain "minor", "point" and "subpoint" in order of decreasing importance
   186  // for sorting purposes.
   187  func repeatGetLatestTag(
   188  	ctx context.Context, c *cluster, user string, repo string, releaseRegex *regexp.Regexp,
   189  ) (string, error) {
   190  	url := fmt.Sprintf("https://api.github.com/repos/%s/%s/tags", user, repo)
   191  	httpClient := &http.Client{Timeout: 10 * time.Second}
   192  	type Tag struct {
   193  		Name string
   194  	}
   195  	type releaseTag struct {
   196  		tag      string
   197  		major    int
   198  		minor    int
   199  		point    int
   200  		subpoint int
   201  	}
   202  	type Tags []Tag
   203  	atoiOrZero := func(groups map[string]string, name string) int {
   204  		value, ok := groups[name]
   205  		if !ok {
   206  			return 0
   207  		}
   208  		i, err := strconv.Atoi(value)
   209  		if err != nil {
   210  			return 0
   211  		}
   212  		return i
   213  	}
   214  	var lastError error
   215  	for attempt, r := 0, retry.StartWithCtx(ctx, canaryRetryOptions); r.Next(); {
   216  		if ctx.Err() != nil {
   217  			return "", ctx.Err()
   218  		}
   219  		if c.t.Failed() {
   220  			return "", fmt.Errorf("test has failed")
   221  		}
   222  		attempt++
   223  
   224  		c.l.Printf("attempt %d - fetching %s", attempt, url)
   225  		var resp *http.Response
   226  		resp, lastError = httpClient.Get(url)
   227  		if lastError != nil {
   228  			c.l.Printf("error fetching - retrying: %s", lastError)
   229  			continue
   230  		}
   231  		defer resp.Body.Close()
   232  
   233  		var tags Tags
   234  		lastError = json.NewDecoder(resp.Body).Decode(&tags)
   235  		if lastError != nil {
   236  			c.l.Printf("error decoding - retrying: %s", lastError)
   237  			continue
   238  		}
   239  		if len(tags) == 0 {
   240  			return "", fmt.Errorf("no tags found at %s", url)
   241  		}
   242  		var releaseTags []releaseTag
   243  		for _, t := range tags {
   244  			match := releaseRegex.FindStringSubmatch(t.Name)
   245  			if match == nil {
   246  				continue
   247  			}
   248  			groups := map[string]string{}
   249  			for i, name := range match {
   250  				groups[releaseRegex.SubexpNames()[i]] = name
   251  			}
   252  			if _, ok := groups["major"]; !ok {
   253  				continue
   254  			}
   255  			releaseTags = append(releaseTags, releaseTag{
   256  				tag:      t.Name,
   257  				major:    atoiOrZero(groups, "major"),
   258  				minor:    atoiOrZero(groups, "minor"),
   259  				point:    atoiOrZero(groups, "point"),
   260  				subpoint: atoiOrZero(groups, "subpoint"),
   261  			})
   262  		}
   263  		if len(releaseTags) == 0 {
   264  			return "", fmt.Errorf("no tags match the given regex")
   265  		}
   266  		sort.SliceStable(releaseTags, func(i, j int) bool {
   267  			return releaseTags[i].major < releaseTags[j].major ||
   268  				releaseTags[i].minor < releaseTags[j].minor ||
   269  				releaseTags[i].point < releaseTags[j].point ||
   270  				releaseTags[i].subpoint < releaseTags[j].subpoint
   271  		})
   272  
   273  		return releaseTags[len(releaseTags)-1].tag, nil
   274  	}
   275  	return "", fmt.Errorf("could not get tags from %s, due to error: %s", url, lastError)
   276  }