github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/canary.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "encoding/json" 16 "fmt" 17 "net/http" 18 "regexp" 19 "sort" 20 "strconv" 21 "strings" 22 "time" 23 24 "github.com/cockroachdb/cockroach/pkg/util/retry" 25 ) 26 27 // This file contains common elements for all 3rd party test suite roachtests. 28 // TODO(bram): There are more common elements between all the canary tests, 29 // factor more of them into here. 30 31 // blacklist is a lists of known test errors and failures. 32 type blacklist map[string]string 33 34 // blacklistForVersion contains both a blacklist of known test errors and 35 // failures but also an optional ignorelist for flaky tests. 36 // When the test suite is run, the results are compared to this list. 37 // Any passed test that is not on this blacklist is reported as PASS - expected 38 // Any passed test that is on this blacklist is reported as PASS - unexpected 39 // Any failed test that is on this blacklist is reported as FAIL - expected 40 // Any failed test that is not on blackthis list is reported as FAIL - unexpected 41 // Any test on this blacklist that is not run is reported as FAIL - not run 42 // Ant test in the ignorelist is reported as SKIP if it is run 43 type blacklistForVersion struct { 44 versionPrefix string 45 blacklistname string 46 blacklist blacklist 47 ignorelistname string 48 ignorelist blacklist 49 } 50 51 type blacklistsForVersion []blacklistForVersion 52 53 // getLists returns the appropriate blacklist and ignorelist based on the 54 // cockroach version. This check only looks to ensure that the prefix that 55 // matches. 56 func (b blacklistsForVersion) getLists(version string) (string, blacklist, string, blacklist) { 57 for _, info := range b { 58 if strings.HasPrefix(version, info.versionPrefix) { 59 return info.blacklistname, info.blacklist, info.ignorelistname, info.ignorelist 60 } 61 } 62 return "", nil, "", nil 63 } 64 65 func fetchCockroachVersion(ctx context.Context, c *cluster, nodeIndex int) (string, error) { 66 db, err := c.ConnE(ctx, nodeIndex) 67 if err != nil { 68 return "", err 69 } 70 defer db.Close() 71 var version string 72 if err := db.QueryRowContext(ctx, 73 `SELECT value FROM crdb_internal.node_build_info where field = 'Version'`, 74 ).Scan(&version); err != nil { 75 return "", err 76 } 77 return version, nil 78 } 79 80 // maybeAddGithubLink will take the issue and if it is just a number, then it 81 // will return a full github link. 82 func maybeAddGithubLink(issue string) string { 83 if len(issue) == 0 { 84 return "" 85 } 86 issueNum, err := strconv.Atoi(issue) 87 if err != nil { 88 return issue 89 } 90 return fmt.Sprintf("https://github.com/cockroachdb/cockroach/issues/%d", issueNum) 91 } 92 93 // The following functions are augmented basic cluster functions but there tends 94 // to be common networking issues that cause test failures and require putting 95 // a retry block around them. 96 97 var canaryRetryOptions = retry.Options{ 98 InitialBackoff: 10 * time.Second, 99 Multiplier: 2, 100 MaxBackoff: 5 * time.Minute, 101 MaxRetries: 10, 102 } 103 104 // repeatRunE is the same function as c.RunE but with an automatic retry loop. 105 func repeatRunE( 106 ctx context.Context, c *cluster, node nodeListOption, operation string, args ...string, 107 ) error { 108 var lastError error 109 for attempt, r := 0, retry.StartWithCtx(ctx, canaryRetryOptions); r.Next(); { 110 if ctx.Err() != nil { 111 return ctx.Err() 112 } 113 if c.t.Failed() { 114 return fmt.Errorf("test has failed") 115 } 116 attempt++ 117 c.l.Printf("attempt %d - %s", attempt, operation) 118 lastError = c.RunE(ctx, node, args...) 119 if lastError != nil { 120 c.l.Printf("error - retrying: %s", lastError) 121 continue 122 } 123 return nil 124 } 125 return fmt.Errorf("all attempts failed for %s due to error: %s", operation, lastError) 126 } 127 128 // repeatRunWithBuffer is the same function as c.RunWithBuffer but with an 129 // automatic retry loop. 130 func repeatRunWithBuffer( 131 ctx context.Context, c *cluster, l *logger, node nodeListOption, operation string, args ...string, 132 ) ([]byte, error) { 133 var ( 134 lastResult []byte 135 lastError error 136 ) 137 for attempt, r := 0, retry.StartWithCtx(ctx, canaryRetryOptions); r.Next(); { 138 if ctx.Err() != nil { 139 return nil, ctx.Err() 140 } 141 if c.t.Failed() { 142 return nil, fmt.Errorf("test has failed") 143 } 144 attempt++ 145 c.l.Printf("attempt %d - %s", attempt, operation) 146 lastResult, lastError = c.RunWithBuffer(ctx, l, node, args...) 147 if lastError != nil { 148 c.l.Printf("error - retrying: %s\n%s", lastError, string(lastResult)) 149 continue 150 } 151 return lastResult, nil 152 } 153 return nil, fmt.Errorf("all attempts failed for %s, with error: %s\n%s", operation, lastError, lastResult) 154 } 155 156 // repeatGitCloneE is the same function as c.GitCloneE but with an automatic 157 // retry loop. 158 func repeatGitCloneE( 159 ctx context.Context, l *logger, c *cluster, src, dest, branch string, node nodeListOption, 160 ) error { 161 var lastError error 162 for attempt, r := 0, retry.StartWithCtx(ctx, canaryRetryOptions); r.Next(); { 163 if ctx.Err() != nil { 164 return ctx.Err() 165 } 166 if c.t.Failed() { 167 return fmt.Errorf("test has failed") 168 } 169 attempt++ 170 l.Printf("attempt %d - clone %s", attempt, src) 171 lastError = c.GitClone(ctx, l, src, dest, branch, node) 172 if lastError != nil { 173 c.l.Printf("error - retrying: %s", lastError) 174 continue 175 } 176 return nil 177 } 178 return fmt.Errorf("could not clone %s due to error: %s", src, lastError) 179 } 180 181 // repeatGetLatestTag fetches the latest (sorted) tag from a github repo. 182 // There is no equivalent function on the cluster as this is really only needed 183 // for the canary tests. 184 // The regex passed in must contain at least a single group named "major" and 185 // may contain "minor", "point" and "subpoint" in order of decreasing importance 186 // for sorting purposes. 187 func repeatGetLatestTag( 188 ctx context.Context, c *cluster, user string, repo string, releaseRegex *regexp.Regexp, 189 ) (string, error) { 190 url := fmt.Sprintf("https://api.github.com/repos/%s/%s/tags", user, repo) 191 httpClient := &http.Client{Timeout: 10 * time.Second} 192 type Tag struct { 193 Name string 194 } 195 type releaseTag struct { 196 tag string 197 major int 198 minor int 199 point int 200 subpoint int 201 } 202 type Tags []Tag 203 atoiOrZero := func(groups map[string]string, name string) int { 204 value, ok := groups[name] 205 if !ok { 206 return 0 207 } 208 i, err := strconv.Atoi(value) 209 if err != nil { 210 return 0 211 } 212 return i 213 } 214 var lastError error 215 for attempt, r := 0, retry.StartWithCtx(ctx, canaryRetryOptions); r.Next(); { 216 if ctx.Err() != nil { 217 return "", ctx.Err() 218 } 219 if c.t.Failed() { 220 return "", fmt.Errorf("test has failed") 221 } 222 attempt++ 223 224 c.l.Printf("attempt %d - fetching %s", attempt, url) 225 var resp *http.Response 226 resp, lastError = httpClient.Get(url) 227 if lastError != nil { 228 c.l.Printf("error fetching - retrying: %s", lastError) 229 continue 230 } 231 defer resp.Body.Close() 232 233 var tags Tags 234 lastError = json.NewDecoder(resp.Body).Decode(&tags) 235 if lastError != nil { 236 c.l.Printf("error decoding - retrying: %s", lastError) 237 continue 238 } 239 if len(tags) == 0 { 240 return "", fmt.Errorf("no tags found at %s", url) 241 } 242 var releaseTags []releaseTag 243 for _, t := range tags { 244 match := releaseRegex.FindStringSubmatch(t.Name) 245 if match == nil { 246 continue 247 } 248 groups := map[string]string{} 249 for i, name := range match { 250 groups[releaseRegex.SubexpNames()[i]] = name 251 } 252 if _, ok := groups["major"]; !ok { 253 continue 254 } 255 releaseTags = append(releaseTags, releaseTag{ 256 tag: t.Name, 257 major: atoiOrZero(groups, "major"), 258 minor: atoiOrZero(groups, "minor"), 259 point: atoiOrZero(groups, "point"), 260 subpoint: atoiOrZero(groups, "subpoint"), 261 }) 262 } 263 if len(releaseTags) == 0 { 264 return "", fmt.Errorf("no tags match the given regex") 265 } 266 sort.SliceStable(releaseTags, func(i, j int) bool { 267 return releaseTags[i].major < releaseTags[j].major || 268 releaseTags[i].minor < releaseTags[j].minor || 269 releaseTags[i].point < releaseTags[j].point || 270 releaseTags[i].subpoint < releaseTags[j].subpoint 271 }) 272 273 return releaseTags[len(releaseTags)-1].tag, nil 274 } 275 return "", fmt.Errorf("could not get tags from %s, due to error: %s", url, lastError) 276 }