github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/triage/summarize.py (about) 1 #!/usr/bin/env python2 2 3 # Copyright 2017 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 ''' 18 Summarize groups failed tests together by finding edit distances between their failure strings, 19 and emits JSON for rendering in a browser. 20 ''' 21 22 # pylint: disable=invalid-name,missing-docstring 23 24 25 import argparse 26 import functools 27 import hashlib 28 import json 29 import logging 30 import os 31 import re 32 import sys 33 import time 34 import zlib 35 36 import berghelroach 37 38 editdist = berghelroach.dist 39 40 flakeReasonDateRE = re.compile( 41 r'[A-Z][a-z]{2}, \d+ \w+ 2\d{3} [\d.-: ]*([-+]\d+)?|' 42 r'\w{3}\s+\d{1,2} \d+:\d+:\d+(\.\d+)?|(\d{4}-\d\d-\d\d.|.\d{4} )\d\d:\d\d:\d\d(.\d+)?') 43 # Find random noisy strings that should be replaced with renumbered strings, for more similarity. 44 flakeReasonOrdinalRE = re.compile( 45 r'0x[0-9a-fA-F]+' # hex constants 46 r'|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d+)?' # IPs + optional port 47 r'|[0-9a-fA-F]{8}-\S{4}-\S{4}-\S{4}-\S{12}(-\d+)?' # UUIDs + trailing digits 48 r'|[0-9a-f]{12,32}' # hex garbage 49 r'|(?<=minion-group-|default-pool-)[-0-9a-z]{4,}' # node names 50 ) 51 52 53 def normalize(s): 54 """ 55 Given a traceback or error message from a text, reduce excess entropy to make 56 clustering easier. 57 58 This includes: 59 - blanking dates and timestamps 60 - renumbering unique information like 61 - pointer addresses 62 - UUIDs 63 - IP addresses 64 - sorting randomly ordered map[] strings. 65 """ 66 67 # blank out dates 68 s = flakeReasonDateRE.sub('TIME', s) 69 70 # do alpha conversion-- rename random garbage strings (hex pointer values, node names, etc) 71 # into 'UNIQ1', 'UNIQ2', etc. 72 matches = {} 73 def repl(m): 74 s = m.group(0) 75 if s not in matches: 76 matches[s] = 'UNIQ%d' % (len(matches) + 1) 77 return matches[s] 78 79 if 'map[' in s: 80 # Go's maps are in a random order. Try to sort them to reduce diffs. 81 s = re.sub(r'map\[([^][]*)\]', 82 lambda m: 'map[%s]' % ' '.join(sorted(m.group(1).split())), 83 s) 84 85 s = flakeReasonOrdinalRE.sub(repl, s) 86 87 if len(s) > 10000: 88 # for long strings, remove repeated lines! 89 s = re.sub(r'(?m)^(.*\n)\1+', r'\1', s) 90 91 if len(s) > 10000: # ridiculously long test output 92 s = s[:5000] + '\n...[truncated]...\n' + s[-5000:] 93 94 return s 95 96 def normalize_name(name): 97 """ 98 Given a test name, remove [...]/{...}. 99 100 Matches code in testgrid and kubernetes/hack/update_owners.py. 101 """ 102 name = re.sub(r'\[.*?\]|\{.*?\}', '', name) 103 name = re.sub(r'\s+', ' ', name) 104 return name.strip() 105 106 107 def make_ngram_counts(s, ngram_counts={}): 108 """ 109 Convert a string into a histogram of frequencies for different byte combinations. 110 This can be used as a heuristic to estimate edit distance between two strings in 111 constant time. 112 113 Instead of counting each ngram individually, they are hashed into buckets. 114 This makes the output count size constant. 115 """ 116 117 # Yes, I'm intentionally memoizing here. 118 # pylint: disable=dangerous-default-value 119 120 size = 64 121 if s not in ngram_counts: 122 counts = [0] * size 123 for x in xrange(len(s)-3): 124 counts[zlib.crc32(s[x:x+4].encode('utf8')) & (size - 1)] += 1 125 ngram_counts[s] = counts # memoize 126 return ngram_counts[s] 127 128 129 def ngram_editdist(a, b): 130 """ 131 Compute a heuristic lower-bound edit distance using ngram counts. 132 133 An insert/deletion/substitution can cause up to 4 ngrams to differ: 134 135 abcdefg => abcefg 136 (abcd, bcde, cdef, defg) => (abce, bcef, cefg) 137 138 This will underestimate the edit distance in many cases: 139 - ngrams hashing into the same bucket will get confused 140 - a large-scale transposition will barely disturb ngram frequencies, 141 but will have a very large effect on edit distance. 142 143 It is useful to avoid more expensive precise computations when they are 144 guaranteed to exceed some limit (being a lower bound), or as a proxy when 145 the exact edit distance computation is too expensive (for long inputs). 146 """ 147 counts_a = make_ngram_counts(a) 148 counts_b = make_ngram_counts(b) 149 return sum(abs(x-y) for x, y in zip(counts_a, counts_b))/4 150 151 152 def make_ngram_counts_digest(s): 153 """ 154 Returns a hashed version of the ngram counts. 155 """ 156 return hashlib.sha1(str(make_ngram_counts(s))).hexdigest()[:20] 157 158 159 def file_memoize(description, name): 160 """ 161 Decorator to save a function's results to a file. 162 """ 163 def inner(func): 164 @functools.wraps(func) 165 def wrapper(*args, **kwargs): 166 if os.path.exists(name): 167 data = json.load(open(name)) 168 logging.info('done (cached) %s', description) 169 return data 170 data = func(*args, **kwargs) 171 json.dump(data, open(name, 'w')) 172 logging.info('done %s', description) 173 return data 174 wrapper.__wrapped__ = func 175 return wrapper 176 return inner 177 178 179 @file_memoize('loading failed tests', 'memo_load_failures.json') 180 def load_failures(builds_file, tests_files): 181 """ 182 Load builds and failed tests files. 183 184 Group builds by path, group test failures by test name. 185 186 Args: 187 filenames 188 Returns: 189 { build_path: [{ path: build_path, started: 12345, ...} ...], ...}, 190 { test_name: [{build: gs://foo/bar, name: test_name, failure_text: xxx}, ...], ...} 191 """ 192 builds = {} 193 for build in json.load(open(builds_file)): 194 if not build['started'] or not build['number']: 195 continue 196 for attr in ('started', 'tests_failed', 'number', 'tests_run'): 197 build[attr] = int(build[attr]) 198 build['elapsed'] = int(float(build['elapsed'])) 199 if 'pr-logs' in build['path']: 200 build['pr'] = build['path'].split('/')[-3] 201 builds[build['path']] = build 202 203 failed_tests = {} 204 for tests_file in tests_files: 205 for line in open(tests_file, 'r'): 206 test = json.loads(line) 207 failed_tests.setdefault(test['name'], []).append(test) 208 for tests in failed_tests.itervalues(): 209 tests.sort(key=lambda t: t['build']) 210 211 return builds, failed_tests 212 213 214 def find_match(fnorm, clusters): 215 for ngram_dist, other in sorted((ngram_editdist(fnorm, x), x) for x in clusters): 216 # allow up to 10% differences 217 limit = int((len(fnorm)+len(other))/2.0 * 0.10) 218 219 if ngram_dist > limit: 220 continue 221 222 if limit <= 1 and other != fnorm: # no chance 223 continue 224 225 dist = editdist(fnorm, other, limit) 226 227 if dist < limit: 228 return other 229 return None 230 231 232 def cluster_test(tests): 233 """ 234 Compute failure clusters given a list of failures for one test. 235 236 Normalize the failure text prior to clustering to avoid needless entropy. 237 238 Args: 239 [{name: test_name, build: gs://foo/bar, failure_text: xxx}, ...] 240 Returns: 241 {cluster_text_1: [test1, test2, ...]} 242 """ 243 clusters = {} 244 start = time.time() 245 246 for test in tests: 247 ftext = test['failure_text'] 248 fnorm = normalize(ftext) 249 if fnorm in clusters: 250 clusters[fnorm].append(test) 251 else: 252 other = find_match(fnorm, clusters) 253 if other: 254 clusters[other].append(test) 255 else: 256 clusters[fnorm] = [test] 257 if time.time() > start + 60: 258 logging.info('bailing early, taking too long!') 259 break 260 return clusters 261 262 263 @file_memoize('clustering inside each test', 'memo_cluster_local.json') 264 def cluster_local(failed_tests): 265 """ 266 Cluster together the failures for each test. 267 268 Args: 269 {test_1: [{name: test_1, build: gs://foo/bar, failure_text: xxx}, ...], ...} 270 Returns: 271 {test_1: {cluster_text_1: [test1, test2], ... }, test_2: ...} 272 273 """ 274 clustered = {} 275 num_failures = 0 276 start = time.time() 277 logging.info("Clustering failures for %d unique tests...", len(failed_tests)) 278 # Look at tests with the most failures first 279 for n, (test_name, tests) in enumerate( 280 sorted(failed_tests.iteritems(), 281 key=lambda x: len(x[1]), 282 reverse=True), 283 1): 284 num_failures += len(tests) 285 logging.info('%4d/%4d, %d failures, %s', n, len(failed_tests), len(tests), test_name) 286 sys.stdout.flush() 287 clustered[test_name] = cluster_test(tests) 288 elapsed = time.time() - start 289 logging.info('Finished locally clustering %d unique tests (%d failures) in %dm%ds', 290 len(clustered), num_failures, elapsed / 60, elapsed % 60) 291 return clustered 292 293 294 @file_memoize('clustering across tests', 'memo_cluster_global.json') 295 def cluster_global(clustered, previous_clustered): 296 """Combine together clustered failures for each test. 297 298 This is done hierarchically for efficiency-- each test's failures are likely to be similar, 299 reducing the number of clusters that need to be paired up at this stage. 300 301 Args: 302 {test_name: {cluster_text_1: [test1, test2, ...], ...}, ...} 303 Returns: 304 {cluster_text_1: [{test_name: [test1, test2, ...]}, ...], ...} 305 """ 306 clusters = {} 307 num_failures = 0 308 logging.info("Combining clustered failures for %d unique tests...", len(clustered)) 309 start = time.time() 310 if previous_clustered: 311 # seed clusters using output from the previous run 312 n = 0 313 for cluster in previous_clustered: 314 key = cluster['key'] 315 if key != normalize(key): 316 logging.info(key) 317 logging.info(normalize(key)) 318 n += 1 319 continue 320 clusters[cluster['key']] = {} 321 logging.info('Seeding with %d previous clusters', len(clusters)) 322 if n: 323 logging.warn('!!! %d clusters lost from different normalization! !!!', n) 324 325 # Look at tests with the most failures over all clusters first 326 for n, (test_name, test_clusters) in enumerate( 327 sorted(clustered.iteritems(), 328 key=lambda (k, v): sum(len(x) for x in v.itervalues()), 329 reverse=True), 330 1): 331 logging.info('%4d/%4d, %d clusters, %s', n, len(clustered), len(test_clusters), test_name) 332 # Look at clusters with the most failures first 333 for key, tests in sorted(test_clusters.iteritems(), key=lambda x: len(x[1]), reverse=True): 334 num_failures += len(tests) 335 if key in clusters: 336 clusters[key].setdefault(test_name, []).extend(tests) 337 else: 338 other = find_match(key, clusters) 339 if other: 340 clusters[other].setdefault(test_name, []).extend(tests) 341 else: 342 clusters[key] = {test_name: list(tests)} 343 344 # If we seeded clusters using the previous run's keys, some of those 345 # clusters may have disappeared. Remove the resulting empty entries. 346 for k in {k for k, v in clusters.iteritems() if not v}: 347 clusters.pop(k) 348 349 elapsed = time.time() - start 350 logging.info('Finished clustering %d unique tests (%d failures) into %d clusters in %dm%ds', 351 len(clustered), num_failures, len(clusters), elapsed / 60, elapsed % 60) 352 353 return clusters 354 355 356 def tests_group_by_job(tests, builds): 357 """Turn a list of test failures into {job: [buildnumber, ...], ...}""" 358 groups = {} 359 for test in tests: 360 try: 361 build = builds[test['build']] 362 except KeyError: 363 continue 364 if 'number' in build: 365 groups.setdefault(build['job'], set()).add(build['number']) 366 return sorted(((key, sorted(value, reverse=True)) for key, value in groups.iteritems()), 367 key=lambda (k, v): (-len(v), k)) 368 369 370 SPAN_RE = re.compile(r'\w+|\W+') 371 372 def common_spans(xs): 373 """ 374 Finds something similar to the longest common subsequence of xs, but much faster. 375 376 Returns a list of [matchlen_1, mismatchlen_2, matchlen_2, mismatchlen_2, ...], representing 377 sequences of the first element of the list that are present in all members. 378 """ 379 common = None 380 for x in xs: 381 x_split = SPAN_RE.findall(x) 382 if common is None: # first iteration 383 common = set(x_split) 384 else: 385 common.intersection_update(x_split) 386 387 spans = [] 388 match = True 389 span_len = 0 390 for x in SPAN_RE.findall(xs[0]): 391 if x in common: 392 if not match: 393 match = True 394 spans.append(span_len) 395 span_len = 0 396 span_len += len(x) 397 else: 398 if match: 399 match = False 400 spans.append(span_len) 401 span_len = 0 402 span_len += len(x) 403 404 if span_len: 405 spans.append(span_len) 406 407 return spans 408 409 410 def clusters_to_display(clustered, builds): 411 """Transpose and sort the output of cluster_global.""" 412 413 return [{ 414 "key": key, 415 "id": key_id, 416 "spans": common_spans([f['failure_text'] for _, fs in clusters for f in fs]), 417 "text": clusters[0][1][0]['failure_text'], 418 "tests": [{ 419 "name": test_name, 420 "jobs": [{"name": n, "builds": b} 421 for n, b in tests_group_by_job(tests, builds)] 422 } 423 for test_name, tests in sorted(clusters, key=lambda (n, t): (-len(t), n)) 424 ] 425 } 426 for key, key_id, clusters in clustered if sum(len(x[1]) for x in clusters) > 1 427 ] 428 429 430 def builds_to_columns(builds): 431 """Convert a list of build dictionaries into a columnar form. 432 433 This compresses much better with gzip.""" 434 435 jobs = {} 436 437 cols = {v: [] for v in 'started tests_failed elapsed tests_run result executor pr'.split()} 438 out = {'jobs': jobs, 'cols': cols, 'job_paths': {}} 439 for build in sorted(builds.itervalues(), key=lambda b: (b['job'], b['number'])): 440 if 'number' not in build: 441 continue 442 index = len(cols['started']) 443 for key, entries in cols.iteritems(): 444 entries.append(build.get(key)) 445 job = jobs.setdefault(build['job'], {}) 446 if not job: 447 out['job_paths'][build['job']] = build['path'][:build['path'].rindex('/')] 448 job[build['number']] = index 449 450 for k, indexes in jobs.items(): 451 numbers = sorted(indexes) 452 base = indexes[numbers[0]] 453 count = len(numbers) 454 455 # optimization: if we have a dense sequential mapping of builds=>indexes, 456 # store only the first build number, the run length, and the first index number. 457 if numbers[-1] == numbers[0] + count - 1 and \ 458 all(indexes[k] == n + base for n, k in enumerate(numbers)): 459 jobs[k] = [numbers[0], count, base] 460 for n in numbers: 461 assert n <= numbers[0] + len(numbers), (k, n, jobs[k], len(numbers), numbers) 462 463 return out 464 465 466 def render(builds, clustered): 467 clustered_sorted = sorted( 468 clustered.iteritems(), 469 key=lambda (k, v): (-sum(len(ts) for ts in v.itervalues()), k)) 470 clustered_tuples = [(k, 471 make_ngram_counts_digest(k), 472 sorted(clusters.items(), key=lambda (n, t): (-len(t), n))) 473 for k, clusters in clustered_sorted] 474 475 return {'clustered': clusters_to_display(clustered_tuples, builds), 476 'builds': builds_to_columns(builds)} 477 478 479 SIG_LABEL_RE = re.compile(r'\[sig-([^]]*)\]') 480 481 def annotate_owners(data, builds, owners): 482 """ 483 Assign ownership to a cluster based on the share of hits in the last day. 484 """ 485 owner_re = re.compile(r'(?:%s)' % '|'.join( 486 '(?P<%s>%s)' % ( 487 sig.replace('-', '_'), # regex group names can't have - 488 '|'.join(re.escape(p) for p in prefixes) 489 ) 490 for sig, prefixes in owners.iteritems() 491 )) 492 job_paths = data['builds']['job_paths'] 493 yesterday = max(data['builds']['cols']['started']) - (60 * 60 * 24) 494 495 for cluster in data['clustered']: 496 owner_counts = {} 497 for test in cluster['tests']: 498 m = SIG_LABEL_RE.search(test['name']) 499 if m: 500 owner = m.group(1) 501 else: 502 m = owner_re.match(normalize_name(test['name'])) 503 if not m or not m.groupdict(): 504 continue 505 owner = next(k for k, v in m.groupdict().iteritems() if v) 506 owner = owner.replace('_', '-') 507 counts = owner_counts.setdefault(owner, [0, 0]) 508 for job in test['jobs']: 509 if ':' in job['name']: # non-standard CI 510 continue 511 job_path = job_paths[job['name']] 512 for build in job['builds']: 513 if builds['%s/%d' % (job_path, build)]['started'] > yesterday: 514 counts[0] += 1 515 else: 516 counts[1] += 1 517 if owner_counts: 518 owner = max(owner_counts.items(), key=lambda (o, c): (c, o))[0] 519 cluster['owner'] = owner 520 else: 521 cluster['owner'] = 'testing' 522 523 524 def render_slice(data, builds, prefix='', owner=''): 525 clustered = [] 526 builds_out = {} 527 jobs = set() 528 for cluster in data['clustered']: 529 # print [cluster['id'], prefix] 530 if owner and cluster.get('owner') == owner: 531 clustered.append(cluster) 532 elif prefix and cluster['id'].startswith(prefix): 533 clustered.append(cluster) 534 else: 535 continue 536 for test in cluster['tests']: 537 for job in test['jobs']: 538 jobs.add(job['name']) 539 for path, build in builds.iteritems(): 540 if build['job'] in jobs: 541 builds_out[path] = build 542 return {'clustered': clustered, 'builds': builds_to_columns(builds_out)} 543 544 def setup_logging(): 545 """Initialize logging to screen""" 546 # See https://docs.python.org/2/library/logging.html#logrecord-attributes 547 # [IWEF]mmdd HH:MM:SS.mmm] msg 548 fmt = '%(levelname).1s%(asctime)s.%(msecs)03d] %(message)s' # pylint: disable=line-too-long 549 datefmt = '%m%d %H:%M:%S' 550 logging.basicConfig( 551 level=logging.INFO, 552 format=fmt, 553 datefmt=datefmt, 554 ) 555 556 def parse_args(args): 557 parser = argparse.ArgumentParser() 558 parser.add_argument('builds', help='builds.json file from BigQuery') 559 parser.add_argument('tests', help='tests.json file from BigQuery', nargs='+') 560 parser.add_argument('--previous', help='previous output', type=argparse.FileType('r')) 561 parser.add_argument('--owners', help='test owner SIGs', type=argparse.FileType('r')) 562 parser.add_argument('--output', default='failure_data.json') 563 parser.add_argument('--output_slices', 564 help='Output slices to this path (must include PREFIX in template)') 565 return parser.parse_args(args) 566 567 568 def main(args): 569 setup_logging() 570 builds, failed_tests = load_failures(args.builds, args.tests) 571 572 previous_clustered = None 573 if args.previous: 574 logging.info('loading previous') 575 previous_clustered = json.load(args.previous)['clustered'] 576 577 clustered_local = cluster_local(failed_tests) 578 579 clustered = cluster_global(clustered_local, previous_clustered) 580 581 logging.info("Rendering results...") 582 start = time.time() 583 data = render(builds, clustered) 584 585 if args.owners: 586 owners = json.load(args.owners) 587 annotate_owners(data, builds, owners) 588 589 json.dump(data, open(args.output, 'w'), 590 sort_keys=True) 591 592 if args.output_slices: 593 assert 'PREFIX' in args.output_slices 594 for subset in range(256): 595 id_prefix = '%02x' % subset 596 json.dump(render_slice(data, builds, id_prefix), 597 open(args.output_slices.replace('PREFIX', id_prefix), 'w'), 598 sort_keys=True) 599 if args.owners: 600 owners.setdefault('testing', []) # for output 601 for owner in owners: 602 json.dump(render_slice(data, builds, prefix='', owner=owner), 603 open(args.output_slices.replace('PREFIX', 'sig-' + owner), 'w'), 604 sort_keys=True) 605 elapsed = time.time() - start 606 logging.info('Finished rendering results in %dm%ds', elapsed / 60, elapsed % 60) 607 608 if __name__ == '__main__': 609 main(parse_args(sys.argv[1:]))