github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/triage/summarize.py (about) 1 #!/usr/bin/env python2 2 3 # Copyright 2017 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 ''' 18 Summarize groups failed tests together by finding edit distances between their failure strings, 19 and emits JSON for rendering in a browser. 20 ''' 21 22 # pylint: disable=invalid-name,missing-docstring 23 24 25 import argparse 26 import functools 27 import hashlib 28 import json 29 import os 30 import re 31 import sys 32 import time 33 import zlib 34 35 import berghelroach 36 37 editdist = berghelroach.dist 38 39 flakeReasonDateRE = re.compile( 40 r'[A-Z][a-z]{2}, \d+ \w+ 2\d{3} [\d.-: ]*([-+]\d+)?|' 41 r'\w{3}\s+\d{1,2} \d+:\d+:\d+(\.\d+)?|(\d{4}-\d\d-\d\d.|.\d{4} )\d\d:\d\d:\d\d(.\d+)?') 42 # Find random noisy strings that should be replaced with renumbered strings, for more similarity. 43 flakeReasonOrdinalRE = re.compile( 44 r'0x[0-9a-fA-F]+' # hex constants 45 r'|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d+)?' # IPs + optional port 46 r'|[0-9a-fA-F]{8}-\S{4}-\S{4}-\S{4}-\S{12}(-\d+)?' # UUIDs + trailing digits 47 r'|[0-9a-f]{12,32}' # hex garbage 48 r'|(?<=minion-group-|default-pool-)[-0-9a-z]{4,}' # node names 49 ) 50 51 52 def normalize(s): 53 """ 54 Given a traceback or error message from a text, reduce excess entropy to make 55 clustering easier. 56 57 This includes: 58 - blanking dates and timestamps 59 - renumbering unique information like 60 - pointer addresses 61 - UUIDs 62 - IP addresses 63 - sorting randomly ordered map[] strings. 64 """ 65 66 # blank out dates 67 s = flakeReasonDateRE.sub('TIME', s) 68 69 # do alpha conversion-- rename random garbage strings (hex pointer values, node names, etc) 70 # into 'UNIQ1', 'UNIQ2', etc. 71 matches = {} 72 def repl(m): 73 s = m.group(0) 74 if s not in matches: 75 matches[s] = 'UNIQ%d' % (len(matches) + 1) 76 return matches[s] 77 78 if 'map[' in s: 79 # Go's maps are in a random order. Try to sort them to reduce diffs. 80 s = re.sub(r'map\[([^][]*)\]', 81 lambda m: 'map[%s]' % ' '.join(sorted(m.group(1).split())), 82 s) 83 84 s = flakeReasonOrdinalRE.sub(repl, s) 85 86 if len(s) > 10000: 87 # for long strings, remove repeated lines! 88 s = re.sub(r'(?m)^(.*\n)\1+', r'\1', s) 89 90 if len(s) > 200000: # ridiculously long test output 91 s = s[:100000] + '\n...[truncated]...\n' + s[-100000:] 92 93 return s 94 95 def normalize_name(name): 96 """ 97 Given a test name, remove [...]/{...}. 98 99 Matches code in testgrid and kubernetes/hack/update_owners.py. 100 """ 101 name = re.sub(r'\[.*?\]|\{.*?\}', '', name) 102 name = re.sub(r'\s+', ' ', name) 103 return name.strip() 104 105 106 def make_ngram_counts(s, ngram_counts={}): 107 """ 108 Convert a string into a histogram of frequencies for different byte combinations. 109 This can be used as a heuristic to estimate edit distance between two strings in 110 constant time. 111 112 Instead of counting each ngram individually, they are hashed into buckets. 113 This makes the output count size constant. 114 """ 115 116 # Yes, I'm intentionally memoizing here. 117 # pylint: disable=dangerous-default-value 118 119 size = 64 120 if s not in ngram_counts: 121 counts = [0] * size 122 for x in xrange(len(s)-3): 123 counts[zlib.crc32(s[x:x+4].encode('utf8')) & (size - 1)] += 1 124 ngram_counts[s] = counts # memoize 125 return ngram_counts[s] 126 127 128 def ngram_editdist(a, b): 129 """ 130 Compute a heuristic lower-bound edit distance using ngram counts. 131 132 An insert/deletion/substitution can cause up to 4 ngrams to differ: 133 134 abcdefg => abcefg 135 (abcd, bcde, cdef, defg) => (abce, bcef, cefg) 136 137 This will underestimate the edit distance in many cases: 138 - ngrams hashing into the same bucket will get confused 139 - a large-scale transposition will barely disturb ngram frequencies, 140 but will have a very large effect on edit distance. 141 142 It is useful to avoid more expensive precise computations when they are 143 guaranteed to exceed some limit (being a lower bound), or as a proxy when 144 the exact edit distance computation is too expensive (for long inputs). 145 """ 146 counts_a = make_ngram_counts(a) 147 counts_b = make_ngram_counts(b) 148 return sum(abs(x-y) for x, y in zip(counts_a, counts_b))/4 149 150 151 def make_ngram_counts_digest(s): 152 """ 153 Returns a hashed version of the ngram counts. 154 """ 155 return hashlib.sha1(str(make_ngram_counts(s))).hexdigest()[:20] 156 157 158 def file_memoize(description, name): 159 """ 160 Decorator to save a function's results to a file. 161 """ 162 def inner(func): 163 @functools.wraps(func) 164 def wrapper(*args, **kwargs): 165 if os.path.exists(name): 166 data = json.load(open(name)) 167 print 'done (cached)', description 168 return data 169 data = func(*args, **kwargs) 170 json.dump(data, open(name, 'w')) 171 print 'done', description 172 return data 173 wrapper.__wrapped__ = func 174 return wrapper 175 return inner 176 177 178 @file_memoize('loading failed tests', 'failed.json') 179 def load_failures(builds_file, tests_file): 180 builds = {} 181 for build in json.load(open(builds_file)): 182 if not build['started'] or not build['number']: 183 continue 184 for attr in ('started', 'tests_failed', 'number', 'tests_run'): 185 build[attr] = int(build[attr]) 186 build['elapsed'] = int(float(build['elapsed'])) 187 if 'pr-logs' in build['path']: 188 build['pr'] = build['path'].split('/')[-3] 189 builds[build['path']] = build 190 191 failed_tests = {} 192 for test in json.load(open(tests_file)): 193 failed_tests.setdefault(test['name'], []).append(test) 194 for tests in failed_tests.itervalues(): 195 tests.sort(key=lambda t: t['build']) 196 197 return builds, failed_tests 198 199 200 def find_match(fnorm, clusters): 201 for ngram_dist, other in sorted((ngram_editdist(fnorm, x), x) for x in clusters): 202 # allow up to 10% differences 203 limit = int((len(fnorm)+len(other))/2.0 * 0.10) 204 205 if ngram_dist > limit: 206 continue 207 208 if limit <= 1 and other != fnorm: # no chance 209 continue 210 211 dist = editdist(fnorm, other, limit) 212 213 if dist < limit: 214 return other 215 216 217 def cluster_test(tests): 218 """ 219 Compute failure clusters given a list of failures for one test. 220 221 Args: 222 tests: list of failed test dictionaries, with 'failure_text' keys 223 Returns: 224 {failure_text: [failure_in_cluster_1, failure_in_cluster_2, ...]} 225 """ 226 clusters = {} 227 start = time.time() 228 229 for test in tests: 230 ftext = test['failure_text'] 231 fnorm = normalize(ftext) 232 if fnorm in clusters: 233 clusters[fnorm].append(test) 234 else: 235 other = find_match(fnorm, clusters) 236 if other: 237 clusters[other].append(test) 238 else: 239 clusters[fnorm] = [test] 240 if time.time() > start + 60: 241 print 'bailing early, taking too long!' 242 break 243 return clusters 244 245 246 @file_memoize('clustering inside each test', 'failed_clusters_local.json') 247 def cluster_local(failed_tests): 248 """Cluster together the failures for each test. """ 249 clustered = {} 250 for test_name, tests in sorted(failed_tests.iteritems(), key=lambda x: len(x[1]), reverse=True): 251 print len(tests), test_name, 252 sys.stdout.flush() 253 clustered[test_name] = cluster_test(tests) 254 print len(clustered[test_name]) 255 return clustered 256 257 258 @file_memoize('clustering across tests', 'failed_clusters_global.json') 259 def cluster_global(clustered, previous_clustered): 260 """Combine together clustered failures for each test. 261 262 This is done hierarchically for efficiency-- each test's failures are likely to be similar, 263 reducing the number of clusters that need to be paired up at this stage. 264 265 Args: 266 {test_name: {failure_text: [failure_1, failure_2, ...], ...}, ...} 267 Returns: 268 {failure_text: [(test_name, [failure_1, failure_2, ...]), ...], ...} 269 """ 270 clusters = {} 271 272 if previous_clustered: 273 # seed clusters using output from the previous run 274 n = 0 275 for cluster in previous_clustered: 276 key = cluster['key'] 277 if key != normalize(key): 278 print key 279 print normalize(key) 280 n += 1 281 continue 282 clusters[cluster['key']] = {} 283 print 'Seeding with %d previous clusters' % len(clusters) 284 if n: 285 print '!!! %d clusters lost from different normalization! !!!' % n 286 287 288 for n, (test_name, cluster) in enumerate( 289 sorted(clustered.iteritems(), 290 key=lambda (k, v): sum(len(x) for x in v.itervalues()), 291 reverse=True), 292 1): 293 print '%d/%d %d %s' % (n, len(clustered), len(cluster), test_name) 294 for key, tests in sorted(cluster.iteritems(), key=lambda x: len(x[1]), reverse=True): 295 if key in clusters: 296 clusters[key].setdefault(test_name, []).extend(tests) 297 else: 298 other = find_match(key, clusters) 299 if other: 300 clusters[other].setdefault(test_name, []).extend(tests) 301 else: 302 clusters[key] = {test_name: list(tests)} 303 304 # If we seeded clusters using the previous run's keys, some of those 305 # clusters may have disappeared. Remove the resulting empty entries. 306 for k in {k for k, v in clusters.iteritems() if not v}: 307 clusters.pop(k) 308 309 return clusters 310 311 312 def tests_group_by_job(tests, builds): 313 """Turn a list of test failures into {job: [buildnumber, ...], ...}""" 314 groups = {} 315 for test in tests: 316 try: 317 build = builds[test['build']] 318 except KeyError: 319 continue 320 if 'number' in build: 321 groups.setdefault(build['job'], set()).add(build['number']) 322 return sorted(((key, sorted(value, reverse=True)) for key, value in groups.iteritems()), 323 key=lambda (k, v): (-len(v), k)) 324 325 326 SPAN_RE = re.compile(r'\w+|\W+') 327 328 def common_spans(xs): 329 """ 330 Finds something similar to the longest common subsequence of xs, but much faster. 331 332 Returns a list of [matchlen_1, mismatchlen_2, matchlen_2, mismatchlen_2, ...], representing 333 sequences of the first element of the list that are present in all members. 334 """ 335 common = None 336 for x in xs: 337 x_split = SPAN_RE.findall(x) 338 if common is None: # first iteration 339 common = set(x_split) 340 else: 341 common.intersection_update(x_split) 342 343 spans = [] 344 match = True 345 span_len = 0 346 for x in SPAN_RE.findall(xs[0]): 347 if x in common: 348 if not match: 349 match = True 350 spans.append(span_len) 351 span_len = 0 352 span_len += len(x) 353 else: 354 if match: 355 match = False 356 spans.append(span_len) 357 span_len = 0 358 span_len += len(x) 359 360 if span_len: 361 spans.append(span_len) 362 363 return spans 364 365 366 def clusters_to_display(clustered, builds): 367 """Transpose and sort the output of cluster_global.""" 368 369 return [{ 370 "key": key, 371 "id": key_id, 372 "spans": common_spans([f['failure_text'] for _, fs in clusters for f in fs]), 373 "text": clusters[0][1][0]['failure_text'], 374 "tests": [{ 375 "name": test_name, 376 "jobs": [{"name": n, "builds": b} 377 for n, b in tests_group_by_job(tests, builds)] 378 } 379 for test_name, tests in sorted(clusters, key=lambda (n, t): (-len(t), n)) 380 ] 381 } 382 for key, key_id, clusters in clustered if sum(len(x[1]) for x in clusters) > 1 383 ] 384 385 386 def builds_to_columns(builds): 387 """Convert a list of build dictionaries into a columnar form. 388 389 This compresses much better with gzip.""" 390 391 jobs = {} 392 393 cols = {v: [] for v in 'started tests_failed elapsed tests_run result executor pr'.split()} 394 out = {'jobs': jobs, 'cols': cols, 'job_paths': {}} 395 for build in sorted(builds.itervalues(), key=lambda b: (b['job'], b['number'])): 396 if 'number' not in build: 397 continue 398 index = len(cols['started']) 399 for key, entries in cols.iteritems(): 400 entries.append(build.get(key)) 401 job = jobs.setdefault(build['job'], {}) 402 if not job: 403 out['job_paths'][build['job']] = build['path'][:build['path'].rindex('/')] 404 job[build['number']] = index 405 406 for k, indexes in jobs.items(): 407 numbers = sorted(indexes) 408 base = indexes[numbers[0]] 409 count = len(numbers) 410 411 # optimization: if we have a dense sequential mapping of builds=>indexes, 412 # store only the first build number, the run length, and the first index number. 413 if numbers[-1] == numbers[0] + count - 1 and \ 414 all(indexes[k] == n + base for n, k in enumerate(numbers)): 415 jobs[k] = [numbers[0], count, base] 416 for n in numbers: 417 assert n <= numbers[0] + len(numbers), (k, n, jobs[k], len(numbers), numbers) 418 419 return out 420 421 422 def render(builds, clustered): 423 clustered_sorted = sorted( 424 clustered.iteritems(), 425 key=lambda (k, v): (-sum(len(ts) for ts in v.itervalues()), k)) 426 clustered_tuples = [(k, 427 make_ngram_counts_digest(k), 428 sorted(clusters.items(), key=lambda (n, t): (-len(t), n))) 429 for k, clusters in clustered_sorted] 430 431 return {'clustered': clusters_to_display(clustered_tuples, builds), 432 'builds': builds_to_columns(builds)} 433 434 435 SIG_LABEL_RE = re.compile(r'\[sig-([^]]*)\]') 436 437 def annotate_owners(data, builds, owners): 438 """ 439 Assign ownership to a cluster based on the share of hits in the last day. 440 """ 441 owner_re = re.compile(r'(?:%s)' % '|'.join( 442 '(?P<%s>%s)' % ( 443 sig.replace('-', '_'), # regex group names can't have - 444 '|'.join(re.escape(p) for p in prefixes) 445 ) 446 for sig, prefixes in owners.iteritems() 447 )) 448 job_paths = data['builds']['job_paths'] 449 yesterday = max(data['builds']['cols']['started']) - (60 * 60 * 24) 450 451 for cluster in data['clustered']: 452 owner_counts = {} 453 for test in cluster['tests']: 454 m = SIG_LABEL_RE.search(test['name']) 455 if m: 456 owner = m.group(1) 457 else: 458 m = owner_re.match(normalize_name(test['name'])) 459 if not m or not m.groupdict(): 460 continue 461 owner = next(k for k, v in m.groupdict().iteritems() if v) 462 owner = owner.replace('_', '-') 463 counts = owner_counts.setdefault(owner, [0, 0]) 464 for job in test['jobs']: 465 if ':' in job['name']: # non-standard CI 466 continue 467 job_path = job_paths[job['name']] 468 for build in job['builds']: 469 if builds['%s/%d' % (job_path, build)]['started'] > yesterday: 470 counts[0] += 1 471 else: 472 counts[1] += 1 473 if owner_counts: 474 owner = max(owner_counts.items(), key=lambda (o, c): (c, o))[0] 475 cluster['owner'] = owner 476 else: 477 cluster['owner'] = 'testing' 478 479 480 def render_slice(data, builds, prefix='', owner=''): 481 clustered = [] 482 builds_out = {} 483 jobs = set() 484 for cluster in data['clustered']: 485 # print [cluster['id'], prefix] 486 if owner and cluster.get('owner') == owner: 487 clustered.append(cluster) 488 elif prefix and cluster['id'].startswith(prefix): 489 clustered.append(cluster) 490 else: 491 continue 492 for test in cluster['tests']: 493 for job in test['jobs']: 494 jobs.add(job['name']) 495 for path, build in builds.iteritems(): 496 if build['job'] in jobs: 497 builds_out[path] = build 498 return {'clustered': clustered, 'builds': builds_to_columns(builds_out)} 499 500 501 def parse_args(args): 502 parser = argparse.ArgumentParser() 503 parser.add_argument('builds', help='builds.json file from BigQuery') 504 parser.add_argument('tests', help='tests.json file from BigQuery') 505 parser.add_argument('--previous', help='previous output', type=argparse.FileType('r')) 506 parser.add_argument('--owners', help='test owner SIGs', type=argparse.FileType('r')) 507 parser.add_argument('--output', default='failure_data.json') 508 parser.add_argument('--output_slices', 509 help='Output slices to this path (must include PREFIX in template)') 510 return parser.parse_args(args) 511 512 513 def main(args): 514 builds, failed_tests = load_failures(args.builds, args.tests) 515 516 previous_clustered = None 517 if args.previous: 518 print 'loading previous' 519 previous_clustered = json.load(args.previous)['clustered'] 520 521 clustered_local = cluster_local(failed_tests) 522 clustered = cluster_global(clustered_local, previous_clustered) 523 524 print '%d clusters' % len(clustered) 525 526 data = render(builds, clustered) 527 528 if args.owners: 529 owners = json.load(args.owners) 530 annotate_owners(data, builds, owners) 531 532 json.dump(data, open(args.output, 'w'), 533 sort_keys=True) 534 535 if args.output_slices: 536 assert 'PREFIX' in args.output_slices 537 for subset in range(256): 538 id_prefix = '%02x' % subset 539 json.dump(render_slice(data, builds, id_prefix), 540 open(args.output_slices.replace('PREFIX', id_prefix), 'w'), 541 sort_keys=True) 542 if args.owners: 543 owners.setdefault('testing', []) # for output 544 for owner in owners: 545 json.dump(render_slice(data, builds, prefix='', owner=owner), 546 open(args.output_slices.replace('PREFIX', 'sig-' + owner), 'w'), 547 sort_keys=True) 548 549 550 if __name__ == '__main__': 551 main(parse_args(sys.argv[1:]))