github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/triage/summarize_test.py (about) 1 #!/usr/bin/env python2 2 3 # Copyright 2017 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 # pylint: disable=invalid-name,missing-docstring 18 19 import json 20 import os 21 import unittest 22 import shutil 23 import tempfile 24 25 import summarize 26 27 28 make_test = lambda t: {'failure_text': t} 29 30 31 class StringsTest(unittest.TestCase): 32 def test_normalize(self): 33 for src, dst in [ 34 ('0x1234 a 123.13.45.43 b 2e24e003-9ffd-4e78-852c-9dcb6cbef493-123', 35 'UNIQ1 a UNIQ2 b UNIQ3'), 36 ('Mon, 12 January 2017 11:34:35 blah blah', 'TIMEblah blah'), 37 ('123.45.68.12:345 abcd1234eeee', 'UNIQ1 UNIQ2'), 38 ('foobarbaz ' * 500000, 39 'foobarbaz ' * 10000 + '\n...[truncated]...\n' + 'foobarbaz ' * 10000), 40 ]: 41 self.assertEqual(summarize.normalize(src), dst) 42 43 def test_editdist(self): 44 for a, b, expected in [ 45 ('foob', 'food', 1), 46 ('doot', 'dot', 1), 47 ('foob', 'f', 3), 48 ('foob', 'g', 4), 49 ]: 50 self.assertEqual(summarize.editdist(a, b), expected, (a, b, expected)) 51 52 def test_make_ngram_counts(self): 53 self.assertEqual(sum(summarize.make_ngram_counts('abcdefg')), 4) 54 self.assertEqual(sum(summarize.make_ngram_counts(u'abcdefg')), 4) 55 self.assertEqual(sum(summarize.make_ngram_counts(u'abcdefg\u2006')), 5) 56 57 def test_make_ngram_counts_digest(self): 58 # ensure stability of ngram count digest 59 self.assertEqual(summarize.make_ngram_counts_digest('some string'), 'eddb950347d1eb05b5d7') 60 61 def test_ngram_editdist(self): 62 self.assertEqual(summarize.ngram_editdist('example text', 'exampl text'), 1) 63 64 def test_common_spans(self): 65 for a, b, expected in [ 66 ('an exact match', 'an exact match', [14]), 67 ('some example string', 'some other string', [5, 7, 7]), 68 ('a problem with a common set', 'a common set', [2, 7, 1, 4, 13]), 69 ]: 70 self.assertEqual(summarize.common_spans([a, b]), expected) 71 72 73 class ClusterTest(unittest.TestCase): 74 def test_cluster_test(self): 75 # small strings aren't equal, even with tiny differences 76 t1 = make_test('exit 1') 77 t2 = make_test('exit 2') 78 self.assertEqual(summarize.cluster_test([t1, t2]), {'exit 1': [t1], 'exit 2': [t2]}) 79 80 t3 = make_test('long message immediately preceding exit code 1') 81 t4 = make_test('long message immediately preceding exit code 2') 82 self.assertEqual(summarize.cluster_test([t3, t4]), {t3['failure_text']: [t3, t4]}) 83 84 t5 = make_test('1 2 ' * 40000) 85 t6 = make_test('1 2 ' * 39999 + '3 4 ') 86 87 self.assertEqual(summarize.cluster_test([t1, t5, t6]), 88 {t1['failure_text']: [t1], t5['failure_text']: [t5, t6]}) 89 90 @staticmethod 91 def cluster_global(clustered, previous_clustered=None): 92 return summarize.cluster_global.__wrapped__(clustered, previous_clustered) 93 94 def test_cluster_global(self): 95 t1 = make_test('exit 1') 96 t2 = make_test('exit 1') 97 t3 = make_test('exit 1') 98 99 self.assertEqual( 100 self.cluster_global({'test a': {'exit 1': [t1, t2]}, 'test b': {'exit 1': [t3]}}), 101 {'exit 1': {'test a': [t1, t2], 'test b': [t3]}}) 102 103 def test_cluster_global_previous(self): 104 # clusters are stable when provided with previou seeds 105 textOld = 'some long failure message that changes occasionally foo' 106 textNew = textOld.replace('foo', 'bar') 107 t1 = make_test(textNew) 108 109 self.assertEqual( 110 self.cluster_global({'test a': {textNew: [t1]}}, [{'key': textOld}]), 111 {textOld: {'test a': [t1]}}) 112 113 def test_annotate_owners(self): 114 def expect(test, owner, owners=None): 115 now = 1.5e9 116 data = { 117 'builds': { 118 'job_paths': {'somejob': '/logs/somejob'}, 119 'cols': {'started': [now]} 120 }, 121 'clustered': [ 122 {'tests': [{'name': test, 'jobs': [{'name': 'somejob', 'builds': [123]}]}]} 123 ], 124 } 125 summarize.annotate_owners( 126 data, {'/logs/somejob/123': {'started': now}}, owners or {}) 127 128 self.assertEqual(owner, data['clustered'][0]['owner']) 129 130 expect('[sig-node] Node reboots', 'node') 131 expect('unknown test name', 'testing') 132 expect('Suffixes too [sig-storage]', 'storage') 133 expect('Variable test with old-style prefixes', 'node', {'node': ['Variable']}) 134 135 136 ############ decode JSON without a bunch of unicode garbage 137 ### http://stackoverflow.com/a/33571117 138 def json_load_byteified(json_text): 139 return _byteify( 140 json.load(json_text, object_hook=_byteify), 141 ignore_dicts=True 142 ) 143 144 def _byteify(data, ignore_dicts=False): 145 # if this is a unicode string, return its string representation 146 if isinstance(data, unicode): 147 return data.encode('utf-8') 148 # if this is a list of values, return list of byteified values 149 if isinstance(data, list): 150 return [_byteify(item, ignore_dicts=True) for item in data] 151 # if this is a dictionary, return dictionary of byteified keys and values 152 # but only if we haven't already byteified it 153 if isinstance(data, dict) and not ignore_dicts: 154 return { 155 _byteify(key, ignore_dicts=True): _byteify(value, ignore_dicts=True) 156 for key, value in data.iteritems() 157 } 158 # if it's anything else, return it in its original form 159 return data 160 ################################ 161 162 163 class IntegrationTest(unittest.TestCase): 164 def setUp(self): 165 self.tmpdir = tempfile.mkdtemp(prefix='summarize_test_') 166 os.chdir(self.tmpdir) 167 168 def tearDown(self): 169 shutil.rmtree(self.tmpdir) 170 171 def test_main(self): 172 def smear(l): 173 "given a list of dictionary deltas, return a list of dictionaries" 174 cur = {} 175 out = [] 176 for delta in l: 177 cur.update(delta) 178 out.append(dict(cur)) 179 return out 180 json.dump(smear([ 181 {'started': 1234, 'number': 1, 'tests_failed': 1, 'tests_run': 2, 182 'elapsed': 4, 'path': 'gs://logs/some-job/1', 'job': 'some-job', 'result': 'SUCCESS'}, 183 {'number': 2, 'path': 'gs://logs/some-job/2'}, 184 {'number': 3, 'path': 'gs://logs/some-job/3'}, 185 {'number': 4, 'path': 'gs://logs/some-job/4'}, 186 {'number': 5, 'path': 'gs://logs/other-job/5', 'job': 'other-job', 'elapsed': 8}, 187 {'number': 7, 'path': 'gs://logs/other-job/7', 'result': 'FAILURE'}, 188 ]), open('builds.json', 'w')) 189 json.dump(smear([ 190 {'name': 'example test', 'build': 'gs://logs/some-job/1', 191 'failure_text': 'some awful stack trace exit 1'}, 192 {'build': 'gs://logs/some-job/2'}, 193 {'build': 'gs://logs/some-job/3'}, 194 {'build': 'gs://logs/some-job/4'}, 195 {'name': 'another test', 'failure_text': 'some other error message'}, 196 {'name': 'unrelated test', 'build': 'gs://logs/other-job/5'}, 197 {}, # intentional dupe 198 {'build': 'gs://logs/other-job/7'}, 199 ]), open('tests.json', 'w')) 200 json.dump({ 201 'node': ['example'] 202 }, open('owners.json', 'w')) 203 summarize.main(summarize.parse_args( 204 ['builds.json', 'tests.json', 205 '--output_slices=failure_data_PREFIX.json', 206 '--owners=owners.json'])) 207 output = json_load_byteified(open('failure_data.json')) 208 209 # uncomment when output changes 210 # import pprint; pprint.pprint(output) 211 212 self.assertEqual( 213 output['builds'], 214 {'cols': {'elapsed': [8, 8, 4, 4, 4, 4], 215 'executor': [None, None, None, None, None, None], 216 'pr': [None, None, None, None, None, None], 217 'result': ['SUCCESS', 218 'FAILURE', 219 'SUCCESS', 220 'SUCCESS', 221 'SUCCESS', 222 'SUCCESS'], 223 'started': [1234, 1234, 1234, 1234, 1234, 1234], 224 'tests_failed': [1, 1, 1, 1, 1, 1], 225 'tests_run': [2, 2, 2, 2, 2, 2]}, 226 'job_paths': {'other-job': 'gs://logs/other-job', 227 'some-job': 'gs://logs/some-job'}, 228 'jobs': {'other-job': {'5': 0, '7': 1}, 'some-job': [1, 4, 2]}}) 229 230 random_hash_1 = output['clustered'][0]['id'] 231 random_hash_2 = output['clustered'][1]['id'] 232 233 self.assertEqual( 234 output['clustered'], 235 [{'id': random_hash_1, 236 'key': 'some awful stack trace exit 1', 237 'tests': [{'jobs': [{'builds': [4, 3, 2, 1], 238 'name': 'some-job'}], 239 'name': 'example test'}], 240 'spans': [29], 241 'owner': 'node', 242 'text': 'some awful stack trace exit 1'}, 243 {'id': random_hash_2, 244 'key': 'some other error message', 245 'tests': [{'jobs': [{'builds': [7, 5], 246 'name': 'other-job'}], 247 'name': 'unrelated test'}, 248 {'jobs': [{'builds': [4], 'name': 'some-job'}], 249 'name': 'another test'}], 250 'spans': [24], 251 'owner': 'testing', 252 'text': 'some other error message'}] 253 ) 254 255 slice_output = json_load_byteified(open('failure_data_%s.json' % random_hash_1[:2])) 256 257 self.assertEqual(slice_output['clustered'], [output['clustered'][0]]) 258 self.assertEqual(slice_output['builds']['cols']['started'], [1234, 1234, 1234, 1234]) 259 260 261 if __name__ == '__main__': 262 unittest.main()