github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/triage/summarize_test.py

github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/triage/summarize_test.py (about)

     1  #!/usr/bin/env python2
     2  
     3  # Copyright 2017 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # pylint: disable=invalid-name,missing-docstring
    18  
    19  import json
    20  import os
    21  import unittest
    22  import shutil
    23  import tempfile
    24  
    25  import summarize
    26  
    27  
    28  make_test = lambda t: {'failure_text': t}
    29  
    30  
    31  class StringsTest(unittest.TestCase):
    32      def test_normalize(self):
    33          for src, dst in [
    34                  ('0x1234 a 123.13.45.43 b 2e24e003-9ffd-4e78-852c-9dcb6cbef493-123',
    35                   'UNIQ1 a UNIQ2 b UNIQ3'),
    36                  ('Mon, 12 January 2017 11:34:35 blah blah', 'TIMEblah blah'),
    37                  ('123.45.68.12:345 abcd1234eeee', 'UNIQ1 UNIQ2'),
    38                  ('foobarbaz ' * 500000,
    39                   'foobarbaz ' * 10000 + '\n...[truncated]...\n' + 'foobarbaz ' * 10000),
    40          ]:
    41              self.assertEqual(summarize.normalize(src), dst)
    42  
    43      def test_editdist(self):
    44          for a, b, expected in [
    45                  ('foob', 'food', 1),
    46                  ('doot', 'dot', 1),
    47                  ('foob', 'f', 3),
    48                  ('foob', 'g', 4),
    49          ]:
    50              self.assertEqual(summarize.editdist(a, b), expected, (a, b, expected))
    51  
    52      def test_make_ngram_counts(self):
    53          self.assertEqual(sum(summarize.make_ngram_counts('abcdefg')), 4)
    54          self.assertEqual(sum(summarize.make_ngram_counts(u'abcdefg')), 4)
    55          self.assertEqual(sum(summarize.make_ngram_counts(u'abcdefg\u2006')), 5)
    56  
    57      def test_make_ngram_counts_digest(self):
    58          # ensure stability of ngram count digest
    59          self.assertEqual(summarize.make_ngram_counts_digest('some string'), 'eddb950347d1eb05b5d7')
    60  
    61      def test_ngram_editdist(self):
    62          self.assertEqual(summarize.ngram_editdist('example text', 'exampl text'), 1)
    63  
    64      def test_common_spans(self):
    65          for a, b, expected in [
    66                  ('an exact match', 'an exact match', [14]),
    67                  ('some example string', 'some other string', [5, 7, 7]),
    68                  ('a problem with a common set', 'a common set', [2, 7, 1, 4, 13]),
    69          ]:
    70              self.assertEqual(summarize.common_spans([a, b]), expected)
    71  
    72  
    73  class ClusterTest(unittest.TestCase):
    74      def test_cluster_test(self):
    75          # small strings aren't equal, even with tiny differences
    76          t1 = make_test('exit 1')
    77          t2 = make_test('exit 2')
    78          self.assertEqual(summarize.cluster_test([t1, t2]), {'exit 1': [t1], 'exit 2': [t2]})
    79  
    80          t3 = make_test('long message immediately preceding exit code 1')
    81          t4 = make_test('long message immediately preceding exit code 2')
    82          self.assertEqual(summarize.cluster_test([t3, t4]), {t3['failure_text']: [t3, t4]})
    83  
    84          t5 = make_test('1 2 ' * 40000)
    85          t6 = make_test('1 2 ' * 39999 + '3 4 ')
    86  
    87          self.assertEqual(summarize.cluster_test([t1, t5, t6]),
    88                           {t1['failure_text']: [t1], t5['failure_text']: [t5, t6]})
    89  
    90      @staticmethod
    91      def cluster_global(clustered, previous_clustered=None):
    92          return summarize.cluster_global.__wrapped__(clustered, previous_clustered)
    93  
    94      def test_cluster_global(self):
    95          t1 = make_test('exit 1')
    96          t2 = make_test('exit 1')
    97          t3 = make_test('exit 1')
    98  
    99          self.assertEqual(
   100              self.cluster_global({'test a': {'exit 1': [t1, t2]}, 'test b': {'exit 1': [t3]}}),
   101              {'exit 1': {'test a': [t1, t2], 'test b': [t3]}})
   102  
   103      def test_cluster_global_previous(self):
   104          # clusters are stable when provided with previou seeds
   105          textOld = 'some long failure message that changes occasionally foo'
   106          textNew = textOld.replace('foo', 'bar')
   107          t1 = make_test(textNew)
   108  
   109          self.assertEqual(
   110              self.cluster_global({'test a': {textNew: [t1]}}, [{'key': textOld}]),
   111              {textOld: {'test a': [t1]}})
   112  
   113      def test_annotate_owners(self):
   114          def expect(test, owner, owners=None):
   115              now = 1.5e9
   116              data = {
   117                  'builds': {
   118                      'job_paths': {'somejob': '/logs/somejob'},
   119                      'cols': {'started': [now]}
   120                  },
   121                  'clustered': [
   122                      {'tests': [{'name': test, 'jobs': [{'name': 'somejob', 'builds': [123]}]}]}
   123                  ],
   124              }
   125              summarize.annotate_owners(
   126                  data, {'/logs/somejob/123': {'started': now}}, owners or {})
   127  
   128              self.assertEqual(owner, data['clustered'][0]['owner'])
   129  
   130          expect('[sig-node] Node reboots', 'node')
   131          expect('unknown test name', 'testing')
   132          expect('Suffixes too [sig-storage]', 'storage')
   133          expect('Variable test with old-style prefixes', 'node', {'node': ['Variable']})
   134  
   135  
   136  ############ decode JSON without a bunch of unicode garbage
   137  ### http://stackoverflow.com/a/33571117
   138  def json_load_byteified(json_text):
   139      return _byteify(
   140          json.load(json_text, object_hook=_byteify),
   141          ignore_dicts=True
   142      )
   143  
   144  def _byteify(data, ignore_dicts=False):
   145      # if this is a unicode string, return its string representation
   146      if isinstance(data, unicode):
   147          return data.encode('utf-8')
   148      # if this is a list of values, return list of byteified values
   149      if isinstance(data, list):
   150          return [_byteify(item, ignore_dicts=True) for item in data]
   151      # if this is a dictionary, return dictionary of byteified keys and values
   152      # but only if we haven't already byteified it
   153      if isinstance(data, dict) and not ignore_dicts:
   154          return {
   155              _byteify(key, ignore_dicts=True): _byteify(value, ignore_dicts=True)
   156              for key, value in data.iteritems()
   157          }
   158      # if it's anything else, return it in its original form
   159      return data
   160  ################################
   161  
   162  
   163  class IntegrationTest(unittest.TestCase):
   164      def setUp(self):
   165          self.tmpdir = tempfile.mkdtemp(prefix='summarize_test_')
   166          os.chdir(self.tmpdir)
   167  
   168      def tearDown(self):
   169          shutil.rmtree(self.tmpdir)
   170  
   171      def test_main(self):
   172          def smear(l):
   173              "given a list of dictionary deltas, return a list of dictionaries"
   174              cur = {}
   175              out = []
   176              for delta in l:
   177                  cur.update(delta)
   178                  out.append(dict(cur))
   179              return out
   180          json.dump(smear([
   181              {'started': 1234, 'number': 1, 'tests_failed': 1, 'tests_run': 2,
   182               'elapsed': 4, 'path': 'gs://logs/some-job/1', 'job': 'some-job', 'result': 'SUCCESS'},
   183              {'number': 2, 'path': 'gs://logs/some-job/2'},
   184              {'number': 3, 'path': 'gs://logs/some-job/3'},
   185              {'number': 4, 'path': 'gs://logs/some-job/4'},
   186              {'number': 5, 'path': 'gs://logs/other-job/5', 'job': 'other-job', 'elapsed': 8},
   187              {'number': 7, 'path': 'gs://logs/other-job/7', 'result': 'FAILURE'},
   188          ]), open('builds.json', 'w'))
   189          json.dump(smear([
   190              {'name': 'example test', 'build': 'gs://logs/some-job/1',
   191               'failure_text': 'some awful stack trace exit 1'},
   192              {'build': 'gs://logs/some-job/2'},
   193              {'build': 'gs://logs/some-job/3'},
   194              {'build': 'gs://logs/some-job/4'},
   195              {'name': 'another test', 'failure_text': 'some other error message'},
   196              {'name': 'unrelated test', 'build': 'gs://logs/other-job/5'},
   197              {},  # intentional dupe
   198              {'build': 'gs://logs/other-job/7'},
   199          ]), open('tests.json', 'w'))
   200          json.dump({
   201              'node': ['example']
   202          }, open('owners.json', 'w'))
   203          summarize.main(summarize.parse_args(
   204              ['builds.json', 'tests.json',
   205               '--output_slices=failure_data_PREFIX.json',
   206               '--owners=owners.json']))
   207          output = json_load_byteified(open('failure_data.json'))
   208  
   209          # uncomment when output changes
   210          # import pprint; pprint.pprint(output)
   211  
   212          self.assertEqual(
   213              output['builds'],
   214              {'cols': {'elapsed': [8, 8, 4, 4, 4, 4],
   215                        'executor': [None, None, None, None, None, None],
   216                        'pr': [None, None, None, None, None, None],
   217                        'result': ['SUCCESS',
   218                                   'FAILURE',
   219                                   'SUCCESS',
   220                                   'SUCCESS',
   221                                   'SUCCESS',
   222                                   'SUCCESS'],
   223                        'started': [1234, 1234, 1234, 1234, 1234, 1234],
   224                        'tests_failed': [1, 1, 1, 1, 1, 1],
   225                        'tests_run': [2, 2, 2, 2, 2, 2]},
   226               'job_paths': {'other-job': 'gs://logs/other-job',
   227                             'some-job': 'gs://logs/some-job'},
   228               'jobs': {'other-job': {'5': 0, '7': 1}, 'some-job': [1, 4, 2]}})
   229  
   230          random_hash_1 = output['clustered'][0]['id']
   231          random_hash_2 = output['clustered'][1]['id']
   232  
   233          self.assertEqual(
   234              output['clustered'],
   235              [{'id': random_hash_1,
   236                'key': 'some awful stack trace exit 1',
   237                'tests': [{'jobs': [{'builds': [4, 3, 2, 1],
   238                                     'name': 'some-job'}],
   239                           'name': 'example test'}],
   240                'spans': [29],
   241                'owner': 'node',
   242                'text': 'some awful stack trace exit 1'},
   243               {'id': random_hash_2,
   244                'key': 'some other error message',
   245                'tests': [{'jobs': [{'builds': [7, 5],
   246                                     'name': 'other-job'}],
   247                           'name': 'unrelated test'},
   248                          {'jobs': [{'builds': [4], 'name': 'some-job'}],
   249                           'name': 'another test'}],
   250                'spans': [24],
   251                'owner': 'testing',
   252                'text': 'some other error message'}]
   253          )
   254  
   255          slice_output = json_load_byteified(open('failure_data_%s.json' % random_hash_1[:2]))
   256  
   257          self.assertEqual(slice_output['clustered'], [output['clustered'][0]])
   258          self.assertEqual(slice_output['builds']['cols']['started'], [1234, 1234, 1234, 1234])
   259  
   260  
   261  if __name__ == '__main__':
   262      unittest.main()