github.com/yrj2011/jx-test-infra@v0.0.0-20190529031832-7a2065ee98eb/kettle/make_json.py (about) 1 #!/usr/bin/env python 2 3 # Copyright 2017 The Kubernetes Authors. 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 """Generate JSON for BigQuery importing.""" 18 19 import argparse 20 import logging 21 import json 22 import os 23 import subprocess 24 import sys 25 import time 26 27 try: 28 import defusedxml.ElementTree as ET 29 except ImportError: 30 import xml.etree.cElementTree as ET 31 32 import model 33 34 35 def parse_junit(xml): 36 """Generate failed tests as a series of dicts. Ignore skipped tests.""" 37 # NOTE: this is modified from gubernator/view_build.py 38 tree = ET.fromstring(xml) 39 40 # pylint: disable=redefined-outer-name 41 42 def make_result(name, time, failure_text): 43 if failure_text: 44 if time is None: 45 return {'name': name, 'failed': True, 'failure_text': failure_text} 46 return {'name': name, 'time': time, 'failed': True, 'failure_text': failure_text} 47 if time is None: 48 return {'name': name} 49 return {'name': name, 'time': time} 50 51 # Note: skipped tests are ignored because they make rows too large for BigQuery. 52 # Knowing that a given build could have ran a test but didn't for some reason 53 # isn't very interesting. 54 if tree.tag == 'testsuite': 55 for child in tree.findall('testcase'): 56 name = child.attrib['name'] 57 time = float(child.attrib['time'] or 0) 58 failure_text = None 59 for param in child.findall('failure'): 60 failure_text = param.text 61 skipped = child.findall('skipped') 62 if skipped: 63 continue 64 yield make_result(name, time, failure_text) 65 elif tree.tag == 'testsuites': 66 for testsuite in tree: 67 suite_name = testsuite.attrib['name'] 68 for child in testsuite.findall('testcase'): 69 name = '%s %s' % (suite_name, child.attrib['name']) 70 time = float(child.attrib['time'] or 0) 71 failure_text = None 72 for param in child.findall('failure'): 73 failure_text = param.text 74 skipped = child.findall('skipped') 75 if skipped: 76 continue 77 yield make_result(name, time, failure_text) 78 else: 79 logging.error('unable to find failures, unexpected tag %s', tree.tag) 80 81 82 # pypy compatibility hack 83 BUCKETS = json.loads(subprocess.check_output( 84 ['python2', '-c', 'import json,yaml; print json.dumps(yaml.load(open("buckets.yaml")))'], 85 cwd=os.path.dirname(os.path.abspath(__file__)))) 86 87 88 def path_to_job_and_number(path): 89 assert not path.endswith('/') 90 for bucket, meta in BUCKETS.iteritems(): 91 if path.startswith(bucket): 92 prefix = meta['prefix'] 93 break 94 else: 95 if path.startswith('gs://kubernetes-jenkins/pr-logs'): 96 prefix = 'pr:' 97 else: 98 raise ValueError('unknown build path') 99 build = os.path.basename(path) 100 job = prefix + os.path.basename(os.path.dirname(path)) 101 try: 102 return job, int(build) 103 except ValueError: 104 return job, None 105 106 107 def row_for_build(path, started, finished, results): 108 tests = [] 109 for result in results: 110 for test in parse_junit(result): 111 if '#' in test['name'] and not test.get('failed'): 112 continue # skip successful repeated tests 113 tests.append(test) 114 build = { 115 'path': path, 116 'test': tests, 117 'tests_run': len(tests), 118 'tests_failed': sum(t.get('failed', 0) for t in tests) 119 } 120 job, number = path_to_job_and_number(path) 121 build['job'] = job 122 if number: 123 build['number'] = number 124 125 if started: 126 build['started'] = int(started['timestamp']) 127 if 'node' in started: 128 build['executor'] = started['node'] 129 if finished: 130 build['finished'] = int(finished['timestamp']) 131 if 'result' in finished: 132 build['result'] = finished['result'] 133 build['passed'] = build['result'] == 'SUCCESS' 134 elif isinstance(finished.get('passed'), bool): 135 build['passed'] = finished['passed'] 136 build['result'] = 'SUCCESS' if build['passed'] else 'FAILURE' 137 if 'version' in finished: 138 build['version'] = finished['version'] 139 140 def get_metadata(): 141 metadata = None 142 if finished and 'metadata' in finished: 143 metadata = finished['metadata'] 144 elif started: 145 metadata = started.get('metadata') 146 if metadata: 147 # clean useless/duplicated metadata fields 148 if 'repo' in metadata and not metadata['repo']: 149 metadata.pop('repo') 150 build_version = build.get('version', 'N/A') 151 if metadata.get('job-version') == build_version: 152 metadata.pop('job-version') 153 if metadata.get('version') == build_version: 154 metadata.pop('version') 155 for key, value in metadata.items(): 156 if not isinstance(value, basestring): 157 # the schema specifies a string value. force it! 158 metadata[key] = json.dumps(value) 159 if not metadata: 160 return None 161 return [{'key': k, 'value': v} for k, v in sorted(metadata.items())] 162 163 metadata = get_metadata() 164 if metadata: 165 build['metadata'] = metadata 166 if started and finished: 167 build['elapsed'] = build['finished'] - build['started'] 168 return build 169 170 171 def get_table(days): 172 if days: 173 return ('build_emitted_%g' % days).replace('.', '_') 174 return 'build_emitted' 175 176 177 def parse_args(args): 178 parser = argparse.ArgumentParser() 179 parser.add_argument('--days', type=float, default=0, 180 help='Grab data for builds within N days') 181 parser.add_argument('--assert-oldest', type=float, 182 help='Exit nonzero if a build older than X days was emitted previously.') 183 parser.add_argument('--reset-emitted', action='store_true', 184 help='Clear list of already-emitted builds.') 185 parser.add_argument('paths', nargs='*', 186 help='Options list of gs:// paths to dump rows for.') 187 return parser.parse_args(args) 188 189 190 def make_rows(db, builds): 191 for rowid, path, started, finished in builds: 192 try: 193 results = db.test_results_for_build(path) 194 yield rowid, row_for_build(path, started, finished, results) 195 except IOError: 196 return 197 except: # pylint: disable=bare-except 198 logging.exception('error on %s', path) 199 200 201 def main(db, opts, outfile): 202 min_started = None 203 if opts.days: 204 min_started = time.time() - (opts.days or 1) * 24 * 60 * 60 205 incremental_table = get_table(opts.days) 206 207 if opts.assert_oldest: 208 oldest = db.get_oldest_emitted(incremental_table) 209 if oldest < time.time() - opts.assert_oldest * 24 * 60 * 60: 210 return 1 211 return 0 212 213 if opts.reset_emitted: 214 db.reset_emitted(incremental_table) 215 216 if opts.paths: 217 # When asking for rows for specific builds, use a dummy table and clear it first. 218 incremental_table = 'incremental_manual' 219 db.reset_emitted(incremental_table) 220 builds = list(db.get_builds_from_paths(opts.paths, incremental_table)) 221 else: 222 builds = db.get_builds(min_started=min_started, incremental_table=incremental_table) 223 224 rows_emitted = set() 225 for rowid, row in make_rows(db, builds): 226 json.dump(row, outfile, sort_keys=True) 227 outfile.write('\n') 228 rows_emitted.add(rowid) 229 230 if rows_emitted: 231 gen = db.insert_emitted(rows_emitted, incremental_table=incremental_table) 232 print >>sys.stderr, 'incremental progress gen #%d' % gen 233 else: 234 print >>sys.stderr, 'no rows emitted' 235 236 237 if __name__ == '__main__': 238 DB = model.Database() 239 OPTIONS = parse_args(sys.argv[1:]) 240 sys.exit(main(DB, OPTIONS, sys.stdout))