k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/gubernator/view_build.py (about) 1 # Copyright 2016 The Kubernetes Authors. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 15 import logging 16 import json 17 import os 18 import re 19 20 import defusedxml.ElementTree as ET 21 22 from google.appengine.api import urlfetch 23 24 import gcs_async 25 from github import models 26 import log_parser 27 import testgrid 28 import view_base 29 30 31 class JUnitParser(object): 32 def __init__(self): 33 self.skipped = [] 34 self.passed = [] 35 self.failed = [] 36 37 def handle_suite(self, tree, filename): 38 for subelement in tree: 39 if subelement.tag == 'testsuite': 40 self.handle_suite(subelement, filename) 41 elif subelement.tag == 'testcase': 42 if 'name' in tree.attrib: 43 name_prefix = tree.attrib['name'] + ' ' 44 else: 45 name_prefix = '' 46 self.handle_test(subelement, filename, name_prefix) 47 48 def handle_test(self, child, filename, name_prefix=''): 49 name = name_prefix + child.attrib['name'] 50 if child.find('skipped') is not None: 51 self.skipped.append(name) 52 elif child.find('failure') is not None: 53 time = 0.0 54 if 'time' in child.attrib: 55 time = float(child.attrib['time']) 56 out = [] 57 for param in child.findall('system-out') + child.findall('system-err'): 58 if param.text: 59 out.append(param.text) 60 for param in child.findall('failure'): 61 self.failed.append((name, time, param.text, filename, '\n'.join(out))) 62 else: 63 self.passed.append(name) 64 65 def parse_xml(self, xml, filename): 66 if not xml: 67 return # can't extract results from nothing! 68 try: 69 tree = ET.fromstring(xml) 70 except ET.ParseError, e: 71 logging.exception('parse_junit failed for %s', filename) 72 try: 73 tree = ET.fromstring(re.sub(r'[\x00\x80-\xFF]+', '?', xml)) 74 except ET.ParseError, e: 75 if re.match(r'junit.*\.xml', os.path.basename(filename)): 76 self.failed.append( 77 ('Gubernator Internal Fatal XML Parse Error', 0.0, str(e), filename, '')) 78 return 79 if tree.tag == 'testsuite': 80 self.handle_suite(tree, filename) 81 elif tree.tag == 'testsuites': 82 for testsuite in tree: 83 self.handle_suite(testsuite, filename) 84 else: 85 logging.error('unable to find failures, unexpected tag %s', tree.tag) 86 87 def get_results(self): 88 self.failed.sort() 89 self.skipped.sort() 90 self.passed.sort() 91 return { 92 'failed': self.failed, 93 'skipped': self.skipped, 94 'passed': self.passed, 95 } 96 97 98 @view_base.memcache_memoize('build-log-parsed://', expires=60*60*4) 99 def get_build_log(build_dir): 100 build_log = gcs_async.read(build_dir + '/build-log.txt').get_result() 101 if build_log: 102 return log_parser.digest(build_log) 103 104 105 def get_running_build_log(job, build, prow_url): 106 try: 107 url = "https://%s/log?job=%s&id=%s" % (prow_url, job, build) 108 result = urlfetch.fetch(url) 109 if result.status_code == 200: 110 return log_parser.digest(result.content), url 111 except urlfetch.Error: 112 logging.exception('Caught exception fetching url') 113 return None, None 114 115 116 def normalize_metadata(started_future, finished_future): 117 """ 118 Munge and normalize the output of loading started 119 and finished.json files from a GCS bucket. 120 121 :param started_future: future from gcs_async.read() 122 :param finished_future: future from gcs_async.read() 123 :return: started, finished dictionaries 124 """ 125 started = started_future.get_result() 126 finished = finished_future.get_result() 127 if finished and not started: 128 started = 'null' 129 elif started and not finished: 130 finished = 'null' 131 elif not (started and finished): 132 return None, None 133 started = json.loads(started) 134 finished = json.loads(finished) 135 136 if finished is not None: 137 # we want to allow users pushing to GCS to 138 # provide us either passed or result, but not 139 # require either (or both) 140 if 'result' in finished and 'passed' not in finished: 141 finished['passed'] = finished['result'] == 'SUCCESS' 142 143 if 'passed' in finished and 'result' not in finished: 144 finished['result'] = 'SUCCESS' if finished['passed'] else 'FAILURE' 145 146 return started, finished 147 148 149 @view_base.memcache_memoize('build-details://', expires=60) 150 def build_details(build_dir, recursive=False): 151 """ 152 Collect information from a build directory. 153 154 Args: 155 build_dir: GCS path containing a build's results. 156 recursive: Whether to scan artifacts recursively for XML files. 157 Returns: 158 started: value from started.json {'version': ..., 'timestamp': ...} 159 finished: value from finished.json {'timestamp': ..., 'result': ...} 160 results: {total: int, 161 failed: [(name, duration, text)...], 162 skipped: [name...], 163 passed: [name...]} 164 """ 165 started, finished = normalize_metadata( 166 gcs_async.read(build_dir + '/started.json'), 167 gcs_async.read(build_dir + '/finished.json') 168 ) 169 170 if started is None and finished is None: 171 return started, finished, None 172 173 if recursive: 174 artifact_paths = view_base.gcs_ls_recursive('%s/artifacts' % build_dir) 175 else: 176 artifact_paths = view_base.gcs_ls('%s/artifacts' % build_dir) 177 178 junit_paths = [f.filename for f in artifact_paths if f.filename.endswith('.xml')] 179 180 junit_futures = {f: gcs_async.read(f) for f in junit_paths} 181 182 parser = JUnitParser() 183 for path, future in junit_futures.iteritems(): 184 parser.parse_xml(future.get_result(), path) 185 return started, finished, parser.get_results() 186 187 188 def parse_pr_path(gcs_path, default_org, default_repo): 189 """ 190 Parse GCS bucket directory into metadata. We 191 allow for two short-form names and one long one: 192 193 gs://<pull_prefix>/<pull_number> 194 -- this fills in the default repo and org 195 196 gs://<pull_prefix>/repo/<pull_number> 197 -- this fills in the default org 198 199 gs://<pull_prefix>/org_repo/<pull_number> 200 201 :param gcs_path: GCS bucket directory for a build 202 :return: tuple of: 203 - PR number 204 - Gubernator PR link 205 - PR repo 206 """ 207 pull_number = os.path.basename(gcs_path) 208 parsed_repo = os.path.basename(os.path.dirname(gcs_path)) 209 if parsed_repo == 'pull': 210 pr_path = '' 211 repo = '%s/%s' % (default_org, default_repo) 212 elif '_' not in parsed_repo: 213 pr_path = parsed_repo + '/' 214 repo = '%s/%s' % (default_org, parsed_repo) 215 else: 216 pr_path = parsed_repo.replace('_', '/', 1) + '/' 217 repo = parsed_repo.replace('_', '/', 1) 218 return pull_number, pr_path, repo 219 220 221 class BuildHandler(view_base.BaseHandler): 222 """Show information about a Build and its failing tests.""" 223 def get(self, prefix, job, build): 224 # pylint: disable=too-many-locals 225 if prefix.endswith('/directory'): 226 # redirect directory requests 227 link = gcs_async.read('/%s/%s/%s.txt' % (prefix, job, build)).get_result() 228 if link and link.startswith('gs://'): 229 self.redirect('/build/' + link.replace('gs://', '')) 230 return 231 232 job_dir = '/%s/%s/' % (prefix, job) 233 testgrid_query = testgrid.path_to_query(job_dir) 234 build_dir = job_dir + build 235 issues_fut = models.GHIssueDigest.find_xrefs_async(build_dir) 236 started, finished, results = build_details( 237 build_dir, self.app.config.get('recursive_artifacts', True)) 238 if started is None and finished is None: 239 logging.warning('unable to load %s', build_dir) 240 self.render( 241 'build_404.html', 242 dict(build_dir=build_dir, job_dir=job_dir, job=job, build=build)) 243 self.response.set_status(404) 244 return 245 246 want_build_log = False 247 build_log = '' 248 build_log_src = None 249 if 'log' in self.request.params or (not finished) or \ 250 (finished and finished.get('result') != 'SUCCESS' and len(results['failed']) <= 1): 251 want_build_log = True 252 build_log = get_build_log(build_dir) 253 254 pr, pr_path, pr_digest = None, None, None 255 repo = '%s/%s' % (self.app.config['default_org'], 256 self.app.config['default_repo']) 257 spyglass_link = '' 258 external_config = get_build_config(prefix, self.app.config) 259 if external_config is not None: 260 if external_config.get('spyglass'): 261 spyglass_link = 'https://' + external_config['prow_url'] + '/view/gs/' + build_dir 262 if '/pull/' in prefix: 263 pr, pr_path, pr_digest, repo = get_pr_info(prefix, self.app.config) 264 if want_build_log and not build_log: 265 build_log, build_log_src = get_running_build_log(job, build, 266 external_config["prow_url"]) 267 268 # 'revision' might be in either started or finished. 269 # prefer finished. 270 version = finished and finished.get('revision') or started and started.get('revision') 271 commit = version and version.split('+')[-1] 272 273 refs = [] 274 if started and started.get('pull'): 275 for ref in started['pull'].split(','): 276 x = ref.split(':', 1) 277 if len(x) == 2: 278 refs.append((x[0], x[1])) 279 else: 280 refs.append((x[0], '')) 281 282 self.render('build.html', dict( 283 job_dir=job_dir, build_dir=build_dir, job=job, build=build, 284 commit=commit, started=started, finished=finished, 285 res=results, refs=refs, 286 build_log=build_log, build_log_src=build_log_src, 287 issues=issues_fut.get_result(), repo=repo, 288 pr_path=pr_path, pr=pr, pr_digest=pr_digest, 289 testgrid_query=testgrid_query, spyglass_link=spyglass_link)) 290 291 292 def get_build_config(prefix, config): 293 for item in config['external_services'].values() + [config['default_external_services']]: 294 if prefix.startswith(item['gcs_pull_prefix']): 295 return item 296 if 'gcs_bucket' in item and prefix.startswith(item['gcs_bucket']): 297 return item 298 299 def get_pr_info(prefix, config): 300 if config is not None: 301 pr, pr_path, repo = parse_pr_path( 302 gcs_path=prefix, 303 default_org=config['default_org'], 304 default_repo=config['default_repo'], 305 ) 306 pr_digest = models.GHIssueDigest.get(repo, pr) 307 return pr, pr_path, pr_digest, repo 308 309 def get_running_pr_log(job, build, config): 310 if config is not None: 311 return get_running_build_log(job, build, config["prow_url"]) 312 313 def get_build_numbers(job_dir, before, indirect): 314 fstats = view_base.gcs_ls(job_dir) 315 fstats.sort(key=lambda f: view_base.pad_numbers(f.filename), 316 reverse=True) 317 if indirect: 318 # find numbered builds 319 builds = [re.search(r'/(\d*)\.txt$', f.filename) 320 for f in fstats if not f.is_dir] 321 builds = [m.group(1) for m in builds if m] 322 else: 323 builds = [os.path.basename(os.path.dirname(f.filename)) 324 for f in fstats if f.is_dir] 325 if before and before in builds: 326 builds = builds[builds.index(before) + 1:] 327 return builds[:40] 328 329 330 @view_base.memcache_memoize('build-list://', expires=60) 331 def build_list(job_dir, before): 332 """ 333 Given a job dir, give a (partial) list of recent build 334 started.json & finished.jsons. 335 336 Args: 337 job_dir: the GCS path holding the jobs 338 Returns: 339 a list of [(build, loc, started, finished)]. 340 build is a string like "123", 341 loc is the job directory and build, 342 started/finished are either None or a dict of the finished.json, 343 and a dict of {build: [issues...]} of xrefs. 344 """ 345 # pylint: disable=too-many-locals 346 347 # /directory/ folders have a series of .txt files pointing at the correct location, 348 # as a sort of fake symlink. 349 indirect = '/directory/' in job_dir 350 351 builds = get_build_numbers(job_dir, before, indirect) 352 353 if indirect: 354 # follow the indirect links 355 build_symlinks = [ 356 (build, 357 gcs_async.read('%s%s.txt' % (job_dir, build))) 358 for build in builds 359 ] 360 build_futures = [] 361 for build, sym_fut in build_symlinks: 362 redir = sym_fut.get_result() 363 if redir and redir.startswith('gs://'): 364 redir = redir[4:].strip() 365 build_futures.append( 366 (build, redir, 367 gcs_async.read('%s/started.json' % redir), 368 gcs_async.read('%s/finished.json' % redir))) 369 else: 370 build_futures = [ 371 (build, '%s%s' % (job_dir, build), 372 gcs_async.read('%s%s/started.json' % (job_dir, build)), 373 gcs_async.read('%s%s/finished.json' % (job_dir, build))) 374 for build in builds 375 ] 376 377 # This is done in parallel with waiting for GCS started/finished. 378 build_refs = models.GHIssueDigest.find_xrefs_multi_async( 379 [b[1] for b in build_futures]) 380 381 output = [] 382 for build, loc, started_future, finished_future in build_futures: 383 started, finished = normalize_metadata(started_future, finished_future) 384 output.append((str(build), loc, started, finished)) 385 386 return output, build_refs.get_result() 387 388 class BuildListHandler(view_base.BaseHandler): 389 """Show a list of Builds for a Job.""" 390 def get(self, prefix, job): 391 job_dir = '/%s/%s/' % (prefix, job) 392 testgrid_query = testgrid.path_to_query(job_dir) 393 before = self.request.get('before') 394 builds, refs = build_list(job_dir, before) 395 dir_link = re.sub(r'/pull/.*', '/directory/%s' % job, prefix) 396 397 self.render('build_list.html', 398 dict(job=job, job_dir=job_dir, dir_link=dir_link, 399 testgrid_query=testgrid_query, 400 builds=builds, refs=refs, 401 before=before)) 402 403 404 class JobListHandler(view_base.BaseHandler): 405 """Show a list of Jobs in a directory.""" 406 def get(self, prefix): 407 jobs_dir = '/%s' % prefix 408 fstats = view_base.gcs_ls(jobs_dir) 409 fstats.sort() 410 self.render('job_list.html', dict(jobs_dir=jobs_dir, fstats=fstats)) 411 412 413 class GcsProxyHandler(view_base.BaseHandler): 414 """Proxy results from GCS. 415 416 Useful for buckets that don't have public read permissions.""" 417 def get(self): 418 # let's lock this down to build logs for now. 419 path = self.request.get('path') 420 if not re.match(r'^[-\w/.]+$', path): 421 self.abort(403) 422 if not path.endswith('/build-log.txt'): 423 self.abort(403) 424 content = gcs_async.read(path).get_result() 425 # lazy XSS prevention. 426 # doesn't work on terrible browsers that do content sniffing (ancient IE). 427 self.response.headers['Content-Type'] = 'text/plain' 428 self.response.write(content)