k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/gubernator/view_build.py

k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/gubernator/view_build.py (about)

     1  # Copyright 2016 The Kubernetes Authors.
     2  #
     3  # Licensed under the Apache License, Version 2.0 (the "License");
     4  # you may not use this file except in compliance with the License.
     5  # You may obtain a copy of the License at
     6  #
     7  #     http://www.apache.org/licenses/LICENSE-2.0
     8  #
     9  # Unless required by applicable law or agreed to in writing, software
    10  # distributed under the License is distributed on an "AS IS" BASIS,
    11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  # See the License for the specific language governing permissions and
    13  # limitations under the License.
    14  
    15  import logging
    16  import json
    17  import os
    18  import re
    19  
    20  import defusedxml.ElementTree as ET
    21  
    22  from google.appengine.api import urlfetch
    23  
    24  import gcs_async
    25  from github import models
    26  import log_parser
    27  import testgrid
    28  import view_base
    29  
    30  
    31  class JUnitParser(object):
    32      def __init__(self):
    33          self.skipped = []
    34          self.passed = []
    35          self.failed = []
    36  
    37      def handle_suite(self, tree, filename):
    38          for subelement in tree:
    39              if subelement.tag == 'testsuite':
    40                  self.handle_suite(subelement, filename)
    41              elif subelement.tag == 'testcase':
    42                  if 'name' in tree.attrib:
    43                      name_prefix = tree.attrib['name'] + ' '
    44                  else:
    45                      name_prefix = ''
    46                  self.handle_test(subelement, filename, name_prefix)
    47  
    48      def handle_test(self, child, filename, name_prefix=''):
    49          name = name_prefix + child.attrib['name']
    50          if child.find('skipped') is not None:
    51              self.skipped.append(name)
    52          elif child.find('failure') is not None:
    53              time = 0.0
    54              if 'time' in child.attrib:
    55                  time = float(child.attrib['time'])
    56              out = []
    57              for param in child.findall('system-out') + child.findall('system-err'):
    58                  if param.text:
    59                      out.append(param.text)
    60              for param in child.findall('failure'):
    61                  self.failed.append((name, time, param.text, filename, '\n'.join(out)))
    62          else:
    63              self.passed.append(name)
    64  
    65      def parse_xml(self, xml, filename):
    66          if not xml:
    67              return  # can't extract results from nothing!
    68          try:
    69              tree = ET.fromstring(xml)
    70          except ET.ParseError, e:
    71              logging.exception('parse_junit failed for %s', filename)
    72              try:
    73                  tree = ET.fromstring(re.sub(r'[\x00\x80-\xFF]+', '?', xml))
    74              except ET.ParseError, e:
    75                  if re.match(r'junit.*\.xml', os.path.basename(filename)):
    76                      self.failed.append(
    77                          ('Gubernator Internal Fatal XML Parse Error', 0.0, str(e), filename, ''))
    78                  return
    79          if tree.tag == 'testsuite':
    80              self.handle_suite(tree, filename)
    81          elif tree.tag == 'testsuites':
    82              for testsuite in tree:
    83                  self.handle_suite(testsuite, filename)
    84          else:
    85              logging.error('unable to find failures, unexpected tag %s', tree.tag)
    86  
    87      def get_results(self):
    88          self.failed.sort()
    89          self.skipped.sort()
    90          self.passed.sort()
    91          return {
    92              'failed': self.failed,
    93              'skipped': self.skipped,
    94              'passed': self.passed,
    95          }
    96  
    97  
    98  @view_base.memcache_memoize('build-log-parsed://', expires=60*60*4)
    99  def get_build_log(build_dir):
   100      build_log = gcs_async.read(build_dir + '/build-log.txt').get_result()
   101      if build_log:
   102          return log_parser.digest(build_log)
   103  
   104  
   105  def get_running_build_log(job, build, prow_url):
   106      try:
   107          url = "https://%s/log?job=%s&id=%s" % (prow_url, job, build)
   108          result = urlfetch.fetch(url)
   109          if result.status_code == 200:
   110              return log_parser.digest(result.content), url
   111      except urlfetch.Error:
   112          logging.exception('Caught exception fetching url')
   113      return None, None
   114  
   115  
   116  def normalize_metadata(started_future, finished_future):
   117      """
   118      Munge and normalize the output of loading started
   119      and finished.json files from a GCS bucket.
   120  
   121      :param started_future: future from gcs_async.read()
   122      :param finished_future: future from gcs_async.read()
   123      :return: started, finished dictionaries
   124      """
   125      started = started_future.get_result()
   126      finished = finished_future.get_result()
   127      if finished and not started:
   128          started = 'null'
   129      elif started and not finished:
   130          finished = 'null'
   131      elif not (started and finished):
   132          return None, None
   133      started = json.loads(started)
   134      finished = json.loads(finished)
   135  
   136      if finished is not None:
   137          # we want to allow users pushing to GCS to
   138          # provide us either passed or result, but not
   139          # require either (or both)
   140          if 'result' in finished and 'passed' not in finished:
   141              finished['passed'] = finished['result'] == 'SUCCESS'
   142  
   143          if 'passed' in finished and 'result' not in finished:
   144              finished['result'] = 'SUCCESS' if finished['passed'] else 'FAILURE'
   145  
   146      return started, finished
   147  
   148  
   149  @view_base.memcache_memoize('build-details://', expires=60)
   150  def build_details(build_dir, recursive=False):
   151      """
   152      Collect information from a build directory.
   153  
   154      Args:
   155          build_dir: GCS path containing a build's results.
   156          recursive: Whether to scan artifacts recursively for XML files.
   157      Returns:
   158          started: value from started.json {'version': ..., 'timestamp': ...}
   159          finished: value from finished.json {'timestamp': ..., 'result': ...}
   160          results: {total: int,
   161                    failed: [(name, duration, text)...],
   162                    skipped: [name...],
   163                    passed: [name...]}
   164      """
   165      started, finished = normalize_metadata(
   166          gcs_async.read(build_dir + '/started.json'),
   167          gcs_async.read(build_dir + '/finished.json')
   168      )
   169  
   170      if started is None and finished is None:
   171          return started, finished, None
   172  
   173      if recursive:
   174          artifact_paths = view_base.gcs_ls_recursive('%s/artifacts' % build_dir)
   175      else:
   176          artifact_paths = view_base.gcs_ls('%s/artifacts' % build_dir)
   177  
   178      junit_paths = [f.filename for f in artifact_paths if f.filename.endswith('.xml')]
   179  
   180      junit_futures = {f: gcs_async.read(f) for f in junit_paths}
   181  
   182      parser = JUnitParser()
   183      for path, future in junit_futures.iteritems():
   184          parser.parse_xml(future.get_result(), path)
   185      return started, finished, parser.get_results()
   186  
   187  
   188  def parse_pr_path(gcs_path, default_org, default_repo):
   189      """
   190      Parse GCS bucket directory into metadata. We
   191      allow for two short-form names and one long one:
   192  
   193       gs://<pull_prefix>/<pull_number>
   194        -- this fills in the default repo and org
   195  
   196       gs://<pull_prefix>/repo/<pull_number>
   197        -- this fills in the default org
   198  
   199       gs://<pull_prefix>/org_repo/<pull_number>
   200  
   201      :param gcs_path: GCS bucket directory for a build
   202      :return: tuple of:
   203       - PR number
   204       - Gubernator PR link
   205       - PR repo
   206      """
   207      pull_number = os.path.basename(gcs_path)
   208      parsed_repo = os.path.basename(os.path.dirname(gcs_path))
   209      if parsed_repo == 'pull':
   210          pr_path = ''
   211          repo = '%s/%s' % (default_org, default_repo)
   212      elif '_' not in parsed_repo:
   213          pr_path = parsed_repo + '/'
   214          repo = '%s/%s' % (default_org, parsed_repo)
   215      else:
   216          pr_path = parsed_repo.replace('_', '/', 1) + '/'
   217          repo = parsed_repo.replace('_', '/', 1)
   218      return pull_number, pr_path, repo
   219  
   220  
   221  class BuildHandler(view_base.BaseHandler):
   222      """Show information about a Build and its failing tests."""
   223      def get(self, prefix, job, build):
   224          # pylint: disable=too-many-locals
   225          if prefix.endswith('/directory'):
   226              # redirect directory requests
   227              link = gcs_async.read('/%s/%s/%s.txt' % (prefix, job, build)).get_result()
   228              if link and link.startswith('gs://'):
   229                  self.redirect('/build/' + link.replace('gs://', ''))
   230                  return
   231  
   232          job_dir = '/%s/%s/' % (prefix, job)
   233          testgrid_query = testgrid.path_to_query(job_dir)
   234          build_dir = job_dir + build
   235          issues_fut = models.GHIssueDigest.find_xrefs_async(build_dir)
   236          started, finished, results = build_details(
   237              build_dir, self.app.config.get('recursive_artifacts', True))
   238          if started is None and finished is None:
   239              logging.warning('unable to load %s', build_dir)
   240              self.render(
   241                  'build_404.html',
   242                  dict(build_dir=build_dir, job_dir=job_dir, job=job, build=build))
   243              self.response.set_status(404)
   244              return
   245  
   246          want_build_log = False
   247          build_log = ''
   248          build_log_src = None
   249          if 'log' in self.request.params or (not finished) or \
   250              (finished and finished.get('result') != 'SUCCESS' and len(results['failed']) <= 1):
   251              want_build_log = True
   252              build_log = get_build_log(build_dir)
   253  
   254          pr, pr_path, pr_digest = None, None, None
   255          repo = '%s/%s' % (self.app.config['default_org'],
   256                            self.app.config['default_repo'])
   257          spyglass_link = ''
   258          external_config = get_build_config(prefix, self.app.config)
   259          if external_config is not None:
   260              if external_config.get('spyglass'):
   261                  spyglass_link = 'https://' + external_config['prow_url'] + '/view/gs/' + build_dir
   262              if '/pull/' in prefix:
   263                  pr, pr_path, pr_digest, repo = get_pr_info(prefix, self.app.config)
   264              if want_build_log and not build_log:
   265                  build_log, build_log_src = get_running_build_log(job, build,
   266                                                                   external_config["prow_url"])
   267  
   268          # 'revision' might be in either started or finished.
   269          # prefer finished.
   270          version = finished and finished.get('revision') or started and started.get('revision')
   271          commit = version and version.split('+')[-1]
   272  
   273          refs = []
   274          if started and started.get('pull'):
   275              for ref in started['pull'].split(','):
   276                  x = ref.split(':', 1)
   277                  if len(x) == 2:
   278                      refs.append((x[0], x[1]))
   279                  else:
   280                      refs.append((x[0], ''))
   281  
   282          self.render('build.html', dict(
   283              job_dir=job_dir, build_dir=build_dir, job=job, build=build,
   284              commit=commit, started=started, finished=finished,
   285              res=results, refs=refs,
   286              build_log=build_log, build_log_src=build_log_src,
   287              issues=issues_fut.get_result(), repo=repo,
   288              pr_path=pr_path, pr=pr, pr_digest=pr_digest,
   289              testgrid_query=testgrid_query, spyglass_link=spyglass_link))
   290  
   291  
   292  def get_build_config(prefix, config):
   293      for item in config['external_services'].values() + [config['default_external_services']]:
   294          if prefix.startswith(item['gcs_pull_prefix']):
   295              return item
   296          if 'gcs_bucket' in item and prefix.startswith(item['gcs_bucket']):
   297              return item
   298  
   299  def get_pr_info(prefix, config):
   300      if config is not None:
   301          pr, pr_path, repo = parse_pr_path(
   302              gcs_path=prefix,
   303              default_org=config['default_org'],
   304              default_repo=config['default_repo'],
   305          )
   306          pr_digest = models.GHIssueDigest.get(repo, pr)
   307          return pr, pr_path, pr_digest, repo
   308  
   309  def get_running_pr_log(job, build, config):
   310      if config is not None:
   311          return get_running_build_log(job, build, config["prow_url"])
   312  
   313  def get_build_numbers(job_dir, before, indirect):
   314      fstats = view_base.gcs_ls(job_dir)
   315      fstats.sort(key=lambda f: view_base.pad_numbers(f.filename),
   316                  reverse=True)
   317      if indirect:
   318          # find numbered builds
   319          builds = [re.search(r'/(\d*)\.txt$', f.filename)
   320                    for f in fstats if not f.is_dir]
   321          builds = [m.group(1) for m in builds if m]
   322      else:
   323          builds = [os.path.basename(os.path.dirname(f.filename))
   324                    for f in fstats if f.is_dir]
   325      if before and before in builds:
   326          builds = builds[builds.index(before) + 1:]
   327      return builds[:40]
   328  
   329  
   330  @view_base.memcache_memoize('build-list://', expires=60)
   331  def build_list(job_dir, before):
   332      """
   333      Given a job dir, give a (partial) list of recent build
   334      started.json & finished.jsons.
   335  
   336      Args:
   337          job_dir: the GCS path holding the jobs
   338      Returns:
   339          a list of [(build, loc, started, finished)].
   340              build is a string like "123",
   341              loc is the job directory and build,
   342              started/finished are either None or a dict of the finished.json,
   343          and a dict of {build: [issues...]} of xrefs.
   344      """
   345      # pylint: disable=too-many-locals
   346  
   347      # /directory/ folders have a series of .txt files pointing at the correct location,
   348      # as a sort of fake symlink.
   349      indirect = '/directory/' in job_dir
   350  
   351      builds = get_build_numbers(job_dir, before, indirect)
   352  
   353      if indirect:
   354          # follow the indirect links
   355          build_symlinks = [
   356              (build,
   357               gcs_async.read('%s%s.txt' % (job_dir, build)))
   358              for build in builds
   359          ]
   360          build_futures = []
   361          for build, sym_fut in build_symlinks:
   362              redir = sym_fut.get_result()
   363              if redir and redir.startswith('gs://'):
   364                  redir = redir[4:].strip()
   365                  build_futures.append(
   366                      (build, redir,
   367                       gcs_async.read('%s/started.json' % redir),
   368                       gcs_async.read('%s/finished.json' % redir)))
   369      else:
   370          build_futures = [
   371              (build, '%s%s' % (job_dir, build),
   372               gcs_async.read('%s%s/started.json' % (job_dir, build)),
   373               gcs_async.read('%s%s/finished.json' % (job_dir, build)))
   374              for build in builds
   375          ]
   376  
   377      # This is done in parallel with waiting for GCS started/finished.
   378      build_refs = models.GHIssueDigest.find_xrefs_multi_async(
   379              [b[1] for b in build_futures])
   380  
   381      output = []
   382      for build, loc, started_future, finished_future in build_futures:
   383          started, finished = normalize_metadata(started_future, finished_future)
   384          output.append((str(build), loc, started, finished))
   385  
   386      return output, build_refs.get_result()
   387  
   388  class BuildListHandler(view_base.BaseHandler):
   389      """Show a list of Builds for a Job."""
   390      def get(self, prefix, job):
   391          job_dir = '/%s/%s/' % (prefix, job)
   392          testgrid_query = testgrid.path_to_query(job_dir)
   393          before = self.request.get('before')
   394          builds, refs = build_list(job_dir, before)
   395          dir_link = re.sub(r'/pull/.*', '/directory/%s' % job, prefix)
   396  
   397          self.render('build_list.html',
   398                      dict(job=job, job_dir=job_dir, dir_link=dir_link,
   399                           testgrid_query=testgrid_query,
   400                           builds=builds, refs=refs,
   401                           before=before))
   402  
   403  
   404  class JobListHandler(view_base.BaseHandler):
   405      """Show a list of Jobs in a directory."""
   406      def get(self, prefix):
   407          jobs_dir = '/%s' % prefix
   408          fstats = view_base.gcs_ls(jobs_dir)
   409          fstats.sort()
   410          self.render('job_list.html', dict(jobs_dir=jobs_dir, fstats=fstats))
   411  
   412  
   413  class GcsProxyHandler(view_base.BaseHandler):
   414      """Proxy results from GCS.
   415  
   416      Useful for buckets that don't have public read permissions."""
   417      def get(self):
   418          # let's lock this down to build logs for now.
   419          path = self.request.get('path')
   420          if not re.match(r'^[-\w/.]+$', path):
   421              self.abort(403)
   422          if not path.endswith('/build-log.txt'):
   423              self.abort(403)
   424          content = gcs_async.read(path).get_result()
   425          # lazy XSS prevention.
   426          # doesn't work on terrible browsers that do content sniffing (ancient IE).
   427          self.response.headers['Content-Type'] = 'text/plain'
   428          self.response.write(content)