github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/gubernator/view_build.py (about)

     1  # Copyright 2016 The Kubernetes Authors.
     2  #
     3  # Licensed under the Apache License, Version 2.0 (the "License");
     4  # you may not use this file except in compliance with the License.
     5  # You may obtain a copy of the License at
     6  #
     7  #     http://www.apache.org/licenses/LICENSE-2.0
     8  #
     9  # Unless required by applicable law or agreed to in writing, software
    10  # distributed under the License is distributed on an "AS IS" BASIS,
    11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  # See the License for the specific language governing permissions and
    13  # limitations under the License.
    14  
    15  import logging
    16  import json
    17  import os
    18  import re
    19  
    20  import defusedxml.ElementTree as ET
    21  
    22  from google.appengine.api import urlfetch
    23  
    24  import gcs_async
    25  from github import models
    26  import log_parser
    27  import testgrid
    28  import view_base
    29  
    30  
    31  class JUnitParser(object):
    32      def __init__(self):
    33          self.skipped = []
    34          self.passed = []
    35          self.failed = []
    36  
    37      def handle_suite(self, tree, filename):
    38          for subelement in tree:
    39              if subelement.tag == 'testsuite':
    40                  self.handle_suite(subelement, filename)
    41              elif subelement.tag == 'testcase':
    42                  if 'name' in tree.attrib:
    43                      name_prefix = tree.attrib['name'] + ' '
    44                  else:
    45                      name_prefix = ''
    46                  self.handle_test(subelement, filename, name_prefix)
    47  
    48      def handle_test(self, child, filename, name_prefix=''):
    49          name = name_prefix + child.attrib['name']
    50          if child.find('skipped') is not None:
    51              self.skipped.append(name)
    52          elif child.find('failure') is not None:
    53              time = float(child.attrib['time'])
    54              out = []
    55              for param in child.findall('system-out'):
    56                  out.append(param.text)
    57              for param in child.findall('system-err'):
    58                  out.append(param.text)
    59              for param in child.findall('failure'):
    60                  self.failed.append((name, time, param.text, filename, '\n'.join(out)))
    61          else:
    62              self.passed.append(name)
    63  
    64      def parse_xml(self, xml, filename):
    65          if not xml:
    66              return  # can't extract results from nothing!
    67          try:
    68              tree = ET.fromstring(xml)
    69          except ET.ParseError, e:
    70              logging.exception('parse_junit failed for %s', filename)
    71              try:
    72                  tree = ET.fromstring(re.sub(r'[\x00\x80-\xFF]+', '?', xml))
    73              except ET.ParseError, e:
    74                  self.failed.append(
    75                      ('Gubernator Internal Fatal XML Parse Error', 0.0, str(e), filename, ''))
    76                  return
    77          if tree.tag == 'testsuite':
    78              self.handle_suite(tree, filename)
    79          elif tree.tag == 'testsuites':
    80              for testsuite in tree:
    81                  self.handle_suite(testsuite, filename)
    82          else:
    83              logging.error('unable to find failures, unexpected tag %s', tree.tag)
    84  
    85      def get_results(self):
    86          self.failed.sort()
    87          self.skipped.sort()
    88          self.passed.sort()
    89          return {
    90              'failed': self.failed,
    91              'skipped': self.skipped,
    92              'passed': self.passed,
    93          }
    94  
    95  
    96  @view_base.memcache_memoize('build-log-parsed://', expires=60*60*4)
    97  def get_build_log(build_dir):
    98      build_log = gcs_async.read(build_dir + '/build-log.txt').get_result()
    99      if build_log:
   100          return log_parser.digest(build_log)
   101  
   102  
   103  def get_running_build_log(job, build, prow_url):
   104      try:
   105          url = "https://%s/log?job=%s&id=%s" % (prow_url, job, build)
   106          result = urlfetch.fetch(url)
   107          if result.status_code == 200:
   108              return log_parser.digest(result.content), url
   109      except urlfetch.Error:
   110          logging.exception('Caught exception fetching url')
   111      return None, None
   112  
   113  
   114  @view_base.memcache_memoize('build-details://', expires=60)
   115  def build_details(build_dir):
   116      """
   117      Collect information from a build directory.
   118  
   119      Args:
   120          build_dir: GCS path containing a build's results.
   121      Returns:
   122          started: value from started.json {'version': ..., 'timestamp': ...}
   123          finished: value from finished.json {'timestamp': ..., 'result': ...}
   124          results: {total: int,
   125                    failed: [(name, duration, text)...],
   126                    skipped: [name...],
   127                    passed: [name...]}
   128      """
   129      started_fut = gcs_async.read(build_dir + '/started.json')
   130      finished = gcs_async.read(build_dir + '/finished.json').get_result()
   131      started = started_fut.get_result()
   132      if finished and not started:
   133          started = 'null'
   134      if started and not finished:
   135          finished = 'null'
   136      elif not (started and finished):
   137          return
   138      started = json.loads(started)
   139      finished = json.loads(finished)
   140  
   141      junit_paths = [f.filename for f in view_base.gcs_ls('%s/artifacts' % build_dir)
   142                     if re.match(r'junit_.*\.xml', os.path.basename(f.filename))]
   143  
   144      junit_futures = {f: gcs_async.read(f) for f in junit_paths}
   145  
   146      parser = JUnitParser()
   147      for path, future in junit_futures.iteritems():
   148          parser.parse_xml(future.get_result(), path)
   149      return started, finished, parser.get_results()
   150  
   151  
   152  def parse_pr_path(gcs_path, default_org, default_repo):
   153      """
   154      Parse GCS bucket directory into metadata. We
   155      allow for two short-form names and one long one:
   156  
   157       gs://<pull_prefix>/<pull_number>
   158        -- this fills in the default repo and org
   159  
   160       gs://<pull_prefix>/repo/<pull_number>
   161        -- this fills in the default org
   162  
   163       gs://<pull_prefix>/org_repo/<pull_number>
   164  
   165      :param gcs_path: GCS bucket directory for a build
   166      :return: tuple of:
   167       - PR number
   168       - Gubernator PR link
   169       - PR repo
   170      """
   171      pull_number = os.path.basename(gcs_path)
   172      parsed_repo = os.path.basename(os.path.dirname(gcs_path))
   173      if parsed_repo == 'pull':
   174          pr_path = ''
   175          repo = '%s/%s' % (default_org, default_repo)
   176      elif '_' not in parsed_repo:
   177          pr_path = parsed_repo + '/'
   178          repo = '%s/%s' % (default_org, parsed_repo)
   179      else:
   180          pr_path = parsed_repo.replace('_', '/') + '/'
   181          repo = parsed_repo.replace('_', '/')
   182      return pull_number, pr_path, repo
   183  
   184  
   185  class BuildHandler(view_base.BaseHandler):
   186      """Show information about a Build and its failing tests."""
   187      def get(self, prefix, job, build):
   188          # pylint: disable=too-many-locals
   189          if prefix.endswith('/directory'):
   190              # redirect directory requests
   191              link = gcs_async.read('/%s/%s/%s.txt' % (prefix, job, build)).get_result()
   192              if link and link.startswith('gs://'):
   193                  self.redirect('/build/' + link.replace('gs://', ''))
   194                  return
   195  
   196          job_dir = '/%s/%s/' % (prefix, job)
   197          testgrid_query = testgrid.path_to_query(job_dir)
   198          build_dir = job_dir + build
   199          details = build_details(build_dir)
   200          if not details:
   201              logging.warning('unable to load %s', build_dir)
   202              self.render(
   203                  'build_404.html',
   204                  dict(build_dir=build_dir, job_dir=job_dir, job=job, build=build))
   205              self.response.set_status(404)
   206              return
   207          started, finished, results = details
   208  
   209          want_build_log = False
   210          build_log = ''
   211          build_log_src = None
   212          if 'log' in self.request.params or (not finished) or \
   213              (finished and finished.get('result') != 'SUCCESS' and len(results['failed']) <= 1):
   214              want_build_log = True
   215              build_log = get_build_log(build_dir)
   216  
   217          pr, pr_path, pr_digest, repo = None, None, None, None
   218          external_config = get_pr_config(prefix, self.app.config)
   219          if external_config is not None:
   220              pr, pr_path, pr_digest, repo = get_pr_info(prefix, self.app.config)
   221              if want_build_log and not build_log:
   222                  build_log, build_log_src = get_running_build_log(job, build,
   223                                                                   external_config["prow_url"])
   224  
   225          # 'version' might be in either started or finished.
   226          # prefer finished.
   227          if finished and 'version' in finished:
   228              version = finished['version']
   229          else:
   230              version = started and started.get('version')
   231          commit = version and version.split('+')[-1]
   232  
   233          issues = list(models.GHIssueDigest.find_xrefs(build_dir))
   234  
   235          refs = []
   236          if started and 'pull' in started:
   237              for ref in started['pull'].split(','):
   238                  x = ref.split(':', 1)
   239                  if len(x) == 2:
   240                      refs.append((x[0], x[1]))
   241                  else:
   242                      refs.append((x[1], ''))
   243  
   244          self.render('build.html', dict(
   245              job_dir=job_dir, build_dir=build_dir, job=job, build=build,
   246              commit=commit, started=started, finished=finished,
   247              res=results, refs=refs,
   248              build_log=build_log, build_log_src=build_log_src,
   249              issues=issues, repo=repo,
   250              pr_path=pr_path, pr=pr, pr_digest=pr_digest,
   251              testgrid_query=testgrid_query))
   252  
   253  
   254  def get_pr_config(prefix, config):
   255      for item in config["external_services"].values():
   256          if prefix.startswith(item["gcs_pull_prefix"]):
   257              return item
   258  
   259  def get_pr_info(prefix, config):
   260      if config is not None:
   261          pr, pr_path, repo = parse_pr_path(
   262              gcs_path=prefix,
   263              default_org=config['default_org'],
   264              default_repo=config['default_repo'],
   265          )
   266          pr_digest = models.GHIssueDigest.get(repo, pr)
   267          return pr, pr_path, pr_digest, repo
   268  
   269  def get_running_pr_log(job, build, config):
   270      if config is not None:
   271          return get_running_build_log(job, build, config["prow_url"])
   272  
   273  def get_build_numbers(job_dir, before, indirect):
   274      try:
   275          if '/pull/' in job_dir and not indirect:
   276              raise ValueError('bad code path for PR build list')
   277          # If we have latest-build.txt, we can skip an expensive GCS ls call!
   278          if before:
   279              latest_build = int(before) - 1
   280          else:
   281              latest_build = int(gcs_async.read(job_dir + 'latest-build.txt').get_result())
   282              # latest-build.txt has the most recent finished build. There might
   283              # be newer builds that have started but not finished. Probe for them.
   284              suffix = '/started.json' if not indirect else '.txt'
   285              while gcs_async.read('%s%s%s' % (job_dir, latest_build + 1, suffix)).get_result():
   286                  latest_build += 1
   287          return range(latest_build, max(0, latest_build - 40), -1)
   288      except (ValueError, TypeError):
   289          fstats = view_base.gcs_ls(job_dir)
   290          fstats.sort(key=lambda f: view_base.pad_numbers(f.filename),
   291                      reverse=True)
   292          if indirect:
   293              # find numbered builds
   294              builds = [re.search(r'/(\d*)\.txt$', f.filename)
   295                        for f in fstats if not f.is_dir]
   296              builds = [m.group(1) for m in builds if m]
   297          else:
   298              builds = [os.path.basename(os.path.dirname(f.filename))
   299                        for f in fstats if f.is_dir]
   300          if before and before in builds:
   301              builds = builds[builds.index(before) + 1:]
   302          return builds[:40]
   303  
   304  
   305  @view_base.memcache_memoize('build-list://', expires=60)
   306  def build_list(job_dir, before):
   307      '''
   308      Given a job dir, give a (partial) list of recent build
   309      finished.jsons.
   310  
   311      Args:
   312          job_dir: the GCS path holding the jobs
   313      Returns:
   314          a list of [(build, finished)]. build is a string like "123",
   315          finished is either None or a dict of the finished.json.
   316      '''
   317  
   318      # /directory/ folders have a series of .txt files pointing at the correct location,
   319      # as a sort of fake symlink.
   320      indirect = '/directory/' in job_dir
   321  
   322      builds = get_build_numbers(job_dir, before, indirect)
   323  
   324      if indirect:
   325          # follow the indirect links
   326          build_symlinks = [
   327              (build,
   328               gcs_async.read('%s%s.txt' % (job_dir, build)))
   329              for build in builds
   330          ]
   331          build_futures = []
   332          for build, sym_fut in build_symlinks:
   333              redir = sym_fut.get_result()
   334              if redir and redir.startswith('gs://'):
   335                  redir = redir[4:].strip()
   336                  build_futures.append(
   337                      (build, redir,
   338                       gcs_async.read('%s/started.json' % redir),
   339                       gcs_async.read('%s/finished.json' % redir)))
   340      else:
   341          build_futures = [
   342              (build, '%s%s' % (job_dir, build),
   343               gcs_async.read('%s%s/started.json' % (job_dir, build)),
   344               gcs_async.read('%s%s/finished.json' % (job_dir, build)))
   345              for build in builds
   346          ]
   347  
   348      def resolve(future):
   349          res = future.get_result()
   350          if res:
   351              return json.loads(res)
   352  
   353      return [(str(build), loc, resolve(started), resolve(finished))
   354              for build, loc, started, finished in build_futures]
   355  
   356  class BuildListHandler(view_base.BaseHandler):
   357      """Show a list of Builds for a Job."""
   358      def get(self, prefix, job):
   359          job_dir = '/%s/%s/' % (prefix, job)
   360          testgrid_query = testgrid.path_to_query(job_dir)
   361          before = self.request.get('before')
   362          builds = build_list(job_dir, before)
   363          dir_link = re.sub(r'/pull/.*', '/directory/%s' % job, prefix)
   364          self.render('build_list.html',
   365                      dict(job=job, job_dir=job_dir, dir_link=dir_link,
   366                           testgrid_query=testgrid_query,
   367                           builds=builds, before=before))
   368  
   369  
   370  class JobListHandler(view_base.BaseHandler):
   371      """Show a list of Jobs in a directory."""
   372      def get(self, prefix):
   373          jobs_dir = '/%s' % prefix
   374          fstats = view_base.gcs_ls(jobs_dir)
   375          fstats.sort()
   376          self.render('job_list.html', dict(jobs_dir=jobs_dir, fstats=fstats))