github.com/jenkins-x/test-infra@v0.0.7/testgrid/conformance/upload_e2e.py (about)

     1  #!/usr/bin/env python
     2  
     3  # Copyright 2018 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # This script parses conformance test output to produce testgrid entries
    18  #
    19  # Assumptions:
    20  # - there is one log file and one JUnit file (true for current conformance tests..)
    21  # - the log file contains ginkgo's output (true for kubetest and sonobuoy..)
    22  # - the ginkgo output will give us start / end time, and overall success
    23  #
    24  # - the start timestamp is suitable as a testgrid ID (unique, monotonic)
    25  #
    26  # - the test ran in the current year unless --year is provided
    27  # - the timestamps are parsed on a machine with the same local time (zone)
    28  #   settings as the machine that produced the logs
    29  #
    30  # The log file is the source of truth for metadata, the JUnit will be consumed
    31  # by testgrid / gubernator for individual test case results
    32  #
    33  # Usage: see README.md
    34  
    35  
    36  import re
    37  import sys
    38  import time
    39  import datetime
    40  import argparse
    41  import json
    42  import subprocess
    43  from os import path
    44  import glob
    45  import atexit
    46  
    47  
    48  # logs often contain ANSI escape sequences
    49  # https://stackoverflow.com/a/14693789
    50  ANSI_ESCAPE_RE = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')
    51  
    52  
    53  # NOTE e2e logs use go's time.StampMilli ("Jan _2 15:04:05.000")
    54  # Example log line with a timestamp:
    55  # Jan 26 06:38:46.284: INFO: Running AfterSuite actions on all node
    56  # the third ':' separates the date from the rest
    57  E2E_LOG_TIMESTAMP_RE = re.compile(r'(... .\d \d\d:\d\d:\d\d\.\d\d\d):.*')
    58  
    59  # Ginkgo gives a line like the following at the end of successful runs:
    60  # SUCCESS! -- 123 Passed | 0 Failed | 0 Pending | 587 Skipped PASS
    61  # we match this to detect overall success
    62  E2E_LOG_SUCCESS_RE = re.compile(r'SUCCESS! -- .* PASS')
    63  
    64  
    65  def log_line_strip_escape_sequences(line):
    66      return ANSI_ESCAPE_RE.sub('', line)
    67  
    68  
    69  def parse_e2e_log_line_timestamp(line, year):
    70      """parses a ginkgo e2e log line for the leading timestamp
    71  
    72      Args:
    73          line (str) - the log line
    74          year (str) - 'YYYY'
    75  
    76      Returns:
    77          timestamp (datetime.datetime) or None
    78      """
    79      match = E2E_LOG_TIMESTAMP_RE.match(line)
    80      if match is None:
    81          return None
    82      # note we add year to the timestamp because the actual timestamp doesn't
    83      # contain one and we want a datetime object...
    84      timestamp = year+' '+match.group(1)
    85      return datetime.datetime.strptime(timestamp, '%Y %b %d %H:%M:%S.%f')
    86  
    87  
    88  def parse_e2e_logfile(file_handle, year):
    89      """parse e2e logfile at path, assuming the log is from year
    90  
    91      Args:
    92          file_handle (file): the log file, iterated for lines
    93          year (str): YYYY year logfile is from
    94  
    95      Returns:
    96          started (datetime.datetime), finished (datetime.datetime), passed (boolean)
    97      """
    98      started = finished = None
    99      passed = False
   100      for line in file_handle:
   101          line = log_line_strip_escape_sequences(line)
   102          # try to get a timestamp from each line, keep the first one as
   103          # start time, and the last one as finish time
   104          timestamp = parse_e2e_log_line_timestamp(line, year)
   105          if timestamp:
   106              if started:
   107                  finished = timestamp
   108              else:
   109                  started = timestamp
   110          # if we found the ginkgo success line then the run passed
   111          is_success = E2E_LOG_SUCCESS_RE.match(line)
   112          if is_success:
   113              passed = True
   114      return started, finished, passed
   115  
   116  
   117  def datetime_to_unix(datetime_obj):
   118      """convert datetime.datetime to unix timestamp"""
   119      return int(time.mktime(datetime_obj.timetuple()))
   120  
   121  
   122  def testgrid_started_json_contents(start_time):
   123      """returns the string contents of a testgrid started.json file
   124  
   125      Args:
   126          start_time (datetime.datetime)
   127  
   128      Returns:
   129          contents (str)
   130      """
   131      started = datetime_to_unix(start_time)
   132      return json.dumps({
   133          'timestamp': started
   134      })
   135  
   136  
   137  def testgrid_finished_json_contents(finish_time, passed, metadata):
   138      """returns the string contents of a testgrid finished.json file
   139  
   140      Args:
   141          finish_time (datetime.datetime)
   142          passed (bool)
   143          metadata (str)
   144  
   145      Returns:
   146          contents (str)
   147      """
   148      finished = datetime_to_unix(finish_time)
   149      result = 'SUCCESS' if passed else 'FAILURE'
   150      if metadata:
   151          testdata = json.loads(metadata)
   152          return json.dumps({
   153              'timestamp': finished,
   154              'result': result,
   155              'metadata': testdata
   156          })
   157      return json.dumps({
   158          'timestamp': finished,
   159          'result': result
   160      })
   161  
   162  
   163  def upload_string(gcs_path, text, dry):
   164      """Uploads text to gcs_path if dry is False, otherwise just prints"""
   165      cmd = ['gsutil', '-q', '-h', 'Content-Type:text/plain', 'cp', '-', gcs_path]
   166      print >>sys.stderr, 'Run:', cmd, 'stdin=%s' % text
   167      if dry:
   168          return
   169      proc = subprocess.Popen(cmd, stdin=subprocess.PIPE)
   170      proc.communicate(input=text)
   171      if proc.returncode != 0:
   172          raise RuntimeError(
   173              "Failed to upload with exit code: %d" % proc.returncode)
   174  
   175  
   176  def upload_file(gcs_path, file_path, dry):
   177      """Uploads file at file_path to gcs_path if dry is False, otherwise just prints"""
   178      cmd = ['gsutil', '-q', '-h', 'Content-Type:text/plain',
   179             'cp', file_path, gcs_path]
   180      print >>sys.stderr, 'Run:', cmd
   181      if dry:
   182          return
   183      proc = subprocess.Popen(cmd)
   184      proc.communicate()
   185      if proc.returncode != 0:
   186          raise RuntimeError(
   187              'Failed to upload with exit code: %d' % proc.returncode)
   188  
   189  
   190  def get_current_account(dry_run):
   191      """gets the currently active gcp account by shelling out to gcloud"""
   192      cmd = ['gcloud', 'auth', 'list',
   193             '--filter=status:ACTIVE', '--format=value(account)']
   194      print >>sys.stderr, 'Run:', cmd
   195      if dry_run:
   196          return ""
   197      return subprocess.check_output(cmd).strip('\n')
   198  
   199  
   200  def set_current_account(account, dry_run):
   201      """sets the currently active gcp account by shelling out to gcloud"""
   202      cmd = ['gcloud', 'config', 'set', 'core/account', account]
   203      print >>sys.stderr, 'Run:', cmd
   204      if dry_run:
   205          return
   206      return subprocess.check_call(cmd)
   207  
   208  
   209  def activate_service_account(key_file, dry_run):
   210      """activates a gcp service account by shelling out to gcloud"""
   211      cmd = ['gcloud', 'auth', 'activate-service-account', '--key-file='+key_file]
   212      print >>sys.stderr, 'Run:', cmd
   213      if dry_run:
   214          return
   215      subprocess.check_call(cmd)
   216  
   217  
   218  def revoke_current_account(dry_run):
   219      """logs out of the currently active gcp account by shelling out to gcloud"""
   220      cmd = ['gcloud', 'auth', 'revoke']
   221      print >>sys.stderr, 'Run:', cmd
   222      if dry_run:
   223          return
   224      return subprocess.check_call(cmd)
   225  
   226  
   227  def parse_args(cli_args=None):
   228      if cli_args is None:
   229          cli_args = sys.argv[1:]
   230      parser = argparse.ArgumentParser()
   231      parser.add_argument(
   232          '--bucket',
   233          help=('GCS bucket to upload the results to,'
   234                ' of the form \'gs://foo/bar\''),
   235          required=True,
   236      )
   237      parser.add_argument(
   238          '--year',
   239          help=('the year in which the log is from, defaults to the current year.'
   240                ' format: YYYY'),
   241          default=str(datetime.datetime.now().year),
   242      )
   243      parser.add_argument(
   244          '--junit',
   245          help='path or glob expression to the junit xml results file(s)',
   246          required=True,
   247      )
   248      parser.add_argument(
   249          '--log',
   250          help='path to the test log file, should contain the ginkgo output',
   251          required=True,
   252      )
   253      parser.add_argument(
   254          '--dry-run',
   255          help='if set, do not actually upload anything, only print actions',
   256          required=False,
   257          action='store_true',
   258      )
   259      parser.add_argument(
   260          '--metadata',
   261          help='dictionary of additional key-value pairs that can be displayed to the user.',
   262          required=False,
   263          default=str(),
   264      )
   265      parser.add_argument(
   266          '--key-file',
   267          help='path to GCP service account key file, which will be activated before '
   268          'uploading if provided, the account will be revoked and the active account reset '
   269          'on exit',
   270          required=False,
   271      )
   272      return parser.parse_args(args=cli_args)
   273  
   274  
   275  def main(cli_args):
   276      args = parse_args(cli_args)
   277  
   278      # optionally activate a service account with upload credentials
   279      if args.key_file:
   280          # grab the currently active account if any, and if there is one
   281          # register a handler to set it active again on exit
   282          current_account = get_current_account(args.dry_run)
   283          if current_account:
   284              atexit.register(
   285                  lambda: set_current_account(current_account, args.dry_run)
   286              )
   287          # login to the service account and register a handler to logout before exit
   288          # NOTE: atexit handlers are called in LIFO order
   289          activate_service_account(args.key_file, args.dry_run)
   290          atexit.register(lambda: revoke_current_account(args.dry_run))
   291  
   292      # find the matching junit files, there should be at least one for a useful
   293      # testgrid entry
   294      junits = glob.glob(args.junit)
   295      if not junits:
   296          print 'No matching JUnit files found!'
   297          sys.exit(-1)
   298  
   299      # parse the e2e.log for start time, finish time, and success
   300      with open(args.log) as file_handle:
   301          started, finished, passed = parse_e2e_logfile(file_handle, args.year)
   302  
   303      # convert parsed results to testgrid json metadata blobs
   304      started_json = testgrid_started_json_contents(started)
   305      finished_json = testgrid_finished_json_contents(
   306          finished, passed, args.metadata)
   307  
   308      # use timestamp as build ID
   309      gcs_dir = args.bucket + '/' + str(datetime_to_unix(started))
   310  
   311      # upload metadata, log, junit to testgrid
   312      print 'Uploading entry to: %s' % gcs_dir
   313      upload_string(gcs_dir+'/started.json', started_json, args.dry_run)
   314      upload_string(gcs_dir+'/finished.json', finished_json, args.dry_run)
   315      upload_file(gcs_dir+'/build-log.txt', args.log, args.dry_run)
   316      for junit_file in junits:
   317          upload_file(gcs_dir+'/artifacts/' +
   318                      path.basename(junit_file), junit_file, args.dry_run)
   319      print 'Done.'
   320  
   321  
   322  if __name__ == '__main__':
   323      main(sys.argv[1:])