k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/testgrid/conformance/upload_e2e.py (about)

     1  #!/usr/bin/env python3
     2  
     3  # Copyright 2018 The Kubernetes Authors.
     4  #
     5  # Licensed under the Apache License, Version 2.0 (the "License");
     6  # you may not use this file except in compliance with the License.
     7  # You may obtain a copy of the License at
     8  #
     9  #     http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  
    17  # This script parses conformance test output to produce testgrid entries
    18  #
    19  # Assumptions:
    20  # - there is one log file and one JUnit file (true for current conformance tests..)
    21  # - the log file contains ginkgo's output (true for kubetest and sonobuoy..)
    22  # - the ginkgo output will give us start / end time, and overall success
    23  #
    24  # - the start timestamp is suitable as a testgrid ID (unique, monotonic)
    25  #
    26  # - the test ran in the current year unless --year is provided
    27  # - the timestamps are parsed on a machine with the same local time (zone)
    28  #   settings as the machine that produced the logs
    29  #
    30  # The log file is the source of truth for metadata, the JUnit will be consumed
    31  # by testgrid / gubernator for individual test case results
    32  #
    33  # Usage: see README.md
    34  
    35  
    36  import re
    37  import sys
    38  import time
    39  import datetime
    40  import argparse
    41  import json
    42  import subprocess
    43  from os import path
    44  import glob
    45  import atexit
    46  
    47  
    48  # logs often contain ANSI escape sequences
    49  # https://stackoverflow.com/a/14693789
    50  ANSI_ESCAPE_RE = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')
    51  
    52  
    53  # NOTE e2e logs use go's time.StampMilli ("Jan _2 15:04:05.000")
    54  # Example log line with a timestamp:
    55  # Jan 26 06:38:46.284: INFO: Running AfterSuite actions on all node
    56  # the third ':' separates the date from the rest
    57  E2E_LOG_TIMESTAMP_RE = re.compile(r'(... .\d \d\d:\d\d:\d\d\.\d\d\d):.*')
    58  
    59  # Ginkgo gives a line like the following at the end of successful runs:
    60  # SUCCESS! -- 123 Passed | 0 Failed | 0 Pending | 587 Skipped PASS
    61  # we match this to detect overall success
    62  E2E_LOG_SUCCESS_RE = re.compile(r'Test Suite Passed')
    63  E2E_LOG_FAIL_RE = re.compile(r'Test Suite Failed')
    64  
    65  
    66  def log_line_strip_escape_sequences(line):
    67      return ANSI_ESCAPE_RE.sub('', line)
    68  
    69  
    70  def parse_e2e_log_line_timestamp(line, year):
    71      """parses a ginkgo e2e log line for the leading timestamp
    72  
    73      Args:
    74          line (str) - the log line
    75          year (str) - 'YYYY'
    76  
    77      Returns:
    78          timestamp (datetime.datetime) or None
    79      """
    80      match = E2E_LOG_TIMESTAMP_RE.match(line)
    81      if match is None:
    82          return None
    83      # note we add year to the timestamp because the actual timestamp doesn't
    84      # contain one and we want a datetime object...
    85      timestamp = year+' '+match.group(1)
    86      return datetime.datetime.strptime(timestamp, '%Y %b %d %H:%M:%S.%f')
    87  
    88  
    89  def parse_e2e_logfile(file_handle, year):
    90      """parse e2e logfile at path, assuming the log is from year
    91  
    92      Args:
    93          file_handle (file): the log file, iterated for lines
    94          year (str): YYYY year logfile is from
    95  
    96      Returns:
    97          started (datetime.datetime), finished (datetime.datetime), passed (boolean)
    98      """
    99      passed = started = finished = None
   100      for line in file_handle:
   101          line = log_line_strip_escape_sequences(line)
   102          # try to get a timestamp from each line, keep the first one as
   103          # start time, and the last one as finish time
   104          timestamp = parse_e2e_log_line_timestamp(line, year)
   105          if timestamp:
   106              if started:
   107                  finished = timestamp
   108              else:
   109                  started = timestamp
   110          if passed is False:
   111              # if we already have found a failure, ignore subsequent pass/fails
   112              continue
   113          if E2E_LOG_SUCCESS_RE.match(line):
   114              passed = True
   115          elif E2E_LOG_FAIL_RE.match(line):
   116              passed = False
   117      return started, finished, passed
   118  
   119  
   120  def datetime_to_unix(datetime_obj):
   121      """convert datetime.datetime to unix timestamp"""
   122      return int(time.mktime(datetime_obj.timetuple()))
   123  
   124  
   125  def testgrid_started_json_contents(start_time):
   126      """returns the string contents of a testgrid started.json file
   127  
   128      Args:
   129          start_time (datetime.datetime)
   130  
   131      Returns:
   132          contents (str)
   133      """
   134      started = datetime_to_unix(start_time)
   135      return json.dumps({
   136          'timestamp': started
   137      })
   138  
   139  
   140  def testgrid_finished_json_contents(finish_time, passed, metadata):
   141      """returns the string contents of a testgrid finished.json file
   142  
   143      Args:
   144          finish_time (datetime.datetime)
   145          passed (bool)
   146          metadata (str)
   147  
   148      Returns:
   149          contents (str)
   150      """
   151      finished = datetime_to_unix(finish_time)
   152      result = 'SUCCESS' if passed else 'FAILURE'
   153      if metadata:
   154          testdata = json.loads(metadata)
   155          return json.dumps({
   156              'timestamp': finished,
   157              'result': result,
   158              'metadata': testdata
   159          })
   160      return json.dumps({
   161          'timestamp': finished,
   162          'result': result
   163      })
   164  
   165  
   166  def upload_string(gcs_path, text, dry):
   167      """Uploads text to gcs_path if dry is False, otherwise just prints"""
   168      cmd = ['gsutil', '-q', '-h', 'Content-Type:text/plain', 'cp', '-', gcs_path]
   169      print('Run:', cmd, 'stdin=%s' % text, file=sys.stderr)
   170      if dry:
   171          return
   172      proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, encoding='utf8')
   173      proc.communicate(input=text)
   174      if proc.returncode != 0:
   175          raise RuntimeError(
   176              "Failed to upload with exit code: %d" % proc.returncode)
   177  
   178  
   179  def upload_file(gcs_path, file_path, dry):
   180      """Uploads file at file_path to gcs_path if dry is False, otherwise just prints"""
   181      cmd = ['gsutil', '-q', '-h', 'Content-Type:text/plain',
   182             'cp', file_path, gcs_path]
   183      print('Run:', cmd, file=sys.stderr)
   184      if dry:
   185          return
   186      proc = subprocess.Popen(cmd)
   187      proc.communicate()
   188      if proc.returncode != 0:
   189          raise RuntimeError(
   190              'Failed to upload with exit code: %d' % proc.returncode)
   191  
   192  
   193  def get_current_account(dry_run):
   194      """gets the currently active gcp account by shelling out to gcloud"""
   195      cmd = ['gcloud', 'auth', 'list',
   196             '--filter=status:ACTIVE', '--format=value(account)']
   197      print('Run:', cmd, file=sys.stderr)
   198      if dry_run:
   199          return ""
   200      return subprocess.check_output(cmd, encoding='utf-8').strip('\n')
   201  
   202  
   203  def set_current_account(account, dry_run):
   204      """sets the currently active gcp account by shelling out to gcloud"""
   205      cmd = ['gcloud', 'config', 'set', 'core/account', account]
   206      print('Run:', cmd, file=sys.stderr)
   207      if dry_run:
   208          return None
   209      return subprocess.check_call(cmd)
   210  
   211  
   212  def activate_service_account(key_file, dry_run):
   213      """activates a gcp service account by shelling out to gcloud"""
   214      cmd = ['gcloud', 'auth', 'activate-service-account', '--key-file='+key_file]
   215      print('Run:', cmd, file=sys.stderr)
   216      if dry_run:
   217          return
   218      subprocess.check_call(cmd)
   219  
   220  
   221  def revoke_current_account(dry_run):
   222      """logs out of the currently active gcp account by shelling out to gcloud"""
   223      cmd = ['gcloud', 'auth', 'revoke']
   224      print('Run:', cmd, file=sys.stderr)
   225      if dry_run:
   226          return None
   227      return subprocess.check_call(cmd)
   228  
   229  
   230  def parse_args(cli_args=None):
   231      if cli_args is None:
   232          cli_args = sys.argv[1:]
   233      parser = argparse.ArgumentParser()
   234      parser.add_argument(
   235          '--bucket',
   236          help=('GCS bucket to upload the results to,'
   237                ' of the form \'gs://foo/bar\''),
   238          required=True,
   239      )
   240      parser.add_argument(
   241          '--year',
   242          help=('the year in which the log is from, defaults to the current year.'
   243                ' format: YYYY'),
   244          default=str(datetime.datetime.now().year),
   245      )
   246      parser.add_argument(
   247          '--junit',
   248          help='path or glob expression to the junit xml results file(s)',
   249          required=True,
   250      )
   251      parser.add_argument(
   252          '--log',
   253          help='path to the test log file, should contain the ginkgo output',
   254          required=True,
   255      )
   256      parser.add_argument(
   257          '--dry-run',
   258          help='if set, do not actually upload anything, only print actions',
   259          required=False,
   260          action='store_true',
   261      )
   262      parser.add_argument(
   263          '--metadata',
   264          help='dictionary of additional key-value pairs that can be displayed to the user.',
   265          required=False,
   266          default=str(),
   267      )
   268      parser.add_argument(
   269          '--key-file',
   270          help='path to GCP service account key file, which will be activated before '
   271          'uploading if provided, the account will be revoked and the active account reset '
   272          'on exit',
   273          required=False,
   274      )
   275      return parser.parse_args(args=cli_args)
   276  
   277  
   278  def main(cli_args):
   279      args = parse_args(cli_args)
   280  
   281      # optionally activate a service account with upload credentials
   282      if args.key_file:
   283          # grab the currently active account if any, and if there is one
   284          # register a handler to set it active again on exit
   285          current_account = get_current_account(args.dry_run)
   286          if current_account:
   287              atexit.register(
   288                  lambda: set_current_account(current_account, args.dry_run)
   289              )
   290          # login to the service account and register a handler to logout before exit
   291          # NOTE: atexit handlers are called in LIFO order
   292          activate_service_account(args.key_file, args.dry_run)
   293          atexit.register(lambda: revoke_current_account(args.dry_run))
   294  
   295      # find the matching junit files, there should be at least one for a useful
   296      # testgrid entry
   297      junits = glob.glob(args.junit)
   298      if not junits:
   299          print('No matching JUnit files found!')
   300          sys.exit(-1)
   301  
   302      # parse the e2e.log for start time, finish time, and success
   303      with open(args.log) as file_handle:
   304          started, finished, passed = parse_e2e_logfile(file_handle, args.year)
   305  
   306      # convert parsed results to testgrid json metadata blobs
   307      started_json = testgrid_started_json_contents(started)
   308      finished_json = testgrid_finished_json_contents(
   309          finished, passed, args.metadata)
   310  
   311      # use timestamp as build ID
   312      gcs_dir = args.bucket + '/' + str(datetime_to_unix(started))
   313  
   314      # upload metadata, log, junit to testgrid
   315      print('Uploading entry to: %s' % gcs_dir)
   316      upload_string(gcs_dir+'/started.json', started_json, args.dry_run)
   317      upload_string(gcs_dir+'/finished.json', finished_json, args.dry_run)
   318      upload_file(gcs_dir+'/build-log.txt', args.log, args.dry_run)
   319      for junit_file in junits:
   320          upload_file(gcs_dir+'/artifacts/' +
   321                      path.basename(junit_file), junit_file, args.dry_run)
   322      print('Done.')
   323  
   324  
   325  if __name__ == '__main__':
   326      main(sys.argv[1:])