github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/analyzers/perf_analysis.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/analyzers/perf_analysis.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  # This script is used to run Change Point Analysis using a config file.
    19  # config file holds the parameters required to fetch data, and to run the
    20  # change point analysis. Change Point Analysis is used to find Performance
    21  # regressions for benchmark/load/performance test.
    22  
    23  import argparse
    24  import logging
    25  import os
    26  import uuid
    27  from datetime import datetime
    28  from datetime import timezone
    29  from typing import Any
    30  from typing import Dict
    31  from typing import Optional
    32  
    33  import pandas as pd
    34  
    35  from apache_beam.testing.analyzers import constants
    36  from apache_beam.testing.analyzers.perf_analysis_utils import GitHubIssueMetaData
    37  from apache_beam.testing.analyzers.perf_analysis_utils import create_performance_alert
    38  from apache_beam.testing.analyzers.perf_analysis_utils import fetch_metric_data
    39  from apache_beam.testing.analyzers.perf_analysis_utils import find_latest_change_point_index
    40  from apache_beam.testing.analyzers.perf_analysis_utils import get_existing_issues_data
    41  from apache_beam.testing.analyzers.perf_analysis_utils import is_change_point_in_valid_window
    42  from apache_beam.testing.analyzers.perf_analysis_utils import is_perf_alert
    43  from apache_beam.testing.analyzers.perf_analysis_utils import publish_issue_metadata_to_big_query
    44  from apache_beam.testing.analyzers.perf_analysis_utils import read_test_config
    45  from apache_beam.testing.analyzers.perf_analysis_utils import validate_config
    46  from apache_beam.testing.load_tests.load_test_metrics_utils import BigQueryMetricsFetcher
    47  
    48  
    49  def run_change_point_analysis(params, test_id, big_query_metrics_fetcher):
    50    """
    51    Args:
    52     params: Dict containing parameters to run change point analysis.
    53     test_id: Test id for the current test.
    54     big_query_metrics_fetcher: BigQuery metrics fetcher used to fetch data for
    55      change point analysis.
    56    Returns:
    57       bool indicating if a change point is observed and alerted on GitHub.
    58    """
    59    if not validate_config(params.keys()):
    60      raise ValueError(
    61          f"Please make sure all these keys {constants._PERF_TEST_KEYS} "
    62          f"are specified for the {test_id}")
    63  
    64    metric_name = params['metric_name']
    65    test_name = params['test_name'].replace('.', '_') + f'_{metric_name}'
    66  
    67    min_runs_between_change_points = (
    68        constants._DEFAULT_MIN_RUNS_BETWEEN_CHANGE_POINTS)
    69    if 'min_runs_between_change_points' in params:
    70      min_runs_between_change_points = params['min_runs_between_change_points']
    71  
    72    num_runs_in_change_point_window = (
    73        constants._DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW)
    74    if 'num_runs_in_change_point_window' in params:
    75      num_runs_in_change_point_window = params['num_runs_in_change_point_window']
    76  
    77    metric_values, timestamps = fetch_metric_data(
    78      params=params,
    79      big_query_metrics_fetcher=big_query_metrics_fetcher
    80    )
    81  
    82    change_point_index = find_latest_change_point_index(
    83        metric_values=metric_values)
    84    if not change_point_index:
    85      logging.info("Change point is not detected for the test %s" % test_name)
    86      return False
    87    # since timestamps are ordered in ascending order and
    88    # num_runs_in_change_point_window refers to the latest runs,
    89    # latest_change_point_run can help determine if the change point
    90    # index is recent wrt num_runs_in_change_point_window
    91    latest_change_point_run = len(timestamps) - 1 - change_point_index
    92    if not is_change_point_in_valid_window(num_runs_in_change_point_window,
    93                                           latest_change_point_run):
    94      logging.info(
    95          'Performance regression/improvement found for the test: %s. '
    96          'on metric %s. Since the change point run %s '
    97          'lies outside the num_runs_in_change_point_window distance: %s, '
    98          'alert is not raised.' % (
    99              params['test_name'],
   100              metric_name,
   101              latest_change_point_run + 1,
   102              num_runs_in_change_point_window))
   103      return False
   104  
   105    is_alert = True
   106    last_reported_issue_number = None
   107    issue_metadata_table_name = f'{params.get("metrics_table")}_{metric_name}'
   108    existing_issue_data = get_existing_issues_data(
   109        table_name=issue_metadata_table_name,
   110        big_query_metrics_fetcher=big_query_metrics_fetcher)
   111  
   112    if existing_issue_data is not None:
   113      existing_issue_timestamps = existing_issue_data[
   114          constants._CHANGE_POINT_TIMESTAMP_LABEL].tolist()
   115      last_reported_issue_number = existing_issue_data[
   116          constants._ISSUE_NUMBER].tolist()[0]
   117  
   118      is_alert = is_perf_alert(
   119          previous_change_point_timestamps=existing_issue_timestamps,
   120          change_point_index=change_point_index,
   121          timestamps=timestamps,
   122          min_runs_between_change_points=min_runs_between_change_points)
   123    logging.debug(
   124        "Performance alert is %s for test %s" % (is_alert, params['test_name']))
   125    if is_alert:
   126      issue_number, issue_url = create_performance_alert(
   127      metric_name, params['test_name'], timestamps,
   128      metric_values, change_point_index,
   129      params.get('labels', None),
   130      last_reported_issue_number,
   131      test_target=params['test_target'] if 'test_target' in params else None
   132      )
   133  
   134      issue_metadata = GitHubIssueMetaData(
   135          issue_timestamp=pd.Timestamp(
   136              datetime.now().replace(tzinfo=timezone.utc)),
   137          test_name=test_name,
   138          metric_name=metric_name,
   139          test_id=uuid.uuid4().hex,
   140          change_point=metric_values[change_point_index],
   141          issue_number=issue_number,
   142          issue_url=issue_url,
   143          change_point_timestamp=timestamps[change_point_index])
   144  
   145      publish_issue_metadata_to_big_query(
   146          issue_metadata=issue_metadata, table_name=issue_metadata_table_name)
   147  
   148    return is_alert
   149  
   150  
   151  def run(config_file_path: Optional[str] = None) -> None:
   152    """
   153    run is the entry point to run change point analysis on test metric
   154    data, which is read from config file, and if there is a performance
   155    regression/improvement observed for a test, an alert
   156    will filed with GitHub Issues.
   157  
   158    If config_file_path is None, then the run method will use default
   159    config file to read the required perf test parameters.
   160  
   161    Please take a look at the README for more information on the parameters
   162    defined in the config file.
   163  
   164    """
   165    if config_file_path is None:
   166      config_file_path = os.path.join(
   167          os.path.dirname(os.path.abspath(__file__)), 'tests_config.yaml')
   168  
   169    tests_config: Dict[str, Dict[str, Any]] = read_test_config(config_file_path)
   170  
   171    big_query_metrics_fetcher = BigQueryMetricsFetcher()
   172  
   173    for test_id, params in tests_config.items():
   174      run_change_point_analysis(params, test_id, big_query_metrics_fetcher)
   175  
   176  
   177  if __name__ == '__main__':
   178    logging.basicConfig(level=logging.INFO)
   179  
   180    parser = argparse.ArgumentParser()
   181    parser.add_argument(
   182        '--config_file_path',
   183        default=None,
   184        type=str,
   185        help='Path to the config file that contains data to run the Change Point '
   186        'Analysis.The default file will used will be '
   187        'apache_beam/testing/analyzers/tests.config.yml. '
   188        'If you would like to use the Change Point Analysis for finding '
   189        'performance regression in the tests, '
   190        'please provide an .yml file in the same structure as the above '
   191        'mentioned file. ')
   192    known_args, unknown_args = parser.parse_known_args()
   193  
   194    if unknown_args:
   195      logging.warning('Discarding unknown arguments : %s ' % unknown_args)
   196  
   197    run(known_args.config_file_path)