github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/analyzers/perf_analysis.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 # This script is used to run Change Point Analysis using a config file. 19 # config file holds the parameters required to fetch data, and to run the 20 # change point analysis. Change Point Analysis is used to find Performance 21 # regressions for benchmark/load/performance test. 22 23 import argparse 24 import logging 25 import os 26 import uuid 27 from datetime import datetime 28 from datetime import timezone 29 from typing import Any 30 from typing import Dict 31 from typing import Optional 32 33 import pandas as pd 34 35 from apache_beam.testing.analyzers import constants 36 from apache_beam.testing.analyzers.perf_analysis_utils import GitHubIssueMetaData 37 from apache_beam.testing.analyzers.perf_analysis_utils import create_performance_alert 38 from apache_beam.testing.analyzers.perf_analysis_utils import fetch_metric_data 39 from apache_beam.testing.analyzers.perf_analysis_utils import find_latest_change_point_index 40 from apache_beam.testing.analyzers.perf_analysis_utils import get_existing_issues_data 41 from apache_beam.testing.analyzers.perf_analysis_utils import is_change_point_in_valid_window 42 from apache_beam.testing.analyzers.perf_analysis_utils import is_perf_alert 43 from apache_beam.testing.analyzers.perf_analysis_utils import publish_issue_metadata_to_big_query 44 from apache_beam.testing.analyzers.perf_analysis_utils import read_test_config 45 from apache_beam.testing.analyzers.perf_analysis_utils import validate_config 46 from apache_beam.testing.load_tests.load_test_metrics_utils import BigQueryMetricsFetcher 47 48 49 def run_change_point_analysis(params, test_id, big_query_metrics_fetcher): 50 """ 51 Args: 52 params: Dict containing parameters to run change point analysis. 53 test_id: Test id for the current test. 54 big_query_metrics_fetcher: BigQuery metrics fetcher used to fetch data for 55 change point analysis. 56 Returns: 57 bool indicating if a change point is observed and alerted on GitHub. 58 """ 59 if not validate_config(params.keys()): 60 raise ValueError( 61 f"Please make sure all these keys {constants._PERF_TEST_KEYS} " 62 f"are specified for the {test_id}") 63 64 metric_name = params['metric_name'] 65 test_name = params['test_name'].replace('.', '_') + f'_{metric_name}' 66 67 min_runs_between_change_points = ( 68 constants._DEFAULT_MIN_RUNS_BETWEEN_CHANGE_POINTS) 69 if 'min_runs_between_change_points' in params: 70 min_runs_between_change_points = params['min_runs_between_change_points'] 71 72 num_runs_in_change_point_window = ( 73 constants._DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW) 74 if 'num_runs_in_change_point_window' in params: 75 num_runs_in_change_point_window = params['num_runs_in_change_point_window'] 76 77 metric_values, timestamps = fetch_metric_data( 78 params=params, 79 big_query_metrics_fetcher=big_query_metrics_fetcher 80 ) 81 82 change_point_index = find_latest_change_point_index( 83 metric_values=metric_values) 84 if not change_point_index: 85 logging.info("Change point is not detected for the test %s" % test_name) 86 return False 87 # since timestamps are ordered in ascending order and 88 # num_runs_in_change_point_window refers to the latest runs, 89 # latest_change_point_run can help determine if the change point 90 # index is recent wrt num_runs_in_change_point_window 91 latest_change_point_run = len(timestamps) - 1 - change_point_index 92 if not is_change_point_in_valid_window(num_runs_in_change_point_window, 93 latest_change_point_run): 94 logging.info( 95 'Performance regression/improvement found for the test: %s. ' 96 'on metric %s. Since the change point run %s ' 97 'lies outside the num_runs_in_change_point_window distance: %s, ' 98 'alert is not raised.' % ( 99 params['test_name'], 100 metric_name, 101 latest_change_point_run + 1, 102 num_runs_in_change_point_window)) 103 return False 104 105 is_alert = True 106 last_reported_issue_number = None 107 issue_metadata_table_name = f'{params.get("metrics_table")}_{metric_name}' 108 existing_issue_data = get_existing_issues_data( 109 table_name=issue_metadata_table_name, 110 big_query_metrics_fetcher=big_query_metrics_fetcher) 111 112 if existing_issue_data is not None: 113 existing_issue_timestamps = existing_issue_data[ 114 constants._CHANGE_POINT_TIMESTAMP_LABEL].tolist() 115 last_reported_issue_number = existing_issue_data[ 116 constants._ISSUE_NUMBER].tolist()[0] 117 118 is_alert = is_perf_alert( 119 previous_change_point_timestamps=existing_issue_timestamps, 120 change_point_index=change_point_index, 121 timestamps=timestamps, 122 min_runs_between_change_points=min_runs_between_change_points) 123 logging.debug( 124 "Performance alert is %s for test %s" % (is_alert, params['test_name'])) 125 if is_alert: 126 issue_number, issue_url = create_performance_alert( 127 metric_name, params['test_name'], timestamps, 128 metric_values, change_point_index, 129 params.get('labels', None), 130 last_reported_issue_number, 131 test_target=params['test_target'] if 'test_target' in params else None 132 ) 133 134 issue_metadata = GitHubIssueMetaData( 135 issue_timestamp=pd.Timestamp( 136 datetime.now().replace(tzinfo=timezone.utc)), 137 test_name=test_name, 138 metric_name=metric_name, 139 test_id=uuid.uuid4().hex, 140 change_point=metric_values[change_point_index], 141 issue_number=issue_number, 142 issue_url=issue_url, 143 change_point_timestamp=timestamps[change_point_index]) 144 145 publish_issue_metadata_to_big_query( 146 issue_metadata=issue_metadata, table_name=issue_metadata_table_name) 147 148 return is_alert 149 150 151 def run(config_file_path: Optional[str] = None) -> None: 152 """ 153 run is the entry point to run change point analysis on test metric 154 data, which is read from config file, and if there is a performance 155 regression/improvement observed for a test, an alert 156 will filed with GitHub Issues. 157 158 If config_file_path is None, then the run method will use default 159 config file to read the required perf test parameters. 160 161 Please take a look at the README for more information on the parameters 162 defined in the config file. 163 164 """ 165 if config_file_path is None: 166 config_file_path = os.path.join( 167 os.path.dirname(os.path.abspath(__file__)), 'tests_config.yaml') 168 169 tests_config: Dict[str, Dict[str, Any]] = read_test_config(config_file_path) 170 171 big_query_metrics_fetcher = BigQueryMetricsFetcher() 172 173 for test_id, params in tests_config.items(): 174 run_change_point_analysis(params, test_id, big_query_metrics_fetcher) 175 176 177 if __name__ == '__main__': 178 logging.basicConfig(level=logging.INFO) 179 180 parser = argparse.ArgumentParser() 181 parser.add_argument( 182 '--config_file_path', 183 default=None, 184 type=str, 185 help='Path to the config file that contains data to run the Change Point ' 186 'Analysis.The default file will used will be ' 187 'apache_beam/testing/analyzers/tests.config.yml. ' 188 'If you would like to use the Change Point Analysis for finding ' 189 'performance regression in the tests, ' 190 'please provide an .yml file in the same structure as the above ' 191 'mentioned file. ') 192 known_args, unknown_args = parser.parse_known_args() 193 194 if unknown_args: 195 logging.warning('Discarding unknown arguments : %s ' % unknown_args) 196 197 run(known_args.config_file_path)