github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/analyzers/analyze_metric_data.ipynb (about) 1 { 2 "nbformat": 4, 3 "nbformat_minor": 0, 4 "metadata": { 5 "colab": { 6 "provenance": [] 7 }, 8 "kernelspec": { 9 "name": "python3", 10 "display_name": "Python 3" 11 }, 12 "language_info": { 13 "name": "python" 14 } 15 }, 16 "cells": [ 17 { 18 "cell_type": "code", 19 "source": [ 20 "# Licensed to the Apache Software Foundation (ASF) under one\n", 21 "# or more contributor license agreements. See the NOTICE file\n", 22 "# distributed with this work for additional information\n", 23 "# regarding copyright ownership. The ASF licenses this file\n", 24 "# to you under the Apache License, Version 2.0 (the\n", 25 "# \"License\"); you may not use this file except in compliance\n", 26 "# with the License. You may obtain a copy of the License at\n", 27 "#\n", 28 "# http://www.apache.org/licenses/LICENSE-2.0\n", 29 "#\n", 30 "# Unless required by applicable law or agreed to in writing,\n", 31 "# software distributed under the License is distributed on an\n", 32 "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n", 33 "# KIND, either express or implied. See the License for the\n", 34 "# specific language governing permissions and limitations\n", 35 "# under the License." 36 ], 37 "metadata": { 38 "id": "fCjymAKWJiTh" 39 }, 40 "execution_count": null, 41 "outputs": [] 42 }, 43 { 44 "cell_type": "code", 45 "source": [ 46 "# this notebook intended for internal testing purpose." 47 ], 48 "metadata": { 49 "id": "CCAvj4mQFR5x" 50 }, 51 "execution_count": null, 52 "outputs": [] 53 }, 54 { 55 "cell_type": "markdown", 56 "source": [ 57 "Apache Beam can be installed directly from the main branch of https://github.com/apache/beam or can be installed via `pip install apache_beam>=2.45.0`" 58 ], 59 "metadata": { 60 "id": "IL7coD4DJqzG" 61 } 62 }, 63 { 64 "cell_type": "code", 65 "source": [ 66 "!git clone https://github.com/apache/beam.git\n", 67 "!pip install -r beam/sdks/python/build-requirements.txt\n", 68 "!pip install -e beam/sdks/python[gcp]\n", 69 "!pip install matplotlib\n", 70 "!pip install pandas" 71 ], 72 "metadata": { 73 "id": "yW4okqmpECqY" 74 }, 75 "execution_count": null, 76 "outputs": [] 77 }, 78 { 79 "cell_type": "markdown", 80 "source": [ 81 "Import necessary dependencies" 82 ], 83 "metadata": { 84 "id": "ZPt3DbZcL-Ki" 85 } 86 }, 87 { 88 "cell_type": "code", 89 "source": [ 90 "import pandas as pd\n", 91 "import matplotlib.pyplot as plt\n", 92 "from apache_beam.testing.load_tests import load_test_metrics_utils\n", 93 "from apache_beam.testing.load_tests.load_test_metrics_utils import BigQueryMetricsFetcher" 94 ], 95 "metadata": { 96 "id": "xYGgc-tpE9qY" 97 }, 98 "execution_count": null, 99 "outputs": [] 100 }, 101 { 102 "cell_type": "code", 103 "source": [ 104 "bq_project = 'apache-beam-testing'\n", 105 "bq_dataset = '<bq-dataset-name>' # sample value: beam_run_inference\n", 106 "bq_table = '<bq-table>' # sample value: torch_inference_imagenet_results_resnet152\n", 107 "metric_name = '<perf-alerted-metric-name>' # sample value: mean_load_model_latency_milli_secs\n", 108 "\n", 109 "query = f\"\"\"\n", 110 " SELECT *\n", 111 " FROM {bq_project}.{bq_dataset}.{bq_table}\n", 112 " WHERE CONTAINS_SUBSTR(({load_test_metrics_utils.METRICS_TYPE_LABEL}), '{metric_name}')\n", 113 " ORDER BY {load_test_metrics_utils.SUBMIT_TIMESTAMP_LABEL} DESC\n", 114 " LIMIT 30\n", 115 " \"\"\"\n" 116 ], 117 "metadata": { 118 "id": "nyMmUpRrD_zV" 119 }, 120 "execution_count": null, 121 "outputs": [] 122 }, 123 { 124 "cell_type": "markdown", 125 "source": [ 126 "If the performance/load test store the results in BigQuery using this [schema](https://github.com/apache/beam/blob/83679216cce2d52dbeb7e837f06ca1d57b31d509/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py#L66),\n", 127 "then fetch the metric_values for a `metric_name` for the last `30` runs and display a plot using matplotlib.\n" 128 ], 129 "metadata": { 130 "id": "RwlsXCLbVs_2" 131 } 132 }, 133 { 134 "cell_type": "code", 135 "source": [ 136 "big_query_metrics_fetcher = BigQueryMetricsFetcher()\n", 137 "metric_data: pd.DataFrame = big_query_metrics_fetcher.fetch(query=query)" 138 ], 139 "metadata": { 140 "id": "rmOE_odNEBFK" 141 }, 142 "execution_count": null, 143 "outputs": [] 144 }, 145 { 146 "cell_type": "code", 147 "source": [ 148 "# sort the data to view it in chronological order.\n", 149 "metric_data.sort_values(\n", 150 " by=[load_test_metrics_utils.SUBMIT_TIMESTAMP_LABEL], inplace=True)" 151 ], 152 "metadata": { 153 "id": "q-i3qLpGV5Ly" 154 }, 155 "execution_count": null, 156 "outputs": [] 157 }, 158 { 159 "cell_type": "code", 160 "source": [ 161 "metric_data.plot(x=load_test_metrics_utils.SUBMIT_TIMESTAMP_LABEL,\n", 162 " y=load_test_metrics_utils.VALUE_LABEL)\n", 163 "plt.show()" 164 ], 165 "metadata": { 166 "id": "vbFoxdxHVvtQ" 167 }, 168 "execution_count": null, 169 "outputs": [] 170 } 171 ] 172 }