github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/testing/analyzers/analyze_metric_data.ipynb (about)

     1  {
     2    "nbformat": 4,
     3    "nbformat_minor": 0,
     4    "metadata": {
     5      "colab": {
     6        "provenance": []
     7      },
     8      "kernelspec": {
     9        "name": "python3",
    10        "display_name": "Python 3"
    11      },
    12      "language_info": {
    13        "name": "python"
    14      }
    15    },
    16    "cells": [
    17      {
    18        "cell_type": "code",
    19        "source": [
    20          "# Licensed to the Apache Software Foundation (ASF) under one\n",
    21          "# or more contributor license agreements.  See the NOTICE file\n",
    22          "# distributed with this work for additional information\n",
    23          "# regarding copyright ownership.  The ASF licenses this file\n",
    24          "# to you under the Apache License, Version 2.0 (the\n",
    25          "# \"License\"); you may not use this file except in compliance\n",
    26          "# with the License.  You may obtain a copy of the License at\n",
    27          "#\n",
    28          "#   http://www.apache.org/licenses/LICENSE-2.0\n",
    29          "#\n",
    30          "# Unless required by applicable law or agreed to in writing,\n",
    31          "# software distributed under the License is distributed on an\n",
    32          "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
    33          "# KIND, either express or implied.  See the License for the\n",
    34          "# specific language governing permissions and limitations\n",
    35          "# under the License."
    36        ],
    37        "metadata": {
    38          "id": "fCjymAKWJiTh"
    39        },
    40        "execution_count": null,
    41        "outputs": []
    42      },
    43      {
    44        "cell_type": "code",
    45        "source": [
    46          "# this notebook intended for internal testing purpose."
    47        ],
    48        "metadata": {
    49          "id": "CCAvj4mQFR5x"
    50        },
    51        "execution_count": null,
    52        "outputs": []
    53      },
    54      {
    55        "cell_type": "markdown",
    56        "source": [
    57          "Apache Beam can be installed directly from the main branch of https://github.com/apache/beam or can be installed via `pip install apache_beam>=2.45.0`"
    58        ],
    59        "metadata": {
    60          "id": "IL7coD4DJqzG"
    61        }
    62      },
    63      {
    64        "cell_type": "code",
    65        "source": [
    66          "!git clone https://github.com/apache/beam.git\n",
    67          "!pip install -r beam/sdks/python/build-requirements.txt\n",
    68          "!pip install -e beam/sdks/python[gcp]\n",
    69          "!pip install matplotlib\n",
    70          "!pip install pandas"
    71        ],
    72        "metadata": {
    73          "id": "yW4okqmpECqY"
    74        },
    75        "execution_count": null,
    76        "outputs": []
    77      },
    78      {
    79        "cell_type": "markdown",
    80        "source": [
    81          "Import necessary dependencies"
    82        ],
    83        "metadata": {
    84          "id": "ZPt3DbZcL-Ki"
    85        }
    86      },
    87      {
    88        "cell_type": "code",
    89        "source": [
    90          "import pandas as pd\n",
    91          "import matplotlib.pyplot as plt\n",
    92          "from apache_beam.testing.load_tests import load_test_metrics_utils\n",
    93          "from apache_beam.testing.load_tests.load_test_metrics_utils import BigQueryMetricsFetcher"
    94        ],
    95        "metadata": {
    96          "id": "xYGgc-tpE9qY"
    97        },
    98        "execution_count": null,
    99        "outputs": []
   100      },
   101      {
   102        "cell_type": "code",
   103        "source": [
   104          "bq_project = 'apache-beam-testing'\n",
   105          "bq_dataset = '<bq-dataset-name>' # sample value: beam_run_inference\n",
   106          "bq_table = '<bq-table>' # sample value: torch_inference_imagenet_results_resnet152\n",
   107          "metric_name = '<perf-alerted-metric-name>' # sample value: mean_load_model_latency_milli_secs\n",
   108          "\n",
   109          "query = f\"\"\"\n",
   110          "      SELECT *\n",
   111          "      FROM {bq_project}.{bq_dataset}.{bq_table}\n",
   112          "      WHERE CONTAINS_SUBSTR(({load_test_metrics_utils.METRICS_TYPE_LABEL}), '{metric_name}')\n",
   113          "      ORDER BY {load_test_metrics_utils.SUBMIT_TIMESTAMP_LABEL} DESC\n",
   114          "      LIMIT 30\n",
   115          "    \"\"\"\n"
   116        ],
   117        "metadata": {
   118          "id": "nyMmUpRrD_zV"
   119        },
   120        "execution_count": null,
   121        "outputs": []
   122      },
   123      {
   124        "cell_type": "markdown",
   125        "source": [
   126          "If the performance/load test store the results in BigQuery using this [schema](https://github.com/apache/beam/blob/83679216cce2d52dbeb7e837f06ca1d57b31d509/sdks/python/apache_beam/testing/load_tests/load_test_metrics_utils.py#L66),\n",
   127          "then fetch the metric_values for a `metric_name` for the last `30` runs and display a plot using matplotlib.\n"
   128        ],
   129        "metadata": {
   130          "id": "RwlsXCLbVs_2"
   131        }
   132      },
   133      {
   134        "cell_type": "code",
   135        "source": [
   136          "big_query_metrics_fetcher = BigQueryMetricsFetcher()\n",
   137          "metric_data: pd.DataFrame = big_query_metrics_fetcher.fetch(query=query)"
   138        ],
   139        "metadata": {
   140          "id": "rmOE_odNEBFK"
   141        },
   142        "execution_count": null,
   143        "outputs": []
   144      },
   145      {
   146        "cell_type": "code",
   147        "source": [
   148          "# sort the data to view it in chronological order.\n",
   149          "metric_data.sort_values(\n",
   150          "      by=[load_test_metrics_utils.SUBMIT_TIMESTAMP_LABEL], inplace=True)"
   151        ],
   152        "metadata": {
   153          "id": "q-i3qLpGV5Ly"
   154        },
   155        "execution_count": null,
   156        "outputs": []
   157      },
   158      {
   159        "cell_type": "code",
   160        "source": [
   161          "metric_data.plot(x=load_test_metrics_utils.SUBMIT_TIMESTAMP_LABEL,\n",
   162          "                 y=load_test_metrics_utils.VALUE_LABEL)\n",
   163          "plt.show()"
   164        ],
   165        "metadata": {
   166          "id": "vbFoxdxHVvtQ"
   167        },
   168        "execution_count": null,
   169        "outputs": []
   170      }
   171    ]
   172  }