github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/dataflow/dataflow_metrics_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """
    19  Tests corresponding to the DataflowRunner implementation of MetricsResult,
    20  the DataflowMetrics class.
    21  """
    22  
    23  # pytype: skip-file
    24  
    25  import types
    26  import unittest
    27  
    28  import mock
    29  
    30  from apache_beam import DoFn
    31  from apache_beam import ParDo
    32  from apache_beam.metrics.cells import DistributionData
    33  from apache_beam.metrics.cells import DistributionResult
    34  from apache_beam.metrics.execution import MetricKey
    35  from apache_beam.metrics.execution import MetricResult
    36  from apache_beam.metrics.metricbase import MetricName
    37  from apache_beam.options.pipeline_options import PipelineOptions
    38  from apache_beam.pipeline import Pipeline
    39  from apache_beam.runners.dataflow import dataflow_metrics
    40  from apache_beam.testing import metric_result_matchers
    41  from apache_beam.testing.metric_result_matchers import MetricResultMatcher
    42  from apache_beam.transforms import Create
    43  from apache_beam.transforms.environments import DockerEnvironment
    44  
    45  # Protect against environments where apitools library is not available.
    46  # pylint: disable=wrong-import-order, wrong-import-position
    47  try:
    48    from apache_beam.runners.dataflow.internal import apiclient
    49  except ImportError:
    50    apiclient = None  # type: ignore
    51  # pylint: enable=wrong-import-order, wrong-import-position
    52  
    53  
    54  class DictToObject(object):
    55    """Translate from a dict(list()) structure to an object structure"""
    56    def __init__(self, data):
    57      for name, value in data.items():
    58        setattr(self, name, self._wrap(value))
    59  
    60    def _wrap(self, value):
    61      if isinstance(value, (tuple, list, set, frozenset)):
    62        return type(value)([self._wrap(v) for v in value])
    63      return DictToObject(value) if isinstance(value, dict) else value
    64  
    65  
    66  class TestDataflowMetrics(unittest.TestCase):
    67  
    68    # TODO(https://github.com/apache/beam/issues/19258): Write a dump tool to
    69    # generate this fake data, or somehow make this easier to maintain.
    70    ONLY_COUNTERS_LIST = {
    71        "metrics": [
    72            {
    73                "name": {
    74                    "context": {
    75                        "additionalProperties": [{
    76                            "key": "namespace",
    77                            "value": "__main__.WordExtractingDoFn"
    78                        }, {
    79                            "key": "step", "value": "s2"
    80                        },
    81                                                 {
    82                                                     "key": "tentative",
    83                                                     "value": "true"
    84                                                 }]
    85                    },
    86                    "name": "words",
    87                    "origin": "user"
    88                },
    89                "scalar": {
    90                    "integer_value": 26185
    91                },
    92                "distribution": None,
    93                "updateTime": "2017-03-22T18:47:06.402Z"
    94            },
    95            {
    96                "name": {
    97                    "context": {
    98                        "additionalProperties": [{
    99                            "key": "namespace",
   100                            "value": "__main__.WordExtractingDoFn"
   101                        }, {
   102                            "key": "step", "value": "s2"
   103                        }]
   104                    },
   105                    "name": "words",
   106                    "origin": "user"
   107                },
   108                "scalar": {
   109                    "integer_value": 26181
   110                },
   111                "distribution": None,
   112                "updateTime": "2017-03-22T18:47:06.402Z"
   113            },
   114            {
   115                "name": {
   116                    "context": {
   117                        "additionalProperties": [{
   118                            "key": "namespace",
   119                            "value": "__main__.WordExtractingDoFn"
   120                        }, {
   121                            "key": "step", "value": "s2"
   122                        },
   123                                                 {
   124                                                     "key": "tentative",
   125                                                     "value": "true"
   126                                                 }]
   127                    },
   128                    "name": "empty_lines",
   129                    "origin": "user"
   130                },
   131                "scalar": {
   132                    "integer_value": 1080
   133                },
   134                "distribution": None,
   135                "updateTime": "2017-03-22T18:47:06.402Z"
   136            },
   137            {
   138                "name": {
   139                    "context": {
   140                        "additionalProperties": [{
   141                            "key": "namespace",
   142                            "value": "__main__.WordExtractingDoFn"
   143                        }, {
   144                            "key": "step", "value": "s2"
   145                        }]
   146                    },
   147                    "name": "empty_lines",
   148                    "origin": "user"
   149                },
   150                "scalar": {
   151                    "integer_value": 1080
   152                },
   153                "distribution": None,
   154                "updateTime": "2017-03-22T18:47:06.402Z"
   155            },
   156        ]
   157    }
   158    STRUCTURED_COUNTER_LIST = {
   159        "metrics": [
   160            {
   161                "name": {
   162                    "context": {
   163                        "additionalProperties": [{
   164                            "key": "namespace",
   165                            "value": "__main__.WordExtractingDoFn"
   166                        }, {
   167                            "key": "step", "value": "s2"
   168                        },
   169                                                 {
   170                                                     "key": "tentative",
   171                                                     "value": "true"
   172                                                 }]
   173                    },
   174                    "name": "word_lengths",
   175                    "origin": "user"
   176                },
   177                "scalar": {
   178                    "integer_value": 109475
   179                },
   180                "distribution": None,
   181                "updateTime": "2017-03-22T18:47:06.402Z"
   182            },
   183            {
   184                "name": {
   185                    "context": {
   186                        "additionalProperties": [{
   187                            "key": "namespace",
   188                            "value": "__main__.WordExtractingDoFn"
   189                        }, {
   190                            "key": "step", "value": "s2"
   191                        }]
   192                    },
   193                    "name": "word_lengths",
   194                    "origin": "user"
   195                },
   196                "scalar": {
   197                    "integer_value": 109475
   198                },
   199                "distribution": None,
   200                "updateTime": "2017-03-22T18:47:06.402Z"
   201            },
   202            {
   203                "name": {
   204                    "context": {
   205                        "additionalProperties": [{
   206                            "key": "namespace",
   207                            "value": "__main__.WordExtractingDoFn"
   208                        }, {
   209                            "key": "step", "value": "s2"
   210                        },
   211                                                 {
   212                                                     "key": "tentative",
   213                                                     "value": "true"
   214                                                 }]
   215                    },
   216                    "name": "word_length_dist",
   217                    "origin": "user"
   218                },
   219                "scalar": None,
   220                "distribution": {
   221                    "object_value": {
   222                        "properties": [
   223                            {
   224                                "key": "min", "value": {
   225                                    "integer_value": 2
   226                                }
   227                            },
   228                            {
   229                                "key": "max", "value": {
   230                                    "integer_value": 16
   231                                }
   232                            },
   233                            {
   234                                "key": "count", "value": {
   235                                    "integer_value": 2
   236                                }
   237                            },
   238                            {
   239                                "key": "mean", "value": {
   240                                    "integer_value": 9
   241                                }
   242                            },
   243                            {
   244                                "key": "sum", "value": {
   245                                    "integer_value": 18
   246                                }
   247                            },
   248                        ]
   249                    }
   250                },
   251                "updateTime": "2017-03-22T18:47:06.402Z"
   252            },
   253            {
   254                "name": {
   255                    "context": {
   256                        "additionalProperties": [{
   257                            "key": "namespace",
   258                            "value": "__main__.WordExtractingDoFn"
   259                        }, {
   260                            "key": "step", "value": "s2"
   261                        }]
   262                    },
   263                    "name": "word_length_dist",
   264                    "origin": "user"
   265                },
   266                "scalar": None,
   267                "distribution": {
   268                    "object_value": {
   269                        "properties": [
   270                            {
   271                                "key": "min", "value": {
   272                                    "integer_value": 2
   273                                }
   274                            },
   275                            {
   276                                "key": "max", "value": {
   277                                    "integer_value": 16
   278                                }
   279                            },
   280                            {
   281                                "key": "count", "value": {
   282                                    "integer_value": 2
   283                                }
   284                            },
   285                            {
   286                                "key": "mean", "value": {
   287                                    "integer_value": 9
   288                                }
   289                            },
   290                            {
   291                                "key": "sum", "value": {
   292                                    "integer_value": 18
   293                                }
   294                            },
   295                        ]
   296                    }
   297                },
   298                "updateTime": "2017-03-22T18:47:06.402Z"
   299            },
   300        ]
   301    }
   302    SYSTEM_COUNTERS_LIST = {
   303        "metrics": [
   304            # ElementCount
   305            {
   306                "name": {
   307                    "context": {
   308                        "additionalProperties": [
   309                            {
   310                                "key": "original_name",
   311                                "value":
   312                                    "ToIsmRecordForMultimap-out0-ElementCount"
   313                            },  # yapf: disable
   314                            {
   315                                "key": "output_user_name",
   316                                "value": "ToIsmRecordForMultimap-out0"
   317                            }
   318                        ]
   319                    },
   320                    "name": "ElementCount",
   321                    "origin": "dataflow/v1b3"
   322                },
   323                "scalar": {
   324                    "integer_value": 42
   325                },
   326                "distribution": None,
   327                "updateTime": "2017-03-22T18:47:06.402Z"
   328            },
   329            {
   330                "name": {
   331                    "context": {
   332                        "additionalProperties": [
   333                            {
   334                                "key": "original_name",
   335                                "value":
   336                                    "ToIsmRecordForMultimap-out0-ElementCount"
   337                            },  # yapf: disable
   338                            {
   339                                "key": "output_user_name",
   340                                "value": "ToIsmRecordForMultimap-out0"
   341                            }, {
   342                                "key": "tentative", "value": "true"
   343                            }
   344                        ]
   345                    },
   346                    "name": "ElementCount",
   347                    "origin": "dataflow/v1b3"
   348                },
   349                "scalar": {
   350                    "integer_value": 42
   351                },
   352                "distribution": None,
   353                "updateTime": "2017-03-22T18:47:06.402Z"
   354            },
   355            # MeanByteCount
   356            {
   357                "name": {
   358                    "context": {
   359                        "additionalProperties": [
   360                            {
   361                                "key": "original_name",
   362                                "value": "Read-out0-MeanByteCount"
   363                            },
   364                            {
   365                                "key": "output_user_name",
   366                                "value": "GroupByKey/Read-out0"
   367                            }
   368                        ]
   369                    },
   370                    "name": "MeanByteCount",
   371                    "origin": "dataflow/v1b3"
   372                },
   373                "scalar": {
   374                    "integer_value": 31
   375                },
   376                "distribution": None,
   377                "updateTime": "2017-03-22T18:47:06.402Z"
   378            },
   379            {
   380                "name": {
   381                    "context": {
   382                        "additionalProperties": [
   383                            {
   384                                "key": "original_name",
   385                                "value": "Read-out0-MeanByteCount"
   386                            },
   387                            {
   388                                "key": "output_user_name",
   389                                "value": "GroupByKey/Read-out0"
   390                            }, {
   391                                "key": "tentative", "value": "true"
   392                            }
   393                        ]
   394                    },
   395                    "name": "MeanByteCount",
   396                    "origin": "dataflow/v1b3"
   397                },
   398                "scalar": {
   399                    "integer_value": 31
   400                },
   401                "distribution": None,
   402                "updateTime": "2017-03-22T18:47:06.402Z"
   403            },
   404            # ExecutionTime
   405            {
   406                "name": {
   407                    "context": {
   408                        "additionalProperties": [
   409                            {
   410                                "key": "step", "value": "write/Write/Write"
   411                            },
   412                        ]
   413                    },
   414                    "name": "ExecutionTime_ProcessElement",
   415                    "origin": "dataflow/v1b3"
   416                },
   417                "scalar": {
   418                    "integer_value": 1000
   419                },
   420                "distribution": None,
   421                "updateTime": "2017-03-22T18:47:06.402Z"
   422            },
   423            {
   424                "name": {
   425                    "context": {
   426                        "additionalProperties": [{
   427                            "key": "step", "value": "write/Write/Write"
   428                        },
   429                                                 {
   430                                                     "key": "tentative",
   431                                                     "value": "true"
   432                                                 }]
   433                    },
   434                    "name": "ExecutionTime_ProcessElement",
   435                    "origin": "dataflow/v1b3"
   436                },
   437                "scalar": {
   438                    "integer_value": 1000
   439                },
   440                "distribution": None,
   441                "updateTime": "2017-03-22T18:47:06.402Z"
   442            },
   443        ]
   444    }
   445  
   446    def setup_mock_client_result(self, counter_list=None):
   447      mock_client = mock.Mock()
   448      mock_query_result = DictToObject(counter_list)
   449      mock_client.get_job_metrics.return_value = mock_query_result
   450      mock_job_result = mock.Mock()
   451      mock_job_result.job_id.return_value = 1
   452      mock_job_result.is_in_terminal_state.return_value = False
   453      return mock_client, mock_job_result
   454  
   455    def test_cache_functions(self):
   456      mock_client, mock_job_result = self.setup_mock_client_result(
   457          self.STRUCTURED_COUNTER_LIST)
   458      dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result)
   459  
   460      # At first creation, we should always query dataflow.
   461      self.assertTrue(dm._cached_metrics is None)
   462  
   463      # Right after querying, we still query again.
   464      dm.query()
   465      self.assertTrue(dm._cached_metrics is None)
   466  
   467      # The job has ended. The query should not run again after this.
   468      mock_job_result.is_in_terminal_state.return_value = True
   469      dm.query()
   470      self.assertTrue(dm._cached_metrics)
   471  
   472    def test_query_structured_metrics(self):
   473      mock_client, mock_job_result = self.setup_mock_client_result(
   474          self.STRUCTURED_COUNTER_LIST)
   475      dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result)
   476      dm._translate_step_name = types.MethodType(lambda self, x: 'split', dm)
   477      query_result = dm.query()
   478      expected_counters = [
   479          MetricResult(
   480              MetricKey(
   481                  'split',
   482                  MetricName('__main__.WordExtractingDoFn', 'word_lengths'),
   483              ),
   484              109475,
   485              109475),
   486      ]
   487      self.assertEqual(query_result['counters'], expected_counters)
   488  
   489      expected_distributions = [
   490          MetricResult(
   491              MetricKey(
   492                  'split',
   493                  MetricName('__main__.WordExtractingDoFn', 'word_length_dist'),
   494              ),
   495              DistributionResult(DistributionData(18, 2, 2, 16)),
   496              DistributionResult(DistributionData(18, 2, 2, 16))),
   497      ]
   498      self.assertEqual(query_result['distributions'], expected_distributions)
   499  
   500    @unittest.skipIf(apiclient is None, 'GCP dependencies are not installed')
   501    def test_translate_portable_job_step_name(self):
   502      mock_client, mock_job_result = self.setup_mock_client_result(
   503          self.ONLY_COUNTERS_LIST)
   504  
   505      pipeline_options = PipelineOptions([
   506          '--experiments=use_runner_v2',
   507          '--experiments=use_portable_job_submission',
   508          '--temp_location=gs://any-location/temp',
   509          '--project=dummy_project',
   510      ])
   511  
   512      pipeline = Pipeline(options=pipeline_options)
   513      pipeline | Create([1, 2, 3]) | 'MyTestParDo' >> ParDo(DoFn())  # pylint:disable=expression-not-assigned
   514  
   515      test_environment = DockerEnvironment(container_image='test_default_image')
   516      proto_pipeline, _ = pipeline.to_runner_api(
   517          return_context=True, default_environment=test_environment)
   518  
   519      job = apiclient.Job(pipeline_options, proto_pipeline)
   520      dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result, job)
   521      self.assertEqual(
   522          'MyTestParDo',
   523          dm._translate_step_name('ref_AppliedPTransform_MyTestParDo_14'))
   524  
   525    def test_query_counters(self):
   526      mock_client, mock_job_result = self.setup_mock_client_result(
   527          self.ONLY_COUNTERS_LIST)
   528      dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result)
   529      dm._translate_step_name = types.MethodType(lambda self, x: 'split', dm)
   530      query_result = dm.query()
   531      expected_counters = [
   532          MetricResult(
   533              MetricKey(
   534                  'split',
   535                  MetricName('__main__.WordExtractingDoFn', 'empty_lines')),
   536              1080,
   537              1080),
   538          MetricResult(
   539              MetricKey(
   540                  'split', MetricName('__main__.WordExtractingDoFn', 'words')),
   541              26181,
   542              26185),
   543      ]
   544      self.assertEqual(
   545          sorted(query_result['counters'], key=lambda x: x.key.metric.name),
   546          sorted(expected_counters, key=lambda x: x.key.metric.name))
   547  
   548    def test_system_counters_set_labels_and_step_name(self):
   549      mock_client, mock_job_result = self.setup_mock_client_result(
   550          self.SYSTEM_COUNTERS_LIST)
   551      test_object = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result)
   552      all_metrics = test_object.all_metrics()
   553  
   554      matchers = [
   555          MetricResultMatcher(
   556              name='ElementCount',
   557              labels={
   558                  'original_name': 'ToIsmRecordForMultimap-out0-ElementCount',
   559                  'output_user_name': 'ToIsmRecordForMultimap-out0'
   560              },
   561              attempted=42,
   562              committed=42),
   563          MetricResultMatcher(
   564              name='MeanByteCount',
   565              labels={
   566                  'original_name': 'Read-out0-MeanByteCount',
   567                  'output_user_name': 'GroupByKey/Read-out0'
   568              },
   569              attempted=31,
   570              committed=31),
   571          MetricResultMatcher(
   572              name='ExecutionTime_ProcessElement',
   573              step='write/Write/Write',
   574              attempted=1000,
   575              committed=1000)
   576      ]
   577      errors = metric_result_matchers.verify_all(all_metrics, matchers)
   578      self.assertFalse(errors, errors)
   579  
   580  
   581  if __name__ == '__main__':
   582    unittest.main()