github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/metrics/monitoring_infos.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 # cython: language_level=3 19 # cython: profile=True 20 21 # pytype: skip-file 22 23 import collections 24 import time 25 from functools import reduce 26 from typing import FrozenSet 27 from typing import Hashable 28 from typing import List 29 from typing import Union 30 31 from apache_beam.coders import coder_impl 32 from apache_beam.coders import coders 33 from apache_beam.metrics.cells import DistributionData 34 from apache_beam.metrics.cells import DistributionResult 35 from apache_beam.metrics.cells import GaugeData 36 from apache_beam.metrics.cells import GaugeResult 37 from apache_beam.portability import common_urns 38 from apache_beam.portability.api import metrics_pb2 39 40 SAMPLED_BYTE_SIZE_URN = ( 41 common_urns.monitoring_info_specs.SAMPLED_BYTE_SIZE.spec.urn) 42 ELEMENT_COUNT_URN = common_urns.monitoring_info_specs.ELEMENT_COUNT.spec.urn 43 START_BUNDLE_MSECS_URN = ( 44 common_urns.monitoring_info_specs.START_BUNDLE_MSECS.spec.urn) 45 PROCESS_BUNDLE_MSECS_URN = ( 46 common_urns.monitoring_info_specs.PROCESS_BUNDLE_MSECS.spec.urn) 47 FINISH_BUNDLE_MSECS_URN = ( 48 common_urns.monitoring_info_specs.FINISH_BUNDLE_MSECS.spec.urn) 49 TOTAL_MSECS_URN = common_urns.monitoring_info_specs.TOTAL_MSECS.spec.urn 50 USER_COUNTER_URN = common_urns.monitoring_info_specs.USER_SUM_INT64.spec.urn 51 USER_DISTRIBUTION_URN = ( 52 common_urns.monitoring_info_specs.USER_DISTRIBUTION_INT64.spec.urn) 53 USER_GAUGE_URN = common_urns.monitoring_info_specs.USER_LATEST_INT64.spec.urn 54 USER_METRIC_URNS = set( 55 [USER_COUNTER_URN, USER_DISTRIBUTION_URN, USER_GAUGE_URN]) 56 WORK_REMAINING_URN = common_urns.monitoring_info_specs.WORK_REMAINING.spec.urn 57 WORK_COMPLETED_URN = common_urns.monitoring_info_specs.WORK_COMPLETED.spec.urn 58 DATA_CHANNEL_READ_INDEX = ( 59 common_urns.monitoring_info_specs.DATA_CHANNEL_READ_INDEX.spec.urn) 60 API_REQUEST_COUNT_URN = ( 61 common_urns.monitoring_info_specs.API_REQUEST_COUNT.spec.urn) 62 63 # TODO(ajamato): Implement the remaining types, i.e. Double types 64 # Extrema types, etc. See: 65 # https://s.apache.org/beam-fn-api-metrics 66 SUM_INT64_TYPE = common_urns.monitoring_info_types.SUM_INT64_TYPE.urn 67 DISTRIBUTION_INT64_TYPE = ( 68 common_urns.monitoring_info_types.DISTRIBUTION_INT64_TYPE.urn) 69 LATEST_INT64_TYPE = common_urns.monitoring_info_types.LATEST_INT64_TYPE.urn 70 PROGRESS_TYPE = common_urns.monitoring_info_types.PROGRESS_TYPE.urn 71 72 COUNTER_TYPES = set([SUM_INT64_TYPE]) 73 DISTRIBUTION_TYPES = set([DISTRIBUTION_INT64_TYPE]) 74 GAUGE_TYPES = set([LATEST_INT64_TYPE]) 75 76 # TODO(migryz) extract values from beam_fn_api.proto::MonitoringInfoLabels 77 PCOLLECTION_LABEL = ( 78 common_urns.monitoring_info_labels.PCOLLECTION.label_props.name) 79 PTRANSFORM_LABEL = ( 80 common_urns.monitoring_info_labels.TRANSFORM.label_props.name) 81 NAMESPACE_LABEL = ( 82 common_urns.monitoring_info_labels.NAMESPACE.label_props.name) 83 NAME_LABEL = (common_urns.monitoring_info_labels.NAME.label_props.name) 84 SERVICE_LABEL = (common_urns.monitoring_info_labels.SERVICE.label_props.name) 85 METHOD_LABEL = (common_urns.monitoring_info_labels.METHOD.label_props.name) 86 RESOURCE_LABEL = (common_urns.monitoring_info_labels.RESOURCE.label_props.name) 87 STATUS_LABEL = (common_urns.monitoring_info_labels.STATUS.label_props.name) 88 BIGQUERY_PROJECT_ID_LABEL = ( 89 common_urns.monitoring_info_labels.BIGQUERY_PROJECT_ID.label_props.name) 90 BIGQUERY_DATASET_LABEL = ( 91 common_urns.monitoring_info_labels.BIGQUERY_DATASET.label_props.name) 92 BIGQUERY_TABLE_LABEL = ( 93 common_urns.monitoring_info_labels.BIGQUERY_TABLE.label_props.name) 94 BIGQUERY_VIEW_LABEL = ( 95 common_urns.monitoring_info_labels.BIGQUERY_VIEW.label_props.name) 96 BIGQUERY_QUERY_NAME_LABEL = ( 97 common_urns.monitoring_info_labels.BIGQUERY_QUERY_NAME.label_props.name) 98 GCS_PROJECT_ID_LABEL = ( 99 common_urns.monitoring_info_labels.GCS_PROJECT_ID.label_props.name) 100 GCS_BUCKET_LABEL = ( 101 common_urns.monitoring_info_labels.GCS_BUCKET.label_props.name) 102 DATASTORE_PROJECT_ID_LABEL = ( 103 common_urns.monitoring_info_labels.DATASTORE_PROJECT.label_props.name) 104 DATASTORE_NAMESPACE_LABEL = ( 105 common_urns.monitoring_info_labels.DATASTORE_NAMESPACE.label_props.name) 106 SPANNER_PROJECT_ID = ( 107 common_urns.monitoring_info_labels.SPANNER_PROJECT_ID.label_props.name) 108 SPANNER_DATABASE_ID = ( 109 common_urns.monitoring_info_labels.SPANNER_DATABASE_ID.label_props.name) 110 SPANNER_TABLE_ID = ( 111 common_urns.monitoring_info_labels.SPANNER_TABLE_ID.label_props.name) 112 SPANNER_QUERY_NAME = ( 113 common_urns.monitoring_info_labels.SPANNER_QUERY_NAME.label_props.name) 114 BIGTABLE_PROJECT_ID_LABEL = ( 115 common_urns.monitoring_info_labels.BIGTABLE_PROJECT_ID.label_props.name) 116 INSTANCE_ID_LABEL = ( 117 common_urns.monitoring_info_labels.INSTANCE_ID.label_props.name) 118 TABLE_ID_LABEL = common_urns.monitoring_info_labels.TABLE_ID.label_props.name 119 120 121 def extract_counter_value(monitoring_info_proto): 122 """Returns the counter value of the monitoring info.""" 123 if not is_counter(monitoring_info_proto): 124 raise ValueError('Unsupported type %s' % monitoring_info_proto.type) 125 126 # Only SUM_INT64_TYPE is currently supported. 127 return coders.VarIntCoder().decode(monitoring_info_proto.payload) 128 129 130 def extract_gauge_value(monitoring_info_proto): 131 """Returns a tuple containing (timestamp, value)""" 132 if not is_gauge(monitoring_info_proto): 133 raise ValueError('Unsupported type %s' % monitoring_info_proto.type) 134 135 # Only LATEST_INT64_TYPE is currently supported. 136 return _decode_gauge(coders.VarIntCoder(), monitoring_info_proto.payload) 137 138 139 def extract_distribution(monitoring_info_proto): 140 """Returns a tuple of (count, sum, min, max). 141 142 Args: 143 proto: The monitoring info for the distribution. 144 """ 145 if not is_distribution(monitoring_info_proto): 146 raise ValueError('Unsupported type %s' % monitoring_info_proto.type) 147 148 # Only DISTRIBUTION_INT64_TYPE is currently supported. 149 return _decode_distribution( 150 coders.VarIntCoder(), monitoring_info_proto.payload) 151 152 153 def create_labels(ptransform=None, namespace=None, name=None, pcollection=None): 154 """Create the label dictionary based on the provided values. 155 156 Args: 157 ptransform: The ptransform id used as a label. 158 pcollection: The pcollection id used as a label. 159 """ 160 labels = {} 161 if ptransform: 162 labels[PTRANSFORM_LABEL] = ptransform 163 if namespace: 164 labels[NAMESPACE_LABEL] = namespace 165 if name: 166 labels[NAME_LABEL] = name 167 if pcollection: 168 labels[PCOLLECTION_LABEL] = pcollection 169 return labels 170 171 172 def int64_user_counter(namespace, name, metric, ptransform=None): 173 # type: (...) -> metrics_pb2.MonitoringInfo 174 175 """Return the counter monitoring info for the specifed URN, metric and labels. 176 177 Args: 178 urn: The URN of the monitoring info/metric. 179 metric: The payload field to use in the monitoring info or an int value. 180 ptransform: The ptransform id used as a label. 181 """ 182 labels = create_labels(ptransform=ptransform, namespace=namespace, name=name) 183 if isinstance(metric, int): 184 metric = coders.VarIntCoder().encode(metric) 185 return create_monitoring_info( 186 USER_COUNTER_URN, SUM_INT64_TYPE, metric, labels) 187 188 189 def int64_counter(urn, metric, ptransform=None, pcollection=None, labels=None): 190 # type: (...) -> metrics_pb2.MonitoringInfo 191 192 """Return the counter monitoring info for the specifed URN, metric and labels. 193 194 Args: 195 urn: The URN of the monitoring info/metric. 196 metric: The payload field to use in the monitoring info or an int value. 197 ptransform: The ptransform id used as a label. 198 pcollection: The pcollection id used as a label. 199 """ 200 labels = labels or {} 201 labels.update(create_labels(ptransform=ptransform, pcollection=pcollection)) 202 if isinstance(metric, int): 203 metric = coders.VarIntCoder().encode(metric) 204 return create_monitoring_info(urn, SUM_INT64_TYPE, metric, labels) 205 206 207 def int64_user_distribution(namespace, name, metric, ptransform=None): 208 # type: (...) -> metrics_pb2.MonitoringInfo 209 210 """Return the distribution monitoring info for the URN, metric and labels. 211 212 Args: 213 urn: The URN of the monitoring info/metric. 214 metric: The DistributionData for the metric. 215 ptransform: The ptransform id used as a label. 216 """ 217 labels = create_labels(ptransform=ptransform, namespace=namespace, name=name) 218 payload = _encode_distribution( 219 coders.VarIntCoder(), metric.count, metric.sum, metric.min, metric.max) 220 return create_monitoring_info( 221 USER_DISTRIBUTION_URN, DISTRIBUTION_INT64_TYPE, payload, labels) 222 223 224 def int64_distribution(urn, metric, ptransform=None, pcollection=None): 225 # type: (...) -> metrics_pb2.MonitoringInfo 226 227 """Return a distribution monitoring info for the URN, metric and labels. 228 229 Args: 230 urn: The URN of the monitoring info/metric. 231 metric: The DistributionData for the metric. 232 ptransform: The ptransform id used as a label. 233 pcollection: The pcollection id used as a label. 234 """ 235 labels = create_labels(ptransform=ptransform, pcollection=pcollection) 236 payload = _encode_distribution( 237 coders.VarIntCoder(), metric.count, metric.sum, metric.min, metric.max) 238 return create_monitoring_info(urn, DISTRIBUTION_INT64_TYPE, payload, labels) 239 240 241 def int64_user_gauge(namespace, name, metric, ptransform=None): 242 # type: (...) -> metrics_pb2.MonitoringInfo 243 244 """Return the gauge monitoring info for the URN, metric and labels. 245 246 Args: 247 namespace: User-defined namespace of counter. 248 name: Name of counter. 249 metric: The GaugeData containing the metrics. 250 ptransform: The ptransform id used as a label. 251 """ 252 labels = create_labels(ptransform=ptransform, namespace=namespace, name=name) 253 if isinstance(metric, GaugeData): 254 coder = coders.VarIntCoder() 255 value = metric.value 256 timestamp = metric.timestamp 257 else: 258 raise TypeError( 259 'Expected GaugeData metric type but received %s with value %s' % 260 (type(metric), metric)) 261 payload = _encode_gauge(coder, timestamp, value) 262 return create_monitoring_info( 263 USER_GAUGE_URN, LATEST_INT64_TYPE, payload, labels) 264 265 266 def int64_gauge(urn, metric, ptransform=None): 267 # type: (...) -> metrics_pb2.MonitoringInfo 268 269 """Return the gauge monitoring info for the URN, metric and labels. 270 271 Args: 272 urn: The URN of the monitoring info/metric. 273 metric: An int representing the value. The current time will be used for 274 the timestamp. 275 ptransform: The ptransform id used as a label. 276 """ 277 labels = create_labels(ptransform=ptransform) 278 if isinstance(metric, int): 279 value = metric 280 time_ms = int(time.time()) * 1000 281 else: 282 raise TypeError( 283 'Expected int metric type but received %s with value %s' % 284 (type(metric), metric)) 285 coder = coders.VarIntCoder() 286 payload = coder.encode(time_ms) + coder.encode(value) 287 return create_monitoring_info(urn, LATEST_INT64_TYPE, payload, labels) 288 289 290 def create_monitoring_info(urn, type_urn, payload, labels=None): 291 # type: (...) -> metrics_pb2.MonitoringInfo 292 293 """Return the gauge monitoring info for the URN, type, metric and labels. 294 295 Args: 296 urn: The URN of the monitoring info/metric. 297 type_urn: The URN of the type of the monitoring info/metric. 298 i.e. beam:metrics:sum_int_64, beam:metrics:latest_int_64. 299 payload: The payload field to use in the monitoring info. 300 labels: The label dictionary to use in the MonitoringInfo. 301 """ 302 return metrics_pb2.MonitoringInfo( 303 urn=urn, type=type_urn, labels=labels or {}, payload=payload) 304 305 306 def is_counter(monitoring_info_proto): 307 """Returns true if the monitoring info is a coutner metric.""" 308 return monitoring_info_proto.type in COUNTER_TYPES 309 310 311 def is_gauge(monitoring_info_proto): 312 """Returns true if the monitoring info is a gauge metric.""" 313 return monitoring_info_proto.type in GAUGE_TYPES 314 315 316 def is_distribution(monitoring_info_proto): 317 """Returns true if the monitoring info is a distrbution metric.""" 318 return monitoring_info_proto.type in DISTRIBUTION_TYPES 319 320 321 def is_user_monitoring_info(monitoring_info_proto): 322 """Returns true if the monitoring info is a user metric.""" 323 return monitoring_info_proto.urn in USER_METRIC_URNS 324 325 326 def extract_metric_result_map_value(monitoring_info_proto): 327 # type: (...) -> Union[None, int, DistributionResult, GaugeResult] 328 329 """Returns the relevant GaugeResult, DistributionResult or int value. 330 331 These are the proper format for use in the MetricResult.query() result. 332 """ 333 # Returns a metric result (AKA the legacy format). 334 # from the MonitoringInfo 335 if is_counter(monitoring_info_proto): 336 return extract_counter_value(monitoring_info_proto) 337 if is_distribution(monitoring_info_proto): 338 (count, sum, min, max) = extract_distribution(monitoring_info_proto) 339 return DistributionResult(DistributionData(sum, count, min, max)) 340 if is_gauge(monitoring_info_proto): 341 (timestamp, value) = extract_gauge_value(monitoring_info_proto) 342 return GaugeResult(GaugeData(value, timestamp)) 343 return None 344 345 346 def parse_namespace_and_name(monitoring_info_proto): 347 """Returns the (namespace, name) tuple of the URN in the monitoring info.""" 348 # Remove the URN prefix which indicates that it is a user counter. 349 if is_user_monitoring_info(monitoring_info_proto): 350 labels = monitoring_info_proto.labels 351 return labels[NAMESPACE_LABEL], labels[NAME_LABEL] 352 353 # If it is not a user counter, just use the first part of the URN, i.e. 'beam' 354 split = monitoring_info_proto.urn.split(':', 1) 355 return split[0], split[1] 356 357 358 def get_step_name(monitoring_info_proto): 359 """Returns a step name for the given monitoring info or None if step name 360 cannot be specified.""" 361 # Right now only metrics that have a PTRANSFORM are taken into account 362 return monitoring_info_proto.labels.get(PTRANSFORM_LABEL) 363 364 365 def to_key(monitoring_info_proto): 366 # type: (metrics_pb2.MonitoringInfo) -> FrozenSet[Hashable] 367 368 """Returns a key based on the URN and labels. 369 370 This is useful in maps to prevent reporting the same MonitoringInfo twice. 371 """ 372 key_items = list(monitoring_info_proto.labels.items()) # type: List[Hashable] 373 key_items.append(monitoring_info_proto.urn) 374 return frozenset(key_items) 375 376 377 def sum_payload_combiner(payload_a, payload_b): 378 coder = coders.VarIntCoder() 379 return coder.encode(coder.decode(payload_a) + coder.decode(payload_b)) 380 381 382 def distribution_payload_combiner(payload_a, payload_b): 383 coder = coders.VarIntCoder() 384 (count_a, sum_a, min_a, max_a) = _decode_distribution(coder, payload_a) 385 (count_b, sum_b, min_b, max_b) = _decode_distribution(coder, payload_b) 386 return _encode_distribution( 387 coder, 388 count_a + count_b, 389 sum_a + sum_b, 390 min(min_a, min_b), 391 max(max_a, max_b)) 392 393 394 _KNOWN_COMBINERS = { 395 SUM_INT64_TYPE: sum_payload_combiner, 396 DISTRIBUTION_INT64_TYPE: distribution_payload_combiner, 397 } 398 399 400 def consolidate(metrics, key=to_key): 401 grouped = collections.defaultdict(list) 402 for metric in metrics: 403 grouped[key(metric)].append(metric) 404 for values in grouped.values(): 405 if len(values) == 1: 406 yield values[0] 407 else: 408 combiner = _KNOWN_COMBINERS.get(values[0].type) 409 if combiner: 410 411 def merge(a, b): 412 # pylint: disable=cell-var-from-loop 413 return metrics_pb2.MonitoringInfo( 414 urn=a.urn, 415 type=a.type, 416 labels=dict((label, value) for label, 417 value in a.labels.items() 418 if b.labels.get(label) == value), 419 payload=combiner(a.payload, b.payload)) 420 421 yield reduce(merge, values) 422 else: 423 for value in values: 424 yield value 425 426 427 def _decode_gauge(coder, payload): 428 """Returns a tuple of (timestamp, value).""" 429 timestamp_coder = coders.VarIntCoder().get_impl() 430 stream = coder_impl.create_InputStream(payload) 431 time_ms = timestamp_coder.decode_from_stream(stream, True) 432 return (time_ms / 1000.0, coder.get_impl().decode_from_stream(stream, True)) 433 434 435 def _encode_gauge(coder, timestamp, value): 436 timestamp_coder = coders.VarIntCoder().get_impl() 437 stream = coder_impl.create_OutputStream() 438 timestamp_coder.encode_to_stream(int(timestamp * 1000), stream, True) 439 coder.get_impl().encode_to_stream(value, stream, True) 440 return stream.get() 441 442 443 def _decode_distribution(value_coder, payload): 444 """Returns a tuple of (count, sum, min, max).""" 445 count_coder = coders.VarIntCoder().get_impl() 446 value_coder = value_coder.get_impl() 447 stream = coder_impl.create_InputStream(payload) 448 return ( 449 count_coder.decode_from_stream(stream, True), 450 value_coder.decode_from_stream(stream, True), 451 value_coder.decode_from_stream(stream, True), 452 value_coder.decode_from_stream(stream, True)) 453 454 455 def _encode_distribution(value_coder, count, sum, min, max): 456 count_coder = coders.VarIntCoder().get_impl() 457 value_coder = value_coder.get_impl() 458 stream = coder_impl.create_OutputStream() 459 count_coder.encode_to_stream(count, stream, True) 460 value_coder.encode_to_stream(sum, stream, True) 461 value_coder.encode_to_stream(min, stream, True) 462 value_coder.encode_to_stream(max, stream, True) 463 return stream.get()