github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/metrics/cells.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 # cython: language_level=3 19 20 """ 21 This file contains metric cell classes. A metric cell is used to accumulate 22 in-memory changes to a metric. It represents a specific metric in a single 23 context. 24 """ 25 26 # pytype: skip-file 27 28 import threading 29 import time 30 from datetime import datetime 31 from typing import Any 32 from typing import Optional 33 from typing import SupportsInt 34 35 try: 36 import cython 37 except ImportError: 38 39 class fake_cython: 40 compiled = False 41 42 globals()['cython'] = fake_cython 43 44 __all__ = [ 45 'MetricAggregator', 46 'MetricCell', 47 'MetricCellFactory', 48 'DistributionResult', 49 'GaugeResult' 50 ] 51 52 53 class MetricCell(object): 54 """For internal use only; no backwards-compatibility guarantees. 55 56 Accumulates in-memory changes to a metric. 57 58 A MetricCell represents a specific metric in a single context and bundle. 59 All subclasses must be thread safe, as these are used in the pipeline runners, 60 and may be subject to parallel/concurrent updates. Cells should only be used 61 directly within a runner. 62 """ 63 def __init__(self): 64 self._lock = threading.Lock() 65 self._start_time = None 66 67 def update(self, value): 68 raise NotImplementedError 69 70 def get_cumulative(self): 71 raise NotImplementedError 72 73 def to_runner_api_monitoring_info(self, name, transform_id): 74 if not self._start_time: 75 self._start_time = datetime.utcnow() 76 mi = self.to_runner_api_monitoring_info_impl(name, transform_id) 77 mi.start_time.FromDatetime(self._start_time) 78 return mi 79 80 def to_runner_api_monitoring_info_impl(self, name, transform_id): 81 raise NotImplementedError 82 83 def reset(self): 84 # type: () -> None 85 raise NotImplementedError 86 87 def __reduce__(self): 88 raise NotImplementedError 89 90 91 class MetricCellFactory(object): 92 def __call__(self): 93 # type: () -> MetricCell 94 raise NotImplementedError 95 96 97 class CounterCell(MetricCell): 98 """For internal use only; no backwards-compatibility guarantees. 99 100 Tracks the current value and delta of a counter metric. 101 102 Each cell tracks the state of a metric independently per context per bundle. 103 Therefore, each metric has a different cell in each bundle, cells are 104 aggregated by the runner. 105 106 This class is thread safe. 107 """ 108 def __init__(self, *args): 109 super().__init__(*args) 110 self.value = CounterAggregator.identity_element() 111 112 def reset(self): 113 # type: () -> None 114 self.value = CounterAggregator.identity_element() 115 116 def combine(self, other): 117 # type: (CounterCell) -> CounterCell 118 result = CounterCell() 119 result.inc(self.value + other.value) 120 return result 121 122 def inc(self, n=1): 123 self.update(n) 124 125 def dec(self, n=1): 126 self.update(-n) 127 128 def update(self, value): 129 if cython.compiled: 130 ivalue = value 131 # Since We hold the GIL, no need for another lock. 132 # And because the C threads won't preempt and interleave 133 # each other. 134 # Assuming there is no code trying to access the counters 135 # directly by circumventing the GIL. 136 self.value += ivalue 137 else: 138 with self._lock: 139 self.value += value 140 141 def get_cumulative(self): 142 # type: () -> int 143 with self._lock: 144 return self.value 145 146 def to_runner_api_monitoring_info_impl(self, name, transform_id): 147 from apache_beam.metrics import monitoring_infos 148 if not name.urn: 149 # User counter case. 150 return monitoring_infos.int64_user_counter( 151 name.namespace, 152 name.name, 153 self.get_cumulative(), 154 ptransform=transform_id) 155 else: 156 # Arbitrary URN case. 157 return monitoring_infos.int64_counter( 158 name.urn, self.get_cumulative(), labels=name.labels) 159 160 161 class DistributionCell(MetricCell): 162 """For internal use only; no backwards-compatibility guarantees. 163 164 Tracks the current value and delta for a distribution metric. 165 166 Each cell tracks the state of a metric independently per context per bundle. 167 Therefore, each metric has a different cell in each bundle, that is later 168 aggregated. 169 170 This class is thread safe. 171 """ 172 def __init__(self, *args): 173 super().__init__(*args) 174 self.data = DistributionAggregator.identity_element() 175 176 def reset(self): 177 # type: () -> None 178 self.data = DistributionAggregator.identity_element() 179 180 def combine(self, other): 181 # type: (DistributionCell) -> DistributionCell 182 result = DistributionCell() 183 result.data = self.data.combine(other.data) 184 return result 185 186 def update(self, value): 187 if cython.compiled: 188 # We will hold the GIL throughout the entire _update. 189 self._update(value) 190 else: 191 with self._lock: 192 self._update(value) 193 194 def _update(self, value): 195 if cython.compiled: 196 ivalue = value 197 else: 198 ivalue = int(value) 199 self.data.count = self.data.count + 1 200 self.data.sum = self.data.sum + ivalue 201 if ivalue < self.data.min: 202 self.data.min = ivalue 203 if ivalue > self.data.max: 204 self.data.max = ivalue 205 206 def get_cumulative(self): 207 # type: () -> DistributionData 208 with self._lock: 209 return self.data.get_cumulative() 210 211 def to_runner_api_monitoring_info_impl(self, name, transform_id): 212 from apache_beam.metrics import monitoring_infos 213 return monitoring_infos.int64_user_distribution( 214 name.namespace, 215 name.name, 216 self.get_cumulative(), 217 ptransform=transform_id) 218 219 220 class GaugeCell(MetricCell): 221 """For internal use only; no backwards-compatibility guarantees. 222 223 Tracks the current value and delta for a gauge metric. 224 225 Each cell tracks the state of a metric independently per context per bundle. 226 Therefore, each metric has a different cell in each bundle, that is later 227 aggregated. 228 229 This class is thread safe. 230 """ 231 def __init__(self, *args): 232 super().__init__(*args) 233 self.data = GaugeAggregator.identity_element() 234 235 def reset(self): 236 self.data = GaugeAggregator.identity_element() 237 238 def combine(self, other): 239 # type: (GaugeCell) -> GaugeCell 240 result = GaugeCell() 241 result.data = self.data.combine(other.data) 242 return result 243 244 def set(self, value): 245 self.update(value) 246 247 def update(self, value): 248 # type: (SupportsInt) -> None 249 value = int(value) 250 with self._lock: 251 # Set the value directly without checking timestamp, because 252 # this value is naturally the latest value. 253 self.data.value = value 254 self.data.timestamp = time.time() 255 256 def get_cumulative(self): 257 # type: () -> GaugeData 258 with self._lock: 259 return self.data.get_cumulative() 260 261 def to_runner_api_monitoring_info_impl(self, name, transform_id): 262 from apache_beam.metrics import monitoring_infos 263 return monitoring_infos.int64_user_gauge( 264 name.namespace, 265 name.name, 266 self.get_cumulative(), 267 ptransform=transform_id) 268 269 270 class DistributionResult(object): 271 """The result of a Distribution metric.""" 272 def __init__(self, data): 273 # type: (DistributionData) -> None 274 self.data = data 275 276 def __eq__(self, other): 277 # type: (object) -> bool 278 if isinstance(other, DistributionResult): 279 return self.data == other.data 280 else: 281 return False 282 283 def __hash__(self): 284 # type: () -> int 285 return hash(self.data) 286 287 def __repr__(self): 288 # type: () -> str 289 return ( 290 'DistributionResult(sum={}, count={}, min={}, max={}, ' 291 'mean={})'.format(self.sum, self.count, self.min, self.max, self.mean)) 292 293 @property 294 def max(self): 295 # type: () -> Optional[int] 296 return self.data.max if self.data.count else None 297 298 @property 299 def min(self): 300 # type: () -> Optional[int] 301 return self.data.min if self.data.count else None 302 303 @property 304 def count(self): 305 # type: () -> Optional[int] 306 return self.data.count 307 308 @property 309 def sum(self): 310 # type: () -> Optional[int] 311 return self.data.sum 312 313 @property 314 def mean(self): 315 # type: () -> Optional[float] 316 317 """Returns the float mean of the distribution. 318 319 If the distribution contains no elements, it returns None. 320 """ 321 if self.data.count == 0: 322 return None 323 return self.data.sum / self.data.count 324 325 326 class GaugeResult(object): 327 def __init__(self, data): 328 # type: (GaugeData) -> None 329 self.data = data 330 331 def __eq__(self, other): 332 # type: (object) -> bool 333 if isinstance(other, GaugeResult): 334 return self.data == other.data 335 else: 336 return False 337 338 def __hash__(self): 339 # type: () -> int 340 return hash(self.data) 341 342 def __repr__(self): 343 return '<GaugeResult(value={}, timestamp={})>'.format( 344 self.value, self.timestamp) 345 346 @property 347 def value(self): 348 # type: () -> Optional[int] 349 return self.data.value 350 351 @property 352 def timestamp(self): 353 # type: () -> Optional[int] 354 return self.data.timestamp 355 356 357 class GaugeData(object): 358 """For internal use only; no backwards-compatibility guarantees. 359 360 The data structure that holds data about a gauge metric. 361 362 Gauge metrics are restricted to integers only. 363 364 This object is not thread safe, so it's not supposed to be modified 365 by other than the GaugeCell that contains it. 366 """ 367 def __init__(self, value, timestamp=None): 368 # type: (Optional[int], Optional[int]) -> None 369 self.value = value 370 self.timestamp = timestamp if timestamp is not None else 0 371 372 def __eq__(self, other): 373 # type: (object) -> bool 374 if isinstance(other, GaugeData): 375 return self.value == other.value and self.timestamp == other.timestamp 376 else: 377 return False 378 379 def __hash__(self): 380 # type: () -> int 381 return hash((self.value, self.timestamp)) 382 383 def __repr__(self): 384 # type: () -> str 385 return '<GaugeData(value={}, timestamp={})>'.format( 386 self.value, self.timestamp) 387 388 def get_cumulative(self): 389 # type: () -> GaugeData 390 return GaugeData(self.value, timestamp=self.timestamp) 391 392 def combine(self, other): 393 # type: (Optional[GaugeData]) -> GaugeData 394 if other is None: 395 return self 396 397 if other.timestamp > self.timestamp: 398 return other 399 else: 400 return self 401 402 @staticmethod 403 def singleton(value, timestamp=None): 404 # type: (Optional[int], Optional[int]) -> GaugeData 405 return GaugeData(value, timestamp=timestamp) 406 407 408 class DistributionData(object): 409 """For internal use only; no backwards-compatibility guarantees. 410 411 The data structure that holds data about a distribution metric. 412 413 Distribution metrics are restricted to distributions of integers only. 414 415 This object is not thread safe, so it's not supposed to be modified 416 by other than the DistributionCell that contains it. 417 """ 418 def __init__(self, sum, count, min, max): 419 # type: (int, int, int, int) -> None 420 if count: 421 self.sum = sum 422 self.count = count 423 self.min = min 424 self.max = max 425 else: 426 self.sum = self.count = 0 427 self.min = 2**63 - 1 428 # Avoid Wimplicitly-unsigned-literal caused by -2**63. 429 self.max = -self.min - 1 430 431 def __eq__(self, other): 432 # type: (object) -> bool 433 if isinstance(other, DistributionData): 434 return ( 435 self.sum == other.sum and self.count == other.count and 436 self.min == other.min and self.max == other.max) 437 else: 438 return False 439 440 def __hash__(self): 441 # type: () -> int 442 return hash((self.sum, self.count, self.min, self.max)) 443 444 def __repr__(self): 445 # type: () -> str 446 return 'DistributionData(sum={}, count={}, min={}, max={})'.format( 447 self.sum, self.count, self.min, self.max) 448 449 def get_cumulative(self): 450 # type: () -> DistributionData 451 return DistributionData(self.sum, self.count, self.min, self.max) 452 453 def combine(self, other): 454 # type: (Optional[DistributionData]) -> DistributionData 455 if other is None: 456 return self 457 458 return DistributionData( 459 self.sum + other.sum, 460 self.count + other.count, 461 self.min if self.min < other.min else other.min, 462 self.max if self.max > other.max else other.max) 463 464 @staticmethod 465 def singleton(value): 466 # type: (int) -> DistributionData 467 return DistributionData(value, 1, value, value) 468 469 470 class MetricAggregator(object): 471 """For internal use only; no backwards-compatibility guarantees. 472 473 Base interface for aggregating metric data during pipeline execution.""" 474 def identity_element(self): 475 # type: () -> Any 476 477 """Returns the identical element of an Aggregation. 478 479 For the identity element, it must hold that 480 Aggregator.combine(any_element, identity_element) == any_element. 481 """ 482 raise NotImplementedError 483 484 def combine(self, x, y): 485 # type: (Any, Any) -> Any 486 raise NotImplementedError 487 488 def result(self, x): 489 # type: (Any) -> Any 490 raise NotImplementedError 491 492 493 class CounterAggregator(MetricAggregator): 494 """For internal use only; no backwards-compatibility guarantees. 495 496 Aggregator for Counter metric data during pipeline execution. 497 498 Values aggregated should be ``int`` objects. 499 """ 500 @staticmethod 501 def identity_element(): 502 # type: () -> int 503 return 0 504 505 def combine(self, x, y): 506 # type: (SupportsInt, SupportsInt) -> int 507 return int(x) + int(y) 508 509 def result(self, x): 510 # type: (SupportsInt) -> int 511 return int(x) 512 513 514 class DistributionAggregator(MetricAggregator): 515 """For internal use only; no backwards-compatibility guarantees. 516 517 Aggregator for Distribution metric data during pipeline execution. 518 519 Values aggregated should be ``DistributionData`` objects. 520 """ 521 @staticmethod 522 def identity_element(): 523 # type: () -> DistributionData 524 return DistributionData(0, 0, 2**63 - 1, -2**63) 525 526 def combine(self, x, y): 527 # type: (DistributionData, DistributionData) -> DistributionData 528 return x.combine(y) 529 530 def result(self, x): 531 # type: (DistributionData) -> DistributionResult 532 return DistributionResult(x.get_cumulative()) 533 534 535 class GaugeAggregator(MetricAggregator): 536 """For internal use only; no backwards-compatibility guarantees. 537 538 Aggregator for Gauge metric data during pipeline execution. 539 540 Values aggregated should be ``GaugeData`` objects. 541 """ 542 @staticmethod 543 def identity_element(): 544 # type: () -> GaugeData 545 return GaugeData(0, timestamp=0) 546 547 def combine(self, x, y): 548 # type: (GaugeData, GaugeData) -> GaugeData 549 result = x.combine(y) 550 return result 551 552 def result(self, x): 553 # type: (GaugeData) -> GaugeResult 554 return GaugeResult(x.get_cumulative())