github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/metrics/cells.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  # cython: language_level=3
    19  
    20  """
    21  This file contains metric cell classes. A metric cell is used to accumulate
    22  in-memory changes to a metric. It represents a specific metric in a single
    23  context.
    24  """
    25  
    26  # pytype: skip-file
    27  
    28  import threading
    29  import time
    30  from datetime import datetime
    31  from typing import Any
    32  from typing import Optional
    33  from typing import SupportsInt
    34  
    35  try:
    36    import cython
    37  except ImportError:
    38  
    39    class fake_cython:
    40      compiled = False
    41  
    42    globals()['cython'] = fake_cython
    43  
    44  __all__ = [
    45      'MetricAggregator',
    46      'MetricCell',
    47      'MetricCellFactory',
    48      'DistributionResult',
    49      'GaugeResult'
    50  ]
    51  
    52  
    53  class MetricCell(object):
    54    """For internal use only; no backwards-compatibility guarantees.
    55  
    56    Accumulates in-memory changes to a metric.
    57  
    58    A MetricCell represents a specific metric in a single context and bundle.
    59    All subclasses must be thread safe, as these are used in the pipeline runners,
    60    and may be subject to parallel/concurrent updates. Cells should only be used
    61    directly within a runner.
    62    """
    63    def __init__(self):
    64      self._lock = threading.Lock()
    65      self._start_time = None
    66  
    67    def update(self, value):
    68      raise NotImplementedError
    69  
    70    def get_cumulative(self):
    71      raise NotImplementedError
    72  
    73    def to_runner_api_monitoring_info(self, name, transform_id):
    74      if not self._start_time:
    75        self._start_time = datetime.utcnow()
    76      mi = self.to_runner_api_monitoring_info_impl(name, transform_id)
    77      mi.start_time.FromDatetime(self._start_time)
    78      return mi
    79  
    80    def to_runner_api_monitoring_info_impl(self, name, transform_id):
    81      raise NotImplementedError
    82  
    83    def reset(self):
    84      # type: () -> None
    85      raise NotImplementedError
    86  
    87    def __reduce__(self):
    88      raise NotImplementedError
    89  
    90  
    91  class MetricCellFactory(object):
    92    def __call__(self):
    93      # type: () -> MetricCell
    94      raise NotImplementedError
    95  
    96  
    97  class CounterCell(MetricCell):
    98    """For internal use only; no backwards-compatibility guarantees.
    99  
   100    Tracks the current value and delta of a counter metric.
   101  
   102    Each cell tracks the state of a metric independently per context per bundle.
   103    Therefore, each metric has a different cell in each bundle, cells are
   104    aggregated by the runner.
   105  
   106    This class is thread safe.
   107    """
   108    def __init__(self, *args):
   109      super().__init__(*args)
   110      self.value = CounterAggregator.identity_element()
   111  
   112    def reset(self):
   113      # type: () -> None
   114      self.value = CounterAggregator.identity_element()
   115  
   116    def combine(self, other):
   117      # type: (CounterCell) -> CounterCell
   118      result = CounterCell()
   119      result.inc(self.value + other.value)
   120      return result
   121  
   122    def inc(self, n=1):
   123      self.update(n)
   124  
   125    def dec(self, n=1):
   126      self.update(-n)
   127  
   128    def update(self, value):
   129      if cython.compiled:
   130        ivalue = value
   131        # Since We hold the GIL, no need for another lock.
   132        # And because the C threads won't preempt and interleave
   133        # each other.
   134        # Assuming there is no code trying to access the counters
   135        # directly by circumventing the GIL.
   136        self.value += ivalue
   137      else:
   138        with self._lock:
   139          self.value += value
   140  
   141    def get_cumulative(self):
   142      # type: () -> int
   143      with self._lock:
   144        return self.value
   145  
   146    def to_runner_api_monitoring_info_impl(self, name, transform_id):
   147      from apache_beam.metrics import monitoring_infos
   148      if not name.urn:
   149        # User counter case.
   150        return monitoring_infos.int64_user_counter(
   151            name.namespace,
   152            name.name,
   153            self.get_cumulative(),
   154            ptransform=transform_id)
   155      else:
   156        # Arbitrary URN case.
   157        return monitoring_infos.int64_counter(
   158            name.urn, self.get_cumulative(), labels=name.labels)
   159  
   160  
   161  class DistributionCell(MetricCell):
   162    """For internal use only; no backwards-compatibility guarantees.
   163  
   164    Tracks the current value and delta for a distribution metric.
   165  
   166    Each cell tracks the state of a metric independently per context per bundle.
   167    Therefore, each metric has a different cell in each bundle, that is later
   168    aggregated.
   169  
   170    This class is thread safe.
   171    """
   172    def __init__(self, *args):
   173      super().__init__(*args)
   174      self.data = DistributionAggregator.identity_element()
   175  
   176    def reset(self):
   177      # type: () -> None
   178      self.data = DistributionAggregator.identity_element()
   179  
   180    def combine(self, other):
   181      # type: (DistributionCell) -> DistributionCell
   182      result = DistributionCell()
   183      result.data = self.data.combine(other.data)
   184      return result
   185  
   186    def update(self, value):
   187      if cython.compiled:
   188        # We will hold the GIL throughout the entire _update.
   189        self._update(value)
   190      else:
   191        with self._lock:
   192          self._update(value)
   193  
   194    def _update(self, value):
   195      if cython.compiled:
   196        ivalue = value
   197      else:
   198        ivalue = int(value)
   199      self.data.count = self.data.count + 1
   200      self.data.sum = self.data.sum + ivalue
   201      if ivalue < self.data.min:
   202        self.data.min = ivalue
   203      if ivalue > self.data.max:
   204        self.data.max = ivalue
   205  
   206    def get_cumulative(self):
   207      # type: () -> DistributionData
   208      with self._lock:
   209        return self.data.get_cumulative()
   210  
   211    def to_runner_api_monitoring_info_impl(self, name, transform_id):
   212      from apache_beam.metrics import monitoring_infos
   213      return monitoring_infos.int64_user_distribution(
   214          name.namespace,
   215          name.name,
   216          self.get_cumulative(),
   217          ptransform=transform_id)
   218  
   219  
   220  class GaugeCell(MetricCell):
   221    """For internal use only; no backwards-compatibility guarantees.
   222  
   223    Tracks the current value and delta for a gauge metric.
   224  
   225    Each cell tracks the state of a metric independently per context per bundle.
   226    Therefore, each metric has a different cell in each bundle, that is later
   227    aggregated.
   228  
   229    This class is thread safe.
   230    """
   231    def __init__(self, *args):
   232      super().__init__(*args)
   233      self.data = GaugeAggregator.identity_element()
   234  
   235    def reset(self):
   236      self.data = GaugeAggregator.identity_element()
   237  
   238    def combine(self, other):
   239      # type: (GaugeCell) -> GaugeCell
   240      result = GaugeCell()
   241      result.data = self.data.combine(other.data)
   242      return result
   243  
   244    def set(self, value):
   245      self.update(value)
   246  
   247    def update(self, value):
   248      # type: (SupportsInt) -> None
   249      value = int(value)
   250      with self._lock:
   251        # Set the value directly without checking timestamp, because
   252        # this value is naturally the latest value.
   253        self.data.value = value
   254        self.data.timestamp = time.time()
   255  
   256    def get_cumulative(self):
   257      # type: () -> GaugeData
   258      with self._lock:
   259        return self.data.get_cumulative()
   260  
   261    def to_runner_api_monitoring_info_impl(self, name, transform_id):
   262      from apache_beam.metrics import monitoring_infos
   263      return monitoring_infos.int64_user_gauge(
   264          name.namespace,
   265          name.name,
   266          self.get_cumulative(),
   267          ptransform=transform_id)
   268  
   269  
   270  class DistributionResult(object):
   271    """The result of a Distribution metric."""
   272    def __init__(self, data):
   273      # type: (DistributionData) -> None
   274      self.data = data
   275  
   276    def __eq__(self, other):
   277      # type: (object) -> bool
   278      if isinstance(other, DistributionResult):
   279        return self.data == other.data
   280      else:
   281        return False
   282  
   283    def __hash__(self):
   284      # type: () -> int
   285      return hash(self.data)
   286  
   287    def __repr__(self):
   288      # type: () -> str
   289      return (
   290          'DistributionResult(sum={}, count={}, min={}, max={}, '
   291          'mean={})'.format(self.sum, self.count, self.min, self.max, self.mean))
   292  
   293    @property
   294    def max(self):
   295      # type: () -> Optional[int]
   296      return self.data.max if self.data.count else None
   297  
   298    @property
   299    def min(self):
   300      # type: () -> Optional[int]
   301      return self.data.min if self.data.count else None
   302  
   303    @property
   304    def count(self):
   305      # type: () -> Optional[int]
   306      return self.data.count
   307  
   308    @property
   309    def sum(self):
   310      # type: () -> Optional[int]
   311      return self.data.sum
   312  
   313    @property
   314    def mean(self):
   315      # type: () -> Optional[float]
   316  
   317      """Returns the float mean of the distribution.
   318  
   319      If the distribution contains no elements, it returns None.
   320      """
   321      if self.data.count == 0:
   322        return None
   323      return self.data.sum / self.data.count
   324  
   325  
   326  class GaugeResult(object):
   327    def __init__(self, data):
   328      # type: (GaugeData) -> None
   329      self.data = data
   330  
   331    def __eq__(self, other):
   332      # type: (object) -> bool
   333      if isinstance(other, GaugeResult):
   334        return self.data == other.data
   335      else:
   336        return False
   337  
   338    def __hash__(self):
   339      # type: () -> int
   340      return hash(self.data)
   341  
   342    def __repr__(self):
   343      return '<GaugeResult(value={}, timestamp={})>'.format(
   344          self.value, self.timestamp)
   345  
   346    @property
   347    def value(self):
   348      # type: () -> Optional[int]
   349      return self.data.value
   350  
   351    @property
   352    def timestamp(self):
   353      # type: () -> Optional[int]
   354      return self.data.timestamp
   355  
   356  
   357  class GaugeData(object):
   358    """For internal use only; no backwards-compatibility guarantees.
   359  
   360    The data structure that holds data about a gauge metric.
   361  
   362    Gauge metrics are restricted to integers only.
   363  
   364    This object is not thread safe, so it's not supposed to be modified
   365    by other than the GaugeCell that contains it.
   366    """
   367    def __init__(self, value, timestamp=None):
   368      # type: (Optional[int], Optional[int]) -> None
   369      self.value = value
   370      self.timestamp = timestamp if timestamp is not None else 0
   371  
   372    def __eq__(self, other):
   373      # type: (object) -> bool
   374      if isinstance(other, GaugeData):
   375        return self.value == other.value and self.timestamp == other.timestamp
   376      else:
   377        return False
   378  
   379    def __hash__(self):
   380      # type: () -> int
   381      return hash((self.value, self.timestamp))
   382  
   383    def __repr__(self):
   384      # type: () -> str
   385      return '<GaugeData(value={}, timestamp={})>'.format(
   386          self.value, self.timestamp)
   387  
   388    def get_cumulative(self):
   389      # type: () -> GaugeData
   390      return GaugeData(self.value, timestamp=self.timestamp)
   391  
   392    def combine(self, other):
   393      # type: (Optional[GaugeData]) -> GaugeData
   394      if other is None:
   395        return self
   396  
   397      if other.timestamp > self.timestamp:
   398        return other
   399      else:
   400        return self
   401  
   402    @staticmethod
   403    def singleton(value, timestamp=None):
   404      # type: (Optional[int], Optional[int]) -> GaugeData
   405      return GaugeData(value, timestamp=timestamp)
   406  
   407  
   408  class DistributionData(object):
   409    """For internal use only; no backwards-compatibility guarantees.
   410  
   411    The data structure that holds data about a distribution metric.
   412  
   413    Distribution metrics are restricted to distributions of integers only.
   414  
   415    This object is not thread safe, so it's not supposed to be modified
   416    by other than the DistributionCell that contains it.
   417    """
   418    def __init__(self, sum, count, min, max):
   419      # type: (int, int, int, int) -> None
   420      if count:
   421        self.sum = sum
   422        self.count = count
   423        self.min = min
   424        self.max = max
   425      else:
   426        self.sum = self.count = 0
   427        self.min = 2**63 - 1
   428        # Avoid Wimplicitly-unsigned-literal caused by -2**63.
   429        self.max = -self.min - 1
   430  
   431    def __eq__(self, other):
   432      # type: (object) -> bool
   433      if isinstance(other, DistributionData):
   434        return (
   435            self.sum == other.sum and self.count == other.count and
   436            self.min == other.min and self.max == other.max)
   437      else:
   438        return False
   439  
   440    def __hash__(self):
   441      # type: () -> int
   442      return hash((self.sum, self.count, self.min, self.max))
   443  
   444    def __repr__(self):
   445      # type: () -> str
   446      return 'DistributionData(sum={}, count={}, min={}, max={})'.format(
   447          self.sum, self.count, self.min, self.max)
   448  
   449    def get_cumulative(self):
   450      # type: () -> DistributionData
   451      return DistributionData(self.sum, self.count, self.min, self.max)
   452  
   453    def combine(self, other):
   454      # type: (Optional[DistributionData]) -> DistributionData
   455      if other is None:
   456        return self
   457  
   458      return DistributionData(
   459          self.sum + other.sum,
   460          self.count + other.count,
   461          self.min if self.min < other.min else other.min,
   462          self.max if self.max > other.max else other.max)
   463  
   464    @staticmethod
   465    def singleton(value):
   466      # type: (int) -> DistributionData
   467      return DistributionData(value, 1, value, value)
   468  
   469  
   470  class MetricAggregator(object):
   471    """For internal use only; no backwards-compatibility guarantees.
   472  
   473    Base interface for aggregating metric data during pipeline execution."""
   474    def identity_element(self):
   475      # type: () -> Any
   476  
   477      """Returns the identical element of an Aggregation.
   478  
   479      For the identity element, it must hold that
   480       Aggregator.combine(any_element, identity_element) == any_element.
   481      """
   482      raise NotImplementedError
   483  
   484    def combine(self, x, y):
   485      # type: (Any, Any) -> Any
   486      raise NotImplementedError
   487  
   488    def result(self, x):
   489      # type: (Any) -> Any
   490      raise NotImplementedError
   491  
   492  
   493  class CounterAggregator(MetricAggregator):
   494    """For internal use only; no backwards-compatibility guarantees.
   495  
   496    Aggregator for Counter metric data during pipeline execution.
   497  
   498    Values aggregated should be ``int`` objects.
   499    """
   500    @staticmethod
   501    def identity_element():
   502      # type: () -> int
   503      return 0
   504  
   505    def combine(self, x, y):
   506      # type: (SupportsInt, SupportsInt) -> int
   507      return int(x) + int(y)
   508  
   509    def result(self, x):
   510      # type: (SupportsInt) -> int
   511      return int(x)
   512  
   513  
   514  class DistributionAggregator(MetricAggregator):
   515    """For internal use only; no backwards-compatibility guarantees.
   516  
   517    Aggregator for Distribution metric data during pipeline execution.
   518  
   519    Values aggregated should be ``DistributionData`` objects.
   520    """
   521    @staticmethod
   522    def identity_element():
   523      # type: () -> DistributionData
   524      return DistributionData(0, 0, 2**63 - 1, -2**63)
   525  
   526    def combine(self, x, y):
   527      # type: (DistributionData, DistributionData) -> DistributionData
   528      return x.combine(y)
   529  
   530    def result(self, x):
   531      # type: (DistributionData) -> DistributionResult
   532      return DistributionResult(x.get_cumulative())
   533  
   534  
   535  class GaugeAggregator(MetricAggregator):
   536    """For internal use only; no backwards-compatibility guarantees.
   537  
   538    Aggregator for Gauge metric data during pipeline execution.
   539  
   540    Values aggregated should be ``GaugeData`` objects.
   541    """
   542    @staticmethod
   543    def identity_element():
   544      # type: () -> GaugeData
   545      return GaugeData(0, timestamp=0)
   546  
   547    def combine(self, x, y):
   548      # type: (GaugeData, GaugeData) -> GaugeData
   549      result = x.combine(y)
   550      return result
   551  
   552    def result(self, x):
   553      # type: (GaugeData) -> GaugeResult
   554      return GaugeResult(x.get_cumulative())