github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/worker/statesampler_fast.pyx (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  # cython: profile=True
    19  # cython: language_level=3
    20  
    21  """State sampler for tracking time spent in execution steps.
    22  
    23  The state sampler profiles the time spent in each step of a pipeline.
    24  Operations (defined in executor.py) which are executed as part of a MapTask are
    25  instrumented with context managers provided by StateSampler.scoped_state().
    26  These context managers change the internal state of the StateSampler during each
    27  relevant Operation's .start(), .process() and .finish() methods.  State is
    28  sampled by a raw C thread, not holding the Python Global Interpreter Lock, which
    29  queries the StateSampler's internal state at a defined sampling frequency.  In a
    30  common example, a ReadOperation during its .start() method reads an element and
    31  calls a DoOperation's .process() method, which can call a WriteOperation's
    32  .process() method.  Each element processed causes the current state to
    33  transition between these states of different Operations.  Each time the sampling
    34  thread queries the current state, the time spent since the previous sample is
    35  attributed to that state and accumulated.  Over time, this allows a granular
    36  runtime profile to be produced.
    37  """
    38  import threading
    39  
    40  from apache_beam.utils.counters import CounterName
    41  from apache_beam.metrics.execution cimport MetricsContainer
    42  
    43  cimport cython
    44  from cpython cimport pythread
    45  from libc cimport math
    46  from libc.stdint cimport int32_t, int64_t
    47  
    48  
    49  cdef extern from "Python.h":
    50    # This typically requires the GIL, but we synchronize the list modifications
    51    # we use this on via our own lock.
    52    cdef void* PyList_GET_ITEM(list, Py_ssize_t index) nogil
    53  
    54  cdef extern from "crossplatform_unistd.h" nogil:
    55    void usleep(int)
    56  
    57  cdef extern from "crossplatform_time.h" nogil:
    58    struct timespec:
    59      long tv_sec  # seconds
    60      long tv_nsec  # nanoseconds
    61    int clock_gettime(int clock_id, timespec *result)
    62  
    63  cdef inline int64_t get_nsec_time() nogil:
    64    """Get current time as microseconds since Unix epoch."""
    65    cdef timespec current_time
    66    # First argument value of 0 corresponds to CLOCK_REALTIME.
    67    clock_gettime(0, &current_time)
    68    return (
    69        (<int64_t> current_time.tv_sec) * 1000000000 +  # second to nanoseconds
    70        current_time.tv_nsec)
    71  
    72  
    73  cdef class StateSampler(object):
    74    """Tracks time spent in states during pipeline execution."""
    75  
    76    def __init__(self,
    77                 sampling_period_ms,
    78                 sampling_period_ms_start=None,
    79                 sampling_period_ratio=1.2):
    80      self._sampling_period_ms = sampling_period_ms
    81      # Slowly ramp up to avoid excessive waiting for short stages, as well
    82      # as more precise information in that case.
    83      self._sampling_period_ms_start = (
    84            sampling_period_ms_start or max(1, sampling_period_ms // 100))
    85      self._sampling_period_ratio = sampling_period_ratio
    86      self.started = False
    87      self.finished = False
    88  
    89      self.lock = pythread.PyThread_allocate_lock()
    90  
    91      self.current_state_index = 0
    92      self.time_since_transition = 0
    93      self.state_transition_count = 0
    94      unknown_state = ScopedState(self,
    95                                  CounterName('unknown'),
    96                                  None,
    97                                  self.current_state_index,
    98                                  None,
    99                                  None)
   100      pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK)
   101      self.scoped_states_by_index = [unknown_state]
   102      pythread.PyThread_release_lock(self.lock)
   103  
   104      # Assert that the compiler correctly aligned the current_state field.  This
   105      # is necessary for reads and writes to this variable to be atomic across
   106      # threads without additional synchronization.
   107      # States are referenced via an index rather than, say, a pointer because
   108      # of better support for 32-bit atomic reads and writes.
   109      assert (<int64_t> &self.current_state_index) % sizeof(int32_t) == 0, (
   110          'Address of StateSampler.current_state_index is not word-aligned.')
   111  
   112    def __dealloc__(self):
   113      pythread.PyThread_free_lock(self.lock)
   114  
   115    def run(self):
   116      cdef int64_t last_nsecs = get_nsec_time()
   117      cdef int64_t elapsed_nsecs
   118      cdef int64_t latest_transition_count = self.state_transition_count
   119      cdef int64_t sampling_period_us = self._sampling_period_ms_start * 1000
   120      with nogil:
   121        while True:
   122          usleep(<int>sampling_period_us)
   123          sampling_period_us = <int64_t>math.fmin(
   124              sampling_period_us * self._sampling_period_ratio,
   125              self._sampling_period_ms * 1000)
   126          pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK)
   127          try:
   128            if self.finished:
   129              break
   130            elapsed_nsecs = get_nsec_time() - last_nsecs
   131            # Take an address as we can't create a reference to the scope
   132            # without the GIL.
   133            nsecs_ptr = &(<ScopedState>PyList_GET_ITEM(
   134                self.scoped_states_by_index, self.current_state_index))._nsecs
   135            nsecs_ptr[0] += elapsed_nsecs
   136            if latest_transition_count != self.state_transition_count:
   137              self.time_since_transition = 0
   138              latest_transition_count = self.state_transition_count
   139            self.time_since_transition += elapsed_nsecs
   140            last_nsecs += elapsed_nsecs
   141          finally:
   142            pythread.PyThread_release_lock(self.lock)
   143  
   144    def start(self):
   145      assert not self.started
   146      self.sampling_thread = threading.Thread(target=self.run)
   147      self.sampling_thread.start()
   148  
   149    def stop(self):
   150      assert not self.finished
   151      pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK)
   152      self.finished = True
   153      pythread.PyThread_release_lock(self.lock)
   154      # May have to wait up to sampling_period_ms, but the platform-independent
   155      # pythread doesn't support conditions.
   156      self.sampling_thread.join()
   157  
   158    def reset(self):
   159      for state in self.scoped_states_by_index:
   160        (<ScopedState>state)._nsecs = 0
   161      self.started = self.finished = False
   162  
   163    cpdef ScopedState current_state(self):
   164      return self.current_state_c()
   165  
   166    cdef inline ScopedState current_state_c(self):
   167      # Faster than cpdef due to self always being a Python subclass.
   168      return <ScopedState>self.scoped_states_by_index[self.current_state_index]
   169  
   170    cpdef _scoped_state(self, counter_name, name_context, output_counter,
   171                        metrics_container):
   172      """Returns a context manager managing transitions for a given state.
   173      Args:
   174       counter_name: A CounterName object with information about the execution
   175         state.
   176       output_counter: A Beam Counter to which msecs are committed for reporting.
   177       metrics_container: A MetricsContainer for the current step.
   178  
   179      Returns:
   180        A ScopedState for the set of step-state-io_target.
   181      """
   182      new_state_index = len(self.scoped_states_by_index)
   183      scoped_state = ScopedState(self,
   184                                 counter_name,
   185                                 name_context,
   186                                 new_state_index,
   187                                 output_counter,
   188                                 metrics_container)
   189      # Both scoped_states_by_index and scoped_state.nsecs are accessed
   190      # by the sampling thread; initialize them under the lock.
   191      pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK)
   192      self.scoped_states_by_index.append(scoped_state)
   193      scoped_state._nsecs = 0
   194      pythread.PyThread_release_lock(self.lock)
   195      return scoped_state
   196  
   197    def update_metric(self, typed_metric_name, value):
   198      # Each of these is a cdef lookup.
   199      metrics_container = self.current_state_c().metrics_container
   200      if metrics_container is not None:
   201        metrics_container.get_metric_cell(typed_metric_name).update(value)
   202  
   203  
   204  cdef class ScopedState(object):
   205    """Context manager class managing transitions for a given sampler state."""
   206  
   207    def __init__(self,
   208                 sampler,
   209                 name,
   210                 step_name_context,
   211                 state_index,
   212                 counter,
   213                 metrics_container):
   214      self.sampler = sampler
   215      self.name = name
   216      self.name_context = step_name_context
   217      self.state_index = state_index
   218      self.counter = counter
   219      self.metrics_container = metrics_container
   220  
   221    @property
   222    def nsecs(self):
   223      return self._nsecs
   224  
   225    def sampled_seconds(self):
   226      return 1e-9 * self.nsecs
   227  
   228    def sampled_msecs_int(self):
   229      return int(1e-6 * self.nsecs)
   230  
   231    def __repr__(self):
   232      return "ScopedState[%s, %s]" % (self.name, self.nsecs)
   233  
   234    cpdef __enter__(self):
   235      self.old_state_index = self.sampler.current_state_index
   236      pythread.PyThread_acquire_lock(self.sampler.lock, pythread.WAIT_LOCK)
   237      self.sampler.current_state_index = self.state_index
   238      self.sampler.state_transition_count += 1
   239      pythread.PyThread_release_lock(self.sampler.lock)
   240  
   241    cpdef __exit__(self, unused_exc_type, unused_exc_value, unused_traceback):
   242      pythread.PyThread_acquire_lock(self.sampler.lock, pythread.WAIT_LOCK)
   243      self.sampler.current_state_index = self.old_state_index
   244      self.sampler.state_transition_count += 1
   245      pythread.PyThread_release_lock(self.sampler.lock)