github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/worker/statesampler_fast.pyx (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 # cython: profile=True 19 # cython: language_level=3 20 21 """State sampler for tracking time spent in execution steps. 22 23 The state sampler profiles the time spent in each step of a pipeline. 24 Operations (defined in executor.py) which are executed as part of a MapTask are 25 instrumented with context managers provided by StateSampler.scoped_state(). 26 These context managers change the internal state of the StateSampler during each 27 relevant Operation's .start(), .process() and .finish() methods. State is 28 sampled by a raw C thread, not holding the Python Global Interpreter Lock, which 29 queries the StateSampler's internal state at a defined sampling frequency. In a 30 common example, a ReadOperation during its .start() method reads an element and 31 calls a DoOperation's .process() method, which can call a WriteOperation's 32 .process() method. Each element processed causes the current state to 33 transition between these states of different Operations. Each time the sampling 34 thread queries the current state, the time spent since the previous sample is 35 attributed to that state and accumulated. Over time, this allows a granular 36 runtime profile to be produced. 37 """ 38 import threading 39 40 from apache_beam.utils.counters import CounterName 41 from apache_beam.metrics.execution cimport MetricsContainer 42 43 cimport cython 44 from cpython cimport pythread 45 from libc cimport math 46 from libc.stdint cimport int32_t, int64_t 47 48 49 cdef extern from "Python.h": 50 # This typically requires the GIL, but we synchronize the list modifications 51 # we use this on via our own lock. 52 cdef void* PyList_GET_ITEM(list, Py_ssize_t index) nogil 53 54 cdef extern from "crossplatform_unistd.h" nogil: 55 void usleep(int) 56 57 cdef extern from "crossplatform_time.h" nogil: 58 struct timespec: 59 long tv_sec # seconds 60 long tv_nsec # nanoseconds 61 int clock_gettime(int clock_id, timespec *result) 62 63 cdef inline int64_t get_nsec_time() nogil: 64 """Get current time as microseconds since Unix epoch.""" 65 cdef timespec current_time 66 # First argument value of 0 corresponds to CLOCK_REALTIME. 67 clock_gettime(0, ¤t_time) 68 return ( 69 (<int64_t> current_time.tv_sec) * 1000000000 + # second to nanoseconds 70 current_time.tv_nsec) 71 72 73 cdef class StateSampler(object): 74 """Tracks time spent in states during pipeline execution.""" 75 76 def __init__(self, 77 sampling_period_ms, 78 sampling_period_ms_start=None, 79 sampling_period_ratio=1.2): 80 self._sampling_period_ms = sampling_period_ms 81 # Slowly ramp up to avoid excessive waiting for short stages, as well 82 # as more precise information in that case. 83 self._sampling_period_ms_start = ( 84 sampling_period_ms_start or max(1, sampling_period_ms // 100)) 85 self._sampling_period_ratio = sampling_period_ratio 86 self.started = False 87 self.finished = False 88 89 self.lock = pythread.PyThread_allocate_lock() 90 91 self.current_state_index = 0 92 self.time_since_transition = 0 93 self.state_transition_count = 0 94 unknown_state = ScopedState(self, 95 CounterName('unknown'), 96 None, 97 self.current_state_index, 98 None, 99 None) 100 pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK) 101 self.scoped_states_by_index = [unknown_state] 102 pythread.PyThread_release_lock(self.lock) 103 104 # Assert that the compiler correctly aligned the current_state field. This 105 # is necessary for reads and writes to this variable to be atomic across 106 # threads without additional synchronization. 107 # States are referenced via an index rather than, say, a pointer because 108 # of better support for 32-bit atomic reads and writes. 109 assert (<int64_t> &self.current_state_index) % sizeof(int32_t) == 0, ( 110 'Address of StateSampler.current_state_index is not word-aligned.') 111 112 def __dealloc__(self): 113 pythread.PyThread_free_lock(self.lock) 114 115 def run(self): 116 cdef int64_t last_nsecs = get_nsec_time() 117 cdef int64_t elapsed_nsecs 118 cdef int64_t latest_transition_count = self.state_transition_count 119 cdef int64_t sampling_period_us = self._sampling_period_ms_start * 1000 120 with nogil: 121 while True: 122 usleep(<int>sampling_period_us) 123 sampling_period_us = <int64_t>math.fmin( 124 sampling_period_us * self._sampling_period_ratio, 125 self._sampling_period_ms * 1000) 126 pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK) 127 try: 128 if self.finished: 129 break 130 elapsed_nsecs = get_nsec_time() - last_nsecs 131 # Take an address as we can't create a reference to the scope 132 # without the GIL. 133 nsecs_ptr = &(<ScopedState>PyList_GET_ITEM( 134 self.scoped_states_by_index, self.current_state_index))._nsecs 135 nsecs_ptr[0] += elapsed_nsecs 136 if latest_transition_count != self.state_transition_count: 137 self.time_since_transition = 0 138 latest_transition_count = self.state_transition_count 139 self.time_since_transition += elapsed_nsecs 140 last_nsecs += elapsed_nsecs 141 finally: 142 pythread.PyThread_release_lock(self.lock) 143 144 def start(self): 145 assert not self.started 146 self.sampling_thread = threading.Thread(target=self.run) 147 self.sampling_thread.start() 148 149 def stop(self): 150 assert not self.finished 151 pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK) 152 self.finished = True 153 pythread.PyThread_release_lock(self.lock) 154 # May have to wait up to sampling_period_ms, but the platform-independent 155 # pythread doesn't support conditions. 156 self.sampling_thread.join() 157 158 def reset(self): 159 for state in self.scoped_states_by_index: 160 (<ScopedState>state)._nsecs = 0 161 self.started = self.finished = False 162 163 cpdef ScopedState current_state(self): 164 return self.current_state_c() 165 166 cdef inline ScopedState current_state_c(self): 167 # Faster than cpdef due to self always being a Python subclass. 168 return <ScopedState>self.scoped_states_by_index[self.current_state_index] 169 170 cpdef _scoped_state(self, counter_name, name_context, output_counter, 171 metrics_container): 172 """Returns a context manager managing transitions for a given state. 173 Args: 174 counter_name: A CounterName object with information about the execution 175 state. 176 output_counter: A Beam Counter to which msecs are committed for reporting. 177 metrics_container: A MetricsContainer for the current step. 178 179 Returns: 180 A ScopedState for the set of step-state-io_target. 181 """ 182 new_state_index = len(self.scoped_states_by_index) 183 scoped_state = ScopedState(self, 184 counter_name, 185 name_context, 186 new_state_index, 187 output_counter, 188 metrics_container) 189 # Both scoped_states_by_index and scoped_state.nsecs are accessed 190 # by the sampling thread; initialize them under the lock. 191 pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK) 192 self.scoped_states_by_index.append(scoped_state) 193 scoped_state._nsecs = 0 194 pythread.PyThread_release_lock(self.lock) 195 return scoped_state 196 197 def update_metric(self, typed_metric_name, value): 198 # Each of these is a cdef lookup. 199 metrics_container = self.current_state_c().metrics_container 200 if metrics_container is not None: 201 metrics_container.get_metric_cell(typed_metric_name).update(value) 202 203 204 cdef class ScopedState(object): 205 """Context manager class managing transitions for a given sampler state.""" 206 207 def __init__(self, 208 sampler, 209 name, 210 step_name_context, 211 state_index, 212 counter, 213 metrics_container): 214 self.sampler = sampler 215 self.name = name 216 self.name_context = step_name_context 217 self.state_index = state_index 218 self.counter = counter 219 self.metrics_container = metrics_container 220 221 @property 222 def nsecs(self): 223 return self._nsecs 224 225 def sampled_seconds(self): 226 return 1e-9 * self.nsecs 227 228 def sampled_msecs_int(self): 229 return int(1e-6 * self.nsecs) 230 231 def __repr__(self): 232 return "ScopedState[%s, %s]" % (self.name, self.nsecs) 233 234 cpdef __enter__(self): 235 self.old_state_index = self.sampler.current_state_index 236 pythread.PyThread_acquire_lock(self.sampler.lock, pythread.WAIT_LOCK) 237 self.sampler.current_state_index = self.state_index 238 self.sampler.state_transition_count += 1 239 pythread.PyThread_release_lock(self.sampler.lock) 240 241 cpdef __exit__(self, unused_exc_type, unused_exc_value, unused_traceback): 242 pythread.PyThread_acquire_lock(self.sampler.lock, pythread.WAIT_LOCK) 243 self.sampler.current_state_index = self.old_state_index 244 self.sampler.state_transition_count += 1 245 pythread.PyThread_release_lock(self.sampler.lock)