github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/direct/executor.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/direct/executor.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """An executor that schedules and executes applied ptransforms."""
    19  
    20  # pytype: skip-file
    21  
    22  import collections
    23  import itertools
    24  import logging
    25  import queue
    26  import threading
    27  import traceback
    28  from typing import TYPE_CHECKING
    29  from typing import Any
    30  from typing import Dict
    31  from typing import FrozenSet
    32  from typing import Optional
    33  from typing import Set
    34  from weakref import WeakValueDictionary
    35  
    36  from apache_beam.metrics.execution import MetricsContainer
    37  from apache_beam.runners.worker import statesampler
    38  from apache_beam.transforms import sideinputs
    39  from apache_beam.utils import counters
    40  
    41  if TYPE_CHECKING:
    42    from apache_beam import pvalue
    43    from apache_beam.runners.direct.bundle_factory import _Bundle
    44    from apache_beam.runners.direct.evaluation_context import EvaluationContext
    45    from apache_beam.runners.direct.transform_evaluator import TransformEvaluatorRegistry
    46  
    47  _LOGGER = logging.getLogger(__name__)
    48  
    49  
    50  class _ExecutorService(object):
    51    """Thread pool for executing tasks in parallel."""
    52    class CallableTask(object):
    53      def call(self, state_sampler):
    54        pass
    55  
    56      @property
    57      def name(self):
    58        return None
    59  
    60    class _ExecutorServiceWorker(threading.Thread):
    61      """Worker thread for executing a single task at a time."""
    62  
    63      # Amount to block waiting for getting an item from the queue in seconds.
    64      TIMEOUT = 5
    65  
    66      def __init__(
    67          self,
    68          queue,  # type: queue.Queue[_ExecutorService.CallableTask]
    69          index):
    70        super().__init__()
    71        self.queue = queue
    72        self._index = index
    73        self._default_name = 'ExecutorServiceWorker-' + str(index)
    74        self._update_name()
    75        self.shutdown_requested = False
    76  
    77        # Stop worker thread when main thread exits.
    78        self.daemon = True
    79        self.start()
    80  
    81      def _update_name(self, task=None):
    82        if task and task.name:
    83          name = task.name
    84        else:
    85          name = self._default_name
    86        self.name = 'Thread: %d, %s (%s)' % (
    87            self._index, name, 'executing' if task else 'idle')
    88  
    89      def _get_task_or_none(self):
    90        # type: () -> Optional[_ExecutorService.CallableTask]
    91        try:
    92          # Do not block indefinitely, otherwise we may not act for a requested
    93          # shutdown.
    94          return self.queue.get(
    95              timeout=_ExecutorService._ExecutorServiceWorker.TIMEOUT)
    96        except queue.Empty:
    97          return None
    98  
    99      def run(self):
   100        state_sampler = statesampler.StateSampler('', counters.CounterFactory())
   101        statesampler.set_current_tracker(state_sampler)
   102        while not self.shutdown_requested:
   103          task = self._get_task_or_none()
   104          if task:
   105            try:
   106              if not self.shutdown_requested:
   107                self._update_name(task)
   108                task.call(state_sampler)
   109                self._update_name()
   110            finally:
   111              self.queue.task_done()
   112  
   113      def shutdown(self):
   114        self.shutdown_requested = True
   115  
   116    def __init__(self, num_workers):
   117      self.queue = queue.Queue(
   118      )  # type: queue.Queue[_ExecutorService.CallableTask]
   119      self.workers = [
   120          _ExecutorService._ExecutorServiceWorker(self.queue, i)
   121          for i in range(num_workers)
   122      ]
   123      self.shutdown_requested = False
   124  
   125    def submit(self, task):
   126      # type: (_ExecutorService.CallableTask) -> None
   127      assert isinstance(task, _ExecutorService.CallableTask)
   128      if not self.shutdown_requested:
   129        self.queue.put(task)
   130  
   131    def await_completion(self):
   132      for worker in self.workers:
   133        worker.join()
   134  
   135    def shutdown(self):
   136      self.shutdown_requested = True
   137  
   138      for worker in self.workers:
   139        worker.shutdown()
   140  
   141      # Consume all the remaining items in the queue
   142      while not self.queue.empty():
   143        try:
   144          self.queue.get_nowait()
   145          self.queue.task_done()
   146        except queue.Empty:
   147          continue
   148      # All existing threads will eventually terminate (after they complete their
   149      # last task).
   150  
   151  
   152  class _TransformEvaluationState(object):
   153    def __init__(
   154        self,
   155        executor_service,
   156        scheduled  # type: Set[TransformExecutor]
   157    ):
   158      self.executor_service = executor_service
   159      self.scheduled = scheduled
   160  
   161    def schedule(self, work):
   162      self.scheduled.add(work)
   163      self.executor_service.submit(work)
   164  
   165    def complete(self, completed_work):
   166      self.scheduled.remove(completed_work)
   167  
   168  
   169  class _ParallelEvaluationState(_TransformEvaluationState):
   170    """A TransformEvaluationState with unlimited parallelism.
   171  
   172    Any TransformExecutor scheduled will be immediately submitted to the
   173    ExecutorService.
   174  
   175    A principal use of this is for evaluators that can generate output bundles
   176    only using the input bundle (e.g. ParDo).
   177    """
   178    pass
   179  
   180  
   181  class _SerialEvaluationState(_TransformEvaluationState):
   182    """A TransformEvaluationState with a single work queue.
   183  
   184    Any TransformExecutor scheduled will be placed on the work queue. Only one
   185    item of work will be submitted to the ExecutorService at any time.
   186  
   187    A principal use of this is for evaluators that keeps a global state such as
   188    _GroupByKeyOnly.
   189    """
   190    def __init__(self, executor_service, scheduled):
   191      super().__init__(executor_service, scheduled)
   192      self.serial_queue = collections.deque()
   193      self.currently_evaluating = None
   194      self._lock = threading.Lock()
   195  
   196    def complete(self, completed_work):
   197      self._update_currently_evaluating(None, completed_work)
   198      super().complete(completed_work)
   199  
   200    def schedule(self, new_work):
   201      self._update_currently_evaluating(new_work, None)
   202  
   203    def _update_currently_evaluating(self, new_work, completed_work):
   204      with self._lock:
   205        if new_work:
   206          self.serial_queue.append(new_work)
   207        if completed_work:
   208          assert self.currently_evaluating == completed_work
   209          self.currently_evaluating = None
   210        if self.serial_queue and not self.currently_evaluating:
   211          next_work = self.serial_queue.pop()
   212          self.currently_evaluating = next_work
   213          super().schedule(next_work)
   214  
   215  
   216  class _TransformExecutorServices(object):
   217    """Schedules and completes TransformExecutors.
   218  
   219    Controls the concurrency as appropriate for the applied transform the executor
   220    exists for.
   221    """
   222    def __init__(self, executor_service):
   223      # type: (_ExecutorService) -> None
   224      self._executor_service = executor_service
   225      self._scheduled = set()  # type: Set[TransformExecutor]
   226      self._parallel = _ParallelEvaluationState(
   227          self._executor_service, self._scheduled)
   228      self._serial_cache = WeakValueDictionary(
   229      )  # type: WeakValueDictionary[Any, _SerialEvaluationState]
   230  
   231    def parallel(self):
   232      # type: () -> _ParallelEvaluationState
   233      return self._parallel
   234  
   235    def serial(self, step):
   236      # type: (Any) -> _SerialEvaluationState
   237      cached = self._serial_cache.get(step)
   238      if not cached:
   239        cached = _SerialEvaluationState(self._executor_service, self._scheduled)
   240        self._serial_cache[step] = cached
   241      return cached
   242  
   243    @property
   244    def executors(self):
   245      # type: () -> FrozenSet[TransformExecutor]
   246      return frozenset(self._scheduled)
   247  
   248  
   249  class _CompletionCallback(object):
   250    """The default completion callback.
   251  
   252    The default completion callback is used to complete transform evaluations
   253    that are triggered due to the arrival of elements from an upstream transform,
   254    or for a source transform.
   255    """
   256  
   257    def __init__(self,
   258                 evaluation_context,  # type: EvaluationContext
   259                 all_updates,
   260                 timer_firings=None
   261                ):
   262      self._evaluation_context = evaluation_context
   263      self._all_updates = all_updates
   264      self._timer_firings = timer_firings or []
   265  
   266    def handle_result(
   267        self, transform_executor, input_committed_bundle, transform_result):
   268      output_committed_bundles = self._evaluation_context.handle_result(
   269          input_committed_bundle, self._timer_firings, transform_result)
   270      for output_committed_bundle in output_committed_bundles:
   271        self._all_updates.offer(
   272            _ExecutorServiceParallelExecutor._ExecutorUpdate(
   273                transform_executor, committed_bundle=output_committed_bundle))
   274      for unprocessed_bundle in transform_result.unprocessed_bundles:
   275        self._all_updates.offer(
   276            _ExecutorServiceParallelExecutor._ExecutorUpdate(
   277                transform_executor, unprocessed_bundle=unprocessed_bundle))
   278      return output_committed_bundles
   279  
   280    def handle_exception(self, transform_executor, exception):
   281      self._all_updates.offer(
   282          _ExecutorServiceParallelExecutor._ExecutorUpdate(
   283              transform_executor, exception=exception))
   284  
   285  
   286  class TransformExecutor(_ExecutorService.CallableTask):
   287    """For internal use only; no backwards-compatibility guarantees.
   288  
   289    TransformExecutor will evaluate a bundle using an applied ptransform.
   290  
   291    A CallableTask responsible for constructing a TransformEvaluator and
   292    evaluating it on some bundle of input, and registering the result using the
   293    completion callback.
   294    """
   295  
   296    _MAX_RETRY_PER_BUNDLE = 4
   297  
   298    def __init__(self,
   299                 transform_evaluator_registry,  # type: TransformEvaluatorRegistry
   300                 evaluation_context,  # type: EvaluationContext
   301                 input_bundle,  # type: _Bundle
   302                 fired_timers,
   303                 applied_ptransform,
   304                 completion_callback,
   305                 transform_evaluation_state  # type: _TransformEvaluationState
   306                ):
   307      self._transform_evaluator_registry = transform_evaluator_registry
   308      self._evaluation_context = evaluation_context
   309      self._input_bundle = input_bundle
   310      # For non-empty bundles, store the window of the max EOW.
   311      # TODO(mariagh): Move to class _Bundle's inner _StackedWindowedValues
   312      self._latest_main_input_window = None
   313      if input_bundle.has_elements():
   314        self._latest_main_input_window = input_bundle._elements[0].windows[0]
   315        for elem in input_bundle.get_elements_iterable():
   316          if elem.windows[0].end > self._latest_main_input_window.end:
   317            self._latest_main_input_window = elem.windows[0]
   318      self._fired_timers = fired_timers
   319      self._applied_ptransform = applied_ptransform
   320      self._completion_callback = completion_callback
   321      self._transform_evaluation_state = transform_evaluation_state
   322      self._side_input_values = {}  # type: Dict[pvalue.AsSideInput, Any]
   323      self.blocked = False
   324      self._call_count = 0
   325      self._retry_count = 0
   326      self._max_retries_per_bundle = TransformExecutor._MAX_RETRY_PER_BUNDLE
   327  
   328    def call(self, state_sampler):
   329      self._call_count += 1
   330      assert self._call_count <= (1 + len(self._applied_ptransform.side_inputs))
   331      metrics_container = MetricsContainer(self._applied_ptransform.full_label)
   332      start_state = state_sampler.scoped_state(
   333          self._applied_ptransform.full_label,
   334          'start',
   335          metrics_container=metrics_container)
   336      process_state = state_sampler.scoped_state(
   337          self._applied_ptransform.full_label,
   338          'process',
   339          metrics_container=metrics_container)
   340      finish_state = state_sampler.scoped_state(
   341          self._applied_ptransform.full_label,
   342          'finish',
   343          metrics_container=metrics_container)
   344  
   345      with start_state:
   346        # Side input initialization should be accounted for in start_state.
   347        for side_input in self._applied_ptransform.side_inputs:
   348          # Find the projection of main's window onto the side input's window.
   349          window_mapping_fn = side_input._view_options().get(
   350              'window_mapping_fn', sideinputs._global_window_mapping_fn)
   351          main_onto_side_window = window_mapping_fn(
   352              self._latest_main_input_window)
   353          block_until = main_onto_side_window.end
   354  
   355          if side_input not in self._side_input_values:
   356            value = self._evaluation_context.get_value_or_block_until_ready(
   357                side_input, self, block_until)
   358            if not value:
   359              # Monitor task will reschedule this executor once the side input is
   360              # available.
   361              return
   362            self._side_input_values[side_input] = value
   363        side_input_values = [
   364            self._side_input_values[side_input]
   365            for side_input in self._applied_ptransform.side_inputs
   366        ]
   367  
   368      while self._retry_count < self._max_retries_per_bundle:
   369        try:
   370          self.attempt_call(
   371              metrics_container,
   372              side_input_values,
   373              start_state,
   374              process_state,
   375              finish_state)
   376          break
   377        except Exception as e:
   378          self._retry_count += 1
   379          _LOGGER.error(
   380              'Exception at bundle %r, due to an exception.\n %s',
   381              self._input_bundle,
   382              traceback.format_exc())
   383          if self._retry_count == self._max_retries_per_bundle:
   384            _LOGGER.error(
   385                'Giving up after %s attempts.', self._max_retries_per_bundle)
   386            self._completion_callback.handle_exception(self, e)
   387  
   388      self._evaluation_context.metrics().commit_physical(
   389          self._input_bundle, metrics_container.get_cumulative())
   390      self._transform_evaluation_state.complete(self)
   391  
   392    def attempt_call(
   393        self,
   394        metrics_container,
   395        side_input_values,
   396        start_state,
   397        process_state,
   398        finish_state):
   399      """Attempts to run a bundle."""
   400      evaluator = self._transform_evaluator_registry.get_evaluator(
   401          self._applied_ptransform, self._input_bundle, side_input_values)
   402  
   403      with start_state:
   404        evaluator.start_bundle()
   405  
   406      with process_state:
   407        if self._fired_timers:
   408          for timer_firing in self._fired_timers:
   409            evaluator.process_timer_wrapper(timer_firing)
   410  
   411        if self._input_bundle:
   412          for value in self._input_bundle.get_elements_iterable():
   413            evaluator.process_element(value)
   414  
   415      with finish_state:
   416        result = evaluator.finish_bundle()
   417        result.logical_metric_updates = metrics_container.get_cumulative()
   418  
   419      self._completion_callback.handle_result(self, self._input_bundle, result)
   420      return result
   421  
   422  
   423  class Executor(object):
   424    """For internal use only; no backwards-compatibility guarantees."""
   425    def __init__(self, *args, **kwargs):
   426      self._executor = _ExecutorServiceParallelExecutor(*args, **kwargs)
   427  
   428    def start(self, roots):
   429      self._executor.start(roots)
   430  
   431    def await_completion(self):
   432      self._executor.await_completion()
   433  
   434    def shutdown(self):
   435      self._executor.request_shutdown()
   436  
   437  
   438  class _ExecutorServiceParallelExecutor(object):
   439    """An internal implementation for Executor."""
   440  
   441    NUM_WORKERS = 1
   442  
   443    def __init__(
   444        self,
   445        value_to_consumers,
   446        transform_evaluator_registry,
   447        evaluation_context  # type: EvaluationContext
   448    ):
   449      self.executor_service = _ExecutorService(
   450          _ExecutorServiceParallelExecutor.NUM_WORKERS)
   451      self.transform_executor_services = _TransformExecutorServices(
   452          self.executor_service)
   453      self.value_to_consumers = value_to_consumers
   454      self.transform_evaluator_registry = transform_evaluator_registry
   455      self.evaluation_context = evaluation_context
   456      self.all_updates = _ExecutorServiceParallelExecutor._TypedUpdateQueue(
   457          _ExecutorServiceParallelExecutor._ExecutorUpdate)
   458      self.visible_updates = _ExecutorServiceParallelExecutor._TypedUpdateQueue(
   459          _ExecutorServiceParallelExecutor._VisibleExecutorUpdate)
   460      self.default_completion_callback = _CompletionCallback(
   461          evaluation_context, self.all_updates)
   462  
   463    def start(self, roots):
   464      self.root_nodes = frozenset(roots)
   465      self.all_nodes = frozenset(
   466          itertools.chain(
   467              roots, *itertools.chain(self.value_to_consumers.values())))
   468      self.node_to_pending_bundles = {}
   469      for root_node in self.root_nodes:
   470        provider = (
   471            self.transform_evaluator_registry.get_root_bundle_provider(root_node))
   472        self.node_to_pending_bundles[root_node] = provider.get_root_bundles()
   473      self.executor_service.submit(
   474          _ExecutorServiceParallelExecutor._MonitorTask(self))
   475  
   476    def await_completion(self):
   477      update = self.visible_updates.take()
   478      try:
   479        if update.exception:
   480          raise update.exception
   481      finally:
   482        self.executor_service.shutdown()
   483        self.executor_service.await_completion()
   484  
   485    def request_shutdown(self):
   486      self.executor_service.shutdown()
   487      self.executor_service.await_completion()
   488      self.evaluation_context.shutdown()
   489  
   490    def schedule_consumers(self, committed_bundle):
   491      # type: (_Bundle) -> None
   492      if committed_bundle.pcollection in self.value_to_consumers:
   493        consumers = self.value_to_consumers[committed_bundle.pcollection]
   494        for applied_ptransform in consumers:
   495          self.schedule_consumption(
   496              applied_ptransform,
   497              committed_bundle, [],
   498              self.default_completion_callback)
   499  
   500    def schedule_unprocessed_bundle(self, applied_ptransform, unprocessed_bundle):
   501      self.node_to_pending_bundles[applied_ptransform].append(unprocessed_bundle)
   502  
   503    def schedule_consumption(self,
   504                             consumer_applied_ptransform,
   505                             committed_bundle,  # type: _Bundle
   506                             fired_timers,
   507                             on_complete
   508                            ):
   509      """Schedules evaluation of the given bundle with the transform."""
   510      assert consumer_applied_ptransform
   511      assert committed_bundle
   512      assert on_complete
   513      if self.transform_evaluator_registry.should_execute_serially(
   514          consumer_applied_ptransform):
   515        transform_executor_service = self.transform_executor_services.serial(
   516            consumer_applied_ptransform)  # type: _TransformEvaluationState
   517      else:
   518        transform_executor_service = self.transform_executor_services.parallel()
   519  
   520      transform_executor = TransformExecutor(
   521          self.transform_evaluator_registry,
   522          self.evaluation_context,
   523          committed_bundle,
   524          fired_timers,
   525          consumer_applied_ptransform,
   526          on_complete,
   527          transform_executor_service)
   528      transform_executor_service.schedule(transform_executor)
   529  
   530    class _TypedUpdateQueue(object):
   531      """Type checking update queue with blocking and non-blocking operations."""
   532      def __init__(self, item_type):
   533        self._item_type = item_type
   534        self._queue = queue.Queue()
   535  
   536      def poll(self):
   537        try:
   538          item = self._queue.get_nowait()
   539          self._queue.task_done()
   540          return item
   541        except queue.Empty:
   542          return None
   543  
   544      def take(self):
   545        # The implementation of Queue.Queue.get() does not propagate
   546        # KeyboardInterrupts when a timeout is not used.  We therefore use a
   547        # one-second timeout in the following loop to allow KeyboardInterrupts
   548        # to be correctly propagated.
   549        while True:
   550          try:
   551            item = self._queue.get(timeout=1)
   552            self._queue.task_done()
   553            return item
   554          except queue.Empty:
   555            pass
   556  
   557      def offer(self, item):
   558        assert isinstance(item, self._item_type)
   559        self._queue.put_nowait(item)
   560  
   561    class _ExecutorUpdate(object):
   562      """An internal status update on the state of the executor."""
   563      def __init__(
   564          self,
   565          transform_executor,
   566          committed_bundle=None,
   567          unprocessed_bundle=None,
   568          exception=None):
   569        self.transform_executor = transform_executor
   570        # Exactly one of them should be not-None
   571        assert sum(
   572            [bool(committed_bundle), bool(unprocessed_bundle),
   573             bool(exception)]) == 1
   574        self.committed_bundle = committed_bundle
   575        self.unprocessed_bundle = unprocessed_bundle
   576        self.exception = exception
   577  
   578    class _VisibleExecutorUpdate(object):
   579      """An update of interest to the user.
   580  
   581      Used for awaiting the completion to decide whether to return normally or
   582      raise an exception.
   583      """
   584      def __init__(self, exception=None):
   585        self.finished = exception is not None
   586        self.exception = exception
   587  
   588    class _MonitorTask(_ExecutorService.CallableTask):
   589      """MonitorTask continuously runs to ensure that pipeline makes progress."""
   590      def __init__(self, executor):
   591        # type: (_ExecutorServiceParallelExecutor) -> None
   592        self._executor = executor
   593  
   594      @property
   595      def name(self):
   596        return 'monitor'
   597  
   598      def call(self, state_sampler):
   599        try:
   600          update = self._executor.all_updates.poll()
   601          while update:
   602            if update.committed_bundle:
   603              self._executor.schedule_consumers(update.committed_bundle)
   604            elif update.unprocessed_bundle:
   605              self._executor.schedule_unprocessed_bundle(
   606                  update.transform_executor._applied_ptransform,
   607                  update.unprocessed_bundle)
   608            else:
   609              assert update.exception
   610              _LOGGER.warning(
   611                  'A task failed with exception: %s', update.exception)
   612              self._executor.visible_updates.offer(
   613                  _ExecutorServiceParallelExecutor._VisibleExecutorUpdate(
   614                      update.exception))
   615            update = self._executor.all_updates.poll()
   616          self._executor.evaluation_context.schedule_pending_unblocked_tasks(
   617              self._executor.executor_service)
   618          self._add_work_if_necessary(self._fire_timers())
   619        except Exception as e:  # pylint: disable=broad-except
   620          _LOGGER.error('Monitor task died due to exception.\n %s', e)
   621          self._executor.visible_updates.offer(
   622              _ExecutorServiceParallelExecutor._VisibleExecutorUpdate(e))
   623        finally:
   624          if not self._should_shutdown():
   625            self._executor.executor_service.submit(self)
   626  
   627      def _should_shutdown(self):
   628        # type: () -> bool
   629  
   630        """Checks whether the pipeline is completed and should be shut down.
   631  
   632        If there is anything in the queue of tasks to do or
   633        if there are any realtime timers set, do not shut down.
   634  
   635        Otherwise, check if all the transforms' watermarks are complete.
   636        If they are not, the pipeline is not progressing (stall detected).
   637        Whether the pipeline has stalled or not, the executor should shut
   638        down the pipeline.
   639  
   640        Returns:
   641          True only if the pipeline has reached a terminal state and should
   642          be shut down.
   643  
   644        """
   645        if self._is_executing():
   646          # There are some bundles still in progress.
   647          return False
   648  
   649        watermark_manager = self._executor.evaluation_context._watermark_manager
   650        _, any_unfired_realtime_timers = watermark_manager.extract_all_timers()
   651        if any_unfired_realtime_timers:
   652          return False
   653  
   654        else:
   655          if self._executor.evaluation_context.is_done():
   656            self._executor.visible_updates.offer(
   657                _ExecutorServiceParallelExecutor._VisibleExecutorUpdate())
   658          else:
   659            # Nothing is scheduled for execution, but watermarks incomplete.
   660            self._executor.visible_updates.offer(
   661                _ExecutorServiceParallelExecutor._VisibleExecutorUpdate((
   662                    Exception('Monitor task detected a pipeline stall.'),
   663                    None,
   664                    None)))
   665          self._executor.executor_service.shutdown()
   666          return True
   667  
   668      def _fire_timers(self):
   669        """Schedules triggered consumers if any timers fired.
   670  
   671        Returns:
   672          True if timers fired.
   673        """
   674        transform_fired_timers, _ = (
   675            self._executor.evaluation_context.extract_all_timers())
   676        for applied_ptransform, fired_timers in transform_fired_timers:
   677          # Use an empty committed bundle. just to trigger.
   678          empty_bundle = (
   679              self._executor.evaluation_context.create_empty_committed_bundle(
   680                  applied_ptransform.inputs[0]))
   681          timer_completion_callback = _CompletionCallback(
   682              self._executor.evaluation_context,
   683              self._executor.all_updates,
   684              timer_firings=fired_timers)
   685  
   686          self._executor.schedule_consumption(
   687              applied_ptransform,
   688              empty_bundle,
   689              fired_timers,
   690              timer_completion_callback)
   691        return bool(transform_fired_timers)
   692  
   693      def _is_executing(self):
   694        # type: () -> bool
   695  
   696        """Checks whether the job is still executing.
   697  
   698        Returns:
   699          True if there is at least one non-blocked TransformExecutor active."""
   700  
   701        executors = self._executor.transform_executor_services.executors
   702        if not executors:
   703          # Nothing is executing.
   704          return False
   705  
   706        # Ensure that at least one of those executors is not blocked.
   707        for transform_executor in executors:
   708          if not transform_executor.blocked:
   709            return True
   710        return False
   711  
   712      def _add_work_if_necessary(self, timers_fired):
   713        """Adds more work from the roots if pipeline requires more input.
   714  
   715        If all active TransformExecutors are in a blocked state, add more work
   716        from root nodes that may have additional work. This ensures that if a
   717        pipeline has elements available from the root nodes it will add those
   718        elements when necessary.
   719  
   720        Args:
   721          timers_fired: True if any timers fired prior to this call.
   722        """
   723        # If any timers have fired, they will add more work; No need to add more.
   724        if timers_fired:
   725          return
   726  
   727        if self._is_executing():
   728          # We have at least one executor that can proceed without adding
   729          # additional work.
   730          return
   731  
   732        # All current TransformExecutors are blocked; add more work from any
   733        # pending bundles.
   734        for applied_ptransform in self._executor.all_nodes:
   735          if not self._executor.evaluation_context.is_done(applied_ptransform):
   736            pending_bundles = self._executor.node_to_pending_bundles.get(
   737                applied_ptransform, [])
   738            for bundle in pending_bundles:
   739              self._executor.schedule_consumption(
   740                  applied_ptransform,
   741                  bundle, [],
   742                  self._executor.default_completion_callback)
   743            self._executor.node_to_pending_bundles[applied_ptransform] = []