github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/runners/direct/direct_runner.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """DirectRunner, executing on the local machine.
    19  
    20  The DirectRunner is a runner implementation that executes the entire
    21  graph of transformations belonging to a pipeline on the local machine.
    22  """
    23  
    24  # pytype: skip-file
    25  
    26  import itertools
    27  import logging
    28  import time
    29  import typing
    30  
    31  from google.protobuf import wrappers_pb2
    32  
    33  import apache_beam as beam
    34  from apache_beam import coders
    35  from apache_beam import typehints
    36  from apache_beam.internal.util import ArgumentPlaceholder
    37  from apache_beam.options.pipeline_options import DirectOptions
    38  from apache_beam.options.pipeline_options import StandardOptions
    39  from apache_beam.options.value_provider import RuntimeValueProvider
    40  from apache_beam.pvalue import PCollection
    41  from apache_beam.runners.direct.bundle_factory import BundleFactory
    42  from apache_beam.runners.direct.clock import RealClock
    43  from apache_beam.runners.direct.clock import TestClock
    44  from apache_beam.runners.runner import PipelineResult
    45  from apache_beam.runners.runner import PipelineRunner
    46  from apache_beam.runners.runner import PipelineState
    47  from apache_beam.transforms import userstate
    48  from apache_beam.transforms.core import CombinePerKey
    49  from apache_beam.transforms.core import CombineValuesDoFn
    50  from apache_beam.transforms.core import DoFn
    51  from apache_beam.transforms.core import ParDo
    52  from apache_beam.transforms.ptransform import PTransform
    53  from apache_beam.transforms.timeutil import TimeDomain
    54  from apache_beam.typehints import trivial_inference
    55  
    56  __all__ = ['BundleBasedDirectRunner', 'DirectRunner', 'SwitchingDirectRunner']
    57  
    58  _LOGGER = logging.getLogger(__name__)
    59  
    60  
    61  class SwitchingDirectRunner(PipelineRunner):
    62    """Executes a single pipeline on the local machine.
    63  
    64    This implementation switches between using the FnApiRunner (which has
    65    high throughput for batch jobs) and using the BundleBasedDirectRunner,
    66    which supports streaming execution and certain primitives not yet
    67    implemented in the FnApiRunner.
    68    """
    69    def is_fnapi_compatible(self):
    70      return BundleBasedDirectRunner.is_fnapi_compatible()
    71  
    72    def run_pipeline(self, pipeline, options):
    73  
    74      from apache_beam.pipeline import PipelineVisitor
    75      from apache_beam.runners.dataflow.native_io.iobase import NativeSource
    76      from apache_beam.runners.dataflow.native_io.iobase import _NativeWrite
    77      from apache_beam.testing.test_stream import TestStream
    78  
    79      class _FnApiRunnerSupportVisitor(PipelineVisitor):
    80        """Visitor determining if a Pipeline can be run on the FnApiRunner."""
    81        def accept(self, pipeline):
    82          self.supported_by_fnapi_runner = True
    83          pipeline.visit(self)
    84          return self.supported_by_fnapi_runner
    85  
    86        def visit_transform(self, applied_ptransform):
    87          transform = applied_ptransform.transform
    88          # The FnApiRunner does not support streaming execution.
    89          if isinstance(transform, TestStream):
    90            self.supported_by_fnapi_runner = False
    91          # The FnApiRunner does not support reads from NativeSources.
    92          if (isinstance(transform, beam.io.Read) and
    93              isinstance(transform.source, NativeSource)):
    94            self.supported_by_fnapi_runner = False
    95          # The FnApiRunner does not support the use of _NativeWrites.
    96          if isinstance(transform, _NativeWrite):
    97            self.supported_by_fnapi_runner = False
    98          if isinstance(transform, beam.ParDo):
    99            dofn = transform.dofn
   100            # The FnApiRunner does not support execution of CombineFns with
   101            # deferred side inputs.
   102            if isinstance(dofn, CombineValuesDoFn):
   103              args, kwargs = transform.raw_side_inputs
   104              args_to_check = itertools.chain(args, kwargs.values())
   105              if any(isinstance(arg, ArgumentPlaceholder)
   106                     for arg in args_to_check):
   107                self.supported_by_fnapi_runner = False
   108            if userstate.is_stateful_dofn(dofn):
   109              _, timer_specs = userstate.get_dofn_specs(dofn)
   110              for timer in timer_specs:
   111                if timer.time_domain == TimeDomain.REAL_TIME:
   112                  self.supported_by_fnapi_runner = False
   113  
   114      # Check whether all transforms used in the pipeline are supported by the
   115      # FnApiRunner, and the pipeline was not meant to be run as streaming.
   116      if _FnApiRunnerSupportVisitor().accept(pipeline):
   117        from apache_beam.portability.api import beam_provision_api_pb2
   118        from apache_beam.runners.portability.fn_api_runner import fn_runner
   119        from apache_beam.runners.portability.portable_runner import JobServiceHandle
   120        all_options = options.get_all_options()
   121        encoded_options = JobServiceHandle.encode_pipeline_options(all_options)
   122        provision_info = fn_runner.ExtendedProvisionInfo(
   123            beam_provision_api_pb2.ProvisionInfo(
   124                pipeline_options=encoded_options))
   125        runner = fn_runner.FnApiRunner(provision_info=provision_info)
   126      else:
   127        runner = BundleBasedDirectRunner()
   128  
   129      return runner.run_pipeline(pipeline, options)
   130  
   131  
   132  # Type variables.
   133  K = typing.TypeVar('K')
   134  V = typing.TypeVar('V')
   135  
   136  
   137  @typehints.with_input_types(typing.Tuple[K, V])
   138  @typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]])
   139  class _GroupByKeyOnly(PTransform):
   140    """A group by key transform, ignoring windows."""
   141    def infer_output_type(self, input_type):
   142      key_type, value_type = trivial_inference.key_value_types(input_type)
   143      return typehints.KV[key_type, typehints.Iterable[value_type]]
   144  
   145    def expand(self, pcoll):
   146      self._check_pcollection(pcoll)
   147      return PCollection.from_(pcoll)
   148  
   149  
   150  @typehints.with_input_types(typing.Tuple[K, typing.Iterable[V]])
   151  @typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]])
   152  class _GroupAlsoByWindow(ParDo):
   153    """The GroupAlsoByWindow transform."""
   154    def __init__(self, windowing):
   155      super().__init__(_GroupAlsoByWindowDoFn(windowing))
   156      self.windowing = windowing
   157  
   158    def expand(self, pcoll):
   159      self._check_pcollection(pcoll)
   160      return PCollection.from_(pcoll)
   161  
   162  
   163  class _GroupAlsoByWindowDoFn(DoFn):
   164    # TODO(robertwb): Support combiner lifting.
   165  
   166    def __init__(self, windowing):
   167      super().__init__()
   168      self.windowing = windowing
   169  
   170    def infer_output_type(self, input_type):
   171      key_type, windowed_value_iter_type = trivial_inference.key_value_types(
   172          input_type)
   173      value_type = windowed_value_iter_type.inner_type.inner_type
   174      return typehints.KV[key_type, typehints.Iterable[value_type]]
   175  
   176    def start_bundle(self):
   177      # pylint: disable=wrong-import-order, wrong-import-position
   178      from apache_beam.transforms.trigger import create_trigger_driver
   179      # pylint: enable=wrong-import-order, wrong-import-position
   180      self.driver = create_trigger_driver(self.windowing, True)
   181  
   182    def process(self, element):
   183      k, vs = element
   184      return self.driver.process_entire_key(k, vs)
   185  
   186  
   187  @typehints.with_input_types(typing.Tuple[K, V])
   188  @typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]])
   189  class _StreamingGroupByKeyOnly(_GroupByKeyOnly):
   190    """Streaming GroupByKeyOnly placeholder for overriding in DirectRunner."""
   191    urn = "direct_runner:streaming_gbko:v0.1"
   192  
   193    # These are needed due to apply overloads.
   194    def to_runner_api_parameter(self, unused_context):
   195      return _StreamingGroupByKeyOnly.urn, None
   196  
   197    @staticmethod
   198    @PTransform.register_urn(urn, None)
   199    def from_runner_api_parameter(
   200        unused_ptransform, unused_payload, unused_context):
   201      return _StreamingGroupByKeyOnly()
   202  
   203  
   204  @typehints.with_input_types(typing.Tuple[K, typing.Iterable[V]])
   205  @typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]])
   206  class _StreamingGroupAlsoByWindow(_GroupAlsoByWindow):
   207    """Streaming GroupAlsoByWindow placeholder for overriding in DirectRunner."""
   208    urn = "direct_runner:streaming_gabw:v0.1"
   209  
   210    # These are needed due to apply overloads.
   211    def to_runner_api_parameter(self, context):
   212      return (
   213          _StreamingGroupAlsoByWindow.urn,
   214          wrappers_pb2.BytesValue(
   215              value=context.windowing_strategies.get_id(self.windowing)))
   216  
   217    @staticmethod
   218    @PTransform.register_urn(urn, wrappers_pb2.BytesValue)
   219    def from_runner_api_parameter(unused_ptransform, payload, context):
   220      return _StreamingGroupAlsoByWindow(
   221          context.windowing_strategies.get_by_id(payload.value))
   222  
   223  
   224  @typehints.with_input_types(typing.Tuple[K, typing.Iterable[V]])
   225  @typehints.with_output_types(typing.Tuple[K, typing.Iterable[V]])
   226  class _GroupByKey(PTransform):
   227    """The DirectRunner GroupByKey implementation."""
   228    def expand(self, pcoll):
   229      # Imported here to avoid circular dependencies.
   230      # pylint: disable=wrong-import-order, wrong-import-position
   231      from apache_beam.coders import typecoders
   232  
   233      input_type = pcoll.element_type
   234      if input_type is not None:
   235        # Initialize type-hints used below to enforce type-checking and to
   236        # pass downstream to further PTransforms.
   237        key_type, value_type = trivial_inference.key_value_types(input_type)
   238        # Enforce the input to a GBK has a KV element type.
   239        pcoll.element_type = typehints.typehints.coerce_to_kv_type(
   240            pcoll.element_type)
   241        typecoders.registry.verify_deterministic(
   242            typecoders.registry.get_coder(key_type),
   243            'GroupByKey operation "%s"' % self.label)
   244  
   245        reify_output_type = typehints.KV[
   246            key_type, typehints.WindowedValue[value_type]]  # type: ignore[misc]
   247        gbk_input_type = (
   248            typehints.KV[
   249                key_type,
   250                typehints.Iterable[typehints.WindowedValue[  # type: ignore[misc]
   251                    value_type]]])
   252        gbk_output_type = typehints.KV[key_type, typehints.Iterable[value_type]]
   253  
   254        # pylint: disable=bad-option-value
   255        return (
   256            pcoll
   257            | 'ReifyWindows' >> (
   258                ParDo(beam.GroupByKey.ReifyWindows()).with_output_types(
   259                    reify_output_type))
   260            | 'GroupByKey' >> (
   261                _GroupByKeyOnly().with_input_types(
   262                    reify_output_type).with_output_types(gbk_input_type))
   263            | (
   264                'GroupByWindow' >>
   265                _GroupAlsoByWindow(pcoll.windowing).with_input_types(
   266                    gbk_input_type).with_output_types(gbk_output_type)))
   267      else:
   268        # The input_type is None, run the default
   269        return (
   270            pcoll
   271            | 'ReifyWindows' >> ParDo(beam.GroupByKey.ReifyWindows())
   272            | 'GroupByKey' >> _GroupByKeyOnly()
   273            | 'GroupByWindow' >> _GroupAlsoByWindow(pcoll.windowing))
   274  
   275  
   276  def _get_transform_overrides(pipeline_options):
   277    # A list of PTransformOverride objects to be applied before running a pipeline
   278    # using DirectRunner.
   279    # Currently this only works for overrides where the input and output types do
   280    # not change.
   281    # For internal use only; no backwards-compatibility guarantees.
   282  
   283    # Importing following locally to avoid a circular dependency.
   284    from apache_beam.pipeline import PTransformOverride
   285    from apache_beam.runners.direct.helper_transforms import LiftedCombinePerKey
   286    from apache_beam.runners.direct.sdf_direct_runner import ProcessKeyedElementsViaKeyedWorkItemsOverride
   287    from apache_beam.runners.direct.sdf_direct_runner import SplittableParDoOverride
   288  
   289    class CombinePerKeyOverride(PTransformOverride):
   290      def matches(self, applied_ptransform):
   291        if isinstance(applied_ptransform.transform, CombinePerKey):
   292          return applied_ptransform.inputs[0].windowing.is_default()
   293  
   294      def get_replacement_transform_for_applied_ptransform(
   295          self, applied_ptransform):
   296        # TODO: Move imports to top. Pipeline <-> Runner dependency cause problems
   297        # with resolving imports when they are at top.
   298        # pylint: disable=wrong-import-position
   299        try:
   300          transform = applied_ptransform.transform
   301          return LiftedCombinePerKey(
   302              transform.fn, transform.args, transform.kwargs)
   303        except NotImplementedError:
   304          return transform
   305  
   306    class StreamingGroupByKeyOverride(PTransformOverride):
   307      def matches(self, applied_ptransform):
   308        # Note: we match the exact class, since we replace it with a subclass.
   309        return applied_ptransform.transform.__class__ == _GroupByKeyOnly
   310  
   311      def get_replacement_transform_for_applied_ptransform(
   312          self, applied_ptransform):
   313        # Use specialized streaming implementation.
   314        transform = _StreamingGroupByKeyOnly()
   315        return transform
   316  
   317    class StreamingGroupAlsoByWindowOverride(PTransformOverride):
   318      def matches(self, applied_ptransform):
   319        # Note: we match the exact class, since we replace it with a subclass.
   320        transform = applied_ptransform.transform
   321        return (
   322            isinstance(applied_ptransform.transform, ParDo) and
   323            isinstance(transform.dofn, _GroupAlsoByWindowDoFn) and
   324            transform.__class__ != _StreamingGroupAlsoByWindow)
   325  
   326      def get_replacement_transform_for_applied_ptransform(
   327          self, applied_ptransform):
   328        # Use specialized streaming implementation.
   329        transform = _StreamingGroupAlsoByWindow(
   330            applied_ptransform.transform.dofn.windowing)
   331        return transform
   332  
   333    class TestStreamOverride(PTransformOverride):
   334      def matches(self, applied_ptransform):
   335        from apache_beam.testing.test_stream import TestStream
   336        self.applied_ptransform = applied_ptransform
   337        return isinstance(applied_ptransform.transform, TestStream)
   338  
   339      def get_replacement_transform_for_applied_ptransform(
   340          self, applied_ptransform):
   341        from apache_beam.runners.direct.test_stream_impl import _ExpandableTestStream
   342        return _ExpandableTestStream(applied_ptransform.transform)
   343  
   344    class GroupByKeyPTransformOverride(PTransformOverride):
   345      """A ``PTransformOverride`` for ``GroupByKey``.
   346  
   347      This replaces the Beam implementation as a primitive.
   348      """
   349      def matches(self, applied_ptransform):
   350        # Imported here to avoid circular dependencies.
   351        # pylint: disable=wrong-import-order, wrong-import-position
   352        from apache_beam.transforms.core import GroupByKey
   353        return isinstance(applied_ptransform.transform, GroupByKey)
   354  
   355      def get_replacement_transform_for_applied_ptransform(
   356          self, applied_ptransform):
   357        return _GroupByKey()
   358  
   359    overrides = [
   360        # This needs to be the first and the last override. Other overrides depend
   361        # on the GroupByKey implementation to be composed of _GroupByKeyOnly and
   362        # _GroupAlsoByWindow.
   363        GroupByKeyPTransformOverride(),
   364        SplittableParDoOverride(),
   365        ProcessKeyedElementsViaKeyedWorkItemsOverride(),
   366        CombinePerKeyOverride(),
   367        TestStreamOverride(),
   368    ]
   369  
   370    # Add streaming overrides, if necessary.
   371    if pipeline_options.view_as(StandardOptions).streaming:
   372      overrides.append(StreamingGroupByKeyOverride())
   373      overrides.append(StreamingGroupAlsoByWindowOverride())
   374  
   375    # Add PubSub overrides, if PubSub is available.
   376    try:
   377      from apache_beam.io.gcp import pubsub as unused_pubsub
   378      overrides += _get_pubsub_transform_overrides(pipeline_options)
   379    except ImportError:
   380      pass
   381  
   382    # This also needs to be last because other transforms apply GBKs which need to
   383    # be translated into a DirectRunner-compatible transform.
   384    overrides.append(GroupByKeyPTransformOverride())
   385  
   386    return overrides
   387  
   388  
   389  class _DirectReadFromPubSub(PTransform):
   390    def __init__(self, source):
   391      self._source = source
   392  
   393    def _infer_output_coder(
   394        self, unused_input_type=None, unused_input_coder=None):
   395      # type: (...) -> typing.Optional[coders.Coder]
   396      return coders.BytesCoder()
   397  
   398    def get_windowing(self, unused_inputs):
   399      return beam.Windowing(beam.window.GlobalWindows())
   400  
   401    def expand(self, pvalue):
   402      # This is handled as a native transform.
   403      return PCollection(self.pipeline, is_bounded=self._source.is_bounded())
   404  
   405  
   406  class _DirectWriteToPubSubFn(DoFn):
   407    BUFFER_SIZE_ELEMENTS = 100
   408    FLUSH_TIMEOUT_SECS = BUFFER_SIZE_ELEMENTS * 0.5
   409  
   410    def __init__(self, transform):
   411      self.project = transform.project
   412      self.short_topic_name = transform.topic_name
   413      self.id_label = transform.id_label
   414      self.timestamp_attribute = transform.timestamp_attribute
   415      self.with_attributes = transform.with_attributes
   416  
   417      # TODO(https://github.com/apache/beam/issues/18939): Add support for
   418      # id_label and timestamp_attribute.
   419      if transform.id_label:
   420        raise NotImplementedError(
   421            'DirectRunner: id_label is not supported for '
   422            'PubSub writes')
   423      if transform.timestamp_attribute:
   424        raise NotImplementedError(
   425            'DirectRunner: timestamp_attribute is not '
   426            'supported for PubSub writes')
   427  
   428    def start_bundle(self):
   429      self._buffer = []
   430  
   431    def process(self, elem):
   432      self._buffer.append(elem)
   433      if len(self._buffer) >= self.BUFFER_SIZE_ELEMENTS:
   434        self._flush()
   435  
   436    def finish_bundle(self):
   437      self._flush()
   438  
   439    def _flush(self):
   440      from google.cloud import pubsub
   441      pub_client = pubsub.PublisherClient()
   442      topic = pub_client.topic_path(self.project, self.short_topic_name)
   443  
   444      if self.with_attributes:
   445        futures = [
   446            pub_client.publish(topic, elem.data, **elem.attributes)
   447            for elem in self._buffer
   448        ]
   449      else:
   450        futures = [pub_client.publish(topic, elem) for elem in self._buffer]
   451  
   452      timer_start = time.time()
   453      for future in futures:
   454        remaining = self.FLUSH_TIMEOUT_SECS - (time.time() - timer_start)
   455        future.result(remaining)
   456      self._buffer = []
   457  
   458  
   459  def _get_pubsub_transform_overrides(pipeline_options):
   460    from apache_beam.io.gcp import pubsub as beam_pubsub
   461    from apache_beam.pipeline import PTransformOverride
   462  
   463    class ReadFromPubSubOverride(PTransformOverride):
   464      def matches(self, applied_ptransform):
   465        return isinstance(
   466            applied_ptransform.transform, beam_pubsub.ReadFromPubSub)
   467  
   468      def get_replacement_transform_for_applied_ptransform(
   469          self, applied_ptransform):
   470        if not pipeline_options.view_as(StandardOptions).streaming:
   471          raise Exception(
   472              'PubSub I/O is only available in streaming mode '
   473              '(use the --streaming flag).')
   474        return _DirectReadFromPubSub(applied_ptransform.transform._source)
   475  
   476    class WriteToPubSubOverride(PTransformOverride):
   477      def matches(self, applied_ptransform):
   478        return isinstance(applied_ptransform.transform, beam_pubsub.WriteToPubSub)
   479  
   480      def get_replacement_transform_for_applied_ptransform(
   481          self, applied_ptransform):
   482        if not pipeline_options.view_as(StandardOptions).streaming:
   483          raise Exception(
   484              'PubSub I/O is only available in streaming mode '
   485              '(use the --streaming flag).')
   486        return beam.ParDo(_DirectWriteToPubSubFn(applied_ptransform.transform))
   487  
   488    return [ReadFromPubSubOverride(), WriteToPubSubOverride()]
   489  
   490  
   491  class BundleBasedDirectRunner(PipelineRunner):
   492    """Executes a single pipeline on the local machine."""
   493    @staticmethod
   494    def is_fnapi_compatible():
   495      return False
   496  
   497    def run_pipeline(self, pipeline, options):
   498      """Execute the entire pipeline and returns an DirectPipelineResult."""
   499  
   500      # TODO: Move imports to top. Pipeline <-> Runner dependency cause problems
   501      # with resolving imports when they are at top.
   502      # pylint: disable=wrong-import-position
   503      from apache_beam.pipeline import PipelineVisitor
   504      from apache_beam.runners.direct.consumer_tracking_pipeline_visitor import \
   505        ConsumerTrackingPipelineVisitor
   506      from apache_beam.runners.direct.evaluation_context import EvaluationContext
   507      from apache_beam.runners.direct.executor import Executor
   508      from apache_beam.runners.direct.transform_evaluator import \
   509        TransformEvaluatorRegistry
   510      from apache_beam.testing.test_stream import TestStream
   511  
   512      # If the TestStream I/O is used, use a mock test clock.
   513      class TestStreamUsageVisitor(PipelineVisitor):
   514        """Visitor determining whether a Pipeline uses a TestStream."""
   515        def __init__(self):
   516          self.uses_test_stream = False
   517  
   518        def visit_transform(self, applied_ptransform):
   519          if isinstance(applied_ptransform.transform, TestStream):
   520            self.uses_test_stream = True
   521  
   522      visitor = TestStreamUsageVisitor()
   523      pipeline.visit(visitor)
   524      clock = TestClock() if visitor.uses_test_stream else RealClock()
   525  
   526      # Performing configured PTransform overrides.
   527      pipeline.replace_all(_get_transform_overrides(options))
   528  
   529      _LOGGER.info('Running pipeline with DirectRunner.')
   530      self.consumer_tracking_visitor = ConsumerTrackingPipelineVisitor()
   531      pipeline.visit(self.consumer_tracking_visitor)
   532  
   533      evaluation_context = EvaluationContext(
   534          options,
   535          BundleFactory(
   536              stacked=options.view_as(
   537                  DirectOptions).direct_runner_use_stacked_bundle),
   538          self.consumer_tracking_visitor.root_transforms,
   539          self.consumer_tracking_visitor.value_to_consumers,
   540          self.consumer_tracking_visitor.step_names,
   541          self.consumer_tracking_visitor.views,
   542          clock)
   543  
   544      executor = Executor(
   545          self.consumer_tracking_visitor.value_to_consumers,
   546          TransformEvaluatorRegistry(evaluation_context),
   547          evaluation_context)
   548      # DirectRunner does not support injecting
   549      # PipelineOptions values at runtime
   550      RuntimeValueProvider.set_runtime_options({})
   551      # Start the executor. This is a non-blocking call, it will start the
   552      # execution in background threads and return.
   553      executor.start(self.consumer_tracking_visitor.root_transforms)
   554      result = DirectPipelineResult(executor, evaluation_context)
   555  
   556      return result
   557  
   558  
   559  # Use the SwitchingDirectRunner as the default.
   560  DirectRunner = SwitchingDirectRunner
   561  
   562  
   563  class DirectPipelineResult(PipelineResult):
   564    """A DirectPipelineResult provides access to info about a pipeline."""
   565    def __init__(self, executor, evaluation_context):
   566      super().__init__(PipelineState.RUNNING)
   567      self._executor = executor
   568      self._evaluation_context = evaluation_context
   569  
   570    def __del__(self):
   571      if self._state == PipelineState.RUNNING:
   572        _LOGGER.warning(
   573            'The DirectPipelineResult is being garbage-collected while the '
   574            'DirectRunner is still running the corresponding pipeline. This may '
   575            'lead to incomplete execution of the pipeline if the main thread '
   576            'exits before pipeline completion. Consider using '
   577            'result.wait_until_finish() to wait for completion of pipeline '
   578            'execution.')
   579  
   580    def wait_until_finish(self, duration=None):
   581      if not PipelineState.is_terminal(self.state):
   582        if duration:
   583          raise NotImplementedError(
   584              'DirectRunner does not support duration argument.')
   585        try:
   586          self._executor.await_completion()
   587          self._state = PipelineState.DONE
   588        except:  # pylint: disable=broad-except
   589          self._state = PipelineState.FAILED
   590          raise
   591      return self._state
   592  
   593    def aggregated_values(self, aggregator_or_name):
   594      return self._evaluation_context.get_aggregator_values(aggregator_or_name)
   595  
   596    def metrics(self):
   597      return self._evaluation_context.metrics()
   598  
   599    def cancel(self):
   600      """Shuts down pipeline workers.
   601  
   602      For testing use only. Does not properly wait for pipeline workers to shut
   603      down.
   604      """
   605      self._state = PipelineState.CANCELLING
   606      self._executor.shutdown()
   607      self._state = PipelineState.CANCELLED