github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/flink/flink_streaming_impulse_source.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """
    19  A PTransform that provides an unbounded, streaming source of empty byte arrays.
    20  
    21  This can only be used with the flink runner.
    22  """
    23  # pytype: skip-file
    24  
    25  import json
    26  from typing import Any
    27  from typing import Dict
    28  
    29  from apache_beam import PTransform
    30  from apache_beam import Windowing
    31  from apache_beam import pvalue
    32  from apache_beam.transforms.window import GlobalWindows
    33  
    34  
    35  class FlinkStreamingImpulseSource(PTransform):
    36    URN = "flink:transform:streaming_impulse:v1"
    37  
    38    config = {}  # type: Dict[str, Any]
    39  
    40    def expand(self, pbegin):
    41      assert isinstance(pbegin, pvalue.PBegin), (
    42          'Input to transform must be a PBegin but found %s' % pbegin)
    43      return pvalue.PCollection(pbegin.pipeline, is_bounded=False)
    44  
    45    def get_windowing(self, unused_inputs):
    46      return Windowing(GlobalWindows())
    47  
    48    def infer_output_type(self, unused_input_type):
    49      return bytes
    50  
    51    def to_runner_api_parameter(self, context):
    52      assert isinstance(self, FlinkStreamingImpulseSource), \
    53        "expected instance of StreamingImpulseSource, but got %s" % self.__class__
    54      return (self.URN, json.dumps(self.config))
    55  
    56    def set_interval_ms(self, interval_ms):
    57      """Sets the interval (in milliseconds) between messages in the stream.
    58      """
    59      self.config["interval_ms"] = interval_ms
    60      return self
    61  
    62    def set_message_count(self, message_count):
    63      """If non-zero, the stream will produce only this many total messages.
    64      Otherwise produces an unbounded number of messages.
    65      """
    66      self.config["message_count"] = message_count
    67      return self
    68  
    69    @staticmethod
    70    @PTransform.register_urn(URN, None)
    71    def from_runner_api_parameter(_ptransform, spec_parameter, _context):
    72      if isinstance(spec_parameter, bytes):
    73        spec_parameter = spec_parameter.decode('utf-8')
    74      config = json.loads(spec_parameter)
    75      instance = FlinkStreamingImpulseSource()
    76      if "interval_ms" in config:
    77        instance.set_interval_ms(config["interval_ms"])
    78      if "message_count" in config:
    79        instance.set_message_count(config["message_count"])
    80  
    81      return instance