github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/flink/flink_streaming_impulse_source.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """ 19 A PTransform that provides an unbounded, streaming source of empty byte arrays. 20 21 This can only be used with the flink runner. 22 """ 23 # pytype: skip-file 24 25 import json 26 from typing import Any 27 from typing import Dict 28 29 from apache_beam import PTransform 30 from apache_beam import Windowing 31 from apache_beam import pvalue 32 from apache_beam.transforms.window import GlobalWindows 33 34 35 class FlinkStreamingImpulseSource(PTransform): 36 URN = "flink:transform:streaming_impulse:v1" 37 38 config = {} # type: Dict[str, Any] 39 40 def expand(self, pbegin): 41 assert isinstance(pbegin, pvalue.PBegin), ( 42 'Input to transform must be a PBegin but found %s' % pbegin) 43 return pvalue.PCollection(pbegin.pipeline, is_bounded=False) 44 45 def get_windowing(self, unused_inputs): 46 return Windowing(GlobalWindows()) 47 48 def infer_output_type(self, unused_input_type): 49 return bytes 50 51 def to_runner_api_parameter(self, context): 52 assert isinstance(self, FlinkStreamingImpulseSource), \ 53 "expected instance of StreamingImpulseSource, but got %s" % self.__class__ 54 return (self.URN, json.dumps(self.config)) 55 56 def set_interval_ms(self, interval_ms): 57 """Sets the interval (in milliseconds) between messages in the stream. 58 """ 59 self.config["interval_ms"] = interval_ms 60 return self 61 62 def set_message_count(self, message_count): 63 """If non-zero, the stream will produce only this many total messages. 64 Otherwise produces an unbounded number of messages. 65 """ 66 self.config["message_count"] = message_count 67 return self 68 69 @staticmethod 70 @PTransform.register_urn(URN, None) 71 def from_runner_api_parameter(_ptransform, spec_parameter, _context): 72 if isinstance(spec_parameter, bytes): 73 spec_parameter = spec_parameter.decode('utf-8') 74 config = json.loads(spec_parameter) 75 instance = FlinkStreamingImpulseSource() 76 if "interval_ms" in config: 77 instance.set_interval_ms(config["interval_ms"]) 78 if "message_count" in config: 79 instance.set_message_count(config["message_count"]) 80 81 return instance