github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/pubsub_it_pipeline.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """ 19 Test pipeline for use by pubsub_integration_test. 20 """ 21 22 # pytype: skip-file 23 24 import argparse 25 26 import apache_beam as beam 27 from apache_beam.options.pipeline_options import PipelineOptions 28 from apache_beam.options.pipeline_options import StandardOptions 29 30 31 def run_pipeline(argv, with_attributes, id_label, timestamp_attribute): 32 """Build and run the pipeline.""" 33 34 parser = argparse.ArgumentParser() 35 parser.add_argument( 36 '--output_topic', 37 required=True, 38 help=( 39 'Output PubSub topic of the form ' 40 '"projects/<PROJECT>/topic/<TOPIC>".')) 41 parser.add_argument( 42 '--input_subscription', 43 required=True, 44 help=( 45 'Input PubSub subscription of the form ' 46 '"projects/<PROJECT>/subscriptions/<SUBSCRIPTION>."')) 47 known_args, pipeline_args = parser.parse_known_args(argv) 48 49 pipeline_options = PipelineOptions(pipeline_args) 50 pipeline_options.view_as(StandardOptions).streaming = True 51 p = beam.Pipeline(options=pipeline_options) 52 runner_name = type(p.runner).__name__ 53 54 # Read from PubSub into a PCollection. 55 if runner_name == 'TestDirectRunner': 56 messages = p | beam.io.ReadFromPubSub( 57 subscription=known_args.input_subscription, 58 with_attributes=with_attributes, 59 timestamp_attribute=timestamp_attribute) 60 else: 61 messages = p | beam.io.ReadFromPubSub( 62 subscription=known_args.input_subscription, 63 id_label=id_label, 64 with_attributes=with_attributes, 65 timestamp_attribute=timestamp_attribute) 66 67 def add_attribute(msg, timestamp=beam.DoFn.TimestampParam): 68 msg.data += b'-seen' 69 msg.attributes['processed'] = 'IT' 70 if timestamp_attribute in msg.attributes: 71 msg.attributes[timestamp_attribute + '_out'] = timestamp.to_rfc3339() 72 return msg 73 74 def modify_data(data): 75 return data + b'-seen' 76 77 if with_attributes: 78 output = messages | 'add_attribute' >> beam.Map(add_attribute) 79 else: 80 output = messages | 'modify_data' >> beam.Map(modify_data) 81 82 # Write to PubSub. 83 if runner_name == 'TestDirectRunner': 84 _ = output | beam.io.WriteToPubSub( 85 known_args.output_topic, with_attributes=with_attributes) 86 else: 87 _ = output | beam.io.WriteToPubSub( 88 known_args.output_topic, 89 id_label=id_label, 90 with_attributes=with_attributes, 91 timestamp_attribute=timestamp_attribute) 92 93 result = p.run() 94 result.wait_until_finish()