github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/pubsub_it_pipeline.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """
    19  Test pipeline for use by pubsub_integration_test.
    20  """
    21  
    22  # pytype: skip-file
    23  
    24  import argparse
    25  
    26  import apache_beam as beam
    27  from apache_beam.options.pipeline_options import PipelineOptions
    28  from apache_beam.options.pipeline_options import StandardOptions
    29  
    30  
    31  def run_pipeline(argv, with_attributes, id_label, timestamp_attribute):
    32    """Build and run the pipeline."""
    33  
    34    parser = argparse.ArgumentParser()
    35    parser.add_argument(
    36        '--output_topic',
    37        required=True,
    38        help=(
    39            'Output PubSub topic of the form '
    40            '"projects/<PROJECT>/topic/<TOPIC>".'))
    41    parser.add_argument(
    42        '--input_subscription',
    43        required=True,
    44        help=(
    45            'Input PubSub subscription of the form '
    46            '"projects/<PROJECT>/subscriptions/<SUBSCRIPTION>."'))
    47    known_args, pipeline_args = parser.parse_known_args(argv)
    48  
    49    pipeline_options = PipelineOptions(pipeline_args)
    50    pipeline_options.view_as(StandardOptions).streaming = True
    51    p = beam.Pipeline(options=pipeline_options)
    52    runner_name = type(p.runner).__name__
    53  
    54    # Read from PubSub into a PCollection.
    55    if runner_name == 'TestDirectRunner':
    56      messages = p | beam.io.ReadFromPubSub(
    57          subscription=known_args.input_subscription,
    58          with_attributes=with_attributes,
    59          timestamp_attribute=timestamp_attribute)
    60    else:
    61      messages = p | beam.io.ReadFromPubSub(
    62          subscription=known_args.input_subscription,
    63          id_label=id_label,
    64          with_attributes=with_attributes,
    65          timestamp_attribute=timestamp_attribute)
    66  
    67    def add_attribute(msg, timestamp=beam.DoFn.TimestampParam):
    68      msg.data += b'-seen'
    69      msg.attributes['processed'] = 'IT'
    70      if timestamp_attribute in msg.attributes:
    71        msg.attributes[timestamp_attribute + '_out'] = timestamp.to_rfc3339()
    72      return msg
    73  
    74    def modify_data(data):
    75      return data + b'-seen'
    76  
    77    if with_attributes:
    78      output = messages | 'add_attribute' >> beam.Map(add_attribute)
    79    else:
    80      output = messages | 'modify_data' >> beam.Map(modify_data)
    81  
    82    # Write to PubSub.
    83    if runner_name == 'TestDirectRunner':
    84      _ = output | beam.io.WriteToPubSub(
    85          known_args.output_topic, with_attributes=with_attributes)
    86    else:
    87      _ = output | beam.io.WriteToPubSub(
    88          known_args.output_topic,
    89          id_label=id_label,
    90          with_attributes=with_attributes,
    91          timestamp_attribute=timestamp_attribute)
    92  
    93    result = p.run()
    94    result.wait_until_finish()