github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/streaming_wordcount_it_test.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/streaming_wordcount_it_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """End-to-end test for the streaming wordcount example."""
    19  
    20  # pytype: skip-file
    21  
    22  # beam-playground:
    23  #   name: StreamingWordCountITTest
    24  #   description: Unit-test for the streaming WordCount example.
    25  #   multifile: false
    26  #   context_line: 45
    27  #   categories:
    28  #     - Streaming
    29  #     - IO
    30  #   complexity: ADVANCED
    31  #   tags:
    32  #     - streaming
    33  #     - count
    34  #     - test
    35  
    36  import logging
    37  import unittest
    38  import uuid
    39  
    40  import pytest
    41  from hamcrest.core.core.allof import all_of
    42  
    43  from apache_beam.examples import streaming_wordcount
    44  from apache_beam.io.gcp.tests.pubsub_matcher import PubSubMessageMatcher
    45  from apache_beam.runners.runner import PipelineState
    46  from apache_beam.testing import test_utils
    47  from apache_beam.testing.pipeline_verifiers import PipelineStateMatcher
    48  from apache_beam.testing.test_pipeline import TestPipeline
    49  
    50  INPUT_TOPIC = 'wc_topic_input'
    51  OUTPUT_TOPIC = 'wc_topic_output'
    52  INPUT_SUB = 'wc_subscription_input'
    53  OUTPUT_SUB = 'wc_subscription_output'
    54  
    55  DEFAULT_INPUT_NUMBERS = 500
    56  WAIT_UNTIL_FINISH_DURATION = 10 * 60 * 1000  # in milliseconds
    57  
    58  
    59  class StreamingWordCountIT(unittest.TestCase):
    60    def setUp(self):
    61      self.test_pipeline = TestPipeline(is_integration_test=True)
    62      self.project = self.test_pipeline.get_option('project')
    63      self.uuid = str(uuid.uuid4())
    64  
    65      # Set up PubSub environment.
    66      from google.cloud import pubsub
    67      self.pub_client = pubsub.PublisherClient()
    68      self.input_topic = self.pub_client.create_topic(
    69          name=self.pub_client.topic_path(self.project, INPUT_TOPIC + self.uuid))
    70      self.output_topic = self.pub_client.create_topic(
    71          name=self.pub_client.topic_path(self.project, OUTPUT_TOPIC + self.uuid))
    72  
    73      self.sub_client = pubsub.SubscriberClient()
    74      self.input_sub = self.sub_client.create_subscription(
    75          name=self.sub_client.subscription_path(
    76              self.project, INPUT_SUB + self.uuid),
    77          topic=self.input_topic.name)
    78      self.output_sub = self.sub_client.create_subscription(
    79          name=self.sub_client.subscription_path(
    80              self.project, OUTPUT_SUB + self.uuid),
    81          topic=self.output_topic.name,
    82          ack_deadline_seconds=60)
    83  
    84    def _inject_numbers(self, topic, num_messages):
    85      """Inject numbers as test data to PubSub."""
    86      logging.debug('Injecting %d numbers to topic %s', num_messages, topic.name)
    87      for n in range(num_messages):
    88        self.pub_client.publish(self.input_topic.name, str(n).encode('utf-8'))
    89  
    90    def tearDown(self):
    91      test_utils.cleanup_subscriptions(
    92          self.sub_client, [self.input_sub, self.output_sub])
    93      test_utils.cleanup_topics(
    94          self.pub_client, [self.input_topic, self.output_topic])
    95  
    96    @pytest.mark.it_postcommit
    97    def test_streaming_wordcount_it(self):
    98      # Build expected dataset.
    99      expected_msg = [('%d: 1' % num).encode('utf-8')
   100                      for num in range(DEFAULT_INPUT_NUMBERS)]
   101  
   102      # Set extra options to the pipeline for test purpose
   103      state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
   104      pubsub_msg_verifier = PubSubMessageMatcher(
   105          self.project, self.output_sub.name, expected_msg, timeout=400)
   106      extra_opts = {
   107          'input_subscription': self.input_sub.name,
   108          'output_topic': self.output_topic.name,
   109          'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
   110          'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
   111      }
   112  
   113      # Generate input data and inject to PubSub.
   114      self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS)
   115  
   116      # Get pipeline options from command argument: --test-pipeline-options,
   117      # and start pipeline job by calling pipeline main function.
   118      streaming_wordcount.run(
   119          self.test_pipeline.get_full_options_as_args(**extra_opts),
   120          save_main_session=False)
   121  
   122  
   123  if __name__ == '__main__':
   124    logging.getLogger().setLevel(logging.DEBUG)
   125    unittest.main()