github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/streaming_wordcount_it_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """End-to-end test for the streaming wordcount example.""" 19 20 # pytype: skip-file 21 22 # beam-playground: 23 # name: StreamingWordCountITTest 24 # description: Unit-test for the streaming WordCount example. 25 # multifile: false 26 # context_line: 45 27 # categories: 28 # - Streaming 29 # - IO 30 # complexity: ADVANCED 31 # tags: 32 # - streaming 33 # - count 34 # - test 35 36 import logging 37 import unittest 38 import uuid 39 40 import pytest 41 from hamcrest.core.core.allof import all_of 42 43 from apache_beam.examples import streaming_wordcount 44 from apache_beam.io.gcp.tests.pubsub_matcher import PubSubMessageMatcher 45 from apache_beam.runners.runner import PipelineState 46 from apache_beam.testing import test_utils 47 from apache_beam.testing.pipeline_verifiers import PipelineStateMatcher 48 from apache_beam.testing.test_pipeline import TestPipeline 49 50 INPUT_TOPIC = 'wc_topic_input' 51 OUTPUT_TOPIC = 'wc_topic_output' 52 INPUT_SUB = 'wc_subscription_input' 53 OUTPUT_SUB = 'wc_subscription_output' 54 55 DEFAULT_INPUT_NUMBERS = 500 56 WAIT_UNTIL_FINISH_DURATION = 10 * 60 * 1000 # in milliseconds 57 58 59 class StreamingWordCountIT(unittest.TestCase): 60 def setUp(self): 61 self.test_pipeline = TestPipeline(is_integration_test=True) 62 self.project = self.test_pipeline.get_option('project') 63 self.uuid = str(uuid.uuid4()) 64 65 # Set up PubSub environment. 66 from google.cloud import pubsub 67 self.pub_client = pubsub.PublisherClient() 68 self.input_topic = self.pub_client.create_topic( 69 name=self.pub_client.topic_path(self.project, INPUT_TOPIC + self.uuid)) 70 self.output_topic = self.pub_client.create_topic( 71 name=self.pub_client.topic_path(self.project, OUTPUT_TOPIC + self.uuid)) 72 73 self.sub_client = pubsub.SubscriberClient() 74 self.input_sub = self.sub_client.create_subscription( 75 name=self.sub_client.subscription_path( 76 self.project, INPUT_SUB + self.uuid), 77 topic=self.input_topic.name) 78 self.output_sub = self.sub_client.create_subscription( 79 name=self.sub_client.subscription_path( 80 self.project, OUTPUT_SUB + self.uuid), 81 topic=self.output_topic.name, 82 ack_deadline_seconds=60) 83 84 def _inject_numbers(self, topic, num_messages): 85 """Inject numbers as test data to PubSub.""" 86 logging.debug('Injecting %d numbers to topic %s', num_messages, topic.name) 87 for n in range(num_messages): 88 self.pub_client.publish(self.input_topic.name, str(n).encode('utf-8')) 89 90 def tearDown(self): 91 test_utils.cleanup_subscriptions( 92 self.sub_client, [self.input_sub, self.output_sub]) 93 test_utils.cleanup_topics( 94 self.pub_client, [self.input_topic, self.output_topic]) 95 96 @pytest.mark.it_postcommit 97 def test_streaming_wordcount_it(self): 98 # Build expected dataset. 99 expected_msg = [('%d: 1' % num).encode('utf-8') 100 for num in range(DEFAULT_INPUT_NUMBERS)] 101 102 # Set extra options to the pipeline for test purpose 103 state_verifier = PipelineStateMatcher(PipelineState.RUNNING) 104 pubsub_msg_verifier = PubSubMessageMatcher( 105 self.project, self.output_sub.name, expected_msg, timeout=400) 106 extra_opts = { 107 'input_subscription': self.input_sub.name, 108 'output_topic': self.output_topic.name, 109 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 110 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) 111 } 112 113 # Generate input data and inject to PubSub. 114 self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS) 115 116 # Get pipeline options from command argument: --test-pipeline-options, 117 # and start pipeline job by calling pipeline main function. 118 streaming_wordcount.run( 119 self.test_pipeline.get_full_options_as_args(**extra_opts), 120 save_main_session=False) 121 122 123 if __name__ == '__main__': 124 logging.getLogger().setLevel(logging.DEBUG) 125 unittest.main()