github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/wordcount_debugging_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Test for the debugging wordcount example.""" 19 20 # pytype: skip-file 21 22 import logging 23 import re 24 import unittest 25 import uuid 26 27 import pytest 28 29 from apache_beam.examples import wordcount_debugging 30 from apache_beam.testing.test_pipeline import TestPipeline 31 from apache_beam.testing.test_utils import create_file 32 from apache_beam.testing.test_utils import read_files_from_pattern 33 34 35 @pytest.mark.examples_postcommit 36 class WordCountDebuggingTest(unittest.TestCase): 37 38 SAMPLE_TEXT = 'xx yy Flourish\n zz Flourish Flourish stomach\n aa\n bb cc dd' 39 40 def get_results(self, temp_path): 41 results = [] 42 lines = read_files_from_pattern(temp_path + '.result*').splitlines() 43 for line in lines: 44 match = re.search(r'([A-Za-z]+): ([0-9]+)', line) 45 if match is not None: 46 results.append((match.group(1), int(match.group(2)))) 47 return results 48 49 def test_basics(self): 50 test_pipeline = TestPipeline(is_integration_test=True) 51 # Setup the files with expected content. 52 temp_location = test_pipeline.get_option('temp_location') 53 temp_path = '/'.join([temp_location, str(uuid.uuid4())]) 54 input = create_file('/'.join([temp_path, 'input.txt']), self.SAMPLE_TEXT) 55 extra_opts = {'input': input, 'output': '%s.result' % temp_path} 56 expected_words = [('Flourish', 3), ('stomach', 1)] 57 wordcount_debugging.run( 58 test_pipeline.get_full_options_as_args(**extra_opts), 59 save_main_session=False) 60 61 # Parse result file and compare. 62 results = self.get_results(temp_path) 63 self.assertEqual(sorted(results), sorted(expected_words)) 64 65 66 if __name__ == '__main__': 67 logging.getLogger().setLevel(logging.INFO) 68 unittest.main()