github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/complete/autocomplete_it_test.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """End-to-end test for Autocomplete example.""" 19 # pytype: skip-file 20 21 import logging 22 import re 23 import unittest 24 import uuid 25 26 import pytest 27 28 from apache_beam.examples.complete import autocomplete 29 from apache_beam.testing.test_pipeline import TestPipeline 30 from apache_beam.testing.test_utils import create_file 31 from apache_beam.testing.test_utils import read_files_from_pattern 32 33 34 def format_output_file(output_string): 35 def extract_prefix_topk_words_tuples(line): 36 match = re.match(r'(.*): \[(.*)\]', line) 37 prefix = match.group(1) 38 topK_words_string = extract_top_k_words_tuples(match.group(2)) 39 return prefix, topK_words_string 40 41 def extract_top_k_words_tuples(top_k_words_string): 42 top_k_list = top_k_words_string.split("), (") 43 return tuple( 44 map( 45 lambda top_k_string: tuple(format_top_k_tuples(top_k_string)), 46 top_k_list)) 47 48 def format_top_k_tuples(top_k_string): 49 (frequency, words) = top_k_string.replace('(', '').replace(')', '').replace( 50 '\"', '').replace('\'', '').replace(' ', '').split(',') 51 return int(frequency), words 52 53 return list( 54 map( 55 lambda line: extract_prefix_topk_words_tuples(line), 56 output_string.split('\n'))) 57 58 59 class AutocompleteIT(unittest.TestCase): 60 WORDS = ['this', 'this', 'that', 'to', 'to', 'to'] 61 EXPECTED_PREFIXES = [ 62 ('t', ((3, 'to'), (2, 'this'), (1, 'that'))), 63 ('to', ((3, 'to'), )), 64 ('th', ((2, 'this'), (1, 'that'))), 65 ('thi', ((2, 'this'), )), 66 ('this', ((2, 'this'), )), 67 ('tha', ((1, 'that'), )), 68 ('that', ((1, 'that'), )), 69 ] 70 71 @pytest.mark.no_xdist 72 @pytest.mark.examples_postcommit 73 def test_autocomplete_output_files_on_small_input(self): 74 test_pipeline = TestPipeline(is_integration_test=True) 75 # Setup the files with expected content. 76 OUTPUT_FILE_DIR = \ 77 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output' 78 output = '/'.join([OUTPUT_FILE_DIR, str(uuid.uuid4()), 'result']) 79 INPUT_FILE_DIR = \ 80 'gs://temp-storage-for-end-to-end-tests/py-it-cloud/input' 81 input = '/'.join([INPUT_FILE_DIR, str(uuid.uuid4()), 'input.txt']) 82 create_file(input, ' '.join(self.WORDS)) 83 extra_opts = {'input': input, 'output': output} 84 85 autocomplete.run(test_pipeline.get_full_options_as_args(**extra_opts)) 86 87 # Load result file and compare. 88 result = read_files_from_pattern('%s*' % output).strip() 89 90 self.assertEqual( 91 sorted(self.EXPECTED_PREFIXES), sorted(format_output_file(result))) 92 93 94 if __name__ == '__main__': 95 logging.getLogger().setLevel(logging.INFO) 96 unittest.main()