github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/complete/autocomplete_it_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """End-to-end test for Autocomplete example."""
    19  # pytype: skip-file
    20  
    21  import logging
    22  import re
    23  import unittest
    24  import uuid
    25  
    26  import pytest
    27  
    28  from apache_beam.examples.complete import autocomplete
    29  from apache_beam.testing.test_pipeline import TestPipeline
    30  from apache_beam.testing.test_utils import create_file
    31  from apache_beam.testing.test_utils import read_files_from_pattern
    32  
    33  
    34  def format_output_file(output_string):
    35    def extract_prefix_topk_words_tuples(line):
    36      match = re.match(r'(.*): \[(.*)\]', line)
    37      prefix = match.group(1)
    38      topK_words_string = extract_top_k_words_tuples(match.group(2))
    39      return prefix, topK_words_string
    40  
    41    def extract_top_k_words_tuples(top_k_words_string):
    42      top_k_list = top_k_words_string.split("), (")
    43      return tuple(
    44          map(
    45              lambda top_k_string: tuple(format_top_k_tuples(top_k_string)),
    46              top_k_list))
    47  
    48    def format_top_k_tuples(top_k_string):
    49      (frequency, words) = top_k_string.replace('(', '').replace(')', '').replace(
    50          '\"', '').replace('\'', '').replace(' ', '').split(',')
    51      return int(frequency), words
    52  
    53    return list(
    54        map(
    55            lambda line: extract_prefix_topk_words_tuples(line),
    56            output_string.split('\n')))
    57  
    58  
    59  class AutocompleteIT(unittest.TestCase):
    60    WORDS = ['this', 'this', 'that', 'to', 'to', 'to']
    61    EXPECTED_PREFIXES = [
    62        ('t', ((3, 'to'), (2, 'this'), (1, 'that'))),
    63        ('to', ((3, 'to'), )),
    64        ('th', ((2, 'this'), (1, 'that'))),
    65        ('thi', ((2, 'this'), )),
    66        ('this', ((2, 'this'), )),
    67        ('tha', ((1, 'that'), )),
    68        ('that', ((1, 'that'), )),
    69    ]
    70  
    71    @pytest.mark.no_xdist
    72    @pytest.mark.examples_postcommit
    73    def test_autocomplete_output_files_on_small_input(self):
    74      test_pipeline = TestPipeline(is_integration_test=True)
    75      # Setup the files with expected content.
    76      OUTPUT_FILE_DIR = \
    77          'gs://temp-storage-for-end-to-end-tests/py-it-cloud/output'
    78      output = '/'.join([OUTPUT_FILE_DIR, str(uuid.uuid4()), 'result'])
    79      INPUT_FILE_DIR = \
    80          'gs://temp-storage-for-end-to-end-tests/py-it-cloud/input'
    81      input = '/'.join([INPUT_FILE_DIR, str(uuid.uuid4()), 'input.txt'])
    82      create_file(input, ' '.join(self.WORDS))
    83      extra_opts = {'input': input, 'output': output}
    84  
    85      autocomplete.run(test_pipeline.get_full_options_as_args(**extra_opts))
    86  
    87      # Load result file and compare.
    88      result = read_files_from_pattern('%s*' % output).strip()
    89  
    90      self.assertEqual(
    91          sorted(self.EXPECTED_PREFIXES), sorted(format_output_file(result)))
    92  
    93  
    94  if __name__ == '__main__':
    95    logging.getLogger().setLevel(logging.INFO)
    96    unittest.main()