github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/examples/complete/autocomplete_test.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Test for the autocomplete example."""
    19  
    20  # pytype: skip-file
    21  
    22  import unittest
    23  
    24  import pytest
    25  
    26  import apache_beam as beam
    27  from apache_beam.examples.complete import autocomplete
    28  from apache_beam.testing.test_pipeline import TestPipeline
    29  from apache_beam.testing.test_utils import compute_hash
    30  from apache_beam.testing.util import assert_that
    31  from apache_beam.testing.util import equal_to
    32  
    33  
    34  class AutocompleteTest(unittest.TestCase):
    35  
    36    WORDS = ['this', 'this', 'that', 'to', 'to', 'to']
    37    KINGLEAR_HASH_SUM = 268011785062540
    38    KINGLEAR_INPUT = 'gs://dataflow-samples/shakespeare/kinglear.txt'
    39    EXPECTED_PREFIXES = [
    40        ('t', ((3, 'to'), (2, 'this'), (1, 'that'))),
    41        ('to', ((3, 'to'), )),
    42        ('th', ((2, 'this'), (1, 'that'))),
    43        ('thi', ((2, 'this'), )),
    44        ('this', ((2, 'this'), )),
    45        ('tha', ((1, 'that'), )),
    46        ('that', ((1, 'that'), )),
    47    ]
    48  
    49    def test_top_prefixes(self):
    50      with TestPipeline() as p:
    51        words = p | beam.Create(self.WORDS)
    52        result = words | autocomplete.TopPerPrefix(5)
    53        # values must be hashable for now
    54        result = result | beam.Map(lambda k_vs: (k_vs[0], tuple(k_vs[1])))
    55        assert_that(result, equal_to(self.EXPECTED_PREFIXES))
    56  
    57    @pytest.mark.it_postcommit
    58    def test_autocomplete_it(self):
    59      with TestPipeline(is_integration_test=True) as p:
    60        words = p | beam.io.ReadFromText(self.KINGLEAR_INPUT)
    61        result = words | autocomplete.TopPerPrefix(10)
    62        # values must be hashable for now
    63        result = result | beam.Map(
    64            lambda k_vs: [k_vs[0], k_vs[1][0][0], k_vs[1][0][1]])
    65        checksum = (
    66            result
    67            | beam.Map(lambda x: int(compute_hash(x)[:8], 16))
    68            | beam.CombineGlobally(sum))
    69  
    70        assert_that(checksum, equal_to([self.KINGLEAR_HASH_SUM]))
    71  
    72  
    73  if __name__ == '__main__':
    74    unittest.main()