github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/ml/gcp/naturallanguageml_test_it.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/ml/gcp/naturallanguageml_test_it.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  # pytype: skip-file
    18  
    19  import unittest
    20  
    21  import pytest
    22  
    23  import apache_beam as beam
    24  from apache_beam.testing.test_pipeline import TestPipeline
    25  from apache_beam.testing.util import assert_that
    26  from apache_beam.testing.util import equal_to
    27  
    28  # Protect against environments where Google Cloud Natural Language client is
    29  # not available.
    30  try:
    31    from apache_beam.ml.gcp.naturallanguageml import AnnotateText
    32    from apache_beam.ml.gcp.naturallanguageml import Document
    33    from apache_beam.ml.gcp.naturallanguageml import enums
    34    from apache_beam.ml.gcp.naturallanguageml import types
    35  except ImportError:
    36    AnnotateText = None
    37  
    38  
    39  def extract(response):
    40    yield beam.pvalue.TaggedOutput('language', response.language)
    41    yield beam.pvalue.TaggedOutput(
    42        'parts_of_speech',
    43        [
    44            enums.PartOfSpeech.Tag(x.part_of_speech.tag).name
    45            for x in response.tokens
    46        ])
    47  
    48  
    49  @pytest.mark.it_postcommit
    50  @unittest.skipIf(AnnotateText is None, 'GCP dependencies are not installed')
    51  class NaturalLanguageMlTestIT(unittest.TestCase):
    52    def test_analyzing_syntax(self):
    53      with TestPipeline(is_integration_test=True) as p:
    54        output = (
    55            p
    56            | beam.Create([Document('Unified programming model.')])
    57            | AnnotateText(
    58                types.AnnotateTextRequest.Features(extract_syntax=True))
    59            | beam.ParDo(extract).with_outputs('language', 'parts_of_speech'))
    60  
    61        assert_that(output.language, equal_to(['en']), label='verify_language')
    62        assert_that(
    63            output.parts_of_speech,
    64            equal_to([['ADJ', 'NOUN', 'NOUN', 'PUNCT']]),
    65            label='verify_parts_of_speech')
    66  
    67  
    68  if __name__ == '__main__':
    69    unittest.main()