github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/ml/gcp/naturallanguageml_test_it.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 # pytype: skip-file 18 19 import unittest 20 21 import pytest 22 23 import apache_beam as beam 24 from apache_beam.testing.test_pipeline import TestPipeline 25 from apache_beam.testing.util import assert_that 26 from apache_beam.testing.util import equal_to 27 28 # Protect against environments where Google Cloud Natural Language client is 29 # not available. 30 try: 31 from apache_beam.ml.gcp.naturallanguageml import AnnotateText 32 from apache_beam.ml.gcp.naturallanguageml import Document 33 from apache_beam.ml.gcp.naturallanguageml import enums 34 from apache_beam.ml.gcp.naturallanguageml import types 35 except ImportError: 36 AnnotateText = None 37 38 39 def extract(response): 40 yield beam.pvalue.TaggedOutput('language', response.language) 41 yield beam.pvalue.TaggedOutput( 42 'parts_of_speech', 43 [ 44 enums.PartOfSpeech.Tag(x.part_of_speech.tag).name 45 for x in response.tokens 46 ]) 47 48 49 @pytest.mark.it_postcommit 50 @unittest.skipIf(AnnotateText is None, 'GCP dependencies are not installed') 51 class NaturalLanguageMlTestIT(unittest.TestCase): 52 def test_analyzing_syntax(self): 53 with TestPipeline(is_integration_test=True) as p: 54 output = ( 55 p 56 | beam.Create([Document('Unified programming model.')]) 57 | AnnotateText( 58 types.AnnotateTextRequest.Features(extract_syntax=True)) 59 | beam.ParDo(extract).with_outputs('language', 'parts_of_speech')) 60 61 assert_that(output.language, equal_to(['en']), label='verify_language') 62 assert_that( 63 output.parts_of_speech, 64 equal_to([['ADJ', 'NOUN', 'NOUN', 'PUNCT']]), 65 label='verify_parts_of_speech') 66 67 68 if __name__ == '__main__': 69 unittest.main()