github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/dataframe/io_it_test.py (about) 1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 # 4 # Licensed to the Apache Software Foundation (ASF) under one or more 5 # contributor license agreements. See the NOTICE file distributed with 6 # this work for additional information regarding copyright ownership. 7 # The ASF licenses this file to You under the Apache License, Version 2.0 8 # (the "License"); you may not use this file except in compliance with 9 # the License. You may obtain a copy of the License at 10 # 11 # http://www.apache.org/licenses/LICENSE-2.0 12 # 13 # Unless required by applicable law or agreed to in writing, software 14 # distributed under the License is distributed on an "AS IS" BASIS, 15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 # See the License for the specific language governing permissions and 17 # limitations under the License. 18 # 19 20 """Integration tests for Dataframe sources and sinks.""" 21 # pytype: skip-file 22 23 import logging 24 import unittest 25 26 import pytest 27 28 import apache_beam.io.gcp.bigquery 29 from apache_beam.testing.test_pipeline import TestPipeline 30 from apache_beam.testing.util import assert_that 31 from apache_beam.testing.util import equal_to 32 33 _LOGGER = logging.getLogger(__name__) 34 35 try: 36 from apitools.base.py.exceptions import HttpError 37 except ImportError: 38 HttpError = None 39 40 41 @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed') 42 class ReadUsingReadGbqTests(unittest.TestCase): 43 @pytest.mark.it_postcommit 44 def test_ReadGbq(self): 45 from apache_beam.dataframe import convert 46 with TestPipeline(is_integration_test=True) as p: 47 actual_df = p | apache_beam.dataframe.io.read_gbq( 48 table="apache-beam-testing:beam_bigquery_io_test." 49 "dfsqltable_3c7d6fd5_16e0460dfd0", 50 use_bqstorage_api=False) 51 assert_that( 52 convert.to_pcollection(actual_df), 53 equal_to([(3, 'customer1', 'test'), (1, 'customer1', 'test'), 54 (2, 'customer2', 'test'), (4, 'customer2', 'test')])) 55 56 @pytest.mark.it_postcommit 57 def test_ReadGbq_export_with_project(self): 58 from apache_beam.dataframe import convert 59 with TestPipeline(is_integration_test=True) as p: 60 actual_df = p | apache_beam.dataframe.io.read_gbq( 61 table="dfsqltable_3c7d6fd5_16e0460dfd0", 62 dataset="beam_bigquery_io_test", 63 project_id="apache-beam-testing", 64 use_bqstorage_api=False) 65 assert_that( 66 convert.to_pcollection(actual_df), 67 equal_to([(3, 'customer1', 'test'), (1, 'customer1', 'test'), 68 (2, 'customer2', 'test'), (4, 'customer2', 'test')])) 69 70 @pytest.mark.it_postcommit 71 def test_ReadGbq_direct_read(self): 72 from apache_beam.dataframe import convert 73 with TestPipeline(is_integration_test=True) as p: 74 actual_df = p | apache_beam.dataframe.io.\ 75 read_gbq( 76 table= 77 "apache-beam-testing:beam_bigquery_io_test." 78 "dfsqltable_3c7d6fd5_16e0460dfd0", 79 use_bqstorage_api=True) 80 assert_that( 81 convert.to_pcollection(actual_df), 82 equal_to([(3, 'customer1', 'test'), (1, 'customer1', 'test'), 83 (2, 'customer2', 'test'), (4, 'customer2', 'test')])) 84 85 @pytest.mark.it_postcommit 86 def test_ReadGbq_direct_read_with_project(self): 87 from apache_beam.dataframe import convert 88 with TestPipeline(is_integration_test=True) as p: 89 actual_df = p | apache_beam.dataframe.io.read_gbq( 90 table="dfsqltable_3c7d6fd5_16e0460dfd0", 91 dataset="beam_bigquery_io_test", 92 project_id="apache-beam-testing", 93 use_bqstorage_api=True) 94 assert_that( 95 convert.to_pcollection(actual_df), 96 equal_to([(3, 'customer1', 'test'), (1, 'customer1', 'test'), 97 (2, 'customer2', 'test'), (4, 'customer2', 'test')])) 98 99 @pytest.mark.it_postcommit 100 def test_ReadGbq_with_computation(self): 101 from apache_beam.dataframe import convert 102 with TestPipeline(is_integration_test=True) as p: 103 beam_df = p | apache_beam.dataframe.io.read_gbq( 104 table="dfsqltable_3c7d6fd5_16e0460dfd0", 105 dataset="beam_bigquery_io_test", 106 project_id="apache-beam-testing") 107 actual_df = beam_df.groupby('id').count() 108 assert_that( 109 convert.to_pcollection(actual_df, include_indexes=True), 110 equal_to([(1, 1, 1), (2, 1, 1), (3, 1, 1), (4, 1, 1)])) 111 112 113 if __name__ == '__main__': 114 logging.getLogger().setLevel(logging.INFO) 115 unittest.main()