github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/dataframe/io_it_test.py

github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/dataframe/io_it_test.py (about)

     1  #!/usr/bin/env python
     2  # -*- coding: utf-8 -*-
     3  #
     4  # Licensed to the Apache Software Foundation (ASF) under one or more
     5  # contributor license agreements.  See the NOTICE file distributed with
     6  # this work for additional information regarding copyright ownership.
     7  # The ASF licenses this file to You under the Apache License, Version 2.0
     8  # (the "License"); you may not use this file except in compliance with
     9  # the License.  You may obtain a copy of the License at
    10  #
    11  #    http://www.apache.org/licenses/LICENSE-2.0
    12  #
    13  # Unless required by applicable law or agreed to in writing, software
    14  # distributed under the License is distributed on an "AS IS" BASIS,
    15  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  # See the License for the specific language governing permissions and
    17  # limitations under the License.
    18  #
    19  
    20  """Integration tests for Dataframe sources and sinks."""
    21  # pytype: skip-file
    22  
    23  import logging
    24  import unittest
    25  
    26  import pytest
    27  
    28  import apache_beam.io.gcp.bigquery
    29  from apache_beam.testing.test_pipeline import TestPipeline
    30  from apache_beam.testing.util import assert_that
    31  from apache_beam.testing.util import equal_to
    32  
    33  _LOGGER = logging.getLogger(__name__)
    34  
    35  try:
    36    from apitools.base.py.exceptions import HttpError
    37  except ImportError:
    38    HttpError = None
    39  
    40  
    41  @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed')
    42  class ReadUsingReadGbqTests(unittest.TestCase):
    43    @pytest.mark.it_postcommit
    44    def test_ReadGbq(self):
    45      from apache_beam.dataframe import convert
    46      with TestPipeline(is_integration_test=True) as p:
    47        actual_df = p | apache_beam.dataframe.io.read_gbq(
    48            table="apache-beam-testing:beam_bigquery_io_test."
    49            "dfsqltable_3c7d6fd5_16e0460dfd0",
    50            use_bqstorage_api=False)
    51        assert_that(
    52            convert.to_pcollection(actual_df),
    53            equal_to([(3, 'customer1', 'test'), (1, 'customer1', 'test'),
    54                      (2, 'customer2', 'test'), (4, 'customer2', 'test')]))
    55  
    56    @pytest.mark.it_postcommit
    57    def test_ReadGbq_export_with_project(self):
    58      from apache_beam.dataframe import convert
    59      with TestPipeline(is_integration_test=True) as p:
    60        actual_df = p | apache_beam.dataframe.io.read_gbq(
    61            table="dfsqltable_3c7d6fd5_16e0460dfd0",
    62            dataset="beam_bigquery_io_test",
    63            project_id="apache-beam-testing",
    64            use_bqstorage_api=False)
    65        assert_that(
    66            convert.to_pcollection(actual_df),
    67            equal_to([(3, 'customer1', 'test'), (1, 'customer1', 'test'),
    68                      (2, 'customer2', 'test'), (4, 'customer2', 'test')]))
    69  
    70    @pytest.mark.it_postcommit
    71    def test_ReadGbq_direct_read(self):
    72      from apache_beam.dataframe import convert
    73      with TestPipeline(is_integration_test=True) as p:
    74        actual_df = p | apache_beam.dataframe.io.\
    75            read_gbq(
    76            table=
    77            "apache-beam-testing:beam_bigquery_io_test."
    78            "dfsqltable_3c7d6fd5_16e0460dfd0",
    79            use_bqstorage_api=True)
    80        assert_that(
    81            convert.to_pcollection(actual_df),
    82            equal_to([(3, 'customer1', 'test'), (1, 'customer1', 'test'),
    83                      (2, 'customer2', 'test'), (4, 'customer2', 'test')]))
    84  
    85    @pytest.mark.it_postcommit
    86    def test_ReadGbq_direct_read_with_project(self):
    87      from apache_beam.dataframe import convert
    88      with TestPipeline(is_integration_test=True) as p:
    89        actual_df = p | apache_beam.dataframe.io.read_gbq(
    90            table="dfsqltable_3c7d6fd5_16e0460dfd0",
    91            dataset="beam_bigquery_io_test",
    92            project_id="apache-beam-testing",
    93            use_bqstorage_api=True)
    94        assert_that(
    95            convert.to_pcollection(actual_df),
    96            equal_to([(3, 'customer1', 'test'), (1, 'customer1', 'test'),
    97                      (2, 'customer2', 'test'), (4, 'customer2', 'test')]))
    98  
    99    @pytest.mark.it_postcommit
   100    def test_ReadGbq_with_computation(self):
   101      from apache_beam.dataframe import convert
   102      with TestPipeline(is_integration_test=True) as p:
   103        beam_df = p | apache_beam.dataframe.io.read_gbq(
   104            table="dfsqltable_3c7d6fd5_16e0460dfd0",
   105            dataset="beam_bigquery_io_test",
   106            project_id="apache-beam-testing")
   107        actual_df = beam_df.groupby('id').count()
   108        assert_that(
   109            convert.to_pcollection(actual_df, include_indexes=True),
   110            equal_to([(1, 1, 1), (2, 1, 1), (3, 1, 1), (4, 1, 1)]))
   111  
   112  
   113  if __name__ == '__main__':
   114    logging.getLogger().setLevel(logging.INFO)
   115    unittest.main()