github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/internal/gcp/auth.py (about)

     1  #
     2  # Licensed to the Apache Software Foundation (ASF) under one or more
     3  # contributor license agreements.  See the NOTICE file distributed with
     4  # this work for additional information regarding copyright ownership.
     5  # The ASF licenses this file to You under the Apache License, Version 2.0
     6  # (the "License"); you may not use this file except in compliance with
     7  # the License.  You may obtain a copy of the License at
     8  #
     9  #    http://www.apache.org/licenses/LICENSE-2.0
    10  #
    11  # Unless required by applicable law or agreed to in writing, software
    12  # distributed under the License is distributed on an "AS IS" BASIS,
    13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  # See the License for the specific language governing permissions and
    15  # limitations under the License.
    16  #
    17  
    18  """Dataflow credentials and authentication."""
    19  
    20  # pytype: skip-file
    21  
    22  import logging
    23  import socket
    24  import threading
    25  from typing import Optional
    26  
    27  from apache_beam.options.pipeline_options import GoogleCloudOptions
    28  from apache_beam.options.pipeline_options import PipelineOptions
    29  
    30  # google.auth is only available when Beam is installed with the gcp extra.
    31  try:
    32    from google.auth import impersonated_credentials
    33    import google.auth
    34    import google_auth_httplib2
    35    _GOOGLE_AUTH_AVAILABLE = True
    36  except ImportError:
    37    _GOOGLE_AUTH_AVAILABLE = False
    38  
    39  # When we are running in GCE, we can authenticate with VM credentials.
    40  is_running_in_gce = False
    41  
    42  # When we are running in GCE, this value is set based on worker startup
    43  # information.
    44  executing_project = None
    45  
    46  _LOGGER = logging.getLogger(__name__)
    47  
    48  
    49  def set_running_in_gce(worker_executing_project):
    50    """For internal use only; no backwards-compatibility guarantees.
    51  
    52    Informs the authentication library that we are running in GCE.
    53  
    54    When we are running in GCE, we have the option of using the VM metadata
    55    credentials for authentication to Google services.
    56  
    57    Args:
    58      worker_executing_project: The project running the workflow. This information
    59        comes from worker startup information.
    60    """
    61    global is_running_in_gce
    62    global executing_project
    63    is_running_in_gce = True
    64    executing_project = worker_executing_project
    65  
    66  
    67  def get_service_credentials(pipeline_options):
    68    # type: (PipelineOptions) -> Optional[google.auth.credentials.Credentials]
    69  
    70    """For internal use only; no backwards-compatibility guarantees.
    71  
    72    Get credentials to access Google services.
    73    Args:
    74      pipeline_options: Pipeline options, used in creating credentials
    75        like impersonated credentials.
    76  
    77    Returns:
    78      A ``google.auth.credentials.Credentials`` object or None if credentials
    79      not found. Returned object is thread-safe.
    80    """
    81    return _Credentials.get_service_credentials(pipeline_options)
    82  
    83  
    84  if _GOOGLE_AUTH_AVAILABLE:
    85  
    86    class _ApitoolsCredentialsAdapter:
    87      """For internal use only; no backwards-compatibility guarantees.
    88  
    89      Adapter allowing use of google-auth credentials with apitools, which
    90      normally expects credentials from the oauth2client library. This allows
    91      upgrading the auth library used by Beam without simultaneously upgrading
    92      all the GCP client libraries (a much larger change).
    93      """
    94      def __init__(self, google_auth_credentials):
    95        self._google_auth_credentials = google_auth_credentials
    96  
    97      def authorize(self, http):
    98        """Return an http client authorized with the google-auth credentials.
    99  
   100        Args:
   101          http: httplib2.Http, an http object to be used to make the refresh
   102            request.
   103  
   104        Returns:
   105          google_auth_httplib2.AuthorizedHttp: An authorized http client.
   106        """
   107        return google_auth_httplib2.AuthorizedHttp(
   108            self._google_auth_credentials, http=http)
   109  
   110      def __getattr__(self, attr):
   111        """Delegate attribute access to underlying google-auth credentials."""
   112        return getattr(self._google_auth_credentials, attr)
   113  
   114  
   115  class _Credentials(object):
   116    _credentials_lock = threading.Lock()
   117    _credentials_init = False
   118    _credentials = None
   119  
   120    @classmethod
   121    def get_service_credentials(cls, pipeline_options):
   122      # type: (PipelineOptions) -> Optional[google.auth.credentials.Credentials]
   123      with cls._credentials_lock:
   124        if cls._credentials_init:
   125          return cls._credentials
   126  
   127        # apitools use urllib with the global timeout. Set it to 60 seconds
   128        # to prevent network related stuckness issues.
   129        if not socket.getdefaulttimeout():
   130          _LOGGER.info("Setting socket default timeout to 60 seconds.")
   131          socket.setdefaulttimeout(60)
   132        _LOGGER.info(
   133            "socket default timeout is %s seconds.", socket.getdefaulttimeout())
   134  
   135        cls._credentials = cls._get_service_credentials(pipeline_options)
   136        cls._credentials_init = True
   137  
   138      return cls._credentials
   139  
   140    @staticmethod
   141    def _get_service_credentials(pipeline_options):
   142      # type: (PipelineOptions) -> Optional[google.auth.credentials.Credentials]
   143      if not _GOOGLE_AUTH_AVAILABLE:
   144        _LOGGER.warning(
   145            'Unable to find default credentials because the google-auth library '
   146            'is not available. Install the gcp extra (apache_beam[gcp]) to use '
   147            'Google default credentials. Connecting anonymously.')
   148        return None
   149  
   150      try:
   151        # pylint: disable=c-extension-no-member
   152        credentials, _ = google.auth.default(
   153            scopes=pipeline_options.view_as(GoogleCloudOptions).gcp_oauth_scopes)
   154        credentials = _Credentials._add_impersonation_credentials(
   155            credentials, pipeline_options)
   156        credentials = _ApitoolsCredentialsAdapter(credentials)
   157        logging.debug(
   158            'Connecting using Google Application Default '
   159            'Credentials.')
   160        return credentials
   161      except Exception as e:
   162        _LOGGER.warning(
   163            'Unable to find default credentials to use: %s\n'
   164            'Connecting anonymously.',
   165            e)
   166        return None
   167  
   168    @staticmethod
   169    def _add_impersonation_credentials(credentials, pipeline_options):
   170      gcs_options = pipeline_options.view_as(GoogleCloudOptions)
   171      impersonate_service_account = gcs_options.impersonate_service_account
   172      scopes = gcs_options.gcp_oauth_scopes
   173      if impersonate_service_account:
   174        _LOGGER.info('Impersonating: %s', impersonate_service_account)
   175        impersonate_accounts = impersonate_service_account.split(',')
   176        target_principal = impersonate_accounts[-1]
   177        delegate_to = impersonate_accounts[0:-1]
   178        credentials = impersonated_credentials.Credentials(
   179            source_credentials=credentials,
   180            target_principal=target_principal,
   181            delegates=delegate_to,
   182            target_scopes=scopes,
   183        )
   184      return credentials