github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/internal/gcp/auth.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 """Dataflow credentials and authentication.""" 19 20 # pytype: skip-file 21 22 import logging 23 import socket 24 import threading 25 from typing import Optional 26 27 from apache_beam.options.pipeline_options import GoogleCloudOptions 28 from apache_beam.options.pipeline_options import PipelineOptions 29 30 # google.auth is only available when Beam is installed with the gcp extra. 31 try: 32 from google.auth import impersonated_credentials 33 import google.auth 34 import google_auth_httplib2 35 _GOOGLE_AUTH_AVAILABLE = True 36 except ImportError: 37 _GOOGLE_AUTH_AVAILABLE = False 38 39 # When we are running in GCE, we can authenticate with VM credentials. 40 is_running_in_gce = False 41 42 # When we are running in GCE, this value is set based on worker startup 43 # information. 44 executing_project = None 45 46 _LOGGER = logging.getLogger(__name__) 47 48 49 def set_running_in_gce(worker_executing_project): 50 """For internal use only; no backwards-compatibility guarantees. 51 52 Informs the authentication library that we are running in GCE. 53 54 When we are running in GCE, we have the option of using the VM metadata 55 credentials for authentication to Google services. 56 57 Args: 58 worker_executing_project: The project running the workflow. This information 59 comes from worker startup information. 60 """ 61 global is_running_in_gce 62 global executing_project 63 is_running_in_gce = True 64 executing_project = worker_executing_project 65 66 67 def get_service_credentials(pipeline_options): 68 # type: (PipelineOptions) -> Optional[google.auth.credentials.Credentials] 69 70 """For internal use only; no backwards-compatibility guarantees. 71 72 Get credentials to access Google services. 73 Args: 74 pipeline_options: Pipeline options, used in creating credentials 75 like impersonated credentials. 76 77 Returns: 78 A ``google.auth.credentials.Credentials`` object or None if credentials 79 not found. Returned object is thread-safe. 80 """ 81 return _Credentials.get_service_credentials(pipeline_options) 82 83 84 if _GOOGLE_AUTH_AVAILABLE: 85 86 class _ApitoolsCredentialsAdapter: 87 """For internal use only; no backwards-compatibility guarantees. 88 89 Adapter allowing use of google-auth credentials with apitools, which 90 normally expects credentials from the oauth2client library. This allows 91 upgrading the auth library used by Beam without simultaneously upgrading 92 all the GCP client libraries (a much larger change). 93 """ 94 def __init__(self, google_auth_credentials): 95 self._google_auth_credentials = google_auth_credentials 96 97 def authorize(self, http): 98 """Return an http client authorized with the google-auth credentials. 99 100 Args: 101 http: httplib2.Http, an http object to be used to make the refresh 102 request. 103 104 Returns: 105 google_auth_httplib2.AuthorizedHttp: An authorized http client. 106 """ 107 return google_auth_httplib2.AuthorizedHttp( 108 self._google_auth_credentials, http=http) 109 110 def __getattr__(self, attr): 111 """Delegate attribute access to underlying google-auth credentials.""" 112 return getattr(self._google_auth_credentials, attr) 113 114 115 class _Credentials(object): 116 _credentials_lock = threading.Lock() 117 _credentials_init = False 118 _credentials = None 119 120 @classmethod 121 def get_service_credentials(cls, pipeline_options): 122 # type: (PipelineOptions) -> Optional[google.auth.credentials.Credentials] 123 with cls._credentials_lock: 124 if cls._credentials_init: 125 return cls._credentials 126 127 # apitools use urllib with the global timeout. Set it to 60 seconds 128 # to prevent network related stuckness issues. 129 if not socket.getdefaulttimeout(): 130 _LOGGER.info("Setting socket default timeout to 60 seconds.") 131 socket.setdefaulttimeout(60) 132 _LOGGER.info( 133 "socket default timeout is %s seconds.", socket.getdefaulttimeout()) 134 135 cls._credentials = cls._get_service_credentials(pipeline_options) 136 cls._credentials_init = True 137 138 return cls._credentials 139 140 @staticmethod 141 def _get_service_credentials(pipeline_options): 142 # type: (PipelineOptions) -> Optional[google.auth.credentials.Credentials] 143 if not _GOOGLE_AUTH_AVAILABLE: 144 _LOGGER.warning( 145 'Unable to find default credentials because the google-auth library ' 146 'is not available. Install the gcp extra (apache_beam[gcp]) to use ' 147 'Google default credentials. Connecting anonymously.') 148 return None 149 150 try: 151 # pylint: disable=c-extension-no-member 152 credentials, _ = google.auth.default( 153 scopes=pipeline_options.view_as(GoogleCloudOptions).gcp_oauth_scopes) 154 credentials = _Credentials._add_impersonation_credentials( 155 credentials, pipeline_options) 156 credentials = _ApitoolsCredentialsAdapter(credentials) 157 logging.debug( 158 'Connecting using Google Application Default ' 159 'Credentials.') 160 return credentials 161 except Exception as e: 162 _LOGGER.warning( 163 'Unable to find default credentials to use: %s\n' 164 'Connecting anonymously.', 165 e) 166 return None 167 168 @staticmethod 169 def _add_impersonation_credentials(credentials, pipeline_options): 170 gcs_options = pipeline_options.view_as(GoogleCloudOptions) 171 impersonate_service_account = gcs_options.impersonate_service_account 172 scopes = gcs_options.gcp_oauth_scopes 173 if impersonate_service_account: 174 _LOGGER.info('Impersonating: %s', impersonate_service_account) 175 impersonate_accounts = impersonate_service_account.split(',') 176 target_principal = impersonate_accounts[-1] 177 delegate_to = impersonate_accounts[0:-1] 178 credentials = impersonated_credentials.Credentials( 179 source_credentials=credentials, 180 target_principal=target_principal, 181 delegates=delegate_to, 182 target_scopes=scopes, 183 ) 184 return credentials