go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/auth/oauth.go (about)

     1  // Copyright 2017 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package auth
    16  
    17  import (
    18  	"context"
    19  	"crypto/sha256"
    20  	"encoding/hex"
    21  	"encoding/json"
    22  	"fmt"
    23  	"net/http"
    24  	"sort"
    25  	"strings"
    26  	"time"
    27  
    28  	"golang.org/x/oauth2"
    29  
    30  	"go.chromium.org/luci/auth/identity"
    31  	"go.chromium.org/luci/common/clock"
    32  	"go.chromium.org/luci/common/errors"
    33  	"go.chromium.org/luci/common/gcloud/googleoauth"
    34  	"go.chromium.org/luci/common/logging"
    35  	"go.chromium.org/luci/common/retry"
    36  	"go.chromium.org/luci/common/retry/transient"
    37  	"go.chromium.org/luci/grpc/grpcutil"
    38  
    39  	"go.chromium.org/luci/server/auth/internal"
    40  	"go.chromium.org/luci/server/caching/layered"
    41  )
    42  
    43  var (
    44  	// ErrBadOAuthToken is returned by GoogleOAuth2Method if the access token it
    45  	// checks either totally invalid, expired or has a wrong list of scopes.
    46  	ErrBadOAuthToken = errors.New("oauth: bad access token", grpcutil.UnauthenticatedTag)
    47  
    48  	// ErrBadAuthorizationHeader is returned by GoogleOAuth2Method if it doesn't
    49  	// recognize the format of Authorization header.
    50  	ErrBadAuthorizationHeader = errors.New("oauth: bad Authorization header", grpcutil.UnauthenticatedTag)
    51  )
    52  
    53  // tokenValidationOutcome is returned by validateAccessToken and cached in
    54  // oauthValidationCache.
    55  //
    56  // It either contains an info extracted from the token or an error message if
    57  // the token is invalid.
    58  type tokenValidationOutcome struct {
    59  	Email    string   `json:"email,omitempty"`
    60  	ClientID string   `json:"client_id,omitempty"`
    61  	Scopes   []string `json:"scopes,omitempty"` // sorted
    62  	Expiry   int64    `json:"expiry,omitempty"` // unix timestamp
    63  	Error    string   `json:"error,omitempty"`
    64  }
    65  
    66  // SHA256(access token) => JSON-marshalled *tokenValidationOutcome.
    67  var oauthValidationCache = layered.RegisterCache(layered.Parameters[*tokenValidationOutcome]{
    68  	ProcessCacheCapacity: 65536,
    69  	GlobalNamespace:      "oauth_validation_v1",
    70  	Marshal: func(item *tokenValidationOutcome) ([]byte, error) {
    71  		return json.Marshal(item)
    72  	},
    73  	Unmarshal: func(blob []byte) (*tokenValidationOutcome, error) {
    74  		tok := &tokenValidationOutcome{}
    75  		if err := json.Unmarshal(blob, tok); err != nil {
    76  			return nil, err
    77  		}
    78  		return tok, nil
    79  	},
    80  })
    81  
    82  // GoogleOAuth2Method implements Method via Google's OAuth2 token info endpoint.
    83  //
    84  // Note that it uses the endpoint which "has no SLA and is not intended for
    85  // production use". The closest alternative is /userinfo endpoint, but it
    86  // doesn't return the token expiration time (so we can't cache the result of
    87  // the check) nor the list of OAuth scopes the token has, nor the client ID to
    88  // check against an allowlist.
    89  //
    90  // The general Google's recommendation is to use access tokens only for
    91  // accessing Google APIs and use OpenID Connect Identity tokens for
    92  // authentication in your own services instead (they are locally verifiable
    93  // JWTs).
    94  //
    95  // Unfortunately, using OpenID tokens for LUCI services and OAuth2 access token
    96  // for Google services significantly complicates clients, especially in
    97  // non-trivial cases (like authenticating from a Swarming job): they now must
    98  // support two token kinds and know which one to use when.
    99  //
   100  // There's no solution currently that preserves all of correctness, performance,
   101  // usability and availability:
   102  //   - Using /tokeninfo (like is done currently) sacrifices availability.
   103  //   - Using /userinfo sacrifices correctness (no client ID or scopes check).
   104  //   - Using OpenID ID tokens scarifies usability for the clients.
   105  type GoogleOAuth2Method struct {
   106  	// Scopes is a list of OAuth scopes to check when authenticating the token.
   107  	Scopes []string
   108  
   109  	// tokenInfoEndpoint is used in unit test to mock production endpoint.
   110  	tokenInfoEndpoint string
   111  }
   112  
   113  var _ UserCredentialsGetter = (*GoogleOAuth2Method)(nil)
   114  
   115  // Authenticate implements Method.
   116  func (m *GoogleOAuth2Method) Authenticate(ctx context.Context, r RequestMetadata) (*User, Session, error) {
   117  	// Extract the access token from the Authorization header.
   118  	header := r.Header("Authorization")
   119  	if header == "" || len(m.Scopes) == 0 {
   120  		return nil, nil, nil // this method is not applicable
   121  	}
   122  	accessToken, err := accessTokenFromHeader(header)
   123  	if err != nil {
   124  		return nil, nil, err
   125  	}
   126  
   127  	// Store only the token hash in the cache, so that if a memory or cache dump
   128  	// ever occurs, the tokens themselves aren't included in it.
   129  	h := sha256.Sum256([]byte(accessToken))
   130  	cacheKey := hex.EncodeToString(h[:])
   131  
   132  	// Verify the token using /tokeninfo endpoint or grab a result of the previous
   133  	// verification. We cache both good and bad tokens for extra 10 min to avoid
   134  	// uselessly rechecking them all the time. Note that a bad token can't turn
   135  	// into a good one with the passage of time, so its OK to cache it. And a good
   136  	// token can turn into a bad one only when it expires (we check it below), so
   137  	// it is also OK to cache it.
   138  	//
   139  	// TODO(vadimsh): Strictly speaking we need to store bad tokens in a separate
   140  	// cache, so a flood of bad tokens (which are very easy to produce, compared
   141  	// to good tokens) doesn't evict good tokens from the process cache.
   142  	outcome, err := oauthValidationCache.GetOrCreate(ctx, cacheKey, func() (*tokenValidationOutcome, time.Duration, error) {
   143  		logging.Infof(ctx, "oauth: validating access token SHA256=%q", cacheKey)
   144  		outcome, expiresIn, err := validateAccessToken(ctx, accessToken, m.tokenInfoEndpoint)
   145  		if err != nil {
   146  			return nil, 0, err
   147  		}
   148  		return outcome, 10*time.Minute + expiresIn, nil
   149  	})
   150  	if err != nil {
   151  		return nil, nil, err // the check itself failed
   152  	}
   153  
   154  	// Fail if the token was never valid.
   155  	if outcome.Error != "" {
   156  		logging.Warningf(ctx, "oauth: access token SHA256=%q: %s", cacheKey, outcome.Error)
   157  		return nil, nil, ErrBadOAuthToken
   158  	}
   159  
   160  	// Fail if the token was once valid but has expired since.
   161  	if expired := clock.Now(ctx).Unix() - outcome.Expiry; expired > 0 {
   162  		logging.Warningf(ctx, "oauth: access token SHA256=%q from %s expired %d sec ago",
   163  			cacheKey, outcome.Email, expired)
   164  		return nil, nil, ErrBadOAuthToken
   165  	}
   166  
   167  	// Fail if the token doesn't have all required scopes.
   168  	var missingScopes []string
   169  	for _, s := range m.Scopes {
   170  		idx := sort.SearchStrings(outcome.Scopes, s)
   171  		if idx == len(outcome.Scopes) || outcome.Scopes[idx] != s {
   172  			missingScopes = append(missingScopes, s)
   173  		}
   174  	}
   175  	if len(missingScopes) != 0 {
   176  		logging.Warningf(ctx, "oauth: access token SHA256=%q from %s doesn't have scopes %q, it has %q",
   177  			cacheKey, outcome.Email, missingScopes, outcome.Scopes)
   178  		return nil, nil, ErrBadOAuthToken
   179  	}
   180  
   181  	// OAuth2 access token representing service accounts have essentially
   182  	// service account's uint64 user ID as an audience. It makes no sense to
   183  	// check it against OAuth2 client ID allowlist (it will basically require us
   184  	// to centrally allowlist every service account ever: we already use groups
   185  	// with service account emails for that).
   186  	if strings.HasSuffix(outcome.Email, ".gserviceaccount.com") {
   187  		outcome.ClientID = ""
   188  	}
   189  
   190  	return &User{
   191  		Identity: identity.Identity("user:" + outcome.Email),
   192  		Email:    outcome.Email,
   193  		ClientID: outcome.ClientID,
   194  	}, nil, nil
   195  }
   196  
   197  // GetUserCredentials implements UserCredentialsGetter.
   198  func (m *GoogleOAuth2Method) GetUserCredentials(ctx context.Context, r RequestMetadata) (*oauth2.Token, error) {
   199  	accessToken, err := accessTokenFromHeader(r.Header("Authorization"))
   200  	if err != nil {
   201  		return nil, err
   202  	}
   203  	return &oauth2.Token{
   204  		AccessToken: accessToken,
   205  		TokenType:   "Bearer",
   206  	}, nil
   207  }
   208  
   209  // accessTokenFromHeader parses Authorization header.
   210  func accessTokenFromHeader(header string) (string, error) {
   211  	typ, tok := internal.SplitAuthHeader(header)
   212  	if typ != "bearer" && typ != "oauth" {
   213  		return "", ErrBadAuthorizationHeader
   214  	}
   215  	return tok, nil
   216  }
   217  
   218  // validateAccessToken uses OAuth2 tokeninfo endpoint to validate an access
   219  // token.
   220  //
   221  // Returns its outcome as tokenValidationOutcome. It either contains a token
   222  // info or an error message if the token is invalid. If the token is valid,
   223  // also returns the duration until it expires.
   224  //
   225  // Returns an error if the check itself fails, e.g. we couldn't make the
   226  // request. Such errors may be transient (network flakes) or fatal
   227  // (auth library misconfiguration).
   228  func validateAccessToken(ctx context.Context, accessToken, tokenInfoEndpoint string) (*tokenValidationOutcome, time.Duration, error) {
   229  	tr, err := GetRPCTransport(ctx, NoAuth)
   230  	if err != nil {
   231  		return nil, 0, err
   232  	}
   233  
   234  	tokenInfo, err := queryTokenInfoEndpoint(ctx, googleoauth.TokenInfoParams{
   235  		AccessToken: accessToken,
   236  		Client:      &http.Client{Transport: tr},
   237  		Endpoint:    tokenInfoEndpoint, // "" means "use default"
   238  	})
   239  	if err != nil {
   240  		if err == googleoauth.ErrBadToken {
   241  			return &tokenValidationOutcome{Error: err.Error()}, 0, nil
   242  		}
   243  		return nil, 0, errors.Annotate(err, "oauth: transient error when validating the token").Tag(transient.Tag).Err()
   244  	}
   245  
   246  	// Verify the token contains all necessary fields.
   247  	errorMsg := ""
   248  	switch {
   249  	case tokenInfo.Email == "":
   250  		errorMsg = "the token is not associated with an email"
   251  	case !tokenInfo.EmailVerified:
   252  		errorMsg = fmt.Sprintf("the email %s in the token is not verified", tokenInfo.Email)
   253  	case tokenInfo.ExpiresIn <= 0:
   254  		errorMsg = fmt.Sprintf("in a token from %s 'expires_in' %d is not a positive integer", tokenInfo.Email, tokenInfo.ExpiresIn)
   255  	case tokenInfo.Aud == "":
   256  		errorMsg = fmt.Sprintf("in a token from %s 'aud' field is empty", tokenInfo.Email)
   257  	case tokenInfo.Scope == "":
   258  		errorMsg = fmt.Sprintf("in a token from %s 'scope' field is empty", tokenInfo.Scope)
   259  	}
   260  	if errorMsg != "" {
   261  		return &tokenValidationOutcome{Error: errorMsg}, 0, nil
   262  	}
   263  
   264  	// Verify the email passes our regexp check.
   265  	if _, err := identity.MakeIdentity("user:" + tokenInfo.Email); err != nil {
   266  		return &tokenValidationOutcome{Error: err.Error()}, 0, nil
   267  	}
   268  
   269  	// Sort scopes alphabetically to speed up lookups in Authenticate.
   270  	scopes := strings.Split(tokenInfo.Scope, " ")
   271  	sort.Strings(scopes)
   272  
   273  	// The token is good.
   274  	expiresIn := time.Duration(tokenInfo.ExpiresIn) * time.Second
   275  	return &tokenValidationOutcome{
   276  		Email:    tokenInfo.Email,
   277  		ClientID: tokenInfo.Aud,
   278  		Scopes:   scopes,
   279  		Expiry:   clock.Now(ctx).Add(expiresIn).Unix(),
   280  	}, expiresIn, nil
   281  }
   282  
   283  // queryTokenInfoEndpoint calls the token info endpoint with retries.
   284  func queryTokenInfoEndpoint(ctx context.Context, params googleoauth.TokenInfoParams) (info *googleoauth.TokenInfo, err error) {
   285  	ctx = clock.Tag(ctx, "oauth-tokeninfo-retry")
   286  
   287  	retryParams := func() retry.Iterator {
   288  		return &retry.ExponentialBackoff{
   289  			Limited: retry.Limited{
   290  				Delay:   10 * time.Millisecond,
   291  				Retries: 5,
   292  			},
   293  		}
   294  	}
   295  
   296  	err = retry.Retry(ctx, transient.Only(retryParams), func() (err error) {
   297  		ctx, cancel := context.WithTimeout(ctx, 2*time.Second)
   298  		defer cancel()
   299  
   300  		start := clock.Now(ctx)
   301  		outcome := "ERROR"
   302  
   303  		switch info, err = googleoauth.GetTokenInfo(ctx, params); {
   304  		case err == nil:
   305  			outcome = "OK"
   306  		case err == googleoauth.ErrBadToken:
   307  			outcome = "BAD_TOKEN"
   308  		case errors.Unwrap(err) == context.DeadlineExceeded:
   309  			outcome = "DEADLINE"
   310  		}
   311  
   312  		tokenInfoCallDuration.Add(ctx, float64(clock.Since(ctx, start).Nanoseconds()/1000), outcome)
   313  
   314  		return err
   315  	}, retry.LogCallback(ctx, "tokeninfo"))
   316  
   317  	return info, err
   318  }