go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/auth/authdb/dump/dump.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package dump implements loading AuthDB from dumps in Google Storage.
    16  package dump
    17  
    18  import (
    19  	"bytes"
    20  	"context"
    21  	"crypto/sha512"
    22  	"fmt"
    23  	"io"
    24  	"net/http"
    25  	"time"
    26  
    27  	"github.com/golang/protobuf/jsonpb"
    28  	"google.golang.org/protobuf/proto"
    29  
    30  	"go.chromium.org/luci/common/errors"
    31  	"go.chromium.org/luci/common/logging"
    32  	"go.chromium.org/luci/common/retry"
    33  	"go.chromium.org/luci/common/retry/transient"
    34  
    35  	"go.chromium.org/luci/server/auth"
    36  	"go.chromium.org/luci/server/auth/authdb"
    37  	"go.chromium.org/luci/server/auth/service"
    38  	"go.chromium.org/luci/server/auth/service/protocol"
    39  	"go.chromium.org/luci/server/auth/signing"
    40  )
    41  
    42  // Fetcher can fetch AuthDB snapshots from GCS dumps, requesting access through
    43  // Auth Service if necessary.
    44  //
    45  // It's designed not to depend on Auth Service availability at all if everything
    46  // is already setup (i.e. the access to AuthDB snapshot is granted). For that
    47  // reason it requires the location of GCS dump and name of Auth Service's
    48  // signing account to be provided as static configuration (since we don't want
    49  // to make RPCs to potentially unavailable Auth Service to discover them).
    50  //
    51  // The only time Auth Service is directly hit is when GCS returns permission
    52  // errors. When this happens, Fetcher tries to authorize itself through the
    53  // Auth Service API call and then retries the fetch.
    54  type Fetcher struct {
    55  	StorageDumpPath    string   // GCS storage path to the dump "<bucket>/<object>"
    56  	AuthServiceURL     string   // URL of the auth service "https://..."
    57  	AuthServiceAccount string   // service account name that signed the blob
    58  	OAuthScopes        []string // scopes to use when making OAuth tokens
    59  
    60  	testRetryPolicy   func() retry.Iterator       // how to retry, mocked in tests
    61  	testStorageURL    string                      // Google Storage URL, mocked in tests
    62  	testStorageClient *http.Client                // client to access Google Storage, mocked in tests
    63  	testSigningCerts  *signing.PublicCertificates // certs to use to check signature, mocked in tests
    64  }
    65  
    66  // FetchAuthDB checks whether there's a newer version of AuthDB available in
    67  // GCS and fetches it if so. If 'cur' is already up-to-date, returns it as is.
    68  //
    69  // Logs and retries errors internally until the context cancellation or timeout.
    70  func (f *Fetcher) FetchAuthDB(ctx context.Context, cur *authdb.SnapshotDB) (fresh *authdb.SnapshotDB, err error) {
    71  	client := f.testStorageClient
    72  	if client == nil {
    73  		t, err := auth.GetRPCTransport(ctx, auth.AsSelf, auth.WithScopes(f.OAuthScopes...))
    74  		if err != nil {
    75  			return nil, errors.Reason("can't get authenticating transport").Err()
    76  		}
    77  		client = &http.Client{Transport: t}
    78  	}
    79  
    80  	retryPolicy := f.testRetryPolicy
    81  	if retryPolicy == nil {
    82  		retryPolicy = transient.Only(indefiniteRetry)
    83  	}
    84  
    85  	err = retry.Retry(ctx, retryPolicy, func() (err error) {
    86  		fresh, err = f.doFetchAttempt(ctx, cur, client)
    87  		return err
    88  	}, func(err error, wait time.Duration) {
    89  		logging.Warningf(ctx, "Failed to fetch AuthDB dump, will retry in %s: %s", wait, err)
    90  	})
    91  	return
    92  }
    93  
    94  // indefiniteRetry is retry.Iterator that retries indefinitely.
    95  func indefiniteRetry() retry.Iterator {
    96  	return &retry.ExponentialBackoff{
    97  		Limited: retry.Limited{
    98  			Retries: -1,
    99  			Delay:   500 * time.Millisecond,
   100  		},
   101  		MaxDelay: 30 * time.Second,
   102  	}
   103  }
   104  
   105  // doFetchAttempt is one iteration of FetchAuthDB retry loop.
   106  func (f *Fetcher) doFetchAttempt(ctx context.Context, cur *authdb.SnapshotDB, client *http.Client) (*authdb.SnapshotDB, error) {
   107  	// Fetch a tiny latest.json. In most cases this is the only RPC we'll do.
   108  	latestRev, needAccess, err := f.fetchLatestRev(ctx, client)
   109  	if err != nil {
   110  		return nil, err
   111  	}
   112  
   113  	// If have no access, ask for it and immediately try again.
   114  	if needAccess {
   115  		if err := f.requestAccess(ctx); err != nil {
   116  			return nil, err
   117  		}
   118  		switch latestRev, needAccess, err = f.fetchLatestRev(ctx, client); {
   119  		case err != nil:
   120  			return nil, err
   121  		case needAccess: // this should not be happening
   122  			return nil, errors.Reason("still no access to GCS").Tag(transient.Tag).Err()
   123  		}
   124  	}
   125  
   126  	// Skip the rest if we already have same or more recent revision.
   127  	if cur != nil && cur.Rev >= latestRev {
   128  		if cur.Rev > latestRev {
   129  			logging.Warningf(ctx, "AuthDB dump revision went back in time (we have %d, the dump is %d)", cur.Rev, latestRev)
   130  		}
   131  		return cur, nil
   132  	}
   133  
   134  	// Fetch and validate the new snapshot.
   135  	logging.Infof(ctx, "AuthDB rev %d is available, fetching it...", latestRev)
   136  	signed, err := f.fetchSignedAuthDB(ctx, client)
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  	if err := f.checkSignature(ctx, signed); err != nil {
   141  		return nil, err
   142  	}
   143  	fresh, err := f.deserializeAuthDB(ctx, signed.AuthDbBlob)
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  
   148  	// Make sure we don't switch to an older revisions no matter what. This should
   149  	// not be happening.
   150  	if cur != nil && fresh.Rev <= cur.Rev {
   151  		logging.Errorf(ctx, "Unexpectedly got an older snapshot (%d <= %d), ignoring it", fresh.Rev, cur.Rev)
   152  		return cur, nil
   153  	}
   154  	return fresh, nil
   155  }
   156  
   157  // fetchLatestRev returns the revision of the latest AuthDB dump in the storage.
   158  //
   159  // On access errors returns (0, true, nil). All other errors are considered
   160  // transient.
   161  func (f *Fetcher) fetchLatestRev(ctx context.Context, client *http.Client) (rev int64, needAccess bool, err error) {
   162  	switch code, resp, err := f.fetchFromGCS(ctx, client, "latest.json"); {
   163  	case err != nil:
   164  		return 0, false, transient.Tag.Apply(err)
   165  	case code == http.StatusOK:
   166  		rev := protocol.AuthDBRevision{}
   167  		if err := jsonpb.Unmarshal(bytes.NewReader(resp), &rev); err != nil {
   168  			return 0, false, errors.Annotate(err, "failed to unmarshal AuthDBRevision").Err()
   169  		}
   170  		return rev.AuthDbRev, false, nil
   171  	case code == http.StatusForbidden || code == http.StatusNotFound:
   172  		logging.Errorf(ctx, "Permission errors when fetching latest.json")
   173  		return 0, true, nil
   174  	default:
   175  		return 0, false, errors.Reason("got HTTP %d when fetching latest.json:\n%s", code, resp).Tag(transient.Tag).Err()
   176  	}
   177  }
   178  
   179  // fetchSignedAuthDB fetches SignedAuthDB from GCS and deserializes it.
   180  func (f *Fetcher) fetchSignedAuthDB(ctx context.Context, client *http.Client) (*protocol.SignedAuthDB, error) {
   181  	code, resp, err := f.fetchFromGCS(ctx, client, "latest.db")
   182  	switch {
   183  	case err != nil:
   184  		return nil, transient.Tag.Apply(err)
   185  	case code == http.StatusOK:
   186  		logging.Infof(ctx, "Fetched AuthDB snapshot (%.1f Kb)", float32(len(resp))/1024)
   187  		db := protocol.SignedAuthDB{}
   188  		if err := proto.Unmarshal(resp, &db); err != nil {
   189  			return nil, errors.Annotate(err, "failed to unmarshal SignedAuthDB").Err()
   190  		}
   191  		return &db, nil
   192  	default:
   193  		return nil, errors.Reason("got HTTP %d when fetching latest.json:\n%s", code, resp).Tag(transient.Tag).Err()
   194  	}
   195  }
   196  
   197  // checkSignature checks the signature in SignedAuthDB.
   198  func (f *Fetcher) checkSignature(ctx context.Context, s *protocol.SignedAuthDB) error {
   199  	if s.SignerId != f.AuthServiceAccount {
   200  		return errors.Reason("the snapshot is signed by %q, but we accept only %q", s.SignerId, f.AuthServiceAccount).Err()
   201  	}
   202  
   203  	certs := f.testSigningCerts
   204  	if certs == nil {
   205  		var err error
   206  		if certs, err = signing.FetchCertificatesForServiceAccount(ctx, s.SignerId); err != nil {
   207  			return errors.Annotate(err, "failed to fetch certs of %q", s.SignerId).Tag(transient.Tag).Err()
   208  		}
   209  	}
   210  
   211  	hash := sha512.Sum512(s.AuthDbBlob)
   212  	if err := certs.CheckSignature(s.SigningKeyId, hash[:], s.Signature); err != nil {
   213  		return errors.Annotate(err, "failed to verify that AuthDB was signed by %q", s.SignerId).Err()
   214  	}
   215  	return nil
   216  }
   217  
   218  // deserializeAuthDB unmarshals and validates AuthDB stored in serialized
   219  // ReplicationPushRequest.
   220  func (f *Fetcher) deserializeAuthDB(ctx context.Context, blob []byte) (*authdb.SnapshotDB, error) {
   221  	m := protocol.ReplicationPushRequest{}
   222  	if err := proto.Unmarshal(blob, &m); err != nil {
   223  		return nil, errors.Annotate(err, "failed to unmarshal ReplicationPushRequest").Err()
   224  	}
   225  	rev := m.Revision.AuthDbRev
   226  	logging.Infof(ctx,
   227  		"AuthDB snapshot rev %d generated by %s (using components.auth v%s)",
   228  		rev, m.Revision.PrimaryId, m.AuthCodeVersion)
   229  	snap, err := authdb.NewSnapshotDB(m.AuthDb, f.AuthServiceURL, rev, true)
   230  	if err != nil {
   231  		return nil, errors.Annotate(err, "snapshot at rev %d fails validation", rev).Err()
   232  	}
   233  	return snap, nil
   234  }
   235  
   236  // requestAccess asks Auth Service to grant us access to the AuthDB dump.
   237  func (f *Fetcher) requestAccess(ctx context.Context) error {
   238  	svc := service.AuthService{
   239  		URL:         f.AuthServiceURL,
   240  		OAuthScopes: f.OAuthScopes, // use same scopes as for GCS to reuse the cached token
   241  	}
   242  	logging.Warningf(ctx, "Asking %s to grant us access to read %q...", f.AuthServiceURL, f.StorageDumpPath)
   243  	switch info, err := svc.RequestAccess(ctx); {
   244  	case err != nil:
   245  		return transient.Tag.Apply(err)
   246  	case info.StorageDumpPath == "":
   247  		return errors.Reason("service %s is not configured to upload AuthDB to GCS", f.AuthServiceURL).Err()
   248  	case info.StorageDumpPath != f.StorageDumpPath:
   249  		// Note: we can't just dynamically "fix" f.StorageDumpPath. It is important
   250  		// that original configuration (e.g. CLI flag) is fixed too, otherwise after
   251  		// restart we'll resume looking at the wrong place. So treat this situation
   252  		// as a fatal error.
   253  		return errors.Reason("wrong configuration: service %s uploads AuthDB to %q, but we are looking at %q",
   254  			f.AuthServiceURL, info.StorageDumpPath, f.StorageDumpPath).Err()
   255  	default:
   256  		logging.Infof(ctx, "Access granted")
   257  		return nil
   258  	}
   259  }
   260  
   261  // fetchFromGCS fetches gs://<StorageDumpPath>/<rel> file into memory.
   262  func (f *Fetcher) fetchFromGCS(ctx context.Context, client *http.Client, rel string) (statusCode int, body []byte, err error) {
   263  	storageURL := "https://storage.googleapis.com"
   264  	if f.testStorageURL != "" {
   265  		storageURL = f.testStorageURL
   266  	}
   267  	url := fmt.Sprintf("%s/%s/%s", storageURL, f.StorageDumpPath, rel)
   268  
   269  	req, _ := http.NewRequest("GET", url, nil)
   270  	resp, err := client.Do(req.WithContext(ctx))
   271  	if err != nil {
   272  		return 0, nil, errors.Annotate(err, "GET %s", url).Err()
   273  	}
   274  	defer resp.Body.Close()
   275  
   276  	blob, err := io.ReadAll(resp.Body)
   277  	if err != nil {
   278  		return 0, nil, errors.Annotate(err, "GET %s", url).Err()
   279  	}
   280  	return resp.StatusCode, blob, nil
   281  }