go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/appengine/gaeauth/server/internal/authdbimpl/authdb.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package authdbimpl
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"crypto/sha256"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"strings"
    24  	"time"
    25  
    26  	ds "go.chromium.org/luci/gae/service/datastore"
    27  
    28  	"go.chromium.org/luci/common/clock"
    29  	"go.chromium.org/luci/common/errors"
    30  	"go.chromium.org/luci/common/logging"
    31  	"go.chromium.org/luci/common/retry/transient"
    32  	"go.chromium.org/luci/server/auth/service"
    33  	"go.chromium.org/luci/server/auth/service/protocol"
    34  )
    35  
    36  // maxShardSize is a limit on a blob size to store in a single entity.
    37  const maxShardSize = 1020 * 1024 // 1020 KiB
    38  
    39  // SnapshotInfo identifies some concrete AuthDB snapshot.
    40  //
    41  // Singleton entity. Serves as a pointer to a blob with corresponding AuthDB
    42  // proto message (stored in separate Snapshot entity).
    43  type SnapshotInfo struct {
    44  	AuthServiceURL string `gae:",noindex"`
    45  	Rev            int64  `gae:",noindex"`
    46  
    47  	_kind string `gae:"$kind,gaeauth.SnapshotInfo"`
    48  	_id   int64  `gae:"$id,1"`
    49  }
    50  
    51  // GetSnapshotID returns datastore ID of the corresponding Snapshot entity.
    52  func (si *SnapshotInfo) GetSnapshotID() string {
    53  	if strings.IndexByte(si.AuthServiceURL, ',') != -1 {
    54  		panic(fmt.Errorf("forbidden symbol ',' in URL %q", si.AuthServiceURL))
    55  	}
    56  	return fmt.Sprintf("v1,%s,%d", si.AuthServiceURL, si.Rev)
    57  }
    58  
    59  // Snapshot is serialized deflated AuthDB blob with some minimal metadata.
    60  //
    61  // Root entity. Immutable. Key has the form "v1,<AuthServiceURL>,<Revision>",
    62  // it's generated by SnapshotInfo.GetSnapshotID(). It is globally unique
    63  // version identifier, since it includes URL of an auth service. AuthServiceURL
    64  // should be not very long (~< 250 chars) for this too work.
    65  //
    66  // Currently does not get garbage collected.
    67  type Snapshot struct {
    68  	ID string `gae:"$id"`
    69  
    70  	// AuthDBDeflated is zlib-compressed serialized AuthDB protobuf message.
    71  	//
    72  	// If it is too big, it is stored in a bunch of SnapshotShard entities
    73  	// referenced by ShardIDs field below.
    74  	//
    75  	// Note: if the old version of this code tries to load a new Snapshot entity
    76  	// with ShardIDs field populated, it would abort with an error because old
    77  	// code doesn't know about ShardIDs field (it is not in the old Snapshot
    78  	// entity struct). This is desirable: the new sharded data structure is not
    79  	// (and can't be made) compatible with old code, so it is good that it breaks
    80  	// as soon as possible.
    81  	AuthDBDeflated []byte `gae:",noindex"`
    82  
    83  	// ShardIDs is a list of IDs of SnapshotShard entities to fetch.
    84  	ShardIDs []string `gae:",noindex"`
    85  
    86  	CreatedAt time.Time // when it was created on Auth service
    87  	FetchedAt time.Time // when it was fetched and put into the datastore
    88  
    89  	_kind string `gae:"$kind,gaeauth.Snapshot"`
    90  }
    91  
    92  // SnapshotShard holds a shard of a deflated AuthDB.
    93  type SnapshotShard struct {
    94  	// ID is "<Snapshot ID>:<shard hash>".
    95  	ID string `gae:"$id"`
    96  	// Shard is the actual data.
    97  	Shard []byte `gae:",noindex"`
    98  
    99  	_kind string `gae:"$kind,gaeauth.SnapshotShard"`
   100  }
   101  
   102  // GetLatestSnapshotInfo fetches SnapshotInfo singleton entity.
   103  //
   104  // If no such entity is stored, returns (nil, nil).
   105  func GetLatestSnapshotInfo(ctx context.Context) (*SnapshotInfo, error) {
   106  	report := durationReporter(ctx, latestSnapshotInfoDuration)
   107  	logging.Debugf(ctx, "Fetching AuthDB snapshot info from the datastore")
   108  	ctx = ds.WithoutTransaction(defaultNS(ctx))
   109  	info := SnapshotInfo{}
   110  	switch err := ds.Get(ctx, &info); {
   111  	case err == ds.ErrNoSuchEntity:
   112  		report("SUCCESS")
   113  		return nil, nil
   114  	case err != nil:
   115  		report("ERROR_TRANSIENT")
   116  		return nil, transient.Tag.Apply(err)
   117  	default:
   118  		report("SUCCESS")
   119  		return &info, nil
   120  	}
   121  }
   122  
   123  // deleteSnapshotInfo removes SnapshotInfo entity from the datastore.
   124  //
   125  // Used to detach the service from auth_service.
   126  func deleteSnapshotInfo(ctx context.Context) error {
   127  	ctx = ds.WithoutTransaction(ctx)
   128  	return ds.Delete(ctx, ds.KeyForObj(ctx, &SnapshotInfo{}))
   129  }
   130  
   131  // GetAuthDBSnapshot fetches, inflates and deserializes AuthDB snapshot.
   132  func GetAuthDBSnapshot(ctx context.Context, id string) (*protocol.AuthDB, error) {
   133  	report := durationReporter(ctx, getSnapshotDuration)
   134  	logging.Debugf(ctx, "Fetching AuthDB snapshot from the datastore")
   135  	defer logging.Debugf(ctx, "AuthDB snapshot fetched")
   136  
   137  	blob, code, err := fetchDeflated(ctx, id)
   138  	if err != nil {
   139  		report(code)
   140  		return nil, err
   141  	}
   142  
   143  	db, err := service.InflateAuthDB(blob)
   144  	if err != nil {
   145  		report("ERROR_INFLATION")
   146  		return nil, err
   147  	}
   148  
   149  	report("SUCCESS")
   150  	return db, nil
   151  }
   152  
   153  // fetchDeflated fetches a deflated AuthDB from datastore, perhaps reassembling
   154  // it from shards.
   155  //
   156  // See also storeDeflated.
   157  func fetchDeflated(ctx context.Context, id string) (blob []byte, code string, err error) {
   158  	ctx = ds.WithoutTransaction(defaultNS(ctx))
   159  
   160  	snap := Snapshot{ID: id}
   161  
   162  	switch err = ds.Get(ctx, &snap); {
   163  	case err == ds.ErrNoSuchEntity:
   164  		return nil, "ERROR_NO_SNAPSHOT", err // not transient
   165  	case err != nil:
   166  		return nil, "ERROR_TRANSIENT", transient.Tag.Apply(err)
   167  	}
   168  
   169  	if len(snap.ShardIDs) != 0 {
   170  		logging.Infof(ctx, "Reconstructing from %d shards", len(snap.ShardIDs))
   171  		switch snap.AuthDBDeflated, err = unshardAuthDB(ctx, snap.ShardIDs); {
   172  		case transient.Tag.In(err):
   173  			return nil, "ERROR_SHARDS_TRANSIENT", err
   174  		case err != nil:
   175  			// We apply the transient tag here to return Internal code
   176  			// instead of Unauthenticated code. The Unauthenticated code
   177  			// is misleading when we encountered an error in unshardAuthDB.
   178  			// https://source.chromium.org/chromium/infra/infra/+/main:go/src/go.chromium.org/luci/server/auth/auth.go;l=272
   179  			return nil, "ERROR_SHARDS_MISSING", transient.Tag.Apply(err)
   180  		}
   181  	}
   182  
   183  	return snap.AuthDBDeflated, "SUCCESS", nil
   184  }
   185  
   186  // ConfigureAuthService makes initial fetch of AuthDB snapshot from the auth
   187  // service and sets up PubSub subscription.
   188  //
   189  // `baseURL` is root URL of currently running service, will be used to derive
   190  // PubSub push endpoint URL.
   191  //
   192  // If `authServiceURL` is blank, disables the fetching.
   193  func ConfigureAuthService(ctx context.Context, baseURL, authServiceURL string) error {
   194  	logging.Infof(ctx, "Reconfiguring AuthDB to be fetched from %q", authServiceURL)
   195  	ctx = defaultNS(ctx)
   196  
   197  	// If switching auth services, need to grab URL of a currently configured
   198  	// auth service to unsubscribe from its PubSub stream.
   199  	prevAuthServiceURL := ""
   200  	switch existing, err := GetLatestSnapshotInfo(ctx); {
   201  	case err != nil:
   202  		return err
   203  	case existing != nil:
   204  		prevAuthServiceURL = existing.AuthServiceURL
   205  	}
   206  
   207  	// Stopping synchronization completely?
   208  	if authServiceURL == "" {
   209  		if prevAuthServiceURL != "" {
   210  			if err := killPubSub(ctx, prevAuthServiceURL); err != nil {
   211  				return err
   212  			}
   213  		}
   214  		return deleteSnapshotInfo(ctx)
   215  	}
   216  
   217  	// Fetch latest AuthDB snapshot and store it in the datastore, thus verifying
   218  	// authServiceURL works end-to-end.
   219  	srv := getAuthService(ctx, authServiceURL)
   220  	latestRev, err := srv.GetLatestSnapshotRevision(ctx)
   221  	if err != nil {
   222  		return err
   223  	}
   224  	info := &SnapshotInfo{
   225  		AuthServiceURL: authServiceURL,
   226  		Rev:            latestRev,
   227  	}
   228  	if err := fetchSnapshot(ctx, info); err != nil {
   229  		logging.Errorf(ctx, "Failed to fetch latest snapshot from %s - %s", authServiceURL, err)
   230  		return err
   231  	}
   232  
   233  	// Configure PubSub subscription to receive future updates.
   234  	if err := setupPubSub(ctx, baseURL, authServiceURL); err != nil {
   235  		logging.Errorf(ctx, "Failed to configure pubsub subscription - %s", err)
   236  		return err
   237  	}
   238  
   239  	// All is configured. Switch SnapshotInfo entity to point to new snapshot.
   240  	// It makes syncAuthDB fetch changes from `authServiceURL`, thus promoting
   241  	// `authServiceURL` to the status of main auth service.
   242  	if err := ds.Put(ds.WithoutTransaction(ctx), info); err != nil {
   243  		return transient.Tag.Apply(err)
   244  	}
   245  
   246  	// Stop getting notifications from previously used auth service.
   247  	if prevAuthServiceURL != "" && prevAuthServiceURL != authServiceURL {
   248  		return killPubSub(ctx, prevAuthServiceURL)
   249  	}
   250  
   251  	return nil
   252  }
   253  
   254  // fetchSnapshot fetches AuthDB snapshot specified by `info` and puts it into
   255  // the datastore.
   256  //
   257  // Idempotent. Doesn't touch SnapshotInfo entity itself, and thus always safe
   258  // to call.
   259  func fetchSnapshot(ctx context.Context, info *SnapshotInfo) error {
   260  	srv := getAuthService(ctx, info.AuthServiceURL)
   261  	snap, err := srv.GetSnapshot(ctx, info.Rev)
   262  	if err != nil {
   263  		return err
   264  	}
   265  	blob, err := service.DeflateAuthDB(snap.AuthDB)
   266  	if err != nil {
   267  		return err
   268  	}
   269  	if err := storeDeflated(ctx, info.GetSnapshotID(), blob, snap.Created, maxShardSize); err != nil {
   270  		return err
   271  	}
   272  	logging.Infof(ctx, "Lag: %s", clock.Now(ctx).Sub(snap.Created))
   273  	return nil
   274  }
   275  
   276  // storeDeflated stores a deflated AuthDB into datastore, perhaps splitting it
   277  // into shards.
   278  //
   279  // See also fetchDeflated.
   280  func storeDeflated(ctx context.Context, id string, blob []byte, created time.Time, maxShardSize int) error {
   281  	ctx = ds.WithoutTransaction(defaultNS(ctx))
   282  
   283  	snapshot := Snapshot{
   284  		ID:        id,
   285  		CreatedAt: created.UTC(),
   286  		FetchedAt: clock.Now(ctx).UTC(),
   287  	}
   288  
   289  	// If we are able to store AuthDB inline in the Snapshot, do it. That way
   290  	// older versions of this code can still successfully read it. If it doesn't
   291  	// fit, there's nothing we can do other than to store it separately in shards.
   292  	// The old code will see unrecognized ShardIDs field and will fail.
   293  	if len(blob) < maxShardSize {
   294  		snapshot.AuthDBDeflated = blob
   295  	} else {
   296  		var err error
   297  		if snapshot.ShardIDs, err = shardAuthDB(ctx, id, blob, maxShardSize); err != nil {
   298  			return err
   299  		}
   300  		logging.Infof(ctx, "Split into %d shards", len(snapshot.ShardIDs))
   301  	}
   302  
   303  	return transient.Tag.Apply(ds.Put(ctx, &snapshot))
   304  }
   305  
   306  // syncAuthDB fetches latest AuthDB snapshot from the configured auth service,
   307  // puts it into the datastore and updates SnapshotInfo entity to point to it.
   308  //
   309  // Expects authenticating transport to be in the context. Called when receiving
   310  // PubSub notifications.
   311  //
   312  // Returns SnapshotInfo of the most recent snapshot.
   313  func syncAuthDB(ctx context.Context) (*SnapshotInfo, error) {
   314  	report := durationReporter(ctx, syncAuthDBDuration)
   315  
   316  	// `info` is what we have in the datastore now.
   317  	info, err := GetLatestSnapshotInfo(ctx)
   318  	if err != nil {
   319  		report("ERROR_GET_LATEST_INFO")
   320  		return nil, err
   321  	}
   322  	if info == nil {
   323  		report("ERROR_NOT_CONFIGURED")
   324  		return nil, errors.New("auth_service URL is not configured")
   325  	}
   326  
   327  	// Grab revision number of the latest snapshot on the server.
   328  	srv := getAuthService(ctx, info.AuthServiceURL)
   329  	latestRev, err := srv.GetLatestSnapshotRevision(ctx)
   330  	if err != nil {
   331  		report("ERROR_GET_LATEST_REVISION")
   332  		return nil, err
   333  	}
   334  
   335  	// Nothing new?
   336  	if info.Rev == latestRev {
   337  		logging.Infof(ctx, "AuthDB is up-to-date at revision %d", latestRev)
   338  		report("SUCCESS_UP_TO_DATE")
   339  		return info, nil
   340  	}
   341  
   342  	// Auth service traveled back in time?
   343  	if info.Rev > latestRev {
   344  		logging.Errorf(
   345  			ctx, "Latest AuthDB revision on server is %d, we have %d. It should not happen",
   346  			latestRev, info.Rev)
   347  		report("SUCCESS_NEWER_ALREADY")
   348  		return info, nil
   349  	}
   350  
   351  	// Fetch the actual snapshot from the server and put it into the datastore.
   352  	info.Rev = latestRev
   353  	if err = fetchSnapshot(ctx, info); err != nil {
   354  		logging.Errorf(ctx, "Failed to fetch snapshot %d from %q - %s", info.Rev, info.AuthServiceURL, err)
   355  		report("ERROR_FETCHING")
   356  		return nil, err
   357  	}
   358  
   359  	// Move pointer to the latest snapshot only if it is more recent than what is
   360  	// already in the datastore.
   361  	var latest *SnapshotInfo
   362  	err = ds.RunInTransaction(ds.WithoutTransaction(ctx), func(ctx context.Context) error {
   363  		latest = &SnapshotInfo{}
   364  		switch err := ds.Get(ctx, latest); {
   365  		case err == ds.ErrNoSuchEntity:
   366  			logging.Warningf(ctx, "No longer need to fetch AuthDB, not configured anymore")
   367  			return nil
   368  		case err != nil:
   369  			return err
   370  		case latest.AuthServiceURL != info.AuthServiceURL:
   371  			logging.Warningf(
   372  				ctx, "No longer need to fetch AuthDB from %q, %q is primary now",
   373  				info.AuthServiceURL, latest.AuthServiceURL)
   374  			return nil
   375  		case latest.Rev >= info.Rev:
   376  			logging.Warningf(ctx, "Already have rev %d", info.Rev)
   377  			return nil
   378  		}
   379  		latest = info
   380  		return ds.Put(ctx, info)
   381  	}, nil)
   382  
   383  	if err != nil {
   384  		report("ERROR_COMMITTING")
   385  		return nil, transient.Tag.Apply(err)
   386  	}
   387  
   388  	report("SUCCESS_UPDATED")
   389  	return latest, nil
   390  }
   391  
   392  // shardAuthDB splits an AuthDB blob into multiple SnapshotShard entities.
   393  func shardAuthDB(ctx context.Context, id string, blob []byte, maxSize int) ([]string, error) {
   394  	var ids []string
   395  
   396  	var shard []byte
   397  	for len(blob) != 0 {
   398  		shardSize := maxSize
   399  		if shardSize > len(blob) {
   400  			shardSize = len(blob)
   401  		}
   402  		shard, blob = blob[:shardSize], blob[shardSize:]
   403  
   404  		digest := sha256.Sum256(shard)
   405  		shardID := fmt.Sprintf("%s:%s", id, hex.EncodeToString(digest[:]))
   406  		ids = append(ids, shardID)
   407  
   408  		// Store shards sequentially to avoid allocating RAM to store full `blob` in
   409  		// RPC buffers. There's no requirement for this code to be performant, it
   410  		// executes in a background job.
   411  		err := ds.Put(ctx, &SnapshotShard{ID: shardID, Shard: shard})
   412  		if err != nil {
   413  			return nil, transient.Tag.Apply(err)
   414  		}
   415  	}
   416  
   417  	return ids, nil
   418  }
   419  
   420  // unshardAuthDB fetches SnapshotShard entities and reassembles the AuthDB blob.
   421  func unshardAuthDB(ctx context.Context, shardIDs []string) ([]byte, error) {
   422  	shards := make([]SnapshotShard, len(shardIDs))
   423  	for idx, id := range shardIDs {
   424  		shards[idx].ID = id
   425  	}
   426  
   427  	if err := ds.Get(ctx, shards); err != nil {
   428  		if merr, ok := err.(errors.MultiError); ok {
   429  			for _, inner := range merr {
   430  				if inner == ds.ErrNoSuchEntity {
   431  					return nil, err // fatal
   432  				}
   433  			}
   434  			return nil, transient.Tag.Apply(err)
   435  		} else {
   436  			// Overall RPC error.
   437  			return nil, transient.Tag.Apply(err)
   438  		}
   439  	}
   440  
   441  	slices := make([][]byte, len(shards))
   442  	for idx, shard := range shards {
   443  		slices[idx] = shard.Shard
   444  	}
   445  	return bytes.Join(slices, nil), nil
   446  }