github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/internal/datastore/spanner/spanner.go (about)

     1  package spanner
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"regexp"
     8  	"strconv"
     9  	"sync"
    10  	"time"
    11  
    12  	"cloud.google.com/go/spanner"
    13  	ocprom "contrib.go.opencensus.io/exporter/prometheus"
    14  	sq "github.com/Masterminds/squirrel"
    15  	"github.com/prometheus/client_golang/prometheus"
    16  	"go.opencensus.io/plugin/ocgrpc"
    17  	"go.opencensus.io/stats/view"
    18  	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
    19  	"go.opentelemetry.io/otel"
    20  	"go.opentelemetry.io/otel/attribute"
    21  	"go.opentelemetry.io/otel/trace"
    22  	"google.golang.org/api/option"
    23  	"google.golang.org/grpc"
    24  	"google.golang.org/grpc/codes"
    25  
    26  	"github.com/authzed/spicedb/internal/datastore/common"
    27  	"github.com/authzed/spicedb/internal/datastore/revisions"
    28  	"github.com/authzed/spicedb/internal/datastore/spanner/migrations"
    29  	log "github.com/authzed/spicedb/internal/logging"
    30  	"github.com/authzed/spicedb/pkg/datastore"
    31  	"github.com/authzed/spicedb/pkg/datastore/options"
    32  	core "github.com/authzed/spicedb/pkg/proto/core/v1"
    33  )
    34  
    35  func init() {
    36  	datastore.Engines = append(datastore.Engines, Engine)
    37  }
    38  
    39  const (
    40  	Engine = "spanner"
    41  
    42  	errUnableToInstantiate = "unable to instantiate spanner client"
    43  
    44  	errRevision = "unable to load revision: %w"
    45  
    46  	errUnableToWriteRelationships    = "unable to write relationships: %w"
    47  	errUnableToBulkLoadRelationships = "unable to bulk load relationships: %w"
    48  	errUnableToDeleteRelationships   = "unable to delete relationships: %w"
    49  
    50  	errUnableToWriteConfig    = "unable to write namespace config: %w"
    51  	errUnableToReadConfig     = "unable to read namespace config: %w"
    52  	errUnableToDeleteConfig   = "unable to delete namespace config: %w"
    53  	errUnableToListNamespaces = "unable to list namespaces: %w"
    54  
    55  	errUnableToReadCaveat   = "unable to read caveat: %w"
    56  	errUnableToWriteCaveat  = "unable to write caveat: %w"
    57  	errUnableToListCaveats  = "unable to list caveats: %w"
    58  	errUnableToDeleteCaveat = "unable to delete caveat: %w"
    59  
    60  	// See https://cloud.google.com/spanner/docs/change-streams#data-retention
    61  	// See https://github.com/authzed/spicedb/issues/1457
    62  	defaultChangeStreamRetention = 24 * time.Hour
    63  )
    64  
    65  const tableSizesStatsTable = "spanner_sys.table_sizes_stats_1hour"
    66  
    67  var (
    68  	sql    = sq.StatementBuilder.PlaceholderFormat(sq.AtP)
    69  	tracer = otel.Tracer("spicedb/internal/datastore/spanner")
    70  
    71  	alreadyExistsRegex = regexp.MustCompile(`^Table relation_tuple: Row {String\("([^\"]+)"\), String\("([^\"]+)"\), String\("([^\"]+)"\), String\("([^\"]+)"\), String\("([^\"]+)"\), String\("([^\"]+)"\)} already exists.$`)
    72  )
    73  
    74  type spannerDatastore struct {
    75  	*revisions.RemoteClockRevisions
    76  	revisions.CommonDecoder
    77  
    78  	watchBufferLength       uint16
    79  	watchBufferWriteTimeout time.Duration
    80  
    81  	client   *spanner.Client
    82  	config   spannerOptions
    83  	database string
    84  
    85  	cachedEstimatedBytesPerRelationshipLock sync.RWMutex
    86  	cachedEstimatedBytesPerRelationship     uint64
    87  
    88  	tableSizesStatsTable string
    89  }
    90  
    91  // NewSpannerDatastore returns a datastore backed by cloud spanner
    92  func NewSpannerDatastore(ctx context.Context, database string, opts ...Option) (datastore.Datastore, error) {
    93  	config, err := generateConfig(opts)
    94  	if err != nil {
    95  		return nil, common.RedactAndLogSensitiveConnString(ctx, errUnableToInstantiate, err, database)
    96  	}
    97  
    98  	if config.migrationPhase != "" {
    99  		log.Info().
   100  			Str("phase", config.migrationPhase).
   101  			Msg("spanner configured to use intermediate migration phase")
   102  	}
   103  
   104  	if len(config.emulatorHost) > 0 {
   105  		if err := os.Setenv("SPANNER_EMULATOR_HOST", config.emulatorHost); err != nil {
   106  			log.Error().Err(err).Msg("failed to set SPANNER_EMULATOR_HOST env variable")
   107  		}
   108  	}
   109  	if len(os.Getenv("SPANNER_EMULATOR_HOST")) > 0 {
   110  		log.Info().Str("spanner-emulator-host", os.Getenv("SPANNER_EMULATOR_HOST")).Msg("running against spanner emulator")
   111  	}
   112  
   113  	// TODO(jschorr): Replace with OpenTelemetry instrumentation once available.
   114  	err = spanner.EnableStatViews() // nolint: staticcheck
   115  	if err != nil {
   116  		return nil, fmt.Errorf("failed to enable spanner session metrics: %w", err)
   117  	}
   118  	err = spanner.EnableGfeLatencyAndHeaderMissingCountViews() // nolint: staticcheck
   119  	if err != nil {
   120  		return nil, fmt.Errorf("failed to enable spanner GFE metrics: %w", err)
   121  	}
   122  
   123  	// Register Spanner client gRPC metrics (include round-trip latency, received/sent bytes...)
   124  	if err := view.Register(ocgrpc.DefaultClientViews...); err != nil {
   125  		return nil, fmt.Errorf("failed to enable gRPC metrics for Spanner client: %w", err)
   126  	}
   127  
   128  	_, err = ocprom.NewExporter(ocprom.Options{
   129  		Namespace:  "spicedb",
   130  		Registerer: prometheus.DefaultRegisterer,
   131  	})
   132  	if err != nil {
   133  		return nil, fmt.Errorf("failed to enable spanner GFE latency stats: %w", err)
   134  	}
   135  
   136  	cfg := spanner.DefaultSessionPoolConfig
   137  	cfg.MinOpened = config.minSessions
   138  	cfg.MaxOpened = config.maxSessions
   139  	client, err := spanner.NewClientWithConfig(context.Background(), database,
   140  		spanner.ClientConfig{SessionPoolConfig: cfg},
   141  		option.WithCredentialsFile(config.credentialsFilePath),
   142  		option.WithGRPCConnectionPool(max(config.readMaxOpen, config.writeMaxOpen)),
   143  		option.WithGRPCDialOption(
   144  			grpc.WithStatsHandler(otelgrpc.NewClientHandler()),
   145  		),
   146  	)
   147  	if err != nil {
   148  		return nil, common.RedactAndLogSensitiveConnString(ctx, errUnableToInstantiate, err, database)
   149  	}
   150  
   151  	maxRevisionStaleness := time.Duration(float64(config.revisionQuantization.Nanoseconds())*
   152  		config.maxRevisionStalenessPercent) * time.Nanosecond
   153  
   154  	ds := &spannerDatastore{
   155  		RemoteClockRevisions: revisions.NewRemoteClockRevisions(
   156  			defaultChangeStreamRetention,
   157  			maxRevisionStaleness,
   158  			config.followerReadDelay,
   159  			config.revisionQuantization,
   160  		),
   161  		CommonDecoder: revisions.CommonDecoder{
   162  			Kind: revisions.Timestamp,
   163  		},
   164  		client:                                  client,
   165  		config:                                  config,
   166  		database:                                database,
   167  		watchBufferWriteTimeout:                 config.watchBufferWriteTimeout,
   168  		watchBufferLength:                       config.watchBufferLength,
   169  		cachedEstimatedBytesPerRelationship:     0,
   170  		cachedEstimatedBytesPerRelationshipLock: sync.RWMutex{},
   171  		tableSizesStatsTable:                    tableSizesStatsTable,
   172  	}
   173  	ds.RemoteClockRevisions.SetNowFunc(ds.headRevisionInternal)
   174  
   175  	return ds, nil
   176  }
   177  
   178  type traceableRTX struct {
   179  	delegate readTX
   180  }
   181  
   182  func (t *traceableRTX) ReadRow(ctx context.Context, table string, key spanner.Key, columns []string) (*spanner.Row, error) {
   183  	trace.SpanFromContext(ctx).SetAttributes(
   184  		attribute.String("spannerAPI", "ReadOnlyTransaction.ReadRow"),
   185  		attribute.String("table", table),
   186  		attribute.String("key", key.String()),
   187  		attribute.StringSlice("columns", columns))
   188  
   189  	return t.delegate.ReadRow(ctx, table, key, columns)
   190  }
   191  
   192  func (t *traceableRTX) Read(ctx context.Context, table string, keys spanner.KeySet, columns []string) *spanner.RowIterator {
   193  	trace.SpanFromContext(ctx).SetAttributes(
   194  		attribute.String("spannerAPI", "ReadOnlyTransaction.Read"),
   195  		attribute.String("table", table),
   196  		attribute.StringSlice("columns", columns))
   197  
   198  	return t.delegate.Read(ctx, table, keys, columns)
   199  }
   200  
   201  func (t *traceableRTX) Query(ctx context.Context, statement spanner.Statement) *spanner.RowIterator {
   202  	trace.SpanFromContext(ctx).SetAttributes(
   203  		attribute.String("spannerAPI", "ReadOnlyTransaction.Query"),
   204  		attribute.String("statement", statement.SQL))
   205  
   206  	return t.delegate.Query(ctx, statement)
   207  }
   208  
   209  func (sd *spannerDatastore) SnapshotReader(revisionRaw datastore.Revision) datastore.Reader {
   210  	r := revisionRaw.(revisions.TimestampRevision)
   211  
   212  	txSource := func() readTX {
   213  		return &traceableRTX{delegate: sd.client.Single().WithTimestampBound(spanner.ReadTimestamp(r.Time()))}
   214  	}
   215  	executor := common.QueryExecutor{Executor: queryExecutor(txSource)}
   216  	return spannerReader{executor, txSource}
   217  }
   218  
   219  func (sd *spannerDatastore) ReadWriteTx(ctx context.Context, fn datastore.TxUserFunc, opts ...options.RWTOptionsOption) (datastore.Revision, error) {
   220  	config := options.NewRWTOptionsWithOptions(opts...)
   221  
   222  	ctx, span := tracer.Start(ctx, "ReadWriteTx")
   223  	defer span.End()
   224  
   225  	ctx, cancel := context.WithCancel(ctx)
   226  	ts, err := sd.client.ReadWriteTransaction(ctx, func(ctx context.Context, spannerRWT *spanner.ReadWriteTransaction) error {
   227  		txSource := func() readTX {
   228  			return &traceableRTX{delegate: spannerRWT}
   229  		}
   230  
   231  		executor := common.QueryExecutor{Executor: queryExecutor(txSource)}
   232  		rwt := spannerReadWriteTXN{
   233  			spannerReader{executor, txSource},
   234  			spannerRWT,
   235  		}
   236  		err := func() error {
   237  			innerCtx, innerSpan := tracer.Start(ctx, "TxUserFunc")
   238  			defer innerSpan.End()
   239  
   240  			return fn(innerCtx, rwt)
   241  		}()
   242  		if err != nil {
   243  			if config.DisableRetries {
   244  				defer cancel()
   245  			}
   246  			return err
   247  		}
   248  
   249  		return nil
   250  	})
   251  	if err != nil {
   252  		if cerr := convertToWriteConstraintError(err); cerr != nil {
   253  			return datastore.NoRevision, cerr
   254  		}
   255  		return datastore.NoRevision, err
   256  	}
   257  
   258  	return revisions.NewForTime(ts), nil
   259  }
   260  
   261  func (sd *spannerDatastore) ReadyState(ctx context.Context) (datastore.ReadyState, error) {
   262  	headMigration, err := migrations.SpannerMigrations.HeadRevision()
   263  	if err != nil {
   264  		return datastore.ReadyState{}, fmt.Errorf("invalid head migration found for spanner: %w", err)
   265  	}
   266  
   267  	checker := migrations.NewSpannerVersionChecker(sd.client)
   268  	version, err := checker.Version(ctx)
   269  	if err != nil {
   270  		return datastore.ReadyState{}, err
   271  	}
   272  
   273  	// TODO: once phased migration is complete, remove the extra allowed version
   274  	if version == headMigration || version == "register-combined-change-stream" {
   275  		return datastore.ReadyState{IsReady: true}, nil
   276  	}
   277  
   278  	return datastore.ReadyState{
   279  		Message: fmt.Sprintf(
   280  			"datastore is not migrated: currently at revision `%s`, but requires `%s`. Please run `spicedb migrate`.",
   281  			version,
   282  			headMigration,
   283  		),
   284  		IsReady: false,
   285  	}, nil
   286  }
   287  
   288  func (sd *spannerDatastore) Features(_ context.Context) (*datastore.Features, error) {
   289  	return &datastore.Features{Watch: datastore.Feature{Enabled: true}}, nil
   290  }
   291  
   292  func (sd *spannerDatastore) Close() error {
   293  	sd.client.Close()
   294  	return nil
   295  }
   296  
   297  func statementFromSQL(sql string, args []any) spanner.Statement {
   298  	params := make(map[string]any, len(args))
   299  	for index, arg := range args {
   300  		params["p"+strconv.Itoa(index+1)] = arg
   301  	}
   302  
   303  	return spanner.Statement{
   304  		SQL:    sql,
   305  		Params: params,
   306  	}
   307  }
   308  
   309  func convertToWriteConstraintError(err error) error {
   310  	if spanner.ErrCode(err) == codes.AlreadyExists {
   311  		description := spanner.ErrDesc(err)
   312  		found := alreadyExistsRegex.FindStringSubmatch(description)
   313  		if found != nil {
   314  			return common.NewCreateRelationshipExistsError(&core.RelationTuple{
   315  				ResourceAndRelation: &core.ObjectAndRelation{
   316  					Namespace: found[1],
   317  					ObjectId:  found[2],
   318  					Relation:  found[3],
   319  				},
   320  				Subject: &core.ObjectAndRelation{
   321  					Namespace: found[4],
   322  					ObjectId:  found[5],
   323  					Relation:  found[6],
   324  				},
   325  			})
   326  		}
   327  
   328  		return common.NewCreateRelationshipExistsError(nil)
   329  	}
   330  	return nil
   331  }