github.com/authzed/spicedb@v1.32.1-0.20240520085336-ebda56537386/internal/datastore/postgres/postgres.go (about) 1 package postgres 2 3 import ( 4 "context" 5 dbsql "database/sql" 6 "errors" 7 "fmt" 8 "os" 9 "sync/atomic" 10 "time" 11 12 "github.com/IBM/pgxpoolprometheus" 13 sq "github.com/Masterminds/squirrel" 14 "github.com/jackc/pgx/v5" 15 "github.com/jackc/pgx/v5/pgconn" 16 "github.com/jackc/pgx/v5/pgxpool" 17 "github.com/jackc/pgx/v5/stdlib" 18 "github.com/mattn/go-isatty" 19 "github.com/ngrok/sqlmw" 20 "github.com/prometheus/client_golang/prometheus" 21 "github.com/schollz/progressbar/v3" 22 "go.opentelemetry.io/otel" 23 "golang.org/x/sync/errgroup" 24 25 datastoreinternal "github.com/authzed/spicedb/internal/datastore" 26 "github.com/authzed/spicedb/internal/datastore/common" 27 pgxcommon "github.com/authzed/spicedb/internal/datastore/postgres/common" 28 "github.com/authzed/spicedb/internal/datastore/postgres/migrations" 29 "github.com/authzed/spicedb/internal/datastore/revisions" 30 log "github.com/authzed/spicedb/internal/logging" 31 "github.com/authzed/spicedb/pkg/datastore" 32 "github.com/authzed/spicedb/pkg/datastore/options" 33 ) 34 35 func init() { 36 datastore.Engines = append(datastore.Engines, Engine) 37 } 38 39 const ( 40 Engine = "postgres" 41 tableNamespace = "namespace_config" 42 tableTransaction = "relation_tuple_transaction" 43 tableTuple = "relation_tuple" 44 tableCaveat = "caveat" 45 46 colXID = "xid" 47 colTimestamp = "timestamp" 48 colNamespace = "namespace" 49 colConfig = "serialized_config" 50 colCreatedXid = "created_xid" 51 colDeletedXid = "deleted_xid" 52 colSnapshot = "snapshot" 53 colObjectID = "object_id" 54 colRelation = "relation" 55 colUsersetNamespace = "userset_namespace" 56 colUsersetObjectID = "userset_object_id" 57 colUsersetRelation = "userset_relation" 58 colCaveatName = "name" 59 colCaveatDefinition = "definition" 60 colCaveatContextName = "caveat_name" 61 colCaveatContext = "caveat_context" 62 63 errUnableToInstantiate = "unable to instantiate datastore" 64 65 // The parameters to this format string are: 66 // 1: the created_xid or deleted_xid column name 67 // 68 // The placeholders are the snapshot and the expected boolean value respectively. 69 snapshotAlive = "pg_visible_in_snapshot(%[1]s, ?) = ?" 70 71 // This is the largest positive integer possible in postgresql 72 liveDeletedTxnID = uint64(9223372036854775807) 73 74 tracingDriverName = "postgres-tracing" 75 76 gcBatchDeleteSize = 1000 77 ) 78 79 var livingTupleConstraints = []string{"uq_relation_tuple_living_xid", "pk_relation_tuple"} 80 81 func init() { 82 dbsql.Register(tracingDriverName, sqlmw.Driver(stdlib.GetDefaultDriver(), new(traceInterceptor))) 83 } 84 85 var ( 86 psql = sq.StatementBuilder.PlaceholderFormat(sq.Dollar) 87 88 getRevision = psql. 89 Select(colXID, colSnapshot). 90 From(tableTransaction). 91 OrderByClause(fmt.Sprintf("%s DESC", colXID)). 92 Limit(1) 93 94 createTxn = fmt.Sprintf( 95 "INSERT INTO %s DEFAULT VALUES RETURNING %s, %s", 96 tableTransaction, 97 colXID, 98 colSnapshot, 99 ) 100 101 getNow = psql.Select("NOW()") 102 103 tracer = otel.Tracer("spicedb/internal/datastore/postgres") 104 ) 105 106 type sqlFilter interface { 107 ToSql() (string, []interface{}, error) 108 } 109 110 // NewPostgresDatastore initializes a SpiceDB datastore that uses a PostgreSQL 111 // database by leveraging manual book-keeping to implement revisioning. 112 // 113 // This datastore is also tested to be compatible with CockroachDB. 114 func NewPostgresDatastore( 115 ctx context.Context, 116 url string, 117 options ...Option, 118 ) (datastore.Datastore, error) { 119 ds, err := newPostgresDatastore(ctx, url, options...) 120 if err != nil { 121 return nil, err 122 } 123 124 return datastoreinternal.NewSeparatingContextDatastoreProxy(ds), nil 125 } 126 127 func newPostgresDatastore( 128 ctx context.Context, 129 pgURL string, 130 options ...Option, 131 ) (datastore.Datastore, error) { 132 config, err := generateConfig(options) 133 if err != nil { 134 return nil, common.RedactAndLogSensitiveConnString(ctx, errUnableToInstantiate, err, pgURL) 135 } 136 137 // Parse the DB URI into configuration. 138 parsedConfig, err := pgxpool.ParseConfig(pgURL) 139 if err != nil { 140 return nil, common.RedactAndLogSensitiveConnString(ctx, errUnableToInstantiate, err, pgURL) 141 } 142 143 // Setup the default custom plan setting, if applicable. 144 pgConfig, err := defaultCustomPlan(parsedConfig) 145 if err != nil { 146 return nil, common.RedactAndLogSensitiveConnString(ctx, errUnableToInstantiate, err, pgURL) 147 } 148 149 // Setup the credentials provider 150 var credentialsProvider datastore.CredentialsProvider 151 if config.credentialsProviderName != "" { 152 credentialsProvider, err = datastore.NewCredentialsProvider(ctx, config.credentialsProviderName) 153 if err != nil { 154 return nil, err 155 } 156 } 157 158 // Setup the config for each of the read and write pools. 159 readPoolConfig := pgConfig.Copy() 160 config.readPoolOpts.ConfigurePgx(readPoolConfig) 161 162 readPoolConfig.AfterConnect = func(ctx context.Context, conn *pgx.Conn) error { 163 RegisterTypes(conn.TypeMap()) 164 return nil 165 } 166 167 writePoolConfig := pgConfig.Copy() 168 config.writePoolOpts.ConfigurePgx(writePoolConfig) 169 170 writePoolConfig.AfterConnect = func(ctx context.Context, conn *pgx.Conn) error { 171 RegisterTypes(conn.TypeMap()) 172 return nil 173 } 174 175 if credentialsProvider != nil { 176 // add before connect callbacks to trigger the token 177 getToken := func(ctx context.Context, config *pgx.ConnConfig) error { 178 config.User, config.Password, err = credentialsProvider.Get(ctx, fmt.Sprintf("%s:%d", config.Host, config.Port), config.User) 179 return err 180 } 181 readPoolConfig.BeforeConnect = getToken 182 writePoolConfig.BeforeConnect = getToken 183 } 184 185 if config.migrationPhase != "" { 186 log.Info(). 187 Str("phase", config.migrationPhase). 188 Msg("postgres configured to use intermediate migration phase") 189 } 190 191 initializationContext, cancelInit := context.WithTimeout(context.Background(), 5*time.Second) 192 defer cancelInit() 193 194 readPool, err := pgxpool.NewWithConfig(initializationContext, readPoolConfig) 195 if err != nil { 196 return nil, common.RedactAndLogSensitiveConnString(ctx, errUnableToInstantiate, err, pgURL) 197 } 198 199 writePool, err := pgxpool.NewWithConfig(initializationContext, writePoolConfig) 200 if err != nil { 201 return nil, common.RedactAndLogSensitiveConnString(ctx, errUnableToInstantiate, err, pgURL) 202 } 203 204 // Verify that the server supports commit timestamps 205 var trackTSOn string 206 if err := readPool. 207 QueryRow(initializationContext, "SHOW track_commit_timestamp;"). 208 Scan(&trackTSOn); err != nil { 209 return nil, err 210 } 211 212 watchEnabled := trackTSOn == "on" 213 if !watchEnabled { 214 log.Warn().Msg("watch API disabled, postgres must be run with track_commit_timestamp=on") 215 } 216 217 if config.enablePrometheusStats { 218 if err := prometheus.Register(pgxpoolprometheus.NewCollector(readPool, map[string]string{ 219 "db_name": "spicedb", 220 "pool_usage": "read", 221 })); err != nil { 222 return nil, err 223 } 224 if err := prometheus.Register(pgxpoolprometheus.NewCollector(writePool, map[string]string{ 225 "db_name": "spicedb", 226 "pool_usage": "write", 227 })); err != nil { 228 return nil, err 229 } 230 if err := common.RegisterGCMetrics(); err != nil { 231 return nil, err 232 } 233 } 234 235 gcCtx, cancelGc := context.WithCancel(context.Background()) 236 237 quantizationPeriodNanos := config.revisionQuantization.Nanoseconds() 238 if quantizationPeriodNanos < 1 { 239 quantizationPeriodNanos = 1 240 } 241 revisionQuery := fmt.Sprintf( 242 querySelectRevision, 243 colXID, 244 tableTransaction, 245 colTimestamp, 246 quantizationPeriodNanos, 247 colSnapshot, 248 ) 249 250 validTransactionQuery := fmt.Sprintf( 251 queryValidTransaction, 252 colXID, 253 tableTransaction, 254 colTimestamp, 255 config.gcWindow.Seconds(), 256 colSnapshot, 257 ) 258 259 maxRevisionStaleness := time.Duration(float64(config.revisionQuantization.Nanoseconds())* 260 config.maxRevisionStalenessPercent) * time.Nanosecond 261 262 datastore := &pgDatastore{ 263 CachedOptimizedRevisions: revisions.NewCachedOptimizedRevisions( 264 maxRevisionStaleness, 265 ), 266 dburl: pgURL, 267 readPool: pgxcommon.MustNewInterceptorPooler(readPool, config.queryInterceptor), 268 writePool: pgxcommon.MustNewInterceptorPooler(writePool, config.queryInterceptor), 269 watchBufferLength: config.watchBufferLength, 270 watchBufferWriteTimeout: config.watchBufferWriteTimeout, 271 optimizedRevisionQuery: revisionQuery, 272 validTransactionQuery: validTransactionQuery, 273 gcWindow: config.gcWindow, 274 gcInterval: config.gcInterval, 275 gcTimeout: config.gcMaxOperationTime, 276 analyzeBeforeStatistics: config.analyzeBeforeStatistics, 277 watchEnabled: watchEnabled, 278 gcCtx: gcCtx, 279 cancelGc: cancelGc, 280 readTxOptions: pgx.TxOptions{IsoLevel: pgx.RepeatableRead, AccessMode: pgx.ReadOnly}, 281 maxRetries: config.maxRetries, 282 credentialsProvider: credentialsProvider, 283 } 284 285 datastore.SetOptimizedRevisionFunc(datastore.optimizedRevisionFunc) 286 287 // Start a goroutine for garbage collection. 288 if datastore.gcInterval > 0*time.Minute && config.gcEnabled { 289 datastore.gcGroup, datastore.gcCtx = errgroup.WithContext(datastore.gcCtx) 290 datastore.gcGroup.Go(func() error { 291 return common.StartGarbageCollector( 292 datastore.gcCtx, 293 datastore, 294 datastore.gcInterval, 295 datastore.gcWindow, 296 datastore.gcTimeout, 297 ) 298 }) 299 } else { 300 log.Warn().Msg("datastore background garbage collection disabled") 301 } 302 303 return datastore, nil 304 } 305 306 type pgDatastore struct { 307 *revisions.CachedOptimizedRevisions 308 309 dburl string 310 readPool, writePool pgxcommon.ConnPooler 311 watchBufferLength uint16 312 watchBufferWriteTimeout time.Duration 313 optimizedRevisionQuery string 314 validTransactionQuery string 315 gcWindow time.Duration 316 gcInterval time.Duration 317 gcTimeout time.Duration 318 analyzeBeforeStatistics bool 319 readTxOptions pgx.TxOptions 320 maxRetries uint8 321 watchEnabled bool 322 323 credentialsProvider datastore.CredentialsProvider 324 325 gcGroup *errgroup.Group 326 gcCtx context.Context 327 cancelGc context.CancelFunc 328 gcHasRun atomic.Bool 329 } 330 331 func (pgd *pgDatastore) SnapshotReader(revRaw datastore.Revision) datastore.Reader { 332 rev := revRaw.(postgresRevision) 333 334 queryFuncs := pgxcommon.QuerierFuncsFor(pgd.readPool) 335 executor := common.QueryExecutor{ 336 Executor: pgxcommon.NewPGXExecutor(queryFuncs), 337 } 338 339 return &pgReader{ 340 queryFuncs, 341 executor, 342 buildLivingObjectFilterForRevision(rev), 343 } 344 } 345 346 // ReadWriteTx starts a read/write transaction, which will be committed if no error is 347 // returned and rolled back if an error is returned. 348 func (pgd *pgDatastore) ReadWriteTx( 349 ctx context.Context, 350 fn datastore.TxUserFunc, 351 opts ...options.RWTOptionsOption, 352 ) (datastore.Revision, error) { 353 config := options.NewRWTOptionsWithOptions(opts...) 354 355 var err error 356 for i := uint8(0); i <= pgd.maxRetries; i++ { 357 var newXID xid8 358 var newSnapshot pgSnapshot 359 err = wrapError(pgx.BeginTxFunc(ctx, pgd.writePool, pgx.TxOptions{IsoLevel: pgx.Serializable}, func(tx pgx.Tx) error { 360 var err error 361 newXID, newSnapshot, err = createNewTransaction(ctx, tx) 362 if err != nil { 363 return err 364 } 365 366 queryFuncs := pgxcommon.QuerierFuncsFor(pgd.readPool) 367 executor := common.QueryExecutor{ 368 Executor: pgxcommon.NewPGXExecutor(queryFuncs), 369 } 370 371 rwt := &pgReadWriteTXN{ 372 &pgReader{ 373 queryFuncs, 374 executor, 375 currentlyLivingObjects, 376 }, 377 tx, 378 newXID, 379 } 380 381 return fn(ctx, rwt) 382 })) 383 if err != nil { 384 if !config.DisableRetries && errorRetryable(err) { 385 pgxcommon.SleepOnErr(ctx, err, i) 386 continue 387 } 388 389 return datastore.NoRevision, err 390 } 391 392 if i > 0 { 393 log.Debug().Uint8("retries", i).Msg("transaction succeeded after retry") 394 } 395 396 return postgresRevision{newSnapshot.markComplete(newXID.Uint64)}, nil 397 } 398 399 if !config.DisableRetries { 400 err = fmt.Errorf("max retries exceeded: %w", err) 401 } 402 403 return datastore.NoRevision, err 404 } 405 406 const repairTransactionIDsOperation = "transaction-ids" 407 408 func (pgd *pgDatastore) Repair(ctx context.Context, operationName string, outputProgress bool) error { 409 switch operationName { 410 case repairTransactionIDsOperation: 411 return pgd.repairTransactionIDs(ctx, outputProgress) 412 413 default: 414 return fmt.Errorf("unknown operation") 415 } 416 } 417 418 const batchSize = 10000 419 420 func (pgd *pgDatastore) repairTransactionIDs(ctx context.Context, outputProgress bool) error { 421 conn, err := pgx.Connect(ctx, pgd.dburl) 422 if err != nil { 423 return err 424 } 425 defer conn.Close(ctx) 426 427 // Get the current transaction ID. 428 currentMaximumID := 0 429 if err := conn.QueryRow(ctx, queryCurrentTransactionID).Scan(¤tMaximumID); err != nil { 430 if !errors.Is(err, pgx.ErrNoRows) { 431 return err 432 } 433 } 434 435 // Find the maximum transaction ID referenced in the transactions table. 436 referencedMaximumID := 0 437 if err := conn.QueryRow(ctx, queryLatestXID).Scan(&referencedMaximumID); err != nil { 438 if !errors.Is(err, pgx.ErrNoRows) { 439 return err 440 } 441 } 442 443 // The delta is what this needs to fill in. 444 log.Ctx(ctx).Info().Int64("current-maximum", int64(currentMaximumID)).Int64("referenced-maximum", int64(referencedMaximumID)).Msg("found transactions") 445 counterDelta := referencedMaximumID - currentMaximumID 446 if counterDelta < 0 { 447 return nil 448 } 449 450 var bar *progressbar.ProgressBar 451 if isatty.IsTerminal(os.Stderr.Fd()) && outputProgress { 452 bar = progressbar.Default(int64(counterDelta), "updating transactions counter") 453 } 454 455 for i := 0; i < counterDelta; i++ { 456 var batch pgx.Batch 457 458 batchCount := min(batchSize, counterDelta-i) 459 for j := 0; j < batchCount; j++ { 460 batch.Queue("begin;") 461 batch.Queue("select pg_current_xact_id();") 462 batch.Queue("rollback;") 463 } 464 465 br := conn.SendBatch(ctx, &batch) 466 if err := br.Close(); err != nil { 467 return err 468 } 469 470 i += batchCount - 1 471 if bar != nil { 472 if err := bar.Add(batchCount); err != nil { 473 return err 474 } 475 } 476 } 477 478 if bar != nil { 479 if err := bar.Close(); err != nil { 480 return err 481 } 482 } 483 484 log.Ctx(ctx).Info().Msg("completed revisions repair") 485 return nil 486 } 487 488 // RepairOperations returns the available repair operations for the datastore. 489 func (pgd *pgDatastore) RepairOperations() []datastore.RepairOperation { 490 return []datastore.RepairOperation{ 491 { 492 Name: repairTransactionIDsOperation, 493 Description: "Brings the Postgres database up to the expected transaction ID (Postgres v15+ only)", 494 }, 495 } 496 } 497 498 func wrapError(err error) error { 499 // If a unique constraint violation is returned, then its likely that the cause 500 // was an existing relationship given as a CREATE. 501 if cerr := pgxcommon.ConvertToWriteConstraintError(livingTupleConstraints, err); cerr != nil { 502 return cerr 503 } 504 505 if pgxcommon.IsSerializationError(err) { 506 return common.NewSerializationError(err) 507 } 508 509 // hack: pgx asyncClose usually happens after cancellation, 510 // but the reason for it being closed is not propagated 511 // and all we get is attempting to perform an operation 512 // on cancelled connection. This keeps the same error, 513 // but wrapped along a cancellation so that: 514 // - pgx logger does not log it 515 // - response is sent as canceled back to the client 516 if err != nil && err.Error() == "conn closed" { 517 return errors.Join(err, context.Canceled) 518 } 519 520 return err 521 } 522 523 func (pgd *pgDatastore) Close() error { 524 pgd.cancelGc() 525 526 if pgd.gcGroup != nil { 527 err := pgd.gcGroup.Wait() 528 log.Warn().Err(err).Msg("completed shutdown of postgres datastore") 529 } 530 531 pgd.readPool.Close() 532 pgd.writePool.Close() 533 return nil 534 } 535 536 func errorRetryable(err error) bool { 537 if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { 538 return false 539 } 540 541 if pgconn.SafeToRetry(err) { 542 return true 543 } 544 545 if pgxcommon.IsSerializationError(err) { 546 return true 547 } 548 549 log.Warn().Err(err).Msg("unable to determine if pgx error is retryable") 550 return false 551 } 552 553 func (pgd *pgDatastore) ReadyState(ctx context.Context) (datastore.ReadyState, error) { 554 headMigration, err := migrations.DatabaseMigrations.HeadRevision() 555 if err != nil { 556 return datastore.ReadyState{}, fmt.Errorf("invalid head migration found for postgres: %w", err) 557 } 558 559 pgDriver, err := migrations.NewAlembicPostgresDriver(ctx, pgd.dburl, pgd.credentialsProvider) 560 if err != nil { 561 return datastore.ReadyState{}, err 562 } 563 defer pgDriver.Close(ctx) 564 565 version, err := pgDriver.Version(ctx) 566 if err != nil { 567 return datastore.ReadyState{}, err 568 } 569 570 if version == headMigration { 571 // Ensure a datastore ID is present. This ensures the tables have not been truncated. 572 uniqueID, err := pgd.datastoreUniqueID(ctx) 573 if err != nil { 574 return datastore.ReadyState{}, fmt.Errorf("database validation failed: %w; if you have previously run `TRUNCATE`, this database is no longer valid and must be remigrated. See: https://spicedb.dev/d/truncate-unsupported", err) 575 } 576 577 log.Trace().Str("unique_id", uniqueID).Msg("postgres datastore unique ID") 578 return datastore.ReadyState{IsReady: true}, nil 579 } 580 581 return datastore.ReadyState{ 582 Message: fmt.Sprintf( 583 "datastore is not migrated: currently at revision `%s`, but requires `%s`. Please run `spicedb migrate`. If you have previously run `TRUNCATE`, this database is no longer valid and must be remigrated. See: https://spicedb.dev/d/truncate-unsupported", 584 version, 585 headMigration, 586 ), 587 IsReady: false, 588 }, nil 589 } 590 591 func (pgd *pgDatastore) Features(_ context.Context) (*datastore.Features, error) { 592 return &datastore.Features{Watch: datastore.Feature{Enabled: pgd.watchEnabled}}, nil 593 } 594 595 func buildLivingObjectFilterForRevision(revision postgresRevision) queryFilterer { 596 createdBeforeTXN := sq.Expr(fmt.Sprintf( 597 snapshotAlive, 598 colCreatedXid, 599 ), revision.snapshot, true) 600 601 deletedAfterTXN := sq.Expr(fmt.Sprintf( 602 snapshotAlive, 603 colDeletedXid, 604 ), revision.snapshot, false) 605 606 return func(original sq.SelectBuilder) sq.SelectBuilder { 607 return original.Where(createdBeforeTXN).Where(deletedAfterTXN) 608 } 609 } 610 611 func currentlyLivingObjects(original sq.SelectBuilder) sq.SelectBuilder { 612 return original.Where(sq.Eq{colDeletedXid: liveDeletedTxnID}) 613 } 614 615 // defaultCustomPlan parses a Postgres URI and determines if a plan_cache_mode 616 // has been specified. If not, it defaults to "force_custom_plan". 617 // This works around a bug impacting performance documented here: 618 // https://spicedb.dev/d/force-custom-plan. 619 func defaultCustomPlan(poolConfig *pgxpool.Config) (*pgxpool.Config, error) { 620 if existing, ok := poolConfig.ConnConfig.Config.RuntimeParams["plan_cache_mode"]; ok { 621 log.Info(). 622 Str("plan_cache_mode", existing). 623 Msg("found plan_cache_mode in DB URI; leaving as-is") 624 return poolConfig, nil 625 } 626 627 poolConfig.ConnConfig.Config.RuntimeParams["plan_cache_mode"] = "force_custom_plan" 628 log.Warn(). 629 Str("details-url", "https://spicedb.dev/d/force-custom-plan"). 630 Str("plan_cache_mode", "force_custom_plan"). 631 Msg("defaulting value in Postgres DB URI") 632 633 return poolConfig, nil 634 } 635 636 var _ datastore.Datastore = &pgDatastore{}