github.com/weaviate/weaviate@v1.24.6/usecases/cluster/transactions_write.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package cluster
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"sync"
    18  	"time"
    19  
    20  	enterrors "github.com/weaviate/weaviate/entities/errors"
    21  
    22  	"github.com/google/uuid"
    23  	"github.com/pkg/errors"
    24  	"github.com/sirupsen/logrus"
    25  	"golang.org/x/exp/slices"
    26  )
    27  
    28  type TransactionType string
    29  
    30  var (
    31  	ErrConcurrentTransaction = errors.New("concurrent transaction")
    32  	ErrInvalidTransaction    = errors.New("invalid transaction")
    33  	ErrExpiredTransaction    = errors.New("transaction TTL expired")
    34  	ErrNotReady              = errors.New("server is not ready: either starting up or shutting down")
    35  )
    36  
    37  type Remote interface {
    38  	BroadcastTransaction(ctx context.Context, tx *Transaction) error
    39  	BroadcastAbortTransaction(ctx context.Context, tx *Transaction) error
    40  	BroadcastCommitTransaction(ctx context.Context, tx *Transaction) error
    41  }
    42  
    43  type (
    44  	CommitFn   func(ctx context.Context, tx *Transaction) error
    45  	ResponseFn func(ctx context.Context, tx *Transaction) ([]byte, error)
    46  )
    47  
    48  type TxManager struct {
    49  	sync.Mutex
    50  	logger logrus.FieldLogger
    51  
    52  	currentTransaction        *Transaction
    53  	currentTransactionContext context.Context
    54  	clearTransaction          func()
    55  
    56  	// any time we start working on a commit, we need to add to this WaitGroup.
    57  	// It will block shutdwon until the commit has completed to make sure that we
    58  	// can't accidentally shutdown while a tx is committing.
    59  	ongoingCommits sync.WaitGroup
    60  
    61  	// when a shutdown signal has been received, we will no longer accept any new
    62  	// tx's or commits
    63  	acceptIncoming bool
    64  
    65  	// read transactions that need to run at start can still be served, they have
    66  	// no side-effects on the node that accepts them.
    67  	//
    68  	// If we disallowed them completely, then two unready nodes would be in a
    69  	// deadlock as they each require information from the other(s) who can't
    70  	// answerbecause they're not ready.
    71  	allowUnready []TransactionType
    72  
    73  	remote     Remote
    74  	commitFn   CommitFn
    75  	responseFn ResponseFn
    76  
    77  	// keep the ids of expired transactions around. This way, we can return a
    78  	// nicer error message to the user. Instead of just an "invalid transaction"
    79  	// which no longer exists, they will get an explicit error message mentioning
    80  	// the timeout.
    81  	expiredTxIDs []string
    82  
    83  	persistence Persistence
    84  }
    85  
    86  func newDummyCommitResponseFn() func(ctx context.Context, tx *Transaction) error {
    87  	return func(ctx context.Context, tx *Transaction) error {
    88  		return nil
    89  	}
    90  }
    91  
    92  func newDummyResponseFn() func(ctx context.Context, tx *Transaction) ([]byte, error) {
    93  	return func(ctx context.Context, tx *Transaction) ([]byte, error) {
    94  		return nil, nil
    95  	}
    96  }
    97  
    98  func NewTxManager(remote Remote, persistence Persistence,
    99  	logger logrus.FieldLogger,
   100  ) *TxManager {
   101  	return &TxManager{
   102  		remote: remote,
   103  
   104  		// by setting dummy fns that do nothing on default it is possible to run
   105  		// the tx manager with only one set of functions. For example, if the
   106  		// specific Tx is only ever used for broadcasting writes, there is no need
   107  		// to set a responseFn. However, if the fn was nil, we'd panic. Thus a
   108  		// dummy function is a reasonable default - and much cleaner than a
   109  		// nil-check on every call.
   110  		commitFn:    newDummyCommitResponseFn(),
   111  		responseFn:  newDummyResponseFn(),
   112  		logger:      logger,
   113  		persistence: persistence,
   114  
   115  		// ready to serve incoming requests
   116  		acceptIncoming: false,
   117  	}
   118  }
   119  
   120  func (c *TxManager) StartAcceptIncoming() {
   121  	c.Lock()
   122  	defer c.Unlock()
   123  
   124  	c.acceptIncoming = true
   125  }
   126  
   127  func (c *TxManager) SetAllowUnready(types []TransactionType) {
   128  	c.Lock()
   129  	defer c.Unlock()
   130  
   131  	c.allowUnready = types
   132  }
   133  
   134  // HaveDanglingTxs is a way to check if there are any uncommitted transactions
   135  // in the durable storage. This can be used to make decisions about whether a
   136  // failed schema check can be temporarily ignored - with the assumption that
   137  // applying the dangling txs will fix the issue.
   138  func (c *TxManager) HaveDanglingTxs(ctx context.Context,
   139  	allowedTypes []TransactionType,
   140  ) (found bool) {
   141  	c.persistence.IterateAll(context.Background(), func(tx *Transaction) {
   142  		if !slices.Contains(allowedTypes, tx.Type) {
   143  			return
   144  		}
   145  		found = true
   146  	})
   147  
   148  	return
   149  }
   150  
   151  // TryResumeDanglingTxs loops over the existing transactions and applies them.
   152  // It only does so if the transaction type is explicitly listed as allowed.
   153  // This is because - at the time of creating this - we were not sure if all
   154  // transaction commit functions are idempotent. If one would not be, then
   155  // reapplying a tx or tx commit could potentially be dangerous, as we don't
   156  // know if it was already applied prior to the node death.
   157  //
   158  // For example, think of a "add property 'foo'" tx, that does nothing but
   159  // append the property to the schema. If this ran twice, we might now end up
   160  // with two duplicate properties with the name 'foo' which could in turn create
   161  // other problems. To make sure all txs are resumable (which is what we want
   162  // because that's the only way to avoid schema issues), we need to make sure
   163  // that every single tx is idempotent, then add them to the allow list.
   164  //
   165  // One other limitation is that this method currently does nothing to check if
   166  // a tx was really committed or not. In an ideal world, the node would contact
   167  // the other nodes and ask. However, this sipmler implementation does not do
   168  // this check. Instead [HaveDanglingTxs] is used in combination with the schema
   169  // check. If the schema is not out of sync in the first place, no txs will be
   170  // applied. This does not cover all edge cases, but it seems to work for now.
   171  // This should be improved in the future.
   172  func (c *TxManager) TryResumeDanglingTxs(ctx context.Context,
   173  	allowedTypes []TransactionType,
   174  ) (applied bool, err error) {
   175  	c.persistence.IterateAll(context.Background(), func(tx *Transaction) {
   176  		if !slices.Contains(allowedTypes, tx.Type) {
   177  			c.logger.WithField("action", "resume_transaction").
   178  				WithField("transaction_id", tx.ID).
   179  				WithField("transaction_type", tx.Type).
   180  				Warnf("dangling transaction %q of type %q is not known to be resumable - skipping",
   181  					tx.ID, tx.Type)
   182  
   183  			return
   184  		}
   185  		if err = c.commitFn(ctx, tx); err != nil {
   186  			return
   187  		}
   188  
   189  		applied = true
   190  		c.logger.WithField("action", "resume_transaction").
   191  			WithField("transaction_id", tx.ID).
   192  			WithField("transaction_type", tx.Type).
   193  			Infof("successfully resumed dangling transaction %q of type %q",
   194  				tx.ID, tx.Type)
   195  	})
   196  
   197  	return
   198  }
   199  
   200  func (c *TxManager) resetTxExpiry(ttl time.Duration, id string) {
   201  	cancel := func() {}
   202  	ctx := context.Background()
   203  	if ttl == 0 {
   204  		c.currentTransactionContext = context.Background()
   205  	} else {
   206  		ctx, cancel = context.WithTimeout(ctx, ttl)
   207  		c.currentTransactionContext = ctx
   208  	}
   209  
   210  	// to prevent a goroutine leak for the new routine we're spawning here,
   211  	// register a way to terminate it in case the explicit cancel is called
   212  	// before the context's done channel fires.
   213  	clearCancelListener := make(chan struct{}, 1)
   214  
   215  	c.clearTransaction = func() {
   216  		c.currentTransaction = nil
   217  		c.currentTransactionContext = nil
   218  		c.clearTransaction = func() {}
   219  
   220  		clearCancelListener <- struct{}{}
   221  		close(clearCancelListener)
   222  	}
   223  
   224  	f := func() {
   225  		ctxDone := ctx.Done()
   226  		select {
   227  		case <-clearCancelListener:
   228  			cancel()
   229  			return
   230  		case <-ctxDone:
   231  			c.Lock()
   232  			defer c.Unlock()
   233  			c.expiredTxIDs = append(c.expiredTxIDs, id)
   234  
   235  			if c.currentTransaction == nil {
   236  				// tx is already cleaned up, for example from a successful commit. Nothing to do for us
   237  				return
   238  			}
   239  
   240  			if c.currentTransaction.ID != id {
   241  				// tx was already cleaned up, then a new tx was started. Any action from
   242  				// us would be destructive, as we'd accidentally destroy a perfectly valid
   243  				// tx
   244  				return
   245  			}
   246  
   247  			c.clearTransaction()
   248  		}
   249  	}
   250  	enterrors.GoWrapper(f, c.logger)
   251  }
   252  
   253  // expired is a helper to return a more meaningful error message to the user.
   254  // Instead of just telling the user that an ID does not exist, this tracks that
   255  // it once existed, but has been cleared because it expired.
   256  //
   257  // This method is not thread-safe as the assumption is that it is called from a
   258  // thread-safe environment where a lock would already be held
   259  func (c *TxManager) expired(id string) bool {
   260  	for _, expired := range c.expiredTxIDs {
   261  		if expired == id {
   262  			return true
   263  		}
   264  	}
   265  
   266  	return false
   267  }
   268  
   269  // SetCommitFn sets a function that is used in Write Transactions, you can
   270  // read from the transaction payload and use that state to alter your local
   271  // state
   272  func (c *TxManager) SetCommitFn(fn CommitFn) {
   273  	c.commitFn = fn
   274  }
   275  
   276  // SetResponseFn sets a function that is used in Read Transactions. The
   277  // function sets the local state (by writing it into the Tx Payload). It can
   278  // then be sent to other nodes. Consensus is not part of the ResponseFn. The
   279  // coordinator - who initiated the Tx - is responsible for coming up with
   280  // consensus. Deciding on Consensus requires insights into business logic, as
   281  // from the TX's perspective payloads are opaque.
   282  func (c *TxManager) SetResponseFn(fn ResponseFn) {
   283  	c.responseFn = fn
   284  }
   285  
   286  // Begin a Transaction with the specified type and payload. Transactions expire
   287  // after the specified TTL. For a transaction that does not ever expire, pass
   288  // in a ttl of 0. When choosing TTLs keep in mind that clocks might be slightly
   289  // skewed in the cluster, therefore set your TTL for desiredTTL +
   290  // toleratedClockSkew
   291  //
   292  // Regular transactions cannot be opened if the cluster is not considered
   293  // healthy.
   294  func (c *TxManager) BeginTransaction(ctx context.Context, trType TransactionType,
   295  	payload interface{}, ttl time.Duration,
   296  ) (*Transaction, error) {
   297  	return c.beginTransaction(ctx, trType, payload, ttl, false)
   298  }
   299  
   300  // Begin a Transaction that does not require the whole cluster to be healthy.
   301  // This can be used for example in bootstrapping situations when not all nodes
   302  // are present yet, or in disaster recovery situations when a node needs to run
   303  // a transaction in order to re-join a cluster.
   304  func (c *TxManager) BeginTransactionTolerateNodeFailures(ctx context.Context, trType TransactionType,
   305  	payload interface{}, ttl time.Duration,
   306  ) (*Transaction, error) {
   307  	return c.beginTransaction(ctx, trType, payload, ttl, true)
   308  }
   309  
   310  func (c *TxManager) beginTransaction(ctx context.Context, trType TransactionType,
   311  	payload interface{}, ttl time.Duration, tolerateNodeFailures bool,
   312  ) (*Transaction, error) {
   313  	c.Lock()
   314  
   315  	if c.currentTransaction != nil {
   316  		c.Unlock()
   317  		return nil, ErrConcurrentTransaction
   318  	}
   319  
   320  	tx := &Transaction{
   321  		Type:                 trType,
   322  		ID:                   uuid.New().String(),
   323  		Payload:              payload,
   324  		TolerateNodeFailures: tolerateNodeFailures,
   325  	}
   326  	if ttl > 0 {
   327  		tx.Deadline = time.Now().Add(ttl)
   328  	} else {
   329  		// UnixTime == 0 represents unlimited
   330  		tx.Deadline = time.UnixMilli(0)
   331  	}
   332  	c.currentTransaction = tx
   333  	c.Unlock()
   334  
   335  	c.resetTxExpiry(ttl, c.currentTransaction.ID)
   336  
   337  	if err := c.remote.BroadcastTransaction(ctx, tx); err != nil {
   338  		// we could not open the transaction on every node, therefore we need to
   339  		// abort it everywhere.
   340  
   341  		if err := c.remote.BroadcastAbortTransaction(ctx, tx); err != nil {
   342  			c.logger.WithFields(logrus.Fields{
   343  				"action": "broadcast_abort_transaction",
   344  				// before https://github.com/weaviate/weaviate/issues/2625 the next
   345  				// line would read
   346  				//
   347  				// "id": c.currentTransaction.ID
   348  				//
   349  				// which had the potential for races. The tx itself is immutable and
   350  				// therefore always thread-safe. However, the association between the tx
   351  				// manager and the current tx is mutable, therefore the
   352  				// c.currentTransaction pointer could be nil (nil pointer panic) or
   353  				// point to another tx (incorrect log).
   354  				"id": tx.ID,
   355  			}).WithError(err).Errorf("broadcast tx abort failed")
   356  		}
   357  
   358  		c.Lock()
   359  		c.clearTransaction()
   360  		c.Unlock()
   361  
   362  		return nil, errors.Wrap(err, "broadcast open transaction")
   363  	}
   364  
   365  	c.Lock()
   366  	defer c.Unlock()
   367  	return c.currentTransaction, nil
   368  }
   369  
   370  func (c *TxManager) CommitWriteTransaction(ctx context.Context,
   371  	tx *Transaction,
   372  ) error {
   373  	c.Lock()
   374  
   375  	if !c.acceptIncoming {
   376  		c.Unlock()
   377  		return ErrNotReady
   378  	}
   379  
   380  	if c.currentTransaction == nil || c.currentTransaction.ID != tx.ID {
   381  		expired := c.expired(tx.ID)
   382  		c.Unlock()
   383  		if expired {
   384  			return ErrExpiredTransaction
   385  		}
   386  		return ErrInvalidTransaction
   387  	}
   388  
   389  	c.Unlock()
   390  
   391  	// now that we know we are dealing with a valid transaction: no  matter the
   392  	// outcome, after this call, we should not have a local transaction anymore
   393  	defer func() {
   394  		c.Lock()
   395  		c.clearTransaction()
   396  		c.Unlock()
   397  	}()
   398  
   399  	if err := c.remote.BroadcastCommitTransaction(ctx, tx); err != nil {
   400  		// the broadcast failed, but we can't do anything about it. If we would
   401  		// broadcast an "abort" now (as a previous version did) we'd likely run
   402  		// into an inconsistency down the line. Network requests have variable
   403  		// time, so there's a chance some nodes would see the abort before the
   404  		// commit and vice-versa. Given enough nodes, we would end up with an
   405  		// inconsistent state.
   406  		//
   407  		// A failed commit means the node that didn't receive the commit needs to
   408  		// figure out itself how to get back to the correct state (e.g. by
   409  		// recovering from a persisted tx), don't jeopardize all the other nodes as
   410  		// a result!
   411  		return errors.Wrap(err, "broadcast commit transaction")
   412  	}
   413  
   414  	return nil
   415  }
   416  
   417  func (c *TxManager) IncomingBeginTransaction(ctx context.Context,
   418  	tx *Transaction,
   419  ) ([]byte, error) {
   420  	c.Lock()
   421  	defer c.Unlock()
   422  
   423  	if !c.acceptIncoming && !slices.Contains(c.allowUnready, tx.Type) {
   424  		return nil, ErrNotReady
   425  	}
   426  
   427  	if c.currentTransaction != nil && c.currentTransaction.ID != tx.ID {
   428  		return nil, ErrConcurrentTransaction
   429  	}
   430  
   431  	if err := c.persistence.StoreTx(ctx, tx); err != nil {
   432  		return nil, fmt.Errorf("make tx durable: %w", err)
   433  	}
   434  
   435  	c.currentTransaction = tx
   436  	data, err := c.responseFn(ctx, tx)
   437  	if err != nil {
   438  		return nil, err
   439  	}
   440  	var ttl time.Duration
   441  	if tx.Deadline.UnixMilli() != 0 {
   442  		ttl = time.Until(tx.Deadline)
   443  	}
   444  	c.resetTxExpiry(ttl, tx.ID)
   445  
   446  	return data, nil
   447  }
   448  
   449  func (c *TxManager) IncomingAbortTransaction(ctx context.Context,
   450  	tx *Transaction,
   451  ) {
   452  	c.Lock()
   453  	defer c.Unlock()
   454  
   455  	if c.currentTransaction == nil || c.currentTransaction.ID != tx.ID {
   456  		// don't do anything
   457  		return
   458  	}
   459  
   460  	c.currentTransaction = nil
   461  	if err := c.persistence.DeleteTx(ctx, tx.ID); err != nil {
   462  		c.logger.WithError(err).Errorf("abort tx: %s", err)
   463  	}
   464  }
   465  
   466  func (c *TxManager) IncomingCommitTransaction(ctx context.Context,
   467  	tx *Transaction,
   468  ) error {
   469  	c.ongoingCommits.Add(1)
   470  	defer c.ongoingCommits.Done()
   471  
   472  	// requires locking because it accesses c.currentTransaction
   473  	txCopy, err := c.incomingCommitTxValidate(ctx, tx)
   474  	if err != nil {
   475  		return err
   476  	}
   477  
   478  	// cannot use locking because of risk of deadlock, see comment inside method
   479  	if err := c.incomingTxCommitApplyCommitFn(ctx, txCopy); err != nil {
   480  		return err
   481  	}
   482  
   483  	// requires locking because it accesses c.currentTransaction
   484  	return c.incomingTxCommitCleanup(ctx, tx)
   485  }
   486  
   487  func (c *TxManager) incomingCommitTxValidate(
   488  	ctx context.Context, tx *Transaction,
   489  ) (*Transaction, error) {
   490  	c.Lock()
   491  	defer c.Unlock()
   492  
   493  	if !c.acceptIncoming {
   494  		return nil, ErrNotReady
   495  	}
   496  
   497  	if c.currentTransaction == nil || c.currentTransaction.ID != tx.ID {
   498  		expired := c.expired(tx.ID)
   499  		if expired {
   500  			return nil, ErrExpiredTransaction
   501  		}
   502  		return nil, ErrInvalidTransaction
   503  	}
   504  
   505  	txCopy := *c.currentTransaction
   506  	return &txCopy, nil
   507  }
   508  
   509  func (c *TxManager) incomingTxCommitApplyCommitFn(
   510  	ctx context.Context, tx *Transaction,
   511  ) error {
   512  	// Important: Do not hold the c.Lock() while applying the commitFn. The
   513  	// c.Lock() is only meant to make access to c.currentTransaction thread-safe.
   514  	// If we would hold it during apply, there is a risk for a deadlock because
   515  	// apply will likely lock the schema Manager. The schema Manager itself
   516  	// however, might be waiting for the TxManager in case of concurrent
   517  	// requests.
   518  	// See https://github.com/weaviate/weaviate/issues/4312 for steps on how to
   519  	// reproduce
   520  	//
   521  	// use transaction from cache, not passed in for two reason: a. protect
   522  	// against the transaction being manipulated after being created, b. allow
   523  	// an "empty" transaction that only contains the id for less network overhead
   524  	// (we don't need to pass the payload around anymore, after it's successfully
   525  	// opened - every node has a copy of the payload now)
   526  	return c.commitFn(ctx, tx)
   527  }
   528  
   529  func (c *TxManager) incomingTxCommitCleanup(
   530  	ctx context.Context, tx *Transaction,
   531  ) error {
   532  	// TODO: only clean up on success - does this make sense?
   533  	c.Lock()
   534  	defer c.Unlock()
   535  	c.currentTransaction = nil
   536  
   537  	if err := c.persistence.DeleteTx(ctx, tx.ID); err != nil {
   538  		return fmt.Errorf("close tx on disk: %w", err)
   539  	}
   540  
   541  	return nil
   542  }
   543  
   544  func (c *TxManager) Shutdown() {
   545  	c.Lock()
   546  	c.acceptIncoming = false
   547  	c.Unlock()
   548  
   549  	c.ongoingCommits.Wait()
   550  }
   551  
   552  type Transaction struct {
   553  	ID       string
   554  	Type     TransactionType
   555  	Payload  interface{}
   556  	Deadline time.Time
   557  
   558  	// If TolerateNodeFailures is false (the default) a transaction cannot be
   559  	// opened or committed if a node is confirmed dead. If a node is only
   560  	// suspected dead, the TxManager will try, but abort unless all nodes ACK.
   561  	TolerateNodeFailures bool
   562  }
   563  
   564  type Persistence interface {
   565  	StoreTx(ctx context.Context, tx *Transaction) error
   566  	DeleteTx(ctx context.Context, txID string) error
   567  	IterateAll(ctx context.Context, cb func(tx *Transaction)) error
   568  }