github.com/weaviate/weaviate@v1.24.6/usecases/cluster/transactions_write.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package cluster 13 14 import ( 15 "context" 16 "fmt" 17 "sync" 18 "time" 19 20 enterrors "github.com/weaviate/weaviate/entities/errors" 21 22 "github.com/google/uuid" 23 "github.com/pkg/errors" 24 "github.com/sirupsen/logrus" 25 "golang.org/x/exp/slices" 26 ) 27 28 type TransactionType string 29 30 var ( 31 ErrConcurrentTransaction = errors.New("concurrent transaction") 32 ErrInvalidTransaction = errors.New("invalid transaction") 33 ErrExpiredTransaction = errors.New("transaction TTL expired") 34 ErrNotReady = errors.New("server is not ready: either starting up or shutting down") 35 ) 36 37 type Remote interface { 38 BroadcastTransaction(ctx context.Context, tx *Transaction) error 39 BroadcastAbortTransaction(ctx context.Context, tx *Transaction) error 40 BroadcastCommitTransaction(ctx context.Context, tx *Transaction) error 41 } 42 43 type ( 44 CommitFn func(ctx context.Context, tx *Transaction) error 45 ResponseFn func(ctx context.Context, tx *Transaction) ([]byte, error) 46 ) 47 48 type TxManager struct { 49 sync.Mutex 50 logger logrus.FieldLogger 51 52 currentTransaction *Transaction 53 currentTransactionContext context.Context 54 clearTransaction func() 55 56 // any time we start working on a commit, we need to add to this WaitGroup. 57 // It will block shutdwon until the commit has completed to make sure that we 58 // can't accidentally shutdown while a tx is committing. 59 ongoingCommits sync.WaitGroup 60 61 // when a shutdown signal has been received, we will no longer accept any new 62 // tx's or commits 63 acceptIncoming bool 64 65 // read transactions that need to run at start can still be served, they have 66 // no side-effects on the node that accepts them. 67 // 68 // If we disallowed them completely, then two unready nodes would be in a 69 // deadlock as they each require information from the other(s) who can't 70 // answerbecause they're not ready. 71 allowUnready []TransactionType 72 73 remote Remote 74 commitFn CommitFn 75 responseFn ResponseFn 76 77 // keep the ids of expired transactions around. This way, we can return a 78 // nicer error message to the user. Instead of just an "invalid transaction" 79 // which no longer exists, they will get an explicit error message mentioning 80 // the timeout. 81 expiredTxIDs []string 82 83 persistence Persistence 84 } 85 86 func newDummyCommitResponseFn() func(ctx context.Context, tx *Transaction) error { 87 return func(ctx context.Context, tx *Transaction) error { 88 return nil 89 } 90 } 91 92 func newDummyResponseFn() func(ctx context.Context, tx *Transaction) ([]byte, error) { 93 return func(ctx context.Context, tx *Transaction) ([]byte, error) { 94 return nil, nil 95 } 96 } 97 98 func NewTxManager(remote Remote, persistence Persistence, 99 logger logrus.FieldLogger, 100 ) *TxManager { 101 return &TxManager{ 102 remote: remote, 103 104 // by setting dummy fns that do nothing on default it is possible to run 105 // the tx manager with only one set of functions. For example, if the 106 // specific Tx is only ever used for broadcasting writes, there is no need 107 // to set a responseFn. However, if the fn was nil, we'd panic. Thus a 108 // dummy function is a reasonable default - and much cleaner than a 109 // nil-check on every call. 110 commitFn: newDummyCommitResponseFn(), 111 responseFn: newDummyResponseFn(), 112 logger: logger, 113 persistence: persistence, 114 115 // ready to serve incoming requests 116 acceptIncoming: false, 117 } 118 } 119 120 func (c *TxManager) StartAcceptIncoming() { 121 c.Lock() 122 defer c.Unlock() 123 124 c.acceptIncoming = true 125 } 126 127 func (c *TxManager) SetAllowUnready(types []TransactionType) { 128 c.Lock() 129 defer c.Unlock() 130 131 c.allowUnready = types 132 } 133 134 // HaveDanglingTxs is a way to check if there are any uncommitted transactions 135 // in the durable storage. This can be used to make decisions about whether a 136 // failed schema check can be temporarily ignored - with the assumption that 137 // applying the dangling txs will fix the issue. 138 func (c *TxManager) HaveDanglingTxs(ctx context.Context, 139 allowedTypes []TransactionType, 140 ) (found bool) { 141 c.persistence.IterateAll(context.Background(), func(tx *Transaction) { 142 if !slices.Contains(allowedTypes, tx.Type) { 143 return 144 } 145 found = true 146 }) 147 148 return 149 } 150 151 // TryResumeDanglingTxs loops over the existing transactions and applies them. 152 // It only does so if the transaction type is explicitly listed as allowed. 153 // This is because - at the time of creating this - we were not sure if all 154 // transaction commit functions are idempotent. If one would not be, then 155 // reapplying a tx or tx commit could potentially be dangerous, as we don't 156 // know if it was already applied prior to the node death. 157 // 158 // For example, think of a "add property 'foo'" tx, that does nothing but 159 // append the property to the schema. If this ran twice, we might now end up 160 // with two duplicate properties with the name 'foo' which could in turn create 161 // other problems. To make sure all txs are resumable (which is what we want 162 // because that's the only way to avoid schema issues), we need to make sure 163 // that every single tx is idempotent, then add them to the allow list. 164 // 165 // One other limitation is that this method currently does nothing to check if 166 // a tx was really committed or not. In an ideal world, the node would contact 167 // the other nodes and ask. However, this sipmler implementation does not do 168 // this check. Instead [HaveDanglingTxs] is used in combination with the schema 169 // check. If the schema is not out of sync in the first place, no txs will be 170 // applied. This does not cover all edge cases, but it seems to work for now. 171 // This should be improved in the future. 172 func (c *TxManager) TryResumeDanglingTxs(ctx context.Context, 173 allowedTypes []TransactionType, 174 ) (applied bool, err error) { 175 c.persistence.IterateAll(context.Background(), func(tx *Transaction) { 176 if !slices.Contains(allowedTypes, tx.Type) { 177 c.logger.WithField("action", "resume_transaction"). 178 WithField("transaction_id", tx.ID). 179 WithField("transaction_type", tx.Type). 180 Warnf("dangling transaction %q of type %q is not known to be resumable - skipping", 181 tx.ID, tx.Type) 182 183 return 184 } 185 if err = c.commitFn(ctx, tx); err != nil { 186 return 187 } 188 189 applied = true 190 c.logger.WithField("action", "resume_transaction"). 191 WithField("transaction_id", tx.ID). 192 WithField("transaction_type", tx.Type). 193 Infof("successfully resumed dangling transaction %q of type %q", 194 tx.ID, tx.Type) 195 }) 196 197 return 198 } 199 200 func (c *TxManager) resetTxExpiry(ttl time.Duration, id string) { 201 cancel := func() {} 202 ctx := context.Background() 203 if ttl == 0 { 204 c.currentTransactionContext = context.Background() 205 } else { 206 ctx, cancel = context.WithTimeout(ctx, ttl) 207 c.currentTransactionContext = ctx 208 } 209 210 // to prevent a goroutine leak for the new routine we're spawning here, 211 // register a way to terminate it in case the explicit cancel is called 212 // before the context's done channel fires. 213 clearCancelListener := make(chan struct{}, 1) 214 215 c.clearTransaction = func() { 216 c.currentTransaction = nil 217 c.currentTransactionContext = nil 218 c.clearTransaction = func() {} 219 220 clearCancelListener <- struct{}{} 221 close(clearCancelListener) 222 } 223 224 f := func() { 225 ctxDone := ctx.Done() 226 select { 227 case <-clearCancelListener: 228 cancel() 229 return 230 case <-ctxDone: 231 c.Lock() 232 defer c.Unlock() 233 c.expiredTxIDs = append(c.expiredTxIDs, id) 234 235 if c.currentTransaction == nil { 236 // tx is already cleaned up, for example from a successful commit. Nothing to do for us 237 return 238 } 239 240 if c.currentTransaction.ID != id { 241 // tx was already cleaned up, then a new tx was started. Any action from 242 // us would be destructive, as we'd accidentally destroy a perfectly valid 243 // tx 244 return 245 } 246 247 c.clearTransaction() 248 } 249 } 250 enterrors.GoWrapper(f, c.logger) 251 } 252 253 // expired is a helper to return a more meaningful error message to the user. 254 // Instead of just telling the user that an ID does not exist, this tracks that 255 // it once existed, but has been cleared because it expired. 256 // 257 // This method is not thread-safe as the assumption is that it is called from a 258 // thread-safe environment where a lock would already be held 259 func (c *TxManager) expired(id string) bool { 260 for _, expired := range c.expiredTxIDs { 261 if expired == id { 262 return true 263 } 264 } 265 266 return false 267 } 268 269 // SetCommitFn sets a function that is used in Write Transactions, you can 270 // read from the transaction payload and use that state to alter your local 271 // state 272 func (c *TxManager) SetCommitFn(fn CommitFn) { 273 c.commitFn = fn 274 } 275 276 // SetResponseFn sets a function that is used in Read Transactions. The 277 // function sets the local state (by writing it into the Tx Payload). It can 278 // then be sent to other nodes. Consensus is not part of the ResponseFn. The 279 // coordinator - who initiated the Tx - is responsible for coming up with 280 // consensus. Deciding on Consensus requires insights into business logic, as 281 // from the TX's perspective payloads are opaque. 282 func (c *TxManager) SetResponseFn(fn ResponseFn) { 283 c.responseFn = fn 284 } 285 286 // Begin a Transaction with the specified type and payload. Transactions expire 287 // after the specified TTL. For a transaction that does not ever expire, pass 288 // in a ttl of 0. When choosing TTLs keep in mind that clocks might be slightly 289 // skewed in the cluster, therefore set your TTL for desiredTTL + 290 // toleratedClockSkew 291 // 292 // Regular transactions cannot be opened if the cluster is not considered 293 // healthy. 294 func (c *TxManager) BeginTransaction(ctx context.Context, trType TransactionType, 295 payload interface{}, ttl time.Duration, 296 ) (*Transaction, error) { 297 return c.beginTransaction(ctx, trType, payload, ttl, false) 298 } 299 300 // Begin a Transaction that does not require the whole cluster to be healthy. 301 // This can be used for example in bootstrapping situations when not all nodes 302 // are present yet, or in disaster recovery situations when a node needs to run 303 // a transaction in order to re-join a cluster. 304 func (c *TxManager) BeginTransactionTolerateNodeFailures(ctx context.Context, trType TransactionType, 305 payload interface{}, ttl time.Duration, 306 ) (*Transaction, error) { 307 return c.beginTransaction(ctx, trType, payload, ttl, true) 308 } 309 310 func (c *TxManager) beginTransaction(ctx context.Context, trType TransactionType, 311 payload interface{}, ttl time.Duration, tolerateNodeFailures bool, 312 ) (*Transaction, error) { 313 c.Lock() 314 315 if c.currentTransaction != nil { 316 c.Unlock() 317 return nil, ErrConcurrentTransaction 318 } 319 320 tx := &Transaction{ 321 Type: trType, 322 ID: uuid.New().String(), 323 Payload: payload, 324 TolerateNodeFailures: tolerateNodeFailures, 325 } 326 if ttl > 0 { 327 tx.Deadline = time.Now().Add(ttl) 328 } else { 329 // UnixTime == 0 represents unlimited 330 tx.Deadline = time.UnixMilli(0) 331 } 332 c.currentTransaction = tx 333 c.Unlock() 334 335 c.resetTxExpiry(ttl, c.currentTransaction.ID) 336 337 if err := c.remote.BroadcastTransaction(ctx, tx); err != nil { 338 // we could not open the transaction on every node, therefore we need to 339 // abort it everywhere. 340 341 if err := c.remote.BroadcastAbortTransaction(ctx, tx); err != nil { 342 c.logger.WithFields(logrus.Fields{ 343 "action": "broadcast_abort_transaction", 344 // before https://github.com/weaviate/weaviate/issues/2625 the next 345 // line would read 346 // 347 // "id": c.currentTransaction.ID 348 // 349 // which had the potential for races. The tx itself is immutable and 350 // therefore always thread-safe. However, the association between the tx 351 // manager and the current tx is mutable, therefore the 352 // c.currentTransaction pointer could be nil (nil pointer panic) or 353 // point to another tx (incorrect log). 354 "id": tx.ID, 355 }).WithError(err).Errorf("broadcast tx abort failed") 356 } 357 358 c.Lock() 359 c.clearTransaction() 360 c.Unlock() 361 362 return nil, errors.Wrap(err, "broadcast open transaction") 363 } 364 365 c.Lock() 366 defer c.Unlock() 367 return c.currentTransaction, nil 368 } 369 370 func (c *TxManager) CommitWriteTransaction(ctx context.Context, 371 tx *Transaction, 372 ) error { 373 c.Lock() 374 375 if !c.acceptIncoming { 376 c.Unlock() 377 return ErrNotReady 378 } 379 380 if c.currentTransaction == nil || c.currentTransaction.ID != tx.ID { 381 expired := c.expired(tx.ID) 382 c.Unlock() 383 if expired { 384 return ErrExpiredTransaction 385 } 386 return ErrInvalidTransaction 387 } 388 389 c.Unlock() 390 391 // now that we know we are dealing with a valid transaction: no matter the 392 // outcome, after this call, we should not have a local transaction anymore 393 defer func() { 394 c.Lock() 395 c.clearTransaction() 396 c.Unlock() 397 }() 398 399 if err := c.remote.BroadcastCommitTransaction(ctx, tx); err != nil { 400 // the broadcast failed, but we can't do anything about it. If we would 401 // broadcast an "abort" now (as a previous version did) we'd likely run 402 // into an inconsistency down the line. Network requests have variable 403 // time, so there's a chance some nodes would see the abort before the 404 // commit and vice-versa. Given enough nodes, we would end up with an 405 // inconsistent state. 406 // 407 // A failed commit means the node that didn't receive the commit needs to 408 // figure out itself how to get back to the correct state (e.g. by 409 // recovering from a persisted tx), don't jeopardize all the other nodes as 410 // a result! 411 return errors.Wrap(err, "broadcast commit transaction") 412 } 413 414 return nil 415 } 416 417 func (c *TxManager) IncomingBeginTransaction(ctx context.Context, 418 tx *Transaction, 419 ) ([]byte, error) { 420 c.Lock() 421 defer c.Unlock() 422 423 if !c.acceptIncoming && !slices.Contains(c.allowUnready, tx.Type) { 424 return nil, ErrNotReady 425 } 426 427 if c.currentTransaction != nil && c.currentTransaction.ID != tx.ID { 428 return nil, ErrConcurrentTransaction 429 } 430 431 if err := c.persistence.StoreTx(ctx, tx); err != nil { 432 return nil, fmt.Errorf("make tx durable: %w", err) 433 } 434 435 c.currentTransaction = tx 436 data, err := c.responseFn(ctx, tx) 437 if err != nil { 438 return nil, err 439 } 440 var ttl time.Duration 441 if tx.Deadline.UnixMilli() != 0 { 442 ttl = time.Until(tx.Deadline) 443 } 444 c.resetTxExpiry(ttl, tx.ID) 445 446 return data, nil 447 } 448 449 func (c *TxManager) IncomingAbortTransaction(ctx context.Context, 450 tx *Transaction, 451 ) { 452 c.Lock() 453 defer c.Unlock() 454 455 if c.currentTransaction == nil || c.currentTransaction.ID != tx.ID { 456 // don't do anything 457 return 458 } 459 460 c.currentTransaction = nil 461 if err := c.persistence.DeleteTx(ctx, tx.ID); err != nil { 462 c.logger.WithError(err).Errorf("abort tx: %s", err) 463 } 464 } 465 466 func (c *TxManager) IncomingCommitTransaction(ctx context.Context, 467 tx *Transaction, 468 ) error { 469 c.ongoingCommits.Add(1) 470 defer c.ongoingCommits.Done() 471 472 // requires locking because it accesses c.currentTransaction 473 txCopy, err := c.incomingCommitTxValidate(ctx, tx) 474 if err != nil { 475 return err 476 } 477 478 // cannot use locking because of risk of deadlock, see comment inside method 479 if err := c.incomingTxCommitApplyCommitFn(ctx, txCopy); err != nil { 480 return err 481 } 482 483 // requires locking because it accesses c.currentTransaction 484 return c.incomingTxCommitCleanup(ctx, tx) 485 } 486 487 func (c *TxManager) incomingCommitTxValidate( 488 ctx context.Context, tx *Transaction, 489 ) (*Transaction, error) { 490 c.Lock() 491 defer c.Unlock() 492 493 if !c.acceptIncoming { 494 return nil, ErrNotReady 495 } 496 497 if c.currentTransaction == nil || c.currentTransaction.ID != tx.ID { 498 expired := c.expired(tx.ID) 499 if expired { 500 return nil, ErrExpiredTransaction 501 } 502 return nil, ErrInvalidTransaction 503 } 504 505 txCopy := *c.currentTransaction 506 return &txCopy, nil 507 } 508 509 func (c *TxManager) incomingTxCommitApplyCommitFn( 510 ctx context.Context, tx *Transaction, 511 ) error { 512 // Important: Do not hold the c.Lock() while applying the commitFn. The 513 // c.Lock() is only meant to make access to c.currentTransaction thread-safe. 514 // If we would hold it during apply, there is a risk for a deadlock because 515 // apply will likely lock the schema Manager. The schema Manager itself 516 // however, might be waiting for the TxManager in case of concurrent 517 // requests. 518 // See https://github.com/weaviate/weaviate/issues/4312 for steps on how to 519 // reproduce 520 // 521 // use transaction from cache, not passed in for two reason: a. protect 522 // against the transaction being manipulated after being created, b. allow 523 // an "empty" transaction that only contains the id for less network overhead 524 // (we don't need to pass the payload around anymore, after it's successfully 525 // opened - every node has a copy of the payload now) 526 return c.commitFn(ctx, tx) 527 } 528 529 func (c *TxManager) incomingTxCommitCleanup( 530 ctx context.Context, tx *Transaction, 531 ) error { 532 // TODO: only clean up on success - does this make sense? 533 c.Lock() 534 defer c.Unlock() 535 c.currentTransaction = nil 536 537 if err := c.persistence.DeleteTx(ctx, tx.ID); err != nil { 538 return fmt.Errorf("close tx on disk: %w", err) 539 } 540 541 return nil 542 } 543 544 func (c *TxManager) Shutdown() { 545 c.Lock() 546 c.acceptIncoming = false 547 c.Unlock() 548 549 c.ongoingCommits.Wait() 550 } 551 552 type Transaction struct { 553 ID string 554 Type TransactionType 555 Payload interface{} 556 Deadline time.Time 557 558 // If TolerateNodeFailures is false (the default) a transaction cannot be 559 // opened or committed if a node is confirmed dead. If a node is only 560 // suspected dead, the TxManager will try, but abort unless all nodes ACK. 561 TolerateNodeFailures bool 562 } 563 564 type Persistence interface { 565 StoreTx(ctx context.Context, tx *Transaction) error 566 DeleteTx(ctx context.Context, txID string) error 567 IterateAll(ctx context.Context, cb func(tx *Transaction)) error 568 }