github.com/koko1123/flow-go-1@v0.29.6/ledger/complete/compactor.go (about) 1 package complete 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 "github.com/rs/zerolog" 10 "go.uber.org/atomic" 11 "golang.org/x/sync/semaphore" 12 13 "github.com/koko1123/flow-go-1/ledger" 14 "github.com/koko1123/flow-go-1/ledger/complete/mtrie/trie" 15 realWAL "github.com/koko1123/flow-go-1/ledger/complete/wal" 16 "github.com/koko1123/flow-go-1/module/lifecycle" 17 "github.com/koko1123/flow-go-1/module/observable" 18 ) 19 20 // WALTrieUpdate is a message communicated through channel between Ledger and Compactor. 21 type WALTrieUpdate struct { 22 Update *ledger.TrieUpdate // Update data needs to be encoded and saved in WAL. 23 ResultCh chan<- error // ResultCh channel is used to send WAL update result from Compactor to Ledger. 24 TrieCh <-chan *trie.MTrie // TrieCh channel is used to send new trie from Ledger to Compactor. 25 } 26 27 // checkpointResult is a message to communicate checkpointing number and error if any. 28 type checkpointResult struct { 29 num int 30 err error 31 } 32 33 // Compactor is a long-running goroutine responsible for: 34 // - writing WAL record from trie update, 35 // - starting checkpointing async when enough segments are finalized. 36 // 37 // Compactor communicates with Ledger through channels 38 // to ensure that by the end of any trie update processing, 39 // update is written to WAL and new trie is pushed to trie queue. 40 // 41 // Compactor stores pointers to tries in ledger state in a fix-sized 42 // checkpointing queue (FIFO). Checkpointing queue is decoupled from 43 // main ledger state to allow separate optimization and looser coupling, etc. 44 // CAUTION: If the forest LRU Cache is used for main state, 45 // then ledger state and checkpointing queue may contain different tries. 46 // This will be resolved automaticaly after the forest LRU Cache 47 // (code outside checkpointing) is replaced by something like a FIFO queue. 48 type Compactor struct { 49 checkpointer *realWAL.Checkpointer 50 wal realWAL.LedgerWAL 51 trieQueue *realWAL.TrieQueue 52 logger zerolog.Logger 53 lm *lifecycle.LifecycleManager 54 observers map[observable.Observer]struct{} 55 checkpointDistance uint 56 checkpointsToKeep uint 57 stopCh chan chan struct{} 58 trieUpdateCh <-chan *WALTrieUpdate 59 triggerCheckpointOnNextSegmentFinish *atomic.Bool // to trigger checkpoint manually 60 } 61 62 // NewCompactor creates new Compactor which writes WAL record and triggers 63 // checkpointing asynchronously when enough segments are finalized. 64 // The checkpointDistance is a flag that specifies how many segments need to 65 // be finalized to trigger checkpointing. However, if a prior checkpointing 66 // is already running and not finished, then more segments than specified 67 // could be accumulated for the new checkpointing (to reduce memory). 68 // All returned errors indicate that Compactor can't be created. 69 // Since failure to create Compactor will end up blocking ledger updates, 70 // the caller should handle all returned errors as unrecoverable. 71 func NewCompactor( 72 l *Ledger, 73 w realWAL.LedgerWAL, 74 logger zerolog.Logger, 75 checkpointCapacity uint, 76 checkpointDistance uint, 77 checkpointsToKeep uint, 78 triggerCheckpointOnNextSegmentFinish *atomic.Bool, 79 ) (*Compactor, error) { 80 if checkpointDistance < 1 { 81 checkpointDistance = 1 82 } 83 84 checkpointer, err := w.NewCheckpointer() 85 if err != nil { 86 return nil, err 87 } 88 89 // Get trieUpdateCh channel to communicate trieUpdate, WAL result, and new trie 90 // created from the update. 91 trieUpdateCh := l.TrieUpdateChan() 92 if trieUpdateCh == nil { 93 return nil, errors.New("failed to get valid trie update channel from ledger") 94 } 95 96 // Get all tries from ledger state. 97 tries, err := l.Tries() 98 if err != nil { 99 return nil, err 100 } 101 102 // Create trieQueue with initial values from ledger state. 103 trieQueue := realWAL.NewTrieQueueWithValues(checkpointCapacity, tries) 104 105 return &Compactor{ 106 checkpointer: checkpointer, 107 wal: w, 108 trieQueue: trieQueue, 109 logger: logger.With().Str("ledger_mod", "compactor").Logger(), 110 stopCh: make(chan chan struct{}), 111 trieUpdateCh: trieUpdateCh, 112 observers: make(map[observable.Observer]struct{}), 113 lm: lifecycle.NewLifecycleManager(), 114 checkpointDistance: checkpointDistance, 115 checkpointsToKeep: checkpointsToKeep, 116 triggerCheckpointOnNextSegmentFinish: triggerCheckpointOnNextSegmentFinish, 117 }, nil 118 } 119 120 // Subscribe subscribes observer to Compactor. 121 func (c *Compactor) Subscribe(observer observable.Observer) { 122 var void struct{} 123 c.observers[observer] = void 124 } 125 126 // Unsubscribe unsubscribes observer to Compactor. 127 func (c *Compactor) Unsubscribe(observer observable.Observer) { 128 delete(c.observers, observer) 129 } 130 131 // Ready returns channel which would be closed when Compactor goroutine starts. 132 func (c *Compactor) Ready() <-chan struct{} { 133 c.lm.OnStart(func() { 134 go c.run() 135 }) 136 return c.lm.Started() 137 } 138 139 // Done returns channel which would be closed when Compactor goroutine exits. 140 func (c *Compactor) Done() <-chan struct{} { 141 c.lm.OnStop(func() { 142 // Signal Compactor goroutine to stop 143 doneCh := make(chan struct{}) 144 c.stopCh <- doneCh 145 146 // Wait for Compactor goroutine to stop 147 <-doneCh 148 149 // Shut down WAL component. 150 // only shut down wal after compactor has been shut down, in case there 151 // is still writing to WAL files. 152 <-c.wal.Done() 153 154 // Notify observers 155 for observer := range c.observers { 156 observer.OnComplete() 157 } 158 }) 159 return c.lm.Stopped() 160 } 161 162 // run writes WAL records from trie updates and starts checkpointing 163 // asynchronously when enough segments are finalized. 164 func (c *Compactor) run() { 165 166 // checkpointSem is used to limit checkpointing to one. 167 // If previous checkpointing isn't finished when enough segments 168 // are finalized for next checkpointing, retry checkpointing 169 // again when next segment is finalized. 170 // This avoids having more tries in memory than needed. 171 checkpointSem := semaphore.NewWeighted(1) 172 173 checkpointResultCh := make(chan checkpointResult, 1) 174 175 // Get active segment number (opened segment that new records write to). 176 // activeSegmentNum is updated when record is written to a new segment. 177 _, activeSegmentNum, err := c.wal.Segments() 178 if err != nil { 179 c.logger.Error().Err(err).Msg("compactor failed to get active segment number") 180 activeSegmentNum = -1 181 } 182 183 lastCheckpointNum, err := c.checkpointer.LatestCheckpoint() 184 if err != nil { 185 c.logger.Error().Err(err).Msg("compactor failed to get last checkpoint number") 186 lastCheckpointNum = -1 187 } 188 189 // Compute next checkpoint number. 190 // nextCheckpointNum is updated when checkpointing starts, fails to start, or fails. 191 // NOTE: next checkpoint number must >= active segment num. 192 nextCheckpointNum := lastCheckpointNum + int(c.checkpointDistance) 193 if activeSegmentNum > nextCheckpointNum { 194 nextCheckpointNum = activeSegmentNum 195 } 196 197 ctx, cancel := context.WithCancel(context.Background()) 198 199 Loop: 200 for { 201 select { 202 203 case doneCh := <-c.stopCh: 204 defer close(doneCh) 205 cancel() 206 break Loop 207 208 case checkpointResult := <-checkpointResultCh: 209 if checkpointResult.err != nil { 210 c.logger.Error().Err(checkpointResult.err).Msg( 211 "compactor failed to create or remove checkpoint", 212 ) 213 var createError *createCheckpointError 214 if errors.As(checkpointResult.err, &createError) { 215 // Retry checkpointing when active segment is finalized. 216 nextCheckpointNum = activeSegmentNum 217 } 218 } 219 220 case update, ok := <-c.trieUpdateCh: 221 if !ok { 222 // trieUpdateCh channel is closed. 223 // Wait for stop signal from c.stopCh 224 continue 225 } 226 227 // listen to signals from admin tool in order to trigger a checkpoint when the current segment file is finished 228 if c.triggerCheckpointOnNextSegmentFinish.CompareAndSwap(true, false) { 229 // sanity checking, usually the nextCheckpointNum is a segment number in the future that when the activeSegmentNum 230 // finishes and reaches the nextCheckpointNum, then checkpoint will be triggered. 231 if nextCheckpointNum >= activeSegmentNum { 232 originalNextCheckpointNum := nextCheckpointNum 233 nextCheckpointNum = activeSegmentNum 234 c.logger.Info().Msgf("compactor will trigger once finish writing segment %v, originalNextCheckpointNum: %v", nextCheckpointNum, originalNextCheckpointNum) 235 } else { 236 c.logger.Warn().Msgf("could not force triggering checkpoint, nextCheckpointNum %v is smaller than activeSegmentNum %v", nextCheckpointNum, activeSegmentNum) 237 } 238 } 239 240 var checkpointNum int 241 var checkpointTries []*trie.MTrie 242 activeSegmentNum, checkpointNum, checkpointTries = 243 c.processTrieUpdate(update, c.trieQueue, activeSegmentNum, nextCheckpointNum) 244 245 if checkpointTries == nil { 246 // Not enough segments for checkpointing (nextCheckpointNum >= activeSegmentNum) 247 continue 248 } 249 250 // Try to checkpoint 251 if checkpointSem.TryAcquire(1) { 252 253 // Compute next checkpoint number 254 nextCheckpointNum = checkpointNum + int(c.checkpointDistance) 255 256 go func() { 257 defer checkpointSem.Release(1) 258 err := c.checkpoint(ctx, checkpointTries, checkpointNum) 259 checkpointResultCh <- checkpointResult{checkpointNum, err} 260 }() 261 } else { 262 // Failed to get semaphore because checkpointing is running. 263 // Try again when active segment is finalized. 264 c.logger.Info().Msgf("compactor delayed checkpoint %d because prior checkpointing is ongoing", nextCheckpointNum) 265 nextCheckpointNum = activeSegmentNum 266 } 267 } 268 } 269 270 // Drain and process remaining trie updates in channel. 271 c.logger.Info().Msg("Starting draining trie update channel in compactor on shutdown") 272 for update := range c.trieUpdateCh { 273 _, _, err := c.wal.RecordUpdate(update.Update) 274 select { 275 case update.ResultCh <- err: 276 default: 277 } 278 } 279 c.logger.Info().Msg("Finished draining trie update channel in compactor on shutdown") 280 281 // Don't wait for checkpointing to finish because it might take too long. 282 } 283 284 // checkpoint creates checkpoint of tries snapshot, 285 // deletes prior checkpoint files (if needed), and notifies observers. 286 // Errors indicate that checkpoint file can't be created or prior checkpoints can't be removed. 287 // Caller should handle returned errors by retrying checkpointing when appropriate. 288 // Since this function is only for checkpointing, Compactor isn't affected by returned error. 289 func (c *Compactor) checkpoint(ctx context.Context, tries []*trie.MTrie, checkpointNum int) error { 290 291 err := createCheckpoint(c.checkpointer, c.logger, tries, checkpointNum) 292 if err != nil { 293 return &createCheckpointError{num: checkpointNum, err: err} 294 } 295 296 // Return if context is canceled. 297 select { 298 case <-ctx.Done(): 299 return nil 300 default: 301 } 302 303 err = cleanupCheckpoints(c.checkpointer, int(c.checkpointsToKeep)) 304 if err != nil { 305 return &removeCheckpointError{err: err} 306 } 307 308 if checkpointNum > 0 { 309 for observer := range c.observers { 310 // Don't notify observer if context is canceled. 311 // observer.OnComplete() is called when Compactor starts shutting down, 312 // which may close channel that observer.OnNext() uses to send data. 313 select { 314 case <-ctx.Done(): 315 return nil 316 default: 317 observer.OnNext(checkpointNum) 318 } 319 } 320 } 321 322 return nil 323 } 324 325 // createCheckpoint creates checkpoint with given checkpointNum and tries. 326 // Errors indicate that checkpoint file can't be created. 327 // Caller should handle returned errors by retrying checkpointing when appropriate. 328 func createCheckpoint(checkpointer *realWAL.Checkpointer, logger zerolog.Logger, tries []*trie.MTrie, checkpointNum int) error { 329 330 logger.Info().Msgf("serializing checkpoint %d with %v tries", checkpointNum, len(tries)) 331 332 startTime := time.Now() 333 334 fileName := realWAL.NumberToFilename(checkpointNum) 335 err := realWAL.StoreCheckpointV6SingleThread(tries, checkpointer.Dir(), fileName, &logger) 336 if err != nil { 337 return fmt.Errorf("error serializing checkpoint (%d): %w", checkpointNum, err) 338 } 339 340 duration := time.Since(startTime) 341 logger.Info().Float64("total_time_s", duration.Seconds()).Msgf("created checkpoint %d", checkpointNum) 342 343 return nil 344 } 345 346 // cleanupCheckpoints deletes prior checkpoint files if needed. 347 // Since the function is side-effect free, all failures are simply a no-op. 348 func cleanupCheckpoints(checkpointer *realWAL.Checkpointer, checkpointsToKeep int) error { 349 // Don't list checkpoints if we keep them all 350 if checkpointsToKeep == 0 { 351 return nil 352 } 353 checkpoints, err := checkpointer.Checkpoints() 354 if err != nil { 355 return fmt.Errorf("cannot list checkpoints: %w", err) 356 } 357 if len(checkpoints) > int(checkpointsToKeep) { 358 // if condition guarantees this never fails 359 checkpointsToRemove := checkpoints[:len(checkpoints)-int(checkpointsToKeep)] 360 361 for _, checkpoint := range checkpointsToRemove { 362 err := checkpointer.RemoveCheckpoint(checkpoint) 363 if err != nil { 364 return fmt.Errorf("cannot remove checkpoint %d: %w", checkpoint, err) 365 } 366 } 367 } 368 return nil 369 } 370 371 // processTrieUpdate writes trie update to WAL, updates activeSegmentNum, 372 // and returns tries for checkpointing if needed. 373 // It sends WAL update result, receives updated trie, and pushes updated trie to trieQueue. 374 // When this function returns, WAL update is in sync with trieQueue update. 375 func (c *Compactor) processTrieUpdate( 376 update *WALTrieUpdate, 377 trieQueue *realWAL.TrieQueue, 378 activeSegmentNum int, 379 nextCheckpointNum int, 380 ) ( 381 _activeSegmentNum int, 382 checkpointNum int, 383 checkpointTries []*trie.MTrie, 384 ) { 385 386 // RecordUpdate returns the segment number the record was written to. 387 // Returned segment number (>= 0) can be 388 // - the same as previous segment number (same segment), or 389 // - incremented by 1 from previous segment number (new segment) 390 segmentNum, skipped, updateErr := c.wal.RecordUpdate(update.Update) 391 392 // Send result of WAL update 393 update.ResultCh <- updateErr 394 395 // This ensures that updated trie matches WAL update. 396 defer func() { 397 // Wait for updated trie 398 trie := <-update.TrieCh 399 if trie == nil { 400 c.logger.Error().Msg("compactor failed to get updated trie") 401 return 402 } 403 404 trieQueue.Push(trie) 405 }() 406 407 if activeSegmentNum == -1 { 408 // Recover from failure to get active segment number at initialization. 409 return segmentNum, -1, nil 410 } 411 412 if updateErr != nil || skipped || segmentNum == activeSegmentNum { 413 return activeSegmentNum, -1, nil 414 } 415 416 // In the remaining code: segmentNum > activeSegmentNum 417 418 // active segment is finalized. 419 420 // Check new segment number is incremented by 1 421 if segmentNum != activeSegmentNum+1 { 422 c.logger.Error().Msg(fmt.Sprintf("compactor got unexpected new segment numer %d, want %d", segmentNum, activeSegmentNum+1)) 423 } 424 425 // Update activeSegmentNum 426 prevSegmentNum := activeSegmentNum 427 activeSegmentNum = segmentNum 428 429 c.logger.Info().Msgf("finish writing segment file %v, trie update is writing to segment file %v, checkpoint will trigger when segment %v is finished", 430 prevSegmentNum, activeSegmentNum, nextCheckpointNum) 431 432 if nextCheckpointNum > prevSegmentNum { 433 // Not enough segments for checkpointing 434 return activeSegmentNum, -1, nil 435 } 436 437 // In the remaining code: nextCheckpointNum == prevSegmentNum 438 439 // Enough segments are created for checkpointing 440 441 // Get tries from checkpoint queue. 442 // At this point, checkpoint queue contains tries up to 443 // last update (last record in finalized segment) 444 // It doesn't include trie for this update 445 // until updated trie is received and added to trieQueue. 446 tries := trieQueue.Tries() 447 448 checkpointNum = nextCheckpointNum 449 450 return activeSegmentNum, checkpointNum, tries 451 } 452 453 // createCheckpointError creates a checkpoint creation error. 454 type createCheckpointError struct { 455 num int 456 err error 457 } 458 459 func (e *createCheckpointError) Error() string { 460 return fmt.Sprintf("cannot create checkpoint %d: %s", e.num, e.err) 461 } 462 463 func (e *createCheckpointError) Unwrap() error { return e.err } 464 465 // removeCheckpointError creates a checkpoint removal error. 466 type removeCheckpointError struct { 467 err error 468 } 469 470 func (e *removeCheckpointError) Error() string { 471 return fmt.Sprintf("cannot cleanup checkpoints: %s", e.err) 472 } 473 474 func (e *removeCheckpointError) Unwrap() error { return e.err }