github.com/vipernet-xyz/tm@v0.34.24/statesync/chunks.go (about)

     1  package statesync
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"strconv"
     9  	"time"
    10  
    11  	tmsync "github.com/vipernet-xyz/tm/libs/sync"
    12  	"github.com/vipernet-xyz/tm/p2p"
    13  )
    14  
    15  // errDone is returned by chunkQueue.Next() when all chunks have been returned.
    16  var errDone = errors.New("chunk queue has completed")
    17  
    18  // chunk contains data for a chunk.
    19  type chunk struct {
    20  	Height uint64
    21  	Format uint32
    22  	Index  uint32
    23  	Chunk  []byte
    24  	Sender p2p.ID
    25  }
    26  
    27  // chunkQueue manages chunks for a state sync process, ordering them if requested. It acts as an
    28  // iterator over all chunks, but callers can request chunks to be retried, optionally after
    29  // refetching.
    30  type chunkQueue struct {
    31  	tmsync.Mutex
    32  	snapshot       *snapshot                  // if this is nil, the queue has been closed
    33  	dir            string                     // temp dir for on-disk chunk storage
    34  	chunkFiles     map[uint32]string          // path to temporary chunk file
    35  	chunkSenders   map[uint32]p2p.ID          // the peer who sent the given chunk
    36  	chunkAllocated map[uint32]bool            // chunks that have been allocated via Allocate()
    37  	chunkReturned  map[uint32]bool            // chunks returned via Next()
    38  	waiters        map[uint32][]chan<- uint32 // signals WaitFor() waiters about chunk arrival
    39  }
    40  
    41  // newChunkQueue creates a new chunk queue for a snapshot, using a temp dir for storage.
    42  // Callers must call Close() when done.
    43  func newChunkQueue(snapshot *snapshot, tempDir string) (*chunkQueue, error) {
    44  	dir, err := os.MkdirTemp(tempDir, "tm-statesync")
    45  	if err != nil {
    46  		return nil, fmt.Errorf("unable to create temp dir for state sync chunks: %w", err)
    47  	}
    48  	if snapshot.Chunks == 0 {
    49  		return nil, errors.New("snapshot has no chunks")
    50  	}
    51  	return &chunkQueue{
    52  		snapshot:       snapshot,
    53  		dir:            dir,
    54  		chunkFiles:     make(map[uint32]string, snapshot.Chunks),
    55  		chunkSenders:   make(map[uint32]p2p.ID, snapshot.Chunks),
    56  		chunkAllocated: make(map[uint32]bool, snapshot.Chunks),
    57  		chunkReturned:  make(map[uint32]bool, snapshot.Chunks),
    58  		waiters:        make(map[uint32][]chan<- uint32),
    59  	}, nil
    60  }
    61  
    62  // Add adds a chunk to the queue. It ignores chunks that already exist, returning false.
    63  func (q *chunkQueue) Add(chunk *chunk) (bool, error) {
    64  	if chunk == nil || chunk.Chunk == nil {
    65  		return false, errors.New("cannot add nil chunk")
    66  	}
    67  	q.Lock()
    68  	defer q.Unlock()
    69  	if q.snapshot == nil {
    70  		return false, nil // queue is closed
    71  	}
    72  	if chunk.Height != q.snapshot.Height {
    73  		return false, fmt.Errorf("invalid chunk height %v, expected %v", chunk.Height, q.snapshot.Height)
    74  	}
    75  	if chunk.Format != q.snapshot.Format {
    76  		return false, fmt.Errorf("invalid chunk format %v, expected %v", chunk.Format, q.snapshot.Format)
    77  	}
    78  	if chunk.Index >= q.snapshot.Chunks {
    79  		return false, fmt.Errorf("received unexpected chunk %v", chunk.Index)
    80  	}
    81  	if q.chunkFiles[chunk.Index] != "" {
    82  		return false, nil
    83  	}
    84  
    85  	path := filepath.Join(q.dir, strconv.FormatUint(uint64(chunk.Index), 10))
    86  	err := os.WriteFile(path, chunk.Chunk, 0o600)
    87  	if err != nil {
    88  		return false, fmt.Errorf("failed to save chunk %v to file %v: %w", chunk.Index, path, err)
    89  	}
    90  	q.chunkFiles[chunk.Index] = path
    91  	q.chunkSenders[chunk.Index] = chunk.Sender
    92  
    93  	// Signal any waiters that the chunk has arrived.
    94  	for _, waiter := range q.waiters[chunk.Index] {
    95  		waiter <- chunk.Index
    96  		close(waiter)
    97  	}
    98  	delete(q.waiters, chunk.Index)
    99  
   100  	return true, nil
   101  }
   102  
   103  // Allocate allocates a chunk to the caller, making it responsible for fetching it. Returns
   104  // errDone once no chunks are left or the queue is closed.
   105  func (q *chunkQueue) Allocate() (uint32, error) {
   106  	q.Lock()
   107  	defer q.Unlock()
   108  	if q.snapshot == nil {
   109  		return 0, errDone
   110  	}
   111  	if uint32(len(q.chunkAllocated)) >= q.snapshot.Chunks {
   112  		return 0, errDone
   113  	}
   114  	for i := uint32(0); i < q.snapshot.Chunks; i++ {
   115  		if !q.chunkAllocated[i] {
   116  			q.chunkAllocated[i] = true
   117  			return i, nil
   118  		}
   119  	}
   120  	return 0, errDone
   121  }
   122  
   123  // Close closes the chunk queue, cleaning up all temporary files.
   124  func (q *chunkQueue) Close() error {
   125  	q.Lock()
   126  	defer q.Unlock()
   127  	if q.snapshot == nil {
   128  		return nil
   129  	}
   130  	for _, waiters := range q.waiters {
   131  		for _, waiter := range waiters {
   132  			close(waiter)
   133  		}
   134  	}
   135  	q.waiters = nil
   136  	q.snapshot = nil
   137  	err := os.RemoveAll(q.dir)
   138  	if err != nil {
   139  		return fmt.Errorf("failed to clean up state sync tempdir %v: %w", q.dir, err)
   140  	}
   141  	return nil
   142  }
   143  
   144  // Discard discards a chunk. It will be removed from the queue, available for allocation, and can
   145  // be added and returned via Next() again. If the chunk is not already in the queue this does
   146  // nothing, to avoid it being allocated to multiple fetchers.
   147  func (q *chunkQueue) Discard(index uint32) error {
   148  	q.Lock()
   149  	defer q.Unlock()
   150  	return q.discard(index)
   151  }
   152  
   153  // discard discards a chunk, scheduling it for refetching. The caller must hold the mutex lock.
   154  func (q *chunkQueue) discard(index uint32) error {
   155  	if q.snapshot == nil {
   156  		return nil
   157  	}
   158  	path := q.chunkFiles[index]
   159  	if path == "" {
   160  		return nil
   161  	}
   162  	err := os.Remove(path)
   163  	if err != nil {
   164  		return fmt.Errorf("failed to remove chunk %v: %w", index, err)
   165  	}
   166  	delete(q.chunkFiles, index)
   167  	delete(q.chunkReturned, index)
   168  	delete(q.chunkAllocated, index)
   169  	return nil
   170  }
   171  
   172  // DiscardSender discards all *unreturned* chunks from a given sender. If the caller wants to
   173  // discard already returned chunks, this can be done via Discard().
   174  func (q *chunkQueue) DiscardSender(peerID p2p.ID) error {
   175  	q.Lock()
   176  	defer q.Unlock()
   177  
   178  	for index, sender := range q.chunkSenders {
   179  		if sender == peerID && !q.chunkReturned[index] {
   180  			err := q.discard(index)
   181  			if err != nil {
   182  				return err
   183  			}
   184  			delete(q.chunkSenders, index)
   185  		}
   186  	}
   187  	return nil
   188  }
   189  
   190  // GetSender returns the sender of the chunk with the given index, or empty if not found.
   191  func (q *chunkQueue) GetSender(index uint32) p2p.ID {
   192  	q.Lock()
   193  	defer q.Unlock()
   194  	return q.chunkSenders[index]
   195  }
   196  
   197  // Has checks whether a chunk exists in the queue.
   198  func (q *chunkQueue) Has(index uint32) bool {
   199  	q.Lock()
   200  	defer q.Unlock()
   201  	return q.chunkFiles[index] != ""
   202  }
   203  
   204  // load loads a chunk from disk, or nil if the chunk is not in the queue. The caller must hold the
   205  // mutex lock.
   206  func (q *chunkQueue) load(index uint32) (*chunk, error) {
   207  	path, ok := q.chunkFiles[index]
   208  	if !ok {
   209  		return nil, nil
   210  	}
   211  	body, err := os.ReadFile(path)
   212  	if err != nil {
   213  		return nil, fmt.Errorf("failed to load chunk %v: %w", index, err)
   214  	}
   215  	return &chunk{
   216  		Height: q.snapshot.Height,
   217  		Format: q.snapshot.Format,
   218  		Index:  index,
   219  		Chunk:  body,
   220  		Sender: q.chunkSenders[index],
   221  	}, nil
   222  }
   223  
   224  // Next returns the next chunk from the queue, or errDone if all chunks have been returned. It
   225  // blocks until the chunk is available. Concurrent Next() calls may return the same chunk.
   226  func (q *chunkQueue) Next() (*chunk, error) {
   227  	q.Lock()
   228  	var chunk *chunk
   229  	index, err := q.nextUp()
   230  	if err == nil {
   231  		chunk, err = q.load(index)
   232  		if err == nil {
   233  			q.chunkReturned[index] = true
   234  		}
   235  	}
   236  	q.Unlock()
   237  	if chunk != nil || err != nil {
   238  		return chunk, err
   239  	}
   240  
   241  	select {
   242  	case _, ok := <-q.WaitFor(index):
   243  		if !ok {
   244  			return nil, errDone // queue closed
   245  		}
   246  	case <-time.After(chunkTimeout):
   247  		return nil, errTimeout
   248  	}
   249  
   250  	q.Lock()
   251  	defer q.Unlock()
   252  	chunk, err = q.load(index)
   253  	if err != nil {
   254  		return nil, err
   255  	}
   256  	q.chunkReturned[index] = true
   257  	return chunk, nil
   258  }
   259  
   260  // nextUp returns the next chunk to be returned, or errDone if all chunks have been returned. The
   261  // caller must hold the mutex lock.
   262  func (q *chunkQueue) nextUp() (uint32, error) {
   263  	if q.snapshot == nil {
   264  		return 0, errDone
   265  	}
   266  	for i := uint32(0); i < q.snapshot.Chunks; i++ {
   267  		if !q.chunkReturned[i] {
   268  			return i, nil
   269  		}
   270  	}
   271  	return 0, errDone
   272  }
   273  
   274  // Retry schedules a chunk to be retried, without refetching it.
   275  func (q *chunkQueue) Retry(index uint32) {
   276  	q.Lock()
   277  	defer q.Unlock()
   278  	delete(q.chunkReturned, index)
   279  }
   280  
   281  // RetryAll schedules all chunks to be retried, without refetching them.
   282  func (q *chunkQueue) RetryAll() {
   283  	q.Lock()
   284  	defer q.Unlock()
   285  	q.chunkReturned = make(map[uint32]bool)
   286  }
   287  
   288  // Size returns the total number of chunks for the snapshot and queue, or 0 when closed.
   289  func (q *chunkQueue) Size() uint32 {
   290  	q.Lock()
   291  	defer q.Unlock()
   292  	if q.snapshot == nil {
   293  		return 0
   294  	}
   295  	return q.snapshot.Chunks
   296  }
   297  
   298  // WaitFor returns a channel that receives a chunk index when it arrives in the queue, or
   299  // immediately if it has already arrived. The channel is closed without a value if the queue is
   300  // closed or if the chunk index is not valid.
   301  func (q *chunkQueue) WaitFor(index uint32) <-chan uint32 {
   302  	q.Lock()
   303  	defer q.Unlock()
   304  	ch := make(chan uint32, 1)
   305  	switch {
   306  	case q.snapshot == nil:
   307  		close(ch)
   308  	case index >= q.snapshot.Chunks:
   309  		close(ch)
   310  	case q.chunkFiles[index] != "":
   311  		ch <- index
   312  		close(ch)
   313  	default:
   314  		if q.waiters[index] == nil {
   315  			q.waiters[index] = make([]chan<- uint32, 0)
   316  		}
   317  		q.waiters[index] = append(q.waiters[index], ch)
   318  	}
   319  	return ch
   320  }