github.com/DFWallet/tendermint-cosmos@v0.0.2/statesync/chunks.go (about)

     1  package statesync
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"os"
     8  	"path/filepath"
     9  	"strconv"
    10  	"time"
    11  
    12  	tmsync "github.com/DFWallet/tendermint-cosmos/libs/sync"
    13  	"github.com/DFWallet/tendermint-cosmos/p2p"
    14  )
    15  
    16  // errDone is returned by chunkQueue.Next() when all chunks have been returned.
    17  var errDone = errors.New("chunk queue has completed")
    18  
    19  // chunk contains data for a chunk.
    20  type chunk struct {
    21  	Height uint64
    22  	Format uint32
    23  	Index  uint32
    24  	Chunk  []byte
    25  	Sender p2p.ID
    26  }
    27  
    28  // chunkQueue manages chunks for a state sync process, ordering them if requested. It acts as an
    29  // iterator over all chunks, but callers can request chunks to be retried, optionally after
    30  // refetching.
    31  type chunkQueue struct {
    32  	tmsync.Mutex
    33  	snapshot       *snapshot                  // if this is nil, the queue has been closed
    34  	dir            string                     // temp dir for on-disk chunk storage
    35  	chunkFiles     map[uint32]string          // path to temporary chunk file
    36  	chunkSenders   map[uint32]p2p.ID          // the peer who sent the given chunk
    37  	chunkAllocated map[uint32]bool            // chunks that have been allocated via Allocate()
    38  	chunkReturned  map[uint32]bool            // chunks returned via Next()
    39  	waiters        map[uint32][]chan<- uint32 // signals WaitFor() waiters about chunk arrival
    40  }
    41  
    42  // newChunkQueue creates a new chunk queue for a snapshot, using a temp dir for storage.
    43  // Callers must call Close() when done.
    44  func newChunkQueue(snapshot *snapshot, tempDir string) (*chunkQueue, error) {
    45  	dir, err := ioutil.TempDir(tempDir, "tm-statesync")
    46  	if err != nil {
    47  		return nil, fmt.Errorf("unable to create temp dir for state sync chunks: %w", err)
    48  	}
    49  	if snapshot.Chunks == 0 {
    50  		return nil, errors.New("snapshot has no chunks")
    51  	}
    52  	return &chunkQueue{
    53  		snapshot:       snapshot,
    54  		dir:            dir,
    55  		chunkFiles:     make(map[uint32]string, snapshot.Chunks),
    56  		chunkSenders:   make(map[uint32]p2p.ID, snapshot.Chunks),
    57  		chunkAllocated: make(map[uint32]bool, snapshot.Chunks),
    58  		chunkReturned:  make(map[uint32]bool, snapshot.Chunks),
    59  		waiters:        make(map[uint32][]chan<- uint32),
    60  	}, nil
    61  }
    62  
    63  // Add adds a chunk to the queue. It ignores chunks that already exist, returning false.
    64  func (q *chunkQueue) Add(chunk *chunk) (bool, error) {
    65  	if chunk == nil || chunk.Chunk == nil {
    66  		return false, errors.New("cannot add nil chunk")
    67  	}
    68  	q.Lock()
    69  	defer q.Unlock()
    70  	if q.snapshot == nil {
    71  		return false, nil // queue is closed
    72  	}
    73  	if chunk.Height != q.snapshot.Height {
    74  		return false, fmt.Errorf("invalid chunk height %v, expected %v", chunk.Height, q.snapshot.Height)
    75  	}
    76  	if chunk.Format != q.snapshot.Format {
    77  		return false, fmt.Errorf("invalid chunk format %v, expected %v", chunk.Format, q.snapshot.Format)
    78  	}
    79  	if chunk.Index >= q.snapshot.Chunks {
    80  		return false, fmt.Errorf("received unexpected chunk %v", chunk.Index)
    81  	}
    82  	if q.chunkFiles[chunk.Index] != "" {
    83  		return false, nil
    84  	}
    85  
    86  	path := filepath.Join(q.dir, strconv.FormatUint(uint64(chunk.Index), 10))
    87  	err := ioutil.WriteFile(path, chunk.Chunk, 0600)
    88  	if err != nil {
    89  		return false, fmt.Errorf("failed to save chunk %v to file %v: %w", chunk.Index, path, err)
    90  	}
    91  	q.chunkFiles[chunk.Index] = path
    92  	q.chunkSenders[chunk.Index] = chunk.Sender
    93  
    94  	// Signal any waiters that the chunk has arrived.
    95  	for _, waiter := range q.waiters[chunk.Index] {
    96  		waiter <- chunk.Index
    97  		close(waiter)
    98  	}
    99  	delete(q.waiters, chunk.Index)
   100  
   101  	return true, nil
   102  }
   103  
   104  // Allocate allocates a chunk to the caller, making it responsible for fetching it. Returns
   105  // errDone once no chunks are left or the queue is closed.
   106  func (q *chunkQueue) Allocate() (uint32, error) {
   107  	q.Lock()
   108  	defer q.Unlock()
   109  	if q.snapshot == nil {
   110  		return 0, errDone
   111  	}
   112  	if uint32(len(q.chunkAllocated)) >= q.snapshot.Chunks {
   113  		return 0, errDone
   114  	}
   115  	for i := uint32(0); i < q.snapshot.Chunks; i++ {
   116  		if !q.chunkAllocated[i] {
   117  			q.chunkAllocated[i] = true
   118  			return i, nil
   119  		}
   120  	}
   121  	return 0, errDone
   122  }
   123  
   124  // Close closes the chunk queue, cleaning up all temporary files.
   125  func (q *chunkQueue) Close() error {
   126  	q.Lock()
   127  	defer q.Unlock()
   128  	if q.snapshot == nil {
   129  		return nil
   130  	}
   131  	for _, waiters := range q.waiters {
   132  		for _, waiter := range waiters {
   133  			close(waiter)
   134  		}
   135  	}
   136  	q.waiters = nil
   137  	q.snapshot = nil
   138  	err := os.RemoveAll(q.dir)
   139  	if err != nil {
   140  		return fmt.Errorf("failed to clean up state sync tempdir %v: %w", q.dir, err)
   141  	}
   142  	return nil
   143  }
   144  
   145  // Discard discards a chunk. It will be removed from the queue, available for allocation, and can
   146  // be added and returned via Next() again. If the chunk is not already in the queue this does
   147  // nothing, to avoid it being allocated to multiple fetchers.
   148  func (q *chunkQueue) Discard(index uint32) error {
   149  	q.Lock()
   150  	defer q.Unlock()
   151  	return q.discard(index)
   152  }
   153  
   154  // discard discards a chunk, scheduling it for refetching. The caller must hold the mutex lock.
   155  func (q *chunkQueue) discard(index uint32) error {
   156  	if q.snapshot == nil {
   157  		return nil
   158  	}
   159  	path := q.chunkFiles[index]
   160  	if path == "" {
   161  		return nil
   162  	}
   163  	err := os.Remove(path)
   164  	if err != nil {
   165  		return fmt.Errorf("failed to remove chunk %v: %w", index, err)
   166  	}
   167  	delete(q.chunkFiles, index)
   168  	delete(q.chunkReturned, index)
   169  	delete(q.chunkAllocated, index)
   170  	return nil
   171  }
   172  
   173  // DiscardSender discards all *unreturned* chunks from a given sender. If the caller wants to
   174  // discard already returned chunks, this can be done via Discard().
   175  func (q *chunkQueue) DiscardSender(peerID p2p.ID) error {
   176  	q.Lock()
   177  	defer q.Unlock()
   178  
   179  	for index, sender := range q.chunkSenders {
   180  		if sender == peerID && !q.chunkReturned[index] {
   181  			err := q.discard(index)
   182  			if err != nil {
   183  				return err
   184  			}
   185  			delete(q.chunkSenders, index)
   186  		}
   187  	}
   188  	return nil
   189  }
   190  
   191  // GetSender returns the sender of the chunk with the given index, or empty if not found.
   192  func (q *chunkQueue) GetSender(index uint32) p2p.ID {
   193  	q.Lock()
   194  	defer q.Unlock()
   195  	return q.chunkSenders[index]
   196  }
   197  
   198  // Has checks whether a chunk exists in the queue.
   199  func (q *chunkQueue) Has(index uint32) bool {
   200  	q.Lock()
   201  	defer q.Unlock()
   202  	return q.chunkFiles[index] != ""
   203  }
   204  
   205  // load loads a chunk from disk, or nil if the chunk is not in the queue. The caller must hold the
   206  // mutex lock.
   207  func (q *chunkQueue) load(index uint32) (*chunk, error) {
   208  	path, ok := q.chunkFiles[index]
   209  	if !ok {
   210  		return nil, nil
   211  	}
   212  	body, err := ioutil.ReadFile(path)
   213  	if err != nil {
   214  		return nil, fmt.Errorf("failed to load chunk %v: %w", index, err)
   215  	}
   216  	return &chunk{
   217  		Height: q.snapshot.Height,
   218  		Format: q.snapshot.Format,
   219  		Index:  index,
   220  		Chunk:  body,
   221  		Sender: q.chunkSenders[index],
   222  	}, nil
   223  }
   224  
   225  // Next returns the next chunk from the queue, or errDone if all chunks have been returned. It
   226  // blocks until the chunk is available. Concurrent Next() calls may return the same chunk.
   227  func (q *chunkQueue) Next() (*chunk, error) {
   228  	q.Lock()
   229  	var chunk *chunk
   230  	index, err := q.nextUp()
   231  	if err == nil {
   232  		chunk, err = q.load(index)
   233  		if err == nil {
   234  			q.chunkReturned[index] = true
   235  		}
   236  	}
   237  	q.Unlock()
   238  	if chunk != nil || err != nil {
   239  		return chunk, err
   240  	}
   241  
   242  	select {
   243  	case _, ok := <-q.WaitFor(index):
   244  		if !ok {
   245  			return nil, errDone // queue closed
   246  		}
   247  	case <-time.After(chunkTimeout):
   248  		return nil, errTimeout
   249  	}
   250  
   251  	q.Lock()
   252  	defer q.Unlock()
   253  	chunk, err = q.load(index)
   254  	if err != nil {
   255  		return nil, err
   256  	}
   257  	q.chunkReturned[index] = true
   258  	return chunk, nil
   259  }
   260  
   261  // nextUp returns the next chunk to be returned, or errDone if all chunks have been returned. The
   262  // caller must hold the mutex lock.
   263  func (q *chunkQueue) nextUp() (uint32, error) {
   264  	if q.snapshot == nil {
   265  		return 0, errDone
   266  	}
   267  	for i := uint32(0); i < q.snapshot.Chunks; i++ {
   268  		if !q.chunkReturned[i] {
   269  			return i, nil
   270  		}
   271  	}
   272  	return 0, errDone
   273  }
   274  
   275  // Retry schedules a chunk to be retried, without refetching it.
   276  func (q *chunkQueue) Retry(index uint32) {
   277  	q.Lock()
   278  	defer q.Unlock()
   279  	delete(q.chunkReturned, index)
   280  }
   281  
   282  // RetryAll schedules all chunks to be retried, without refetching them.
   283  func (q *chunkQueue) RetryAll() {
   284  	q.Lock()
   285  	defer q.Unlock()
   286  	q.chunkReturned = make(map[uint32]bool)
   287  }
   288  
   289  // Size returns the total number of chunks for the snapshot and queue, or 0 when closed.
   290  func (q *chunkQueue) Size() uint32 {
   291  	q.Lock()
   292  	defer q.Unlock()
   293  	if q.snapshot == nil {
   294  		return 0
   295  	}
   296  	return q.snapshot.Chunks
   297  }
   298  
   299  // WaitFor returns a channel that receives a chunk index when it arrives in the queue, or
   300  // immediately if it has already arrived. The channel is closed without a value if the queue is
   301  // closed or if the chunk index is not valid.
   302  func (q *chunkQueue) WaitFor(index uint32) <-chan uint32 {
   303  	q.Lock()
   304  	defer q.Unlock()
   305  	ch := make(chan uint32, 1)
   306  	switch {
   307  	case q.snapshot == nil:
   308  		close(ch)
   309  	case index >= q.snapshot.Chunks:
   310  		close(ch)
   311  	case q.chunkFiles[index] != "":
   312  		ch <- index
   313  		close(ch)
   314  	default:
   315  		if q.waiters[index] == nil {
   316  			q.waiters[index] = make([]chan<- uint32, 0)
   317  		}
   318  		q.waiters[index] = append(q.waiters[index], ch)
   319  	}
   320  	return ch
   321  }