github.com/DFWallet/tendermint-cosmos@v0.0.2/statesync/chunks.go (about) 1 package statesync 2 3 import ( 4 "errors" 5 "fmt" 6 "io/ioutil" 7 "os" 8 "path/filepath" 9 "strconv" 10 "time" 11 12 tmsync "github.com/DFWallet/tendermint-cosmos/libs/sync" 13 "github.com/DFWallet/tendermint-cosmos/p2p" 14 ) 15 16 // errDone is returned by chunkQueue.Next() when all chunks have been returned. 17 var errDone = errors.New("chunk queue has completed") 18 19 // chunk contains data for a chunk. 20 type chunk struct { 21 Height uint64 22 Format uint32 23 Index uint32 24 Chunk []byte 25 Sender p2p.ID 26 } 27 28 // chunkQueue manages chunks for a state sync process, ordering them if requested. It acts as an 29 // iterator over all chunks, but callers can request chunks to be retried, optionally after 30 // refetching. 31 type chunkQueue struct { 32 tmsync.Mutex 33 snapshot *snapshot // if this is nil, the queue has been closed 34 dir string // temp dir for on-disk chunk storage 35 chunkFiles map[uint32]string // path to temporary chunk file 36 chunkSenders map[uint32]p2p.ID // the peer who sent the given chunk 37 chunkAllocated map[uint32]bool // chunks that have been allocated via Allocate() 38 chunkReturned map[uint32]bool // chunks returned via Next() 39 waiters map[uint32][]chan<- uint32 // signals WaitFor() waiters about chunk arrival 40 } 41 42 // newChunkQueue creates a new chunk queue for a snapshot, using a temp dir for storage. 43 // Callers must call Close() when done. 44 func newChunkQueue(snapshot *snapshot, tempDir string) (*chunkQueue, error) { 45 dir, err := ioutil.TempDir(tempDir, "tm-statesync") 46 if err != nil { 47 return nil, fmt.Errorf("unable to create temp dir for state sync chunks: %w", err) 48 } 49 if snapshot.Chunks == 0 { 50 return nil, errors.New("snapshot has no chunks") 51 } 52 return &chunkQueue{ 53 snapshot: snapshot, 54 dir: dir, 55 chunkFiles: make(map[uint32]string, snapshot.Chunks), 56 chunkSenders: make(map[uint32]p2p.ID, snapshot.Chunks), 57 chunkAllocated: make(map[uint32]bool, snapshot.Chunks), 58 chunkReturned: make(map[uint32]bool, snapshot.Chunks), 59 waiters: make(map[uint32][]chan<- uint32), 60 }, nil 61 } 62 63 // Add adds a chunk to the queue. It ignores chunks that already exist, returning false. 64 func (q *chunkQueue) Add(chunk *chunk) (bool, error) { 65 if chunk == nil || chunk.Chunk == nil { 66 return false, errors.New("cannot add nil chunk") 67 } 68 q.Lock() 69 defer q.Unlock() 70 if q.snapshot == nil { 71 return false, nil // queue is closed 72 } 73 if chunk.Height != q.snapshot.Height { 74 return false, fmt.Errorf("invalid chunk height %v, expected %v", chunk.Height, q.snapshot.Height) 75 } 76 if chunk.Format != q.snapshot.Format { 77 return false, fmt.Errorf("invalid chunk format %v, expected %v", chunk.Format, q.snapshot.Format) 78 } 79 if chunk.Index >= q.snapshot.Chunks { 80 return false, fmt.Errorf("received unexpected chunk %v", chunk.Index) 81 } 82 if q.chunkFiles[chunk.Index] != "" { 83 return false, nil 84 } 85 86 path := filepath.Join(q.dir, strconv.FormatUint(uint64(chunk.Index), 10)) 87 err := ioutil.WriteFile(path, chunk.Chunk, 0600) 88 if err != nil { 89 return false, fmt.Errorf("failed to save chunk %v to file %v: %w", chunk.Index, path, err) 90 } 91 q.chunkFiles[chunk.Index] = path 92 q.chunkSenders[chunk.Index] = chunk.Sender 93 94 // Signal any waiters that the chunk has arrived. 95 for _, waiter := range q.waiters[chunk.Index] { 96 waiter <- chunk.Index 97 close(waiter) 98 } 99 delete(q.waiters, chunk.Index) 100 101 return true, nil 102 } 103 104 // Allocate allocates a chunk to the caller, making it responsible for fetching it. Returns 105 // errDone once no chunks are left or the queue is closed. 106 func (q *chunkQueue) Allocate() (uint32, error) { 107 q.Lock() 108 defer q.Unlock() 109 if q.snapshot == nil { 110 return 0, errDone 111 } 112 if uint32(len(q.chunkAllocated)) >= q.snapshot.Chunks { 113 return 0, errDone 114 } 115 for i := uint32(0); i < q.snapshot.Chunks; i++ { 116 if !q.chunkAllocated[i] { 117 q.chunkAllocated[i] = true 118 return i, nil 119 } 120 } 121 return 0, errDone 122 } 123 124 // Close closes the chunk queue, cleaning up all temporary files. 125 func (q *chunkQueue) Close() error { 126 q.Lock() 127 defer q.Unlock() 128 if q.snapshot == nil { 129 return nil 130 } 131 for _, waiters := range q.waiters { 132 for _, waiter := range waiters { 133 close(waiter) 134 } 135 } 136 q.waiters = nil 137 q.snapshot = nil 138 err := os.RemoveAll(q.dir) 139 if err != nil { 140 return fmt.Errorf("failed to clean up state sync tempdir %v: %w", q.dir, err) 141 } 142 return nil 143 } 144 145 // Discard discards a chunk. It will be removed from the queue, available for allocation, and can 146 // be added and returned via Next() again. If the chunk is not already in the queue this does 147 // nothing, to avoid it being allocated to multiple fetchers. 148 func (q *chunkQueue) Discard(index uint32) error { 149 q.Lock() 150 defer q.Unlock() 151 return q.discard(index) 152 } 153 154 // discard discards a chunk, scheduling it for refetching. The caller must hold the mutex lock. 155 func (q *chunkQueue) discard(index uint32) error { 156 if q.snapshot == nil { 157 return nil 158 } 159 path := q.chunkFiles[index] 160 if path == "" { 161 return nil 162 } 163 err := os.Remove(path) 164 if err != nil { 165 return fmt.Errorf("failed to remove chunk %v: %w", index, err) 166 } 167 delete(q.chunkFiles, index) 168 delete(q.chunkReturned, index) 169 delete(q.chunkAllocated, index) 170 return nil 171 } 172 173 // DiscardSender discards all *unreturned* chunks from a given sender. If the caller wants to 174 // discard already returned chunks, this can be done via Discard(). 175 func (q *chunkQueue) DiscardSender(peerID p2p.ID) error { 176 q.Lock() 177 defer q.Unlock() 178 179 for index, sender := range q.chunkSenders { 180 if sender == peerID && !q.chunkReturned[index] { 181 err := q.discard(index) 182 if err != nil { 183 return err 184 } 185 delete(q.chunkSenders, index) 186 } 187 } 188 return nil 189 } 190 191 // GetSender returns the sender of the chunk with the given index, or empty if not found. 192 func (q *chunkQueue) GetSender(index uint32) p2p.ID { 193 q.Lock() 194 defer q.Unlock() 195 return q.chunkSenders[index] 196 } 197 198 // Has checks whether a chunk exists in the queue. 199 func (q *chunkQueue) Has(index uint32) bool { 200 q.Lock() 201 defer q.Unlock() 202 return q.chunkFiles[index] != "" 203 } 204 205 // load loads a chunk from disk, or nil if the chunk is not in the queue. The caller must hold the 206 // mutex lock. 207 func (q *chunkQueue) load(index uint32) (*chunk, error) { 208 path, ok := q.chunkFiles[index] 209 if !ok { 210 return nil, nil 211 } 212 body, err := ioutil.ReadFile(path) 213 if err != nil { 214 return nil, fmt.Errorf("failed to load chunk %v: %w", index, err) 215 } 216 return &chunk{ 217 Height: q.snapshot.Height, 218 Format: q.snapshot.Format, 219 Index: index, 220 Chunk: body, 221 Sender: q.chunkSenders[index], 222 }, nil 223 } 224 225 // Next returns the next chunk from the queue, or errDone if all chunks have been returned. It 226 // blocks until the chunk is available. Concurrent Next() calls may return the same chunk. 227 func (q *chunkQueue) Next() (*chunk, error) { 228 q.Lock() 229 var chunk *chunk 230 index, err := q.nextUp() 231 if err == nil { 232 chunk, err = q.load(index) 233 if err == nil { 234 q.chunkReturned[index] = true 235 } 236 } 237 q.Unlock() 238 if chunk != nil || err != nil { 239 return chunk, err 240 } 241 242 select { 243 case _, ok := <-q.WaitFor(index): 244 if !ok { 245 return nil, errDone // queue closed 246 } 247 case <-time.After(chunkTimeout): 248 return nil, errTimeout 249 } 250 251 q.Lock() 252 defer q.Unlock() 253 chunk, err = q.load(index) 254 if err != nil { 255 return nil, err 256 } 257 q.chunkReturned[index] = true 258 return chunk, nil 259 } 260 261 // nextUp returns the next chunk to be returned, or errDone if all chunks have been returned. The 262 // caller must hold the mutex lock. 263 func (q *chunkQueue) nextUp() (uint32, error) { 264 if q.snapshot == nil { 265 return 0, errDone 266 } 267 for i := uint32(0); i < q.snapshot.Chunks; i++ { 268 if !q.chunkReturned[i] { 269 return i, nil 270 } 271 } 272 return 0, errDone 273 } 274 275 // Retry schedules a chunk to be retried, without refetching it. 276 func (q *chunkQueue) Retry(index uint32) { 277 q.Lock() 278 defer q.Unlock() 279 delete(q.chunkReturned, index) 280 } 281 282 // RetryAll schedules all chunks to be retried, without refetching them. 283 func (q *chunkQueue) RetryAll() { 284 q.Lock() 285 defer q.Unlock() 286 q.chunkReturned = make(map[uint32]bool) 287 } 288 289 // Size returns the total number of chunks for the snapshot and queue, or 0 when closed. 290 func (q *chunkQueue) Size() uint32 { 291 q.Lock() 292 defer q.Unlock() 293 if q.snapshot == nil { 294 return 0 295 } 296 return q.snapshot.Chunks 297 } 298 299 // WaitFor returns a channel that receives a chunk index when it arrives in the queue, or 300 // immediately if it has already arrived. The channel is closed without a value if the queue is 301 // closed or if the chunk index is not valid. 302 func (q *chunkQueue) WaitFor(index uint32) <-chan uint32 { 303 q.Lock() 304 defer q.Unlock() 305 ch := make(chan uint32, 1) 306 switch { 307 case q.snapshot == nil: 308 close(ch) 309 case index >= q.snapshot.Chunks: 310 close(ch) 311 case q.chunkFiles[index] != "": 312 ch <- index 313 close(ch) 314 default: 315 if q.waiters[index] == nil { 316 q.waiters[index] = make([]chan<- uint32, 0) 317 } 318 q.waiters[index] = append(q.waiters[index], ch) 319 } 320 return ch 321 }