github.com/Team-Kujira/tendermint@v0.34.24-indexer/statesync/chunks.go (about) 1 package statesync 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "path/filepath" 8 "strconv" 9 "time" 10 11 tmsync "github.com/tendermint/tendermint/libs/sync" 12 "github.com/tendermint/tendermint/p2p" 13 ) 14 15 // errDone is returned by chunkQueue.Next() when all chunks have been returned. 16 var errDone = errors.New("chunk queue has completed") 17 18 // chunk contains data for a chunk. 19 type chunk struct { 20 Height uint64 21 Format uint32 22 Index uint32 23 Chunk []byte 24 Sender p2p.ID 25 } 26 27 // chunkQueue manages chunks for a state sync process, ordering them if requested. It acts as an 28 // iterator over all chunks, but callers can request chunks to be retried, optionally after 29 // refetching. 30 type chunkQueue struct { 31 tmsync.Mutex 32 snapshot *snapshot // if this is nil, the queue has been closed 33 dir string // temp dir for on-disk chunk storage 34 chunkFiles map[uint32]string // path to temporary chunk file 35 chunkSenders map[uint32]p2p.ID // the peer who sent the given chunk 36 chunkAllocated map[uint32]bool // chunks that have been allocated via Allocate() 37 chunkReturned map[uint32]bool // chunks returned via Next() 38 waiters map[uint32][]chan<- uint32 // signals WaitFor() waiters about chunk arrival 39 } 40 41 // newChunkQueue creates a new chunk queue for a snapshot, using a temp dir for storage. 42 // Callers must call Close() when done. 43 func newChunkQueue(snapshot *snapshot, tempDir string) (*chunkQueue, error) { 44 dir, err := os.MkdirTemp(tempDir, "tm-statesync") 45 if err != nil { 46 return nil, fmt.Errorf("unable to create temp dir for state sync chunks: %w", err) 47 } 48 if snapshot.Chunks == 0 { 49 return nil, errors.New("snapshot has no chunks") 50 } 51 return &chunkQueue{ 52 snapshot: snapshot, 53 dir: dir, 54 chunkFiles: make(map[uint32]string, snapshot.Chunks), 55 chunkSenders: make(map[uint32]p2p.ID, snapshot.Chunks), 56 chunkAllocated: make(map[uint32]bool, snapshot.Chunks), 57 chunkReturned: make(map[uint32]bool, snapshot.Chunks), 58 waiters: make(map[uint32][]chan<- uint32), 59 }, nil 60 } 61 62 // Add adds a chunk to the queue. It ignores chunks that already exist, returning false. 63 func (q *chunkQueue) Add(chunk *chunk) (bool, error) { 64 if chunk == nil || chunk.Chunk == nil { 65 return false, errors.New("cannot add nil chunk") 66 } 67 q.Lock() 68 defer q.Unlock() 69 if q.snapshot == nil { 70 return false, nil // queue is closed 71 } 72 if chunk.Height != q.snapshot.Height { 73 return false, fmt.Errorf("invalid chunk height %v, expected %v", chunk.Height, q.snapshot.Height) 74 } 75 if chunk.Format != q.snapshot.Format { 76 return false, fmt.Errorf("invalid chunk format %v, expected %v", chunk.Format, q.snapshot.Format) 77 } 78 if chunk.Index >= q.snapshot.Chunks { 79 return false, fmt.Errorf("received unexpected chunk %v", chunk.Index) 80 } 81 if q.chunkFiles[chunk.Index] != "" { 82 return false, nil 83 } 84 85 path := filepath.Join(q.dir, strconv.FormatUint(uint64(chunk.Index), 10)) 86 err := os.WriteFile(path, chunk.Chunk, 0o600) 87 if err != nil { 88 return false, fmt.Errorf("failed to save chunk %v to file %v: %w", chunk.Index, path, err) 89 } 90 q.chunkFiles[chunk.Index] = path 91 q.chunkSenders[chunk.Index] = chunk.Sender 92 93 // Signal any waiters that the chunk has arrived. 94 for _, waiter := range q.waiters[chunk.Index] { 95 waiter <- chunk.Index 96 close(waiter) 97 } 98 delete(q.waiters, chunk.Index) 99 100 return true, nil 101 } 102 103 // Allocate allocates a chunk to the caller, making it responsible for fetching it. Returns 104 // errDone once no chunks are left or the queue is closed. 105 func (q *chunkQueue) Allocate() (uint32, error) { 106 q.Lock() 107 defer q.Unlock() 108 if q.snapshot == nil { 109 return 0, errDone 110 } 111 if uint32(len(q.chunkAllocated)) >= q.snapshot.Chunks { 112 return 0, errDone 113 } 114 for i := uint32(0); i < q.snapshot.Chunks; i++ { 115 if !q.chunkAllocated[i] { 116 q.chunkAllocated[i] = true 117 return i, nil 118 } 119 } 120 return 0, errDone 121 } 122 123 // Close closes the chunk queue, cleaning up all temporary files. 124 func (q *chunkQueue) Close() error { 125 q.Lock() 126 defer q.Unlock() 127 if q.snapshot == nil { 128 return nil 129 } 130 for _, waiters := range q.waiters { 131 for _, waiter := range waiters { 132 close(waiter) 133 } 134 } 135 q.waiters = nil 136 q.snapshot = nil 137 err := os.RemoveAll(q.dir) 138 if err != nil { 139 return fmt.Errorf("failed to clean up state sync tempdir %v: %w", q.dir, err) 140 } 141 return nil 142 } 143 144 // Discard discards a chunk. It will be removed from the queue, available for allocation, and can 145 // be added and returned via Next() again. If the chunk is not already in the queue this does 146 // nothing, to avoid it being allocated to multiple fetchers. 147 func (q *chunkQueue) Discard(index uint32) error { 148 q.Lock() 149 defer q.Unlock() 150 return q.discard(index) 151 } 152 153 // discard discards a chunk, scheduling it for refetching. The caller must hold the mutex lock. 154 func (q *chunkQueue) discard(index uint32) error { 155 if q.snapshot == nil { 156 return nil 157 } 158 path := q.chunkFiles[index] 159 if path == "" { 160 return nil 161 } 162 err := os.Remove(path) 163 if err != nil { 164 return fmt.Errorf("failed to remove chunk %v: %w", index, err) 165 } 166 delete(q.chunkFiles, index) 167 delete(q.chunkReturned, index) 168 delete(q.chunkAllocated, index) 169 return nil 170 } 171 172 // DiscardSender discards all *unreturned* chunks from a given sender. If the caller wants to 173 // discard already returned chunks, this can be done via Discard(). 174 func (q *chunkQueue) DiscardSender(peerID p2p.ID) error { 175 q.Lock() 176 defer q.Unlock() 177 178 for index, sender := range q.chunkSenders { 179 if sender == peerID && !q.chunkReturned[index] { 180 err := q.discard(index) 181 if err != nil { 182 return err 183 } 184 delete(q.chunkSenders, index) 185 } 186 } 187 return nil 188 } 189 190 // GetSender returns the sender of the chunk with the given index, or empty if not found. 191 func (q *chunkQueue) GetSender(index uint32) p2p.ID { 192 q.Lock() 193 defer q.Unlock() 194 return q.chunkSenders[index] 195 } 196 197 // Has checks whether a chunk exists in the queue. 198 func (q *chunkQueue) Has(index uint32) bool { 199 q.Lock() 200 defer q.Unlock() 201 return q.chunkFiles[index] != "" 202 } 203 204 // load loads a chunk from disk, or nil if the chunk is not in the queue. The caller must hold the 205 // mutex lock. 206 func (q *chunkQueue) load(index uint32) (*chunk, error) { 207 path, ok := q.chunkFiles[index] 208 if !ok { 209 return nil, nil 210 } 211 body, err := os.ReadFile(path) 212 if err != nil { 213 return nil, fmt.Errorf("failed to load chunk %v: %w", index, err) 214 } 215 return &chunk{ 216 Height: q.snapshot.Height, 217 Format: q.snapshot.Format, 218 Index: index, 219 Chunk: body, 220 Sender: q.chunkSenders[index], 221 }, nil 222 } 223 224 // Next returns the next chunk from the queue, or errDone if all chunks have been returned. It 225 // blocks until the chunk is available. Concurrent Next() calls may return the same chunk. 226 func (q *chunkQueue) Next() (*chunk, error) { 227 q.Lock() 228 var chunk *chunk 229 index, err := q.nextUp() 230 if err == nil { 231 chunk, err = q.load(index) 232 if err == nil { 233 q.chunkReturned[index] = true 234 } 235 } 236 q.Unlock() 237 if chunk != nil || err != nil { 238 return chunk, err 239 } 240 241 select { 242 case _, ok := <-q.WaitFor(index): 243 if !ok { 244 return nil, errDone // queue closed 245 } 246 case <-time.After(chunkTimeout): 247 return nil, errTimeout 248 } 249 250 q.Lock() 251 defer q.Unlock() 252 chunk, err = q.load(index) 253 if err != nil { 254 return nil, err 255 } 256 q.chunkReturned[index] = true 257 return chunk, nil 258 } 259 260 // nextUp returns the next chunk to be returned, or errDone if all chunks have been returned. The 261 // caller must hold the mutex lock. 262 func (q *chunkQueue) nextUp() (uint32, error) { 263 if q.snapshot == nil { 264 return 0, errDone 265 } 266 for i := uint32(0); i < q.snapshot.Chunks; i++ { 267 if !q.chunkReturned[i] { 268 return i, nil 269 } 270 } 271 return 0, errDone 272 } 273 274 // Retry schedules a chunk to be retried, without refetching it. 275 func (q *chunkQueue) Retry(index uint32) { 276 q.Lock() 277 defer q.Unlock() 278 delete(q.chunkReturned, index) 279 } 280 281 // RetryAll schedules all chunks to be retried, without refetching them. 282 func (q *chunkQueue) RetryAll() { 283 q.Lock() 284 defer q.Unlock() 285 q.chunkReturned = make(map[uint32]bool) 286 } 287 288 // Size returns the total number of chunks for the snapshot and queue, or 0 when closed. 289 func (q *chunkQueue) Size() uint32 { 290 q.Lock() 291 defer q.Unlock() 292 if q.snapshot == nil { 293 return 0 294 } 295 return q.snapshot.Chunks 296 } 297 298 // WaitFor returns a channel that receives a chunk index when it arrives in the queue, or 299 // immediately if it has already arrived. The channel is closed without a value if the queue is 300 // closed or if the chunk index is not valid. 301 func (q *chunkQueue) WaitFor(index uint32) <-chan uint32 { 302 q.Lock() 303 defer q.Unlock() 304 ch := make(chan uint32, 1) 305 switch { 306 case q.snapshot == nil: 307 close(ch) 308 case index >= q.snapshot.Chunks: 309 close(ch) 310 case q.chunkFiles[index] != "": 311 ch <- index 312 close(ch) 313 default: 314 if q.waiters[index] == nil { 315 q.waiters[index] = make([]chan<- uint32, 0) 316 } 317 q.waiters[index] = append(q.waiters[index], ch) 318 } 319 return ch 320 }