github.com/aakash4dev/cometbft@v0.38.2/statesync/reactor.go (about) 1 package statesync 2 3 import ( 4 "context" 5 "errors" 6 "sort" 7 "time" 8 9 abci "github.com/aakash4dev/cometbft/abci/types" 10 "github.com/aakash4dev/cometbft/config" 11 cmtsync "github.com/aakash4dev/cometbft/libs/sync" 12 "github.com/aakash4dev/cometbft/p2p" 13 ssproto "github.com/aakash4dev/cometbft/proto/tendermint/statesync" 14 "github.com/aakash4dev/cometbft/proxy" 15 sm "github.com/aakash4dev/cometbft/state" 16 "github.com/aakash4dev/cometbft/types" 17 ) 18 19 const ( 20 // SnapshotChannel exchanges snapshot metadata 21 SnapshotChannel = byte(0x60) 22 // ChunkChannel exchanges chunk contents 23 ChunkChannel = byte(0x61) 24 // recentSnapshots is the number of recent snapshots to send and receive per peer. 25 recentSnapshots = 10 26 ) 27 28 // Reactor handles state sync, both restoring snapshots for the local node and serving snapshots 29 // for other nodes. 30 type Reactor struct { 31 p2p.BaseReactor 32 33 cfg config.StateSyncConfig 34 conn proxy.AppConnSnapshot 35 connQuery proxy.AppConnQuery 36 tempDir string 37 metrics *Metrics 38 39 // This will only be set when a state sync is in progress. It is used to feed received 40 // snapshots and chunks into the sync. 41 mtx cmtsync.RWMutex 42 syncer *syncer 43 } 44 45 // NewReactor creates a new state sync reactor. 46 func NewReactor( 47 cfg config.StateSyncConfig, 48 conn proxy.AppConnSnapshot, 49 connQuery proxy.AppConnQuery, 50 metrics *Metrics, 51 ) *Reactor { 52 r := &Reactor{ 53 cfg: cfg, 54 conn: conn, 55 connQuery: connQuery, 56 metrics: metrics, 57 } 58 r.BaseReactor = *p2p.NewBaseReactor("StateSync", r) 59 60 return r 61 } 62 63 // GetChannels implements p2p.Reactor. 64 func (r *Reactor) GetChannels() []*p2p.ChannelDescriptor { 65 return []*p2p.ChannelDescriptor{ 66 { 67 ID: SnapshotChannel, 68 Priority: 5, 69 SendQueueCapacity: 10, 70 RecvMessageCapacity: snapshotMsgSize, 71 MessageType: &ssproto.Message{}, 72 }, 73 { 74 ID: ChunkChannel, 75 Priority: 3, 76 SendQueueCapacity: 10, 77 RecvMessageCapacity: chunkMsgSize, 78 MessageType: &ssproto.Message{}, 79 }, 80 } 81 } 82 83 // OnStart implements p2p.Reactor. 84 func (r *Reactor) OnStart() error { 85 return nil 86 } 87 88 // AddPeer implements p2p.Reactor. 89 func (r *Reactor) AddPeer(peer p2p.Peer) { 90 r.mtx.RLock() 91 defer r.mtx.RUnlock() 92 if r.syncer != nil { 93 r.syncer.AddPeer(peer) 94 } 95 } 96 97 // RemovePeer implements p2p.Reactor. 98 func (r *Reactor) RemovePeer(peer p2p.Peer, _ interface{}) { 99 r.mtx.RLock() 100 defer r.mtx.RUnlock() 101 if r.syncer != nil { 102 r.syncer.RemovePeer(peer) 103 } 104 } 105 106 // Receive implements p2p.Reactor. 107 func (r *Reactor) Receive(e p2p.Envelope) { 108 if !r.IsRunning() { 109 return 110 } 111 112 err := validateMsg(e.Message) 113 if err != nil { 114 r.Logger.Error("Invalid message", "peer", e.Src, "msg", e.Message, "err", err) 115 r.Switch.StopPeerForError(e.Src, err) 116 return 117 } 118 119 switch e.ChannelID { 120 case SnapshotChannel: 121 switch msg := e.Message.(type) { 122 case *ssproto.SnapshotsRequest: 123 snapshots, err := r.recentSnapshots(recentSnapshots) 124 if err != nil { 125 r.Logger.Error("Failed to fetch snapshots", "err", err) 126 return 127 } 128 for _, snapshot := range snapshots { 129 r.Logger.Debug("Advertising snapshot", "height", snapshot.Height, 130 "format", snapshot.Format, "peer", e.Src.ID()) 131 e.Src.Send(p2p.Envelope{ 132 ChannelID: e.ChannelID, 133 Message: &ssproto.SnapshotsResponse{ 134 Height: snapshot.Height, 135 Format: snapshot.Format, 136 Chunks: snapshot.Chunks, 137 Hash: snapshot.Hash, 138 Metadata: snapshot.Metadata, 139 }, 140 }) 141 } 142 143 case *ssproto.SnapshotsResponse: 144 r.mtx.RLock() 145 defer r.mtx.RUnlock() 146 if r.syncer == nil { 147 r.Logger.Debug("Received unexpected snapshot, no state sync in progress") 148 return 149 } 150 r.Logger.Debug("Received snapshot", "height", msg.Height, "format", msg.Format, "peer", e.Src.ID()) 151 _, err := r.syncer.AddSnapshot(e.Src, &snapshot{ 152 Height: msg.Height, 153 Format: msg.Format, 154 Chunks: msg.Chunks, 155 Hash: msg.Hash, 156 Metadata: msg.Metadata, 157 }) 158 // TODO: We may want to consider punishing the peer for certain errors 159 if err != nil { 160 r.Logger.Error("Failed to add snapshot", "height", msg.Height, "format", msg.Format, 161 "peer", e.Src.ID(), "err", err) 162 return 163 } 164 165 default: 166 r.Logger.Error("Received unknown message %T", msg) 167 } 168 169 case ChunkChannel: 170 switch msg := e.Message.(type) { 171 case *ssproto.ChunkRequest: 172 r.Logger.Debug("Received chunk request", "height", msg.Height, "format", msg.Format, 173 "chunk", msg.Index, "peer", e.Src.ID()) 174 resp, err := r.conn.LoadSnapshotChunk(context.TODO(), &abci.RequestLoadSnapshotChunk{ 175 Height: msg.Height, 176 Format: msg.Format, 177 Chunk: msg.Index, 178 }) 179 if err != nil { 180 r.Logger.Error("Failed to load chunk", "height", msg.Height, "format", msg.Format, 181 "chunk", msg.Index, "err", err) 182 return 183 } 184 r.Logger.Debug("Sending chunk", "height", msg.Height, "format", msg.Format, 185 "chunk", msg.Index, "peer", e.Src.ID()) 186 e.Src.Send(p2p.Envelope{ 187 ChannelID: ChunkChannel, 188 Message: &ssproto.ChunkResponse{ 189 Height: msg.Height, 190 Format: msg.Format, 191 Index: msg.Index, 192 Chunk: resp.Chunk, 193 Missing: resp.Chunk == nil, 194 }, 195 }) 196 197 case *ssproto.ChunkResponse: 198 r.mtx.RLock() 199 defer r.mtx.RUnlock() 200 if r.syncer == nil { 201 r.Logger.Debug("Received unexpected chunk, no state sync in progress", "peer", e.Src.ID()) 202 return 203 } 204 r.Logger.Debug("Received chunk, adding to sync", "height", msg.Height, "format", msg.Format, 205 "chunk", msg.Index, "peer", e.Src.ID()) 206 _, err := r.syncer.AddChunk(&chunk{ 207 Height: msg.Height, 208 Format: msg.Format, 209 Index: msg.Index, 210 Chunk: msg.Chunk, 211 Sender: e.Src.ID(), 212 }) 213 if err != nil { 214 r.Logger.Error("Failed to add chunk", "height", msg.Height, "format", msg.Format, 215 "chunk", msg.Index, "err", err) 216 return 217 } 218 219 default: 220 r.Logger.Error("Received unknown message %T", msg) 221 } 222 223 default: 224 r.Logger.Error("Received message on invalid channel %x", e.ChannelID) 225 } 226 } 227 228 // recentSnapshots fetches the n most recent snapshots from the app 229 func (r *Reactor) recentSnapshots(n uint32) ([]*snapshot, error) { 230 resp, err := r.conn.ListSnapshots(context.TODO(), &abci.RequestListSnapshots{}) 231 if err != nil { 232 return nil, err 233 } 234 sort.Slice(resp.Snapshots, func(i, j int) bool { 235 a := resp.Snapshots[i] 236 b := resp.Snapshots[j] 237 switch { 238 case a.Height > b.Height: 239 return true 240 case a.Height == b.Height && a.Format > b.Format: 241 return true 242 default: 243 return false 244 } 245 }) 246 snapshots := make([]*snapshot, 0, n) 247 for i, s := range resp.Snapshots { 248 if i >= recentSnapshots { 249 break 250 } 251 snapshots = append(snapshots, &snapshot{ 252 Height: s.Height, 253 Format: s.Format, 254 Chunks: s.Chunks, 255 Hash: s.Hash, 256 Metadata: s.Metadata, 257 }) 258 } 259 return snapshots, nil 260 } 261 262 // Sync runs a state sync, returning the new state and last commit at the snapshot height. 263 // The caller must store the state and commit in the state database and block store. 264 func (r *Reactor) Sync(stateProvider StateProvider, discoveryTime time.Duration) (sm.State, *types.Commit, error) { 265 r.mtx.Lock() 266 if r.syncer != nil { 267 r.mtx.Unlock() 268 return sm.State{}, nil, errors.New("a state sync is already in progress") 269 } 270 r.metrics.Syncing.Set(1) 271 r.syncer = newSyncer(r.cfg, r.Logger, r.conn, r.connQuery, stateProvider, r.tempDir) 272 r.mtx.Unlock() 273 274 hook := func() { 275 r.Logger.Debug("Requesting snapshots from known peers") 276 // Request snapshots from all currently connected peers 277 278 r.Switch.Broadcast(p2p.Envelope{ 279 ChannelID: SnapshotChannel, 280 Message: &ssproto.SnapshotsRequest{}, 281 }) 282 } 283 284 hook() 285 286 state, commit, err := r.syncer.SyncAny(discoveryTime, hook) 287 288 r.mtx.Lock() 289 r.syncer = nil 290 r.metrics.Syncing.Set(0) 291 r.mtx.Unlock() 292 return state, commit, err 293 }