github.com/hechain20/hechain@v0.0.0-20220316014945-b544036ba106/orderer/consensus/etcdraft/storage.go (about) 1 /* 2 Copyright hechain. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package etcdraft 8 9 import ( 10 "fmt" 11 "io" 12 "os" 13 "path/filepath" 14 "sort" 15 "strings" 16 17 "github.com/hechain20/hechain/common/flogging" 18 "github.com/pkg/errors" 19 "go.etcd.io/etcd/etcdserver/api/snap" 20 "go.etcd.io/etcd/pkg/fileutil" 21 "go.etcd.io/etcd/raft" 22 "go.etcd.io/etcd/raft/raftpb" 23 "go.etcd.io/etcd/wal" 24 "go.etcd.io/etcd/wal/walpb" 25 ) 26 27 // MaxSnapshotFiles defines max number of etcd/raft snapshot files to retain 28 // on filesystem. Snapshot files are read from newest to oldest, until first 29 // intact file is found. The more snapshot files we keep around, the more we 30 // mitigate the impact of a corrupted snapshots. This is exported for testing 31 // purpose. This MUST be greater equal than 1. 32 var MaxSnapshotFiles = 4 33 34 // MemoryStorage is currently backed by etcd/raft.MemoryStorage. This interface is 35 // defined to expose dependencies of fsm so that it may be swapped in the 36 // future. TODO(jay) Add other necessary methods to this interface once we need 37 // them in implementation, e.g. ApplySnapshot. 38 type MemoryStorage interface { 39 raft.Storage 40 Append(entries []raftpb.Entry) error 41 SetHardState(st raftpb.HardState) error 42 CreateSnapshot(i uint64, cs *raftpb.ConfState, data []byte) (raftpb.Snapshot, error) 43 Compact(compactIndex uint64) error 44 ApplySnapshot(snap raftpb.Snapshot) error 45 } 46 47 // RaftStorage encapsulates storages needed for etcd/raft data, i.e. memory, wal 48 type RaftStorage struct { 49 SnapshotCatchUpEntries uint64 50 51 walDir string 52 snapDir string 53 54 lg *flogging.FabricLogger 55 56 ram MemoryStorage 57 wal *wal.WAL 58 snap *snap.Snapshotter 59 60 // a queue that keeps track of indices of snapshots on disk 61 snapshotIndex []uint64 62 } 63 64 // CreateStorage attempts to create a storage to persist etcd/raft data. 65 // If data presents in specified disk, they are loaded to reconstruct storage state. 66 func CreateStorage( 67 lg *flogging.FabricLogger, 68 walDir string, 69 snapDir string, 70 ram MemoryStorage, 71 ) (*RaftStorage, error) { 72 sn, err := createSnapshotter(lg, snapDir) 73 if err != nil { 74 return nil, err 75 } 76 77 snapshot, err := sn.Load() 78 if err != nil { 79 if err == snap.ErrNoSnapshot { 80 lg.Debugf("No snapshot found at %s", snapDir) 81 } else { 82 return nil, errors.Errorf("failed to load snapshot: %s", err) 83 } 84 } else { 85 // snapshot found 86 lg.Debugf("Loaded snapshot at Term %d and Index %d, Nodes: %+v", 87 snapshot.Metadata.Term, snapshot.Metadata.Index, snapshot.Metadata.ConfState.Nodes) 88 } 89 90 w, st, ents, err := createOrReadWAL(lg, walDir, snapshot) 91 if err != nil { 92 return nil, errors.Errorf("failed to create or read WAL: %s", err) 93 } 94 95 if snapshot != nil { 96 lg.Debugf("Applying snapshot to raft MemoryStorage") 97 if err := ram.ApplySnapshot(*snapshot); err != nil { 98 return nil, errors.Errorf("Failed to apply snapshot to memory: %s", err) 99 } 100 } 101 102 lg.Debugf("Setting HardState to {Term: %d, Commit: %d}", st.Term, st.Commit) 103 ram.SetHardState(st) // MemoryStorage.SetHardState always returns nil 104 105 lg.Debugf("Appending %d entries to memory storage", len(ents)) 106 ram.Append(ents) // MemoryStorage.Append always return nil 107 108 return &RaftStorage{ 109 lg: lg, 110 ram: ram, 111 wal: w, 112 snap: sn, 113 walDir: walDir, 114 snapDir: snapDir, 115 snapshotIndex: ListSnapshots(lg, snapDir), 116 }, nil 117 } 118 119 // ListSnapshots returns a list of RaftIndex of snapshots stored on disk. 120 // If a file is corrupted, rename the file. 121 func ListSnapshots(logger *flogging.FabricLogger, snapDir string) []uint64 { 122 dir, err := os.Open(snapDir) 123 if err != nil { 124 logger.Errorf("Failed to open snapshot directory %s: %s", snapDir, err) 125 return nil 126 } 127 defer dir.Close() 128 129 filenames, err := dir.Readdirnames(-1) 130 if err != nil { 131 logger.Errorf("Failed to read snapshot files: %s", err) 132 return nil 133 } 134 135 snapfiles := []string{} 136 for i := range filenames { 137 if strings.HasSuffix(filenames[i], ".snap") { 138 snapfiles = append(snapfiles, filenames[i]) 139 } 140 } 141 sort.Strings(snapfiles) 142 143 var snapshots []uint64 144 for _, snapfile := range snapfiles { 145 fpath := filepath.Join(snapDir, snapfile) 146 s, err := snap.Read(logger.Zap(), fpath) 147 if err != nil { 148 logger.Errorf("Snapshot file %s is corrupted: %s", fpath, err) 149 150 broken := fpath + ".broken" 151 if err = os.Rename(fpath, broken); err != nil { 152 logger.Errorf("Failed to rename corrupted snapshot file %s to %s: %s", fpath, broken, err) 153 } else { 154 logger.Debugf("Renaming corrupted snapshot file %s to %s", fpath, broken) 155 } 156 157 continue 158 } 159 160 snapshots = append(snapshots, s.Metadata.Index) 161 } 162 163 return snapshots 164 } 165 166 func createSnapshotter(logger *flogging.FabricLogger, snapDir string) (*snap.Snapshotter, error) { 167 if err := os.MkdirAll(snapDir, os.ModePerm); err != nil { 168 return nil, errors.Errorf("failed to mkdir '%s' for snapshot: %s", snapDir, err) 169 } 170 171 return snap.New(logger.Zap(), snapDir), nil 172 } 173 174 func createOrReadWAL(lg *flogging.FabricLogger, walDir string, snapshot *raftpb.Snapshot) (w *wal.WAL, st raftpb.HardState, ents []raftpb.Entry, err error) { 175 if !wal.Exist(walDir) { 176 lg.Infof("No WAL data found, creating new WAL at path '%s'", walDir) 177 // TODO(jay_guo) add metadata to be persisted with wal once we need it. 178 // use case could be data dump and restore on a new node. 179 w, err := wal.Create(lg.Zap(), walDir, nil) 180 if err == os.ErrExist { 181 lg.Fatalf("programming error, we've just checked that WAL does not exist") 182 } 183 184 if err != nil { 185 return nil, st, nil, errors.Errorf("failed to initialize WAL: %s", err) 186 } 187 188 if err = w.Close(); err != nil { 189 return nil, st, nil, errors.Errorf("failed to close the WAL just created: %s", err) 190 } 191 } else { 192 lg.Infof("Found WAL data at path '%s', replaying it", walDir) 193 } 194 195 walsnap := walpb.Snapshot{} 196 if snapshot != nil { 197 walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term 198 } 199 200 lg.Debugf("Loading WAL at Term %d and Index %d", walsnap.Term, walsnap.Index) 201 202 var repaired bool 203 for { 204 if w, err = wal.Open(lg.Zap(), walDir, walsnap); err != nil { 205 return nil, st, nil, errors.Errorf("failed to open WAL: %s", err) 206 } 207 208 if _, st, ents, err = w.ReadAll(); err != nil { 209 lg.Warnf("Failed to read WAL: %s", err) 210 211 if errc := w.Close(); errc != nil { 212 return nil, st, nil, errors.Errorf("failed to close erroneous WAL: %s", errc) 213 } 214 215 // only repair UnexpectedEOF and only repair once 216 if repaired || err != io.ErrUnexpectedEOF { 217 return nil, st, nil, errors.Errorf("failed to read WAL and cannot repair: %s", err) 218 } 219 220 if !wal.Repair(lg.Zap(), walDir) { 221 return nil, st, nil, errors.Errorf("failed to repair WAL: %s", err) 222 } 223 224 repaired = true 225 // next loop should be able to open WAL and return 226 continue 227 } 228 229 // successfully opened WAL and read all entries, break 230 break 231 } 232 233 return w, st, ents, nil 234 } 235 236 // Snapshot returns the latest snapshot stored in memory 237 func (rs *RaftStorage) Snapshot() raftpb.Snapshot { 238 sn, _ := rs.ram.Snapshot() // Snapshot always returns nil error 239 return sn 240 } 241 242 // Store persists etcd/raft data 243 func (rs *RaftStorage) Store(entries []raftpb.Entry, hardstate raftpb.HardState, snapshot raftpb.Snapshot) error { 244 if err := rs.wal.Save(hardstate, entries); err != nil { 245 return err 246 } 247 248 if !raft.IsEmptySnap(snapshot) { 249 if err := rs.saveSnap(snapshot); err != nil { 250 return err 251 } 252 253 if err := rs.ram.ApplySnapshot(snapshot); err != nil { 254 if err == raft.ErrSnapOutOfDate { 255 rs.lg.Warnf("Attempted to apply out-of-date snapshot at Term %d and Index %d", 256 snapshot.Metadata.Term, snapshot.Metadata.Index) 257 } else { 258 rs.lg.Fatalf("Unexpected programming error: %s", err) 259 } 260 } 261 } 262 263 if err := rs.ram.Append(entries); err != nil { 264 return err 265 } 266 267 return nil 268 } 269 270 func (rs *RaftStorage) saveSnap(snap raftpb.Snapshot) error { 271 rs.lg.Infof("Persisting snapshot (term: %d, index: %d) to WAL and disk", snap.Metadata.Term, snap.Metadata.Index) 272 273 // must save the snapshot index to the WAL before saving the 274 // snapshot to maintain the invariant that we only Open the 275 // wal at previously-saved snapshot indexes. 276 walsnap := walpb.Snapshot{ 277 Index: snap.Metadata.Index, 278 Term: snap.Metadata.Term, 279 } 280 281 if err := rs.wal.SaveSnapshot(walsnap); err != nil { 282 return errors.Errorf("failed to save snapshot to WAL: %s", err) 283 } 284 285 if err := rs.snap.SaveSnap(snap); err != nil { 286 return errors.Errorf("failed to save snapshot to disk: %s", err) 287 } 288 289 rs.lg.Debugf("Releasing lock to wal files prior to %d", snap.Metadata.Index) 290 if err := rs.wal.ReleaseLockTo(snap.Metadata.Index); err != nil { 291 return err 292 } 293 294 return nil 295 } 296 297 // TakeSnapshot takes a snapshot at index i from MemoryStorage, and persists it to wal and disk. 298 func (rs *RaftStorage) TakeSnapshot(i uint64, cs raftpb.ConfState, data []byte) error { 299 rs.lg.Debugf("Creating snapshot at index %d from MemoryStorage", i) 300 snap, err := rs.ram.CreateSnapshot(i, &cs, data) 301 if err != nil { 302 return errors.Errorf("failed to create snapshot from MemoryStorage: %s", err) 303 } 304 305 if err = rs.saveSnap(snap); err != nil { 306 return err 307 } 308 309 rs.snapshotIndex = append(rs.snapshotIndex, snap.Metadata.Index) 310 311 // Keep some entries in memory for slow followers to catchup 312 if i > rs.SnapshotCatchUpEntries { 313 compacti := i - rs.SnapshotCatchUpEntries 314 rs.lg.Debugf("Purging in-memory raft entries prior to %d", compacti) 315 if err = rs.ram.Compact(compacti); err != nil { 316 if err == raft.ErrCompacted { 317 rs.lg.Warnf("Raft entries prior to %d are already purged", compacti) 318 } else { 319 rs.lg.Fatalf("Failed to purge raft entries: %s", err) 320 } 321 } 322 } 323 324 rs.lg.Infof("Snapshot is taken at index %d", i) 325 326 rs.gc() 327 return nil 328 } 329 330 // gc collects etcd/raft garbage files, namely wal and snapshot files 331 func (rs *RaftStorage) gc() { 332 if len(rs.snapshotIndex) < MaxSnapshotFiles { 333 rs.lg.Debugf("Snapshots on disk (%d) < limit (%d), no need to purge wal/snapshot", 334 len(rs.snapshotIndex), MaxSnapshotFiles) 335 return 336 } 337 338 rs.snapshotIndex = rs.snapshotIndex[len(rs.snapshotIndex)-MaxSnapshotFiles:] 339 340 rs.purgeWAL() 341 rs.purgeSnap() 342 } 343 344 func (rs *RaftStorage) purgeWAL() { 345 retain := rs.snapshotIndex[0] 346 347 var files []string 348 err := filepath.Walk(rs.walDir, func(path string, info os.FileInfo, err error) error { 349 if err != nil { 350 return err 351 } 352 if !strings.HasSuffix(path, ".wal") { 353 return nil 354 } 355 356 var seq, index uint64 357 _, f := filepath.Split(path) 358 fmt.Sscanf(f, "%016x-%016x.wal", &seq, &index) 359 360 // Only purge WAL with index lower than oldest snapshot. 361 // filepath.SkipDir seizes Walk without returning error. 362 if index >= retain { 363 return filepath.SkipDir 364 } 365 366 files = append(files, path) 367 return nil 368 }) 369 if err != nil { 370 rs.lg.Errorf("Failed to read WAL directory %s: %s", rs.walDir, err) 371 } 372 373 if len(files) <= 1 { 374 // we need to keep one wal segment with index smaller than snapshot. 375 // see comment on wal.ReleaseLockTo for the more details. 376 return 377 } 378 379 rs.purge(files[:len(files)-1]) 380 } 381 382 func (rs *RaftStorage) purgeSnap() { 383 var files []string 384 err := filepath.Walk(rs.snapDir, func(path string, info os.FileInfo, err error) error { 385 if err != nil { 386 return err 387 } 388 if strings.HasSuffix(path, ".snap") { 389 files = append(files, path) 390 } else if strings.HasSuffix(path, ".broken") { 391 rs.lg.Warnf("Found broken snapshot file %s, it can be removed manually", path) 392 } 393 394 return nil 395 }) 396 if err != nil { 397 rs.lg.Errorf("Failed to read Snapshot directory %s: %s", rs.snapDir, err) 398 return 399 } 400 401 l := len(files) 402 if l <= MaxSnapshotFiles { 403 return 404 } 405 406 rs.purge(files[:l-MaxSnapshotFiles]) // retain last MaxSnapshotFiles snapshot files 407 } 408 409 func (rs *RaftStorage) purge(files []string) { 410 for _, file := range files { 411 l, err := fileutil.TryLockFile(file, os.O_WRONLY, fileutil.PrivateFileMode) 412 if err != nil { 413 rs.lg.Debugf("Failed to lock %s, abort purging", file) 414 break 415 } 416 417 if err = os.Remove(file); err != nil { 418 rs.lg.Errorf("Failed to remove %s: %s", file, err) 419 } else { 420 rs.lg.Debugf("Purged file %s", file) 421 } 422 423 if err = l.Close(); err != nil { 424 rs.lg.Errorf("Failed to close file lock %s: %s", l.Name(), err) 425 } 426 } 427 } 428 429 // ApplySnapshot applies snapshot to local memory storage 430 func (rs *RaftStorage) ApplySnapshot(snap raftpb.Snapshot) { 431 if err := rs.ram.ApplySnapshot(snap); err != nil { 432 if err == raft.ErrSnapOutOfDate { 433 rs.lg.Warnf("Attempted to apply out-of-date snapshot at Term %d and Index %d", 434 snap.Metadata.Term, snap.Metadata.Index) 435 } else { 436 rs.lg.Fatalf("Unexpected programming error: %s", err) 437 } 438 } 439 } 440 441 // Close closes storage 442 func (rs *RaftStorage) Close() error { 443 if err := rs.wal.Close(); err != nil { 444 return err 445 } 446 447 return nil 448 }