github.com/defanghe/fabric@v2.1.1+incompatible/orderer/consensus/etcdraft/storage.go (about) 1 /* 2 Copyright IBM Corp. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package etcdraft 8 9 import ( 10 "fmt" 11 "io" 12 "os" 13 "path/filepath" 14 "sort" 15 "strings" 16 17 "github.com/hyperledger/fabric/common/flogging" 18 "github.com/pkg/errors" 19 "go.etcd.io/etcd/etcdserver/api/snap" 20 "go.etcd.io/etcd/pkg/fileutil" 21 "go.etcd.io/etcd/raft" 22 "go.etcd.io/etcd/raft/raftpb" 23 "go.etcd.io/etcd/wal" 24 "go.etcd.io/etcd/wal/walpb" 25 ) 26 27 // MaxSnapshotFiles defines max number of etcd/raft snapshot files to retain 28 // on filesystem. Snapshot files are read from newest to oldest, until first 29 // intact file is found. The more snapshot files we keep around, the more we 30 // mitigate the impact of a corrupted snapshots. This is exported for testing 31 // purpose. This MUST be greater equal than 1. 32 var MaxSnapshotFiles = 4 33 34 // MemoryStorage is currently backed by etcd/raft.MemoryStorage. This interface is 35 // defined to expose dependencies of fsm so that it may be swapped in the 36 // future. TODO(jay) Add other necessary methods to this interface once we need 37 // them in implementation, e.g. ApplySnapshot. 38 type MemoryStorage interface { 39 raft.Storage 40 Append(entries []raftpb.Entry) error 41 SetHardState(st raftpb.HardState) error 42 CreateSnapshot(i uint64, cs *raftpb.ConfState, data []byte) (raftpb.Snapshot, error) 43 Compact(compactIndex uint64) error 44 ApplySnapshot(snap raftpb.Snapshot) error 45 } 46 47 // RaftStorage encapsulates storages needed for etcd/raft data, i.e. memory, wal 48 type RaftStorage struct { 49 SnapshotCatchUpEntries uint64 50 51 walDir string 52 snapDir string 53 54 lg *flogging.FabricLogger 55 56 ram MemoryStorage 57 wal *wal.WAL 58 snap *snap.Snapshotter 59 60 // a queue that keeps track of indices of snapshots on disk 61 snapshotIndex []uint64 62 } 63 64 // CreateStorage attempts to create a storage to persist etcd/raft data. 65 // If data presents in specified disk, they are loaded to reconstruct storage state. 66 func CreateStorage( 67 lg *flogging.FabricLogger, 68 walDir string, 69 snapDir string, 70 ram MemoryStorage, 71 ) (*RaftStorage, error) { 72 73 sn, err := createSnapshotter(lg, snapDir) 74 if err != nil { 75 return nil, err 76 } 77 78 snapshot, err := sn.Load() 79 if err != nil { 80 if err == snap.ErrNoSnapshot { 81 lg.Debugf("No snapshot found at %s", snapDir) 82 } else { 83 return nil, errors.Errorf("failed to load snapshot: %s", err) 84 } 85 } else { 86 // snapshot found 87 lg.Debugf("Loaded snapshot at Term %d and Index %d, Nodes: %+v", 88 snapshot.Metadata.Term, snapshot.Metadata.Index, snapshot.Metadata.ConfState.Nodes) 89 } 90 91 w, st, ents, err := createOrReadWAL(lg, walDir, snapshot) 92 if err != nil { 93 return nil, errors.Errorf("failed to create or read WAL: %s", err) 94 } 95 96 if snapshot != nil { 97 lg.Debugf("Applying snapshot to raft MemoryStorage") 98 if err := ram.ApplySnapshot(*snapshot); err != nil { 99 return nil, errors.Errorf("Failed to apply snapshot to memory: %s", err) 100 } 101 } 102 103 lg.Debugf("Setting HardState to {Term: %d, Commit: %d}", st.Term, st.Commit) 104 ram.SetHardState(st) // MemoryStorage.SetHardState always returns nil 105 106 lg.Debugf("Appending %d entries to memory storage", len(ents)) 107 ram.Append(ents) // MemoryStorage.Append always return nil 108 109 return &RaftStorage{ 110 lg: lg, 111 ram: ram, 112 wal: w, 113 snap: sn, 114 walDir: walDir, 115 snapDir: snapDir, 116 snapshotIndex: ListSnapshots(lg, snapDir), 117 }, nil 118 } 119 120 // ListSnapshots returns a list of RaftIndex of snapshots stored on disk. 121 // If a file is corrupted, rename the file. 122 func ListSnapshots(logger *flogging.FabricLogger, snapDir string) []uint64 { 123 dir, err := os.Open(snapDir) 124 if err != nil { 125 logger.Errorf("Failed to open snapshot directory %s: %s", snapDir, err) 126 return nil 127 } 128 defer dir.Close() 129 130 filenames, err := dir.Readdirnames(-1) 131 if err != nil { 132 logger.Errorf("Failed to read snapshot files: %s", err) 133 return nil 134 } 135 136 snapfiles := []string{} 137 for i := range filenames { 138 if strings.HasSuffix(filenames[i], ".snap") { 139 snapfiles = append(snapfiles, filenames[i]) 140 } 141 } 142 sort.Sort(sort.StringSlice(snapfiles)) 143 144 var snapshots []uint64 145 for _, snapfile := range snapfiles { 146 fpath := filepath.Join(snapDir, snapfile) 147 s, err := snap.Read(logger.Zap(), fpath) 148 if err != nil { 149 logger.Errorf("Snapshot file %s is corrupted: %s", fpath, err) 150 151 broken := fpath + ".broken" 152 if err = os.Rename(fpath, broken); err != nil { 153 logger.Errorf("Failed to rename corrupted snapshot file %s to %s: %s", fpath, broken, err) 154 } else { 155 logger.Debugf("Renaming corrupted snapshot file %s to %s", fpath, broken) 156 } 157 158 continue 159 } 160 161 snapshots = append(snapshots, s.Metadata.Index) 162 } 163 164 return snapshots 165 } 166 167 func createSnapshotter(logger *flogging.FabricLogger, snapDir string) (*snap.Snapshotter, error) { 168 if err := os.MkdirAll(snapDir, os.ModePerm); err != nil { 169 return nil, errors.Errorf("failed to mkdir '%s' for snapshot: %s", snapDir, err) 170 } 171 172 return snap.New(logger.Zap(), snapDir), nil 173 } 174 175 func createOrReadWAL(lg *flogging.FabricLogger, walDir string, snapshot *raftpb.Snapshot) (w *wal.WAL, st raftpb.HardState, ents []raftpb.Entry, err error) { 176 if !wal.Exist(walDir) { 177 lg.Infof("No WAL data found, creating new WAL at path '%s'", walDir) 178 // TODO(jay_guo) add metadata to be persisted with wal once we need it. 179 // use case could be data dump and restore on a new node. 180 w, err := wal.Create(lg.Zap(), walDir, nil) 181 if err == os.ErrExist { 182 lg.Fatalf("programming error, we've just checked that WAL does not exist") 183 } 184 185 if err != nil { 186 return nil, st, nil, errors.Errorf("failed to initialize WAL: %s", err) 187 } 188 189 if err = w.Close(); err != nil { 190 return nil, st, nil, errors.Errorf("failed to close the WAL just created: %s", err) 191 } 192 } else { 193 lg.Infof("Found WAL data at path '%s', replaying it", walDir) 194 } 195 196 walsnap := walpb.Snapshot{} 197 if snapshot != nil { 198 walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term 199 } 200 201 lg.Debugf("Loading WAL at Term %d and Index %d", walsnap.Term, walsnap.Index) 202 203 var repaired bool 204 for { 205 if w, err = wal.Open(lg.Zap(), walDir, walsnap); err != nil { 206 return nil, st, nil, errors.Errorf("failed to open WAL: %s", err) 207 } 208 209 if _, st, ents, err = w.ReadAll(); err != nil { 210 lg.Warnf("Failed to read WAL: %s", err) 211 212 if errc := w.Close(); errc != nil { 213 return nil, st, nil, errors.Errorf("failed to close erroneous WAL: %s", errc) 214 } 215 216 // only repair UnexpectedEOF and only repair once 217 if repaired || err != io.ErrUnexpectedEOF { 218 return nil, st, nil, errors.Errorf("failed to read WAL and cannot repair: %s", err) 219 } 220 221 if !wal.Repair(lg.Zap(), walDir) { 222 return nil, st, nil, errors.Errorf("failed to repair WAL: %s", err) 223 } 224 225 repaired = true 226 // next loop should be able to open WAL and return 227 continue 228 } 229 230 // successfully opened WAL and read all entries, break 231 break 232 } 233 234 return w, st, ents, nil 235 } 236 237 // Snapshot returns the latest snapshot stored in memory 238 func (rs *RaftStorage) Snapshot() raftpb.Snapshot { 239 sn, _ := rs.ram.Snapshot() // Snapshot always returns nil error 240 return sn 241 } 242 243 // Store persists etcd/raft data 244 func (rs *RaftStorage) Store(entries []raftpb.Entry, hardstate raftpb.HardState, snapshot raftpb.Snapshot) error { 245 if err := rs.wal.Save(hardstate, entries); err != nil { 246 return err 247 } 248 249 if !raft.IsEmptySnap(snapshot) { 250 if err := rs.saveSnap(snapshot); err != nil { 251 return err 252 } 253 254 if err := rs.ram.ApplySnapshot(snapshot); err != nil { 255 if err == raft.ErrSnapOutOfDate { 256 rs.lg.Warnf("Attempted to apply out-of-date snapshot at Term %d and Index %d", 257 snapshot.Metadata.Term, snapshot.Metadata.Index) 258 } else { 259 rs.lg.Fatalf("Unexpected programming error: %s", err) 260 } 261 } 262 } 263 264 if err := rs.ram.Append(entries); err != nil { 265 return err 266 } 267 268 return nil 269 } 270 271 func (rs *RaftStorage) saveSnap(snap raftpb.Snapshot) error { 272 rs.lg.Infof("Persisting snapshot (term: %d, index: %d) to WAL and disk", snap.Metadata.Term, snap.Metadata.Index) 273 274 // must save the snapshot index to the WAL before saving the 275 // snapshot to maintain the invariant that we only Open the 276 // wal at previously-saved snapshot indexes. 277 walsnap := walpb.Snapshot{ 278 Index: snap.Metadata.Index, 279 Term: snap.Metadata.Term, 280 } 281 282 if err := rs.wal.SaveSnapshot(walsnap); err != nil { 283 return errors.Errorf("failed to save snapshot to WAL: %s", err) 284 } 285 286 if err := rs.snap.SaveSnap(snap); err != nil { 287 return errors.Errorf("failed to save snapshot to disk: %s", err) 288 } 289 290 rs.lg.Debugf("Releasing lock to wal files prior to %d", snap.Metadata.Index) 291 if err := rs.wal.ReleaseLockTo(snap.Metadata.Index); err != nil { 292 return err 293 } 294 295 return nil 296 } 297 298 // TakeSnapshot takes a snapshot at index i from MemoryStorage, and persists it to wal and disk. 299 func (rs *RaftStorage) TakeSnapshot(i uint64, cs raftpb.ConfState, data []byte) error { 300 rs.lg.Debugf("Creating snapshot at index %d from MemoryStorage", i) 301 snap, err := rs.ram.CreateSnapshot(i, &cs, data) 302 if err != nil { 303 return errors.Errorf("failed to create snapshot from MemoryStorage: %s", err) 304 } 305 306 if err = rs.saveSnap(snap); err != nil { 307 return err 308 } 309 310 rs.snapshotIndex = append(rs.snapshotIndex, snap.Metadata.Index) 311 312 // Keep some entries in memory for slow followers to catchup 313 if i > rs.SnapshotCatchUpEntries { 314 compacti := i - rs.SnapshotCatchUpEntries 315 rs.lg.Debugf("Purging in-memory raft entries prior to %d", compacti) 316 if err = rs.ram.Compact(compacti); err != nil { 317 if err == raft.ErrCompacted { 318 rs.lg.Warnf("Raft entries prior to %d are already purged", compacti) 319 } else { 320 rs.lg.Fatalf("Failed to purge raft entries: %s", err) 321 } 322 } 323 } 324 325 rs.lg.Infof("Snapshot is taken at index %d", i) 326 327 rs.gc() 328 return nil 329 } 330 331 // gc collects etcd/raft garbage files, namely wal and snapshot files 332 func (rs *RaftStorage) gc() { 333 if len(rs.snapshotIndex) < MaxSnapshotFiles { 334 rs.lg.Debugf("Snapshots on disk (%d) < limit (%d), no need to purge wal/snapshot", 335 len(rs.snapshotIndex), MaxSnapshotFiles) 336 return 337 } 338 339 rs.snapshotIndex = rs.snapshotIndex[len(rs.snapshotIndex)-MaxSnapshotFiles:] 340 341 rs.purgeWAL() 342 rs.purgeSnap() 343 } 344 345 func (rs *RaftStorage) purgeWAL() { 346 retain := rs.snapshotIndex[0] 347 348 var files []string 349 err := filepath.Walk(rs.walDir, func(path string, info os.FileInfo, err error) error { 350 if err != nil { 351 return err 352 } 353 if !strings.HasSuffix(path, ".wal") { 354 return nil 355 } 356 357 var seq, index uint64 358 _, f := filepath.Split(path) 359 fmt.Sscanf(f, "%016x-%016x.wal", &seq, &index) 360 361 // Only purge WAL with index lower than oldest snapshot. 362 // filepath.SkipDir seizes Walk without returning error. 363 if index >= retain { 364 return filepath.SkipDir 365 } 366 367 files = append(files, path) 368 return nil 369 }) 370 if err != nil { 371 rs.lg.Errorf("Failed to read WAL directory %s: %s", rs.walDir, err) 372 } 373 374 if len(files) <= 1 { 375 // we need to keep one wal segment with index smaller than snapshot. 376 // see comment on wal.ReleaseLockTo for the more details. 377 return 378 } 379 380 rs.purge(files[:len(files)-1]) 381 } 382 383 func (rs *RaftStorage) purgeSnap() { 384 var files []string 385 err := filepath.Walk(rs.snapDir, func(path string, info os.FileInfo, err error) error { 386 if err != nil { 387 return err 388 } 389 if strings.HasSuffix(path, ".snap") { 390 files = append(files, path) 391 } else if strings.HasSuffix(path, ".broken") { 392 rs.lg.Warnf("Found broken snapshot file %s, it can be removed manually", path) 393 } 394 395 return nil 396 }) 397 if err != nil { 398 rs.lg.Errorf("Failed to read Snapshot directory %s: %s", rs.snapDir, err) 399 return 400 } 401 402 l := len(files) 403 if l <= MaxSnapshotFiles { 404 return 405 } 406 407 rs.purge(files[:l-MaxSnapshotFiles]) // retain last MaxSnapshotFiles snapshot files 408 } 409 410 func (rs *RaftStorage) purge(files []string) { 411 for _, file := range files { 412 l, err := fileutil.TryLockFile(file, os.O_WRONLY, fileutil.PrivateFileMode) 413 if err != nil { 414 rs.lg.Debugf("Failed to lock %s, abort purging", file) 415 break 416 } 417 418 if err = os.Remove(file); err != nil { 419 rs.lg.Errorf("Failed to remove %s: %s", file, err) 420 } else { 421 rs.lg.Debugf("Purged file %s", file) 422 } 423 424 if err = l.Close(); err != nil { 425 rs.lg.Errorf("Failed to close file lock %s: %s", l.Name(), err) 426 } 427 } 428 } 429 430 // ApplySnapshot applies snapshot to local memory storage 431 func (rs *RaftStorage) ApplySnapshot(snap raftpb.Snapshot) { 432 if err := rs.ram.ApplySnapshot(snap); err != nil { 433 if err == raft.ErrSnapOutOfDate { 434 rs.lg.Warnf("Attempted to apply out-of-date snapshot at Term %d and Index %d", 435 snap.Metadata.Term, snap.Metadata.Index) 436 } else { 437 rs.lg.Fatalf("Unexpected programming error: %s", err) 438 } 439 } 440 } 441 442 // Close closes storage 443 func (rs *RaftStorage) Close() error { 444 if err := rs.wal.Close(); err != nil { 445 return err 446 } 447 448 return nil 449 }