github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/state/raft/storage/storage.go (about) 1 package storage 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "sync" 9 10 "github.com/coreos/etcd/pkg/fileutil" 11 "github.com/coreos/etcd/raft/raftpb" 12 "github.com/coreos/etcd/snap" 13 "github.com/coreos/etcd/wal" 14 "github.com/coreos/etcd/wal/walpb" 15 "github.com/docker/swarmkit/log" 16 "github.com/docker/swarmkit/manager/encryption" 17 "github.com/pkg/errors" 18 ) 19 20 // ErrNoWAL is returned if there are no WALs on disk 21 var ErrNoWAL = errors.New("no WAL present") 22 23 type walSnapDirs struct { 24 wal string 25 snap string 26 } 27 28 // the wal/snap directories in decreasing order of preference/version 29 var versionedWALSnapDirs = []walSnapDirs{ 30 {wal: "wal-v3-encrypted", snap: "snap-v3-encrypted"}, 31 {wal: "wal-v3", snap: "snap-v3"}, 32 {wal: "wal", snap: "snap"}, 33 } 34 35 // EncryptedRaftLogger saves raft data to disk 36 type EncryptedRaftLogger struct { 37 StateDir string 38 EncryptionKey []byte 39 40 // FIPS specifies whether the encryption should be FIPS-compliant 41 FIPS bool 42 43 // mutex is locked for writing only when we need to replace the wal object and snapshotter 44 // object, not when we're writing snapshots or wals (in which case it's locked for reading) 45 encoderMu sync.RWMutex 46 wal WAL 47 snapshotter Snapshotter 48 } 49 50 // BootstrapFromDisk creates a new snapshotter and wal, and also reads the latest snapshot and WALs from disk 51 func (e *EncryptedRaftLogger) BootstrapFromDisk(ctx context.Context, oldEncryptionKeys ...[]byte) (*raftpb.Snapshot, WALData, error) { 52 e.encoderMu.Lock() 53 defer e.encoderMu.Unlock() 54 55 walDir := e.walDir() 56 snapDir := e.snapDir() 57 58 encrypter, decrypter := encryption.Defaults(e.EncryptionKey, e.FIPS) 59 if oldEncryptionKeys != nil { 60 decrypters := []encryption.Decrypter{decrypter} 61 for _, key := range oldEncryptionKeys { 62 _, d := encryption.Defaults(key, e.FIPS) 63 decrypters = append(decrypters, d) 64 } 65 decrypter = encryption.NewMultiDecrypter(decrypters...) 66 } 67 68 snapFactory := NewSnapFactory(encrypter, decrypter) 69 70 if !fileutil.Exist(snapDir) { 71 // If snapshots created by the etcd-v2 code exist, or by swarmkit development version, 72 // read the latest snapshot and write it encoded to the new path. The new path 73 // prevents etc-v2 creating snapshots that are visible to us, but not encoded and 74 // out of sync with our WALs, after a downgrade. 75 for _, dirs := range versionedWALSnapDirs[1:] { 76 legacySnapDir := filepath.Join(e.StateDir, dirs.snap) 77 if fileutil.Exist(legacySnapDir) { 78 if err := MigrateSnapshot(legacySnapDir, snapDir, OriginalSnap, snapFactory); err != nil { 79 return nil, WALData{}, err 80 } 81 break 82 } 83 } 84 } 85 // ensure the new directory exists 86 if err := os.MkdirAll(snapDir, 0700); err != nil { 87 return nil, WALData{}, errors.Wrap(err, "failed to create snapshot directory") 88 } 89 90 var ( 91 snapshotter Snapshotter 92 walObj WAL 93 err error 94 ) 95 96 // Create a snapshotter and load snapshot data 97 snapshotter = snapFactory.New(snapDir) 98 snapshot, err := snapshotter.Load() 99 if err != nil && err != snap.ErrNoSnapshot { 100 return nil, WALData{}, err 101 } 102 103 walFactory := NewWALFactory(encrypter, decrypter) 104 var walsnap walpb.Snapshot 105 if snapshot != nil { 106 walsnap.Index = snapshot.Metadata.Index 107 walsnap.Term = snapshot.Metadata.Term 108 } 109 110 if !wal.Exist(walDir) { 111 var walExists bool 112 // If wals created by the etcd-v2 wal code exist, read the latest ones based 113 // on this snapshot and encode them to wals in the new path to avoid adding 114 // backwards-incompatible entries to those files. 115 for _, dirs := range versionedWALSnapDirs[1:] { 116 legacyWALDir := filepath.Join(e.StateDir, dirs.wal) 117 if !wal.Exist(legacyWALDir) { 118 continue 119 } 120 if err = MigrateWALs(ctx, legacyWALDir, walDir, OriginalWAL, walFactory, walsnap); err != nil { 121 return nil, WALData{}, err 122 } 123 walExists = true 124 break 125 } 126 if !walExists { 127 return nil, WALData{}, ErrNoWAL 128 } 129 } 130 131 walObj, waldata, err := ReadRepairWAL(ctx, walDir, walsnap, walFactory) 132 if err != nil { 133 return nil, WALData{}, err 134 } 135 136 e.snapshotter = snapshotter 137 e.wal = walObj 138 139 return snapshot, waldata, nil 140 } 141 142 // BootstrapNew creates a new snapshotter and WAL writer, expecting that there is nothing on disk 143 func (e *EncryptedRaftLogger) BootstrapNew(metadata []byte) error { 144 e.encoderMu.Lock() 145 defer e.encoderMu.Unlock() 146 encrypter, decrypter := encryption.Defaults(e.EncryptionKey, e.FIPS) 147 walFactory := NewWALFactory(encrypter, decrypter) 148 149 for _, dirpath := range []string{filepath.Dir(e.walDir()), e.snapDir()} { 150 if err := os.MkdirAll(dirpath, 0700); err != nil { 151 return errors.Wrapf(err, "failed to create %s", dirpath) 152 } 153 } 154 var err error 155 // the wal directory must not already exist upon creation 156 e.wal, err = walFactory.Create(e.walDir(), metadata) 157 if err != nil { 158 return errors.Wrap(err, "failed to create WAL") 159 } 160 161 e.snapshotter = NewSnapFactory(encrypter, decrypter).New(e.snapDir()) 162 return nil 163 } 164 165 func (e *EncryptedRaftLogger) walDir() string { 166 return filepath.Join(e.StateDir, versionedWALSnapDirs[0].wal) 167 } 168 169 func (e *EncryptedRaftLogger) snapDir() string { 170 return filepath.Join(e.StateDir, versionedWALSnapDirs[0].snap) 171 } 172 173 // RotateEncryptionKey swaps out the encoders and decoders used by the wal and snapshotter 174 func (e *EncryptedRaftLogger) RotateEncryptionKey(newKey []byte) { 175 e.encoderMu.Lock() 176 defer e.encoderMu.Unlock() 177 178 if e.wal != nil { // if the wal exists, the snapshotter exists 179 // We don't want to have to close the WAL, because we can't open a new one. 180 // We need to know the previous snapshot, because when you open a WAL you 181 // have to read out all the entries from a particular snapshot, or you can't 182 // write. So just rotate the encoders out from under it. We already 183 // have a lock on writing to snapshots and WALs. 184 wrapped, ok := e.wal.(*wrappedWAL) 185 if !ok { 186 panic(fmt.Errorf("EncryptedRaftLogger's WAL is not a wrappedWAL")) 187 } 188 189 wrapped.encrypter, wrapped.decrypter = encryption.Defaults(newKey, e.FIPS) 190 191 e.snapshotter = NewSnapFactory(wrapped.encrypter, wrapped.decrypter).New(e.snapDir()) 192 } 193 e.EncryptionKey = newKey 194 } 195 196 // SaveSnapshot actually saves a given snapshot to both the WAL and the snapshot. 197 func (e *EncryptedRaftLogger) SaveSnapshot(snapshot raftpb.Snapshot) error { 198 199 walsnap := walpb.Snapshot{ 200 Index: snapshot.Metadata.Index, 201 Term: snapshot.Metadata.Term, 202 } 203 204 e.encoderMu.RLock() 205 if err := e.wal.SaveSnapshot(walsnap); err != nil { 206 e.encoderMu.RUnlock() 207 return err 208 } 209 210 snapshotter := e.snapshotter 211 e.encoderMu.RUnlock() 212 213 if err := snapshotter.SaveSnap(snapshot); err != nil { 214 return err 215 } 216 return e.wal.ReleaseLockTo(snapshot.Metadata.Index) 217 } 218 219 // GC garbage collects snapshots and wals older than the provided index and term 220 func (e *EncryptedRaftLogger) GC(index uint64, term uint64, keepOldSnapshots uint64) error { 221 // Delete any older snapshots 222 curSnapshot := fmt.Sprintf("%016x-%016x%s", term, index, ".snap") 223 224 snapshots, err := ListSnapshots(e.snapDir()) 225 if err != nil { 226 return err 227 } 228 229 // Ignore any snapshots that are older than the current snapshot. 230 // Delete the others. Rather than doing lexical comparisons, we look 231 // at what exists before/after the current snapshot in the slice. 232 // This means that if the current snapshot doesn't appear in the 233 // directory for some strange reason, we won't delete anything, which 234 // is the safe behavior. 235 curSnapshotIdx := -1 236 var ( 237 removeErr error 238 oldestSnapshot string 239 ) 240 241 for i, snapFile := range snapshots { 242 if curSnapshotIdx >= 0 && i > curSnapshotIdx { 243 if uint64(i-curSnapshotIdx) > keepOldSnapshots { 244 err := os.Remove(filepath.Join(e.snapDir(), snapFile)) 245 if err != nil && removeErr == nil { 246 removeErr = err 247 } 248 continue 249 } 250 } else if snapFile == curSnapshot { 251 curSnapshotIdx = i 252 } 253 oldestSnapshot = snapFile 254 } 255 256 if removeErr != nil { 257 return removeErr 258 } 259 260 // Remove any WAL files that only contain data from before the oldest 261 // remaining snapshot. 262 263 if oldestSnapshot == "" { 264 return nil 265 } 266 267 // Parse index out of oldest snapshot's filename 268 var snapTerm, snapIndex uint64 269 _, err = fmt.Sscanf(oldestSnapshot, "%016x-%016x.snap", &snapTerm, &snapIndex) 270 if err != nil { 271 return errors.Wrapf(err, "malformed snapshot filename %s", oldestSnapshot) 272 } 273 274 wals, err := ListWALs(e.walDir()) 275 if err != nil { 276 return err 277 } 278 279 found := false 280 deleteUntil := -1 281 282 for i, walName := range wals { 283 var walSeq, walIndex uint64 284 _, err = fmt.Sscanf(walName, "%016x-%016x.wal", &walSeq, &walIndex) 285 if err != nil { 286 return errors.Wrapf(err, "could not parse WAL name %s", walName) 287 } 288 289 if walIndex >= snapIndex { 290 deleteUntil = i - 1 291 found = true 292 break 293 } 294 } 295 296 // If all WAL files started with indices below the oldest snapshot's 297 // index, we can delete all but the newest WAL file. 298 if !found && len(wals) != 0 { 299 deleteUntil = len(wals) - 1 300 } 301 302 for i := 0; i < deleteUntil; i++ { 303 walPath := filepath.Join(e.walDir(), wals[i]) 304 l, err := fileutil.TryLockFile(walPath, os.O_WRONLY, fileutil.PrivateFileMode) 305 if err != nil { 306 return errors.Wrapf(err, "could not lock old WAL file %s for removal", wals[i]) 307 } 308 err = os.Remove(walPath) 309 l.Close() 310 if err != nil { 311 return errors.Wrapf(err, "error removing old WAL file %s", wals[i]) 312 } 313 } 314 315 return nil 316 } 317 318 // SaveEntries saves only entries to disk 319 func (e *EncryptedRaftLogger) SaveEntries(st raftpb.HardState, entries []raftpb.Entry) error { 320 e.encoderMu.RLock() 321 defer e.encoderMu.RUnlock() 322 323 if e.wal == nil { 324 return fmt.Errorf("raft WAL has either been closed or has never been created") 325 } 326 return e.wal.Save(st, entries) 327 } 328 329 // Close closes the logger - it will have to be bootstrapped again to start writing 330 func (e *EncryptedRaftLogger) Close(ctx context.Context) { 331 e.encoderMu.Lock() 332 defer e.encoderMu.Unlock() 333 334 if e.wal != nil { 335 if err := e.wal.Close(); err != nil { 336 log.G(ctx).WithError(err).Error("error closing raft WAL") 337 } 338 } 339 340 e.wal = nil 341 e.snapshotter = nil 342 } 343 344 // Clear closes the existing WAL and removes the WAL and snapshot. 345 func (e *EncryptedRaftLogger) Clear(ctx context.Context) error { 346 e.encoderMu.Lock() 347 defer e.encoderMu.Unlock() 348 349 if e.wal != nil { 350 if err := e.wal.Close(); err != nil { 351 log.G(ctx).WithError(err).Error("error closing raft WAL") 352 } 353 } 354 e.snapshotter = nil 355 356 os.RemoveAll(e.walDir()) 357 os.RemoveAll(e.snapDir()) 358 return nil 359 }