go.etcd.io/etcd@v3.3.27+incompatible/etcdctl/ctlv3/command/snapshot_command.go (about) 1 // Copyright 2016 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package command 16 17 import ( 18 "context" 19 "crypto/sha256" 20 "encoding/binary" 21 "encoding/json" 22 "fmt" 23 "hash/crc32" 24 "io" 25 "math" 26 "os" 27 "path/filepath" 28 "reflect" 29 "strings" 30 31 "github.com/coreos/etcd/etcdserver" 32 "github.com/coreos/etcd/etcdserver/etcdserverpb" 33 "github.com/coreos/etcd/etcdserver/membership" 34 "github.com/coreos/etcd/lease" 35 "github.com/coreos/etcd/mvcc" 36 "github.com/coreos/etcd/mvcc/backend" 37 "github.com/coreos/etcd/pkg/fileutil" 38 "github.com/coreos/etcd/pkg/types" 39 "github.com/coreos/etcd/raft" 40 "github.com/coreos/etcd/raft/raftpb" 41 "github.com/coreos/etcd/snap" 42 "github.com/coreos/etcd/store" 43 "github.com/coreos/etcd/wal" 44 "github.com/coreos/etcd/wal/walpb" 45 46 bolt "github.com/coreos/bbolt" 47 "github.com/spf13/cobra" 48 ) 49 50 const ( 51 defaultName = "default" 52 defaultInitialAdvertisePeerURLs = "http://localhost:2380" 53 ) 54 55 var ( 56 restoreCluster string 57 restoreClusterToken string 58 restoreDataDir string 59 restoreWalDir string 60 restorePeerURLs string 61 restoreName string 62 skipHashCheck bool 63 ) 64 65 // NewSnapshotCommand returns the cobra command for "snapshot". 66 func NewSnapshotCommand() *cobra.Command { 67 cmd := &cobra.Command{ 68 Use: "snapshot <subcommand>", 69 Short: "Manages etcd node snapshots", 70 } 71 cmd.AddCommand(NewSnapshotSaveCommand()) 72 cmd.AddCommand(NewSnapshotRestoreCommand()) 73 cmd.AddCommand(newSnapshotStatusCommand()) 74 return cmd 75 } 76 77 func NewSnapshotSaveCommand() *cobra.Command { 78 return &cobra.Command{ 79 Use: "save <filename>", 80 Short: "Stores an etcd node backend snapshot to a given file", 81 Run: snapshotSaveCommandFunc, 82 } 83 } 84 85 func newSnapshotStatusCommand() *cobra.Command { 86 return &cobra.Command{ 87 Use: "status <filename>", 88 Short: "Gets backend snapshot status of a given file", 89 Long: `When --write-out is set to simple, this command prints out comma-separated status lists for each endpoint. 90 The items in the lists are hash, revision, total keys, total size. 91 `, 92 Run: snapshotStatusCommandFunc, 93 } 94 } 95 96 func NewSnapshotRestoreCommand() *cobra.Command { 97 cmd := &cobra.Command{ 98 Use: "restore <filename> [options]", 99 Short: "Restores an etcd member snapshot to an etcd directory", 100 Run: snapshotRestoreCommandFunc, 101 } 102 cmd.Flags().StringVar(&restoreDataDir, "data-dir", "", "Path to the data directory") 103 cmd.Flags().StringVar(&restoreWalDir, "wal-dir", "", "Path to the WAL directory (use --data-dir if none given)") 104 cmd.Flags().StringVar(&restoreCluster, "initial-cluster", initialClusterFromName(defaultName), "Initial cluster configuration for restore bootstrap") 105 cmd.Flags().StringVar(&restoreClusterToken, "initial-cluster-token", "etcd-cluster", "Initial cluster token for the etcd cluster during restore bootstrap") 106 cmd.Flags().StringVar(&restorePeerURLs, "initial-advertise-peer-urls", defaultInitialAdvertisePeerURLs, "List of this member's peer URLs to advertise to the rest of the cluster") 107 cmd.Flags().StringVar(&restoreName, "name", defaultName, "Human-readable name for this member") 108 cmd.Flags().BoolVar(&skipHashCheck, "skip-hash-check", false, "Ignore snapshot integrity hash value (required if copied from data directory)") 109 110 return cmd 111 } 112 113 func snapshotSaveCommandFunc(cmd *cobra.Command, args []string) { 114 if len(args) != 1 { 115 err := fmt.Errorf("snapshot save expects one argument") 116 ExitWithError(ExitBadArgs, err) 117 } 118 119 path := args[0] 120 121 partpath := path + ".part" 122 f, err := os.Create(partpath) 123 124 if err != nil { 125 exiterr := fmt.Errorf("could not open %s (%v)", partpath, err) 126 ExitWithError(ExitBadArgs, exiterr) 127 } 128 129 c := mustClientFromCmd(cmd) 130 r, serr := c.Snapshot(context.TODO()) 131 if serr != nil { 132 os.RemoveAll(partpath) 133 ExitWithError(ExitInterrupted, serr) 134 } 135 if _, rerr := io.Copy(f, r); rerr != nil { 136 os.RemoveAll(partpath) 137 ExitWithError(ExitInterrupted, rerr) 138 } 139 140 fileutil.Fsync(f) 141 142 f.Close() 143 144 if rerr := os.Rename(partpath, path); rerr != nil { 145 exiterr := fmt.Errorf("could not rename %s to %s (%v)", partpath, path, rerr) 146 ExitWithError(ExitIO, exiterr) 147 } 148 fmt.Printf("Snapshot saved at %s\n", path) 149 } 150 151 func snapshotStatusCommandFunc(cmd *cobra.Command, args []string) { 152 if len(args) != 1 { 153 err := fmt.Errorf("snapshot status requires exactly one argument") 154 ExitWithError(ExitBadArgs, err) 155 } 156 initDisplayFromCmd(cmd) 157 ds := dbStatus(args[0]) 158 display.DBStatus(ds) 159 } 160 161 func snapshotRestoreCommandFunc(cmd *cobra.Command, args []string) { 162 if len(args) != 1 { 163 err := fmt.Errorf("snapshot restore requires exactly one argument") 164 ExitWithError(ExitBadArgs, err) 165 } 166 167 urlmap, uerr := types.NewURLsMap(restoreCluster) 168 if uerr != nil { 169 ExitWithError(ExitBadArgs, uerr) 170 } 171 172 cfg := etcdserver.ServerConfig{ 173 InitialClusterToken: restoreClusterToken, 174 InitialPeerURLsMap: urlmap, 175 PeerURLs: types.MustNewURLs(strings.Split(restorePeerURLs, ",")), 176 Name: restoreName, 177 } 178 if err := cfg.VerifyBootstrap(); err != nil { 179 ExitWithError(ExitBadArgs, err) 180 } 181 182 cl, cerr := membership.NewClusterFromURLsMap(restoreClusterToken, urlmap) 183 if cerr != nil { 184 ExitWithError(ExitBadArgs, cerr) 185 } 186 187 basedir := restoreDataDir 188 if basedir == "" { 189 basedir = restoreName + ".etcd" 190 } 191 192 waldir := restoreWalDir 193 if waldir == "" { 194 waldir = filepath.Join(basedir, "member", "wal") 195 } 196 snapdir := filepath.Join(basedir, "member", "snap") 197 198 if _, err := os.Stat(basedir); err == nil { 199 ExitWithError(ExitInvalidInput, fmt.Errorf("data-dir %q exists", basedir)) 200 } 201 202 makeDB(snapdir, args[0], len(cl.Members())) 203 makeWALAndSnap(waldir, snapdir, cl) 204 } 205 206 func initialClusterFromName(name string) string { 207 n := name 208 if name == "" { 209 n = defaultName 210 } 211 return fmt.Sprintf("%s=http://localhost:2380", n) 212 } 213 214 // makeWAL creates a WAL for the initial cluster 215 func makeWALAndSnap(waldir, snapdir string, cl *membership.RaftCluster) { 216 if err := fileutil.CreateDirAll(waldir); err != nil { 217 ExitWithError(ExitIO, err) 218 } 219 220 // add members again to persist them to the store we create. 221 st := store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix) 222 cl.SetStore(st) 223 for _, m := range cl.Members() { 224 cl.AddMember(m) 225 } 226 227 m := cl.MemberByName(restoreName) 228 md := &etcdserverpb.Metadata{NodeID: uint64(m.ID), ClusterID: uint64(cl.ID())} 229 metadata, merr := md.Marshal() 230 if merr != nil { 231 ExitWithError(ExitInvalidInput, merr) 232 } 233 234 w, walerr := wal.Create(waldir, metadata) 235 if walerr != nil { 236 ExitWithError(ExitIO, walerr) 237 } 238 defer w.Close() 239 240 peers := make([]raft.Peer, len(cl.MemberIDs())) 241 for i, id := range cl.MemberIDs() { 242 ctx, err := json.Marshal((*cl).Member(id)) 243 if err != nil { 244 ExitWithError(ExitInvalidInput, err) 245 } 246 peers[i] = raft.Peer{ID: uint64(id), Context: ctx} 247 } 248 249 ents := make([]raftpb.Entry, len(peers)) 250 nodeIDs := make([]uint64, len(peers)) 251 for i, p := range peers { 252 nodeIDs[i] = p.ID 253 cc := raftpb.ConfChange{ 254 Type: raftpb.ConfChangeAddNode, 255 NodeID: p.ID, 256 Context: p.Context} 257 d, err := cc.Marshal() 258 if err != nil { 259 ExitWithError(ExitInvalidInput, err) 260 } 261 e := raftpb.Entry{ 262 Type: raftpb.EntryConfChange, 263 Term: 1, 264 Index: uint64(i + 1), 265 Data: d, 266 } 267 ents[i] = e 268 } 269 270 commit, term := uint64(len(ents)), uint64(1) 271 272 if err := w.Save(raftpb.HardState{ 273 Term: term, 274 Vote: peers[0].ID, 275 Commit: commit}, ents); err != nil { 276 ExitWithError(ExitIO, err) 277 } 278 279 b, berr := st.Save() 280 if berr != nil { 281 ExitWithError(ExitError, berr) 282 } 283 284 raftSnap := raftpb.Snapshot{ 285 Data: b, 286 Metadata: raftpb.SnapshotMetadata{ 287 Index: commit, 288 Term: term, 289 ConfState: raftpb.ConfState{ 290 Nodes: nodeIDs, 291 }, 292 }, 293 } 294 snapshotter := snap.New(snapdir) 295 if err := snapshotter.SaveSnap(raftSnap); err != nil { 296 panic(err) 297 } 298 299 if err := w.SaveSnapshot(walpb.Snapshot{Index: commit, Term: term}); err != nil { 300 ExitWithError(ExitIO, err) 301 } 302 } 303 304 // initIndex implements ConsistentIndexGetter so the snapshot won't block 305 // the new raft instance by waiting for a future raft index. 306 type initIndex int 307 308 func (i *initIndex) ConsistentIndex() uint64 { return uint64(*i) } 309 310 // makeDB copies the database snapshot to the snapshot directory 311 func makeDB(snapdir, dbfile string, commit int) { 312 f, ferr := os.OpenFile(dbfile, os.O_RDONLY, 0600) 313 if ferr != nil { 314 ExitWithError(ExitInvalidInput, ferr) 315 } 316 defer f.Close() 317 318 // get snapshot integrity hash 319 if _, err := f.Seek(-sha256.Size, io.SeekEnd); err != nil { 320 ExitWithError(ExitIO, err) 321 } 322 sha := make([]byte, sha256.Size) 323 if _, err := f.Read(sha); err != nil { 324 ExitWithError(ExitIO, err) 325 } 326 if _, err := f.Seek(0, io.SeekStart); err != nil { 327 ExitWithError(ExitIO, err) 328 } 329 330 if err := fileutil.CreateDirAll(snapdir); err != nil { 331 ExitWithError(ExitIO, err) 332 } 333 334 dbpath := filepath.Join(snapdir, "db") 335 db, dberr := os.OpenFile(dbpath, os.O_RDWR|os.O_CREATE, 0600) 336 if dberr != nil { 337 ExitWithError(ExitIO, dberr) 338 } 339 if _, err := io.Copy(db, f); err != nil { 340 ExitWithError(ExitIO, err) 341 } 342 343 // truncate away integrity hash, if any. 344 off, serr := db.Seek(0, io.SeekEnd) 345 if serr != nil { 346 ExitWithError(ExitIO, serr) 347 } 348 hasHash := (off % 512) == sha256.Size 349 if hasHash { 350 if err := db.Truncate(off - sha256.Size); err != nil { 351 ExitWithError(ExitIO, err) 352 } 353 } 354 355 if !hasHash && !skipHashCheck { 356 err := fmt.Errorf("snapshot missing hash but --skip-hash-check=false") 357 ExitWithError(ExitBadArgs, err) 358 } 359 360 if hasHash && !skipHashCheck { 361 // check for match 362 if _, err := db.Seek(0, io.SeekStart); err != nil { 363 ExitWithError(ExitIO, err) 364 } 365 h := sha256.New() 366 if _, err := io.Copy(h, db); err != nil { 367 ExitWithError(ExitIO, err) 368 } 369 dbsha := h.Sum(nil) 370 if !reflect.DeepEqual(sha, dbsha) { 371 err := fmt.Errorf("expected sha256 %v, got %v", sha, dbsha) 372 ExitWithError(ExitInvalidInput, err) 373 } 374 } 375 376 // db hash is OK, can now modify DB so it can be part of a new cluster 377 db.Close() 378 379 // update consistentIndex so applies go through on etcdserver despite 380 // having a new raft instance 381 be := backend.NewDefaultBackend(dbpath) 382 // a lessor never timeouts leases 383 lessor := lease.NewLessor(be, math.MaxInt64) 384 s := mvcc.NewStore(be, lessor, (*initIndex)(&commit)) 385 txn := s.Write() 386 btx := be.BatchTx() 387 del := func(k, v []byte) error { 388 txn.DeleteRange(k, nil) 389 return nil 390 } 391 392 // delete stored members from old cluster since using new members 393 btx.UnsafeForEach([]byte("members"), del) 394 // todo: add back new members when we start to deprecate old snap file. 395 btx.UnsafeForEach([]byte("members_removed"), del) 396 // trigger write-out of new consistent index 397 txn.End() 398 s.Commit() 399 s.Close() 400 be.Close() 401 } 402 403 type dbstatus struct { 404 Hash uint32 `json:"hash"` 405 Revision int64 `json:"revision"` 406 TotalKey int `json:"totalKey"` 407 TotalSize int64 `json:"totalSize"` 408 } 409 410 func dbStatus(p string) dbstatus { 411 if _, err := os.Stat(p); err != nil { 412 ExitWithError(ExitError, err) 413 } 414 415 ds := dbstatus{} 416 417 db, err := bolt.Open(p, 0400, &bolt.Options{ReadOnly: true}) 418 if err != nil { 419 ExitWithError(ExitError, err) 420 } 421 defer db.Close() 422 423 h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) 424 425 err = db.View(func(tx *bolt.Tx) error { 426 // check snapshot file integrity first 427 var dbErrStrings []string 428 for dbErr := range tx.Check() { 429 dbErrStrings = append(dbErrStrings, dbErr.Error()) 430 } 431 if len(dbErrStrings) > 0 { 432 return fmt.Errorf("snapshot file integrity check failed. %d errors found.\n"+strings.Join(dbErrStrings, "\n"), len(dbErrStrings)) 433 } 434 ds.TotalSize = tx.Size() 435 c := tx.Cursor() 436 for next, _ := c.First(); next != nil; next, _ = c.Next() { 437 b := tx.Bucket(next) 438 if b == nil { 439 return fmt.Errorf("cannot get hash of bucket %s", string(next)) 440 } 441 h.Write(next) 442 iskeyb := (string(next) == "key") 443 b.ForEach(func(k, v []byte) error { 444 h.Write(k) 445 h.Write(v) 446 if iskeyb { 447 rev := bytesToRev(k) 448 ds.Revision = rev.main 449 } 450 ds.TotalKey++ 451 return nil 452 }) 453 } 454 return nil 455 }) 456 457 if err != nil { 458 ExitWithError(ExitError, err) 459 } 460 461 ds.Hash = h.Sum32() 462 return ds 463 } 464 465 type revision struct { 466 main int64 467 sub int64 468 } 469 470 func bytesToRev(bytes []byte) revision { 471 return revision{ 472 main: int64(binary.BigEndian.Uint64(bytes[0:8])), 473 sub: int64(binary.BigEndian.Uint64(bytes[9:])), 474 } 475 }