go.etcd.io/etcd@v3.3.27+incompatible/etcdctl/ctlv3/command/snapshot_command.go (about)

     1  // Copyright 2016 The etcd Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package command
    16  
    17  import (
    18  	"context"
    19  	"crypto/sha256"
    20  	"encoding/binary"
    21  	"encoding/json"
    22  	"fmt"
    23  	"hash/crc32"
    24  	"io"
    25  	"math"
    26  	"os"
    27  	"path/filepath"
    28  	"reflect"
    29  	"strings"
    30  
    31  	"github.com/coreos/etcd/etcdserver"
    32  	"github.com/coreos/etcd/etcdserver/etcdserverpb"
    33  	"github.com/coreos/etcd/etcdserver/membership"
    34  	"github.com/coreos/etcd/lease"
    35  	"github.com/coreos/etcd/mvcc"
    36  	"github.com/coreos/etcd/mvcc/backend"
    37  	"github.com/coreos/etcd/pkg/fileutil"
    38  	"github.com/coreos/etcd/pkg/types"
    39  	"github.com/coreos/etcd/raft"
    40  	"github.com/coreos/etcd/raft/raftpb"
    41  	"github.com/coreos/etcd/snap"
    42  	"github.com/coreos/etcd/store"
    43  	"github.com/coreos/etcd/wal"
    44  	"github.com/coreos/etcd/wal/walpb"
    45  
    46  	bolt "github.com/coreos/bbolt"
    47  	"github.com/spf13/cobra"
    48  )
    49  
    50  const (
    51  	defaultName                     = "default"
    52  	defaultInitialAdvertisePeerURLs = "http://localhost:2380"
    53  )
    54  
    55  var (
    56  	restoreCluster      string
    57  	restoreClusterToken string
    58  	restoreDataDir      string
    59  	restoreWalDir       string
    60  	restorePeerURLs     string
    61  	restoreName         string
    62  	skipHashCheck       bool
    63  )
    64  
    65  // NewSnapshotCommand returns the cobra command for "snapshot".
    66  func NewSnapshotCommand() *cobra.Command {
    67  	cmd := &cobra.Command{
    68  		Use:   "snapshot <subcommand>",
    69  		Short: "Manages etcd node snapshots",
    70  	}
    71  	cmd.AddCommand(NewSnapshotSaveCommand())
    72  	cmd.AddCommand(NewSnapshotRestoreCommand())
    73  	cmd.AddCommand(newSnapshotStatusCommand())
    74  	return cmd
    75  }
    76  
    77  func NewSnapshotSaveCommand() *cobra.Command {
    78  	return &cobra.Command{
    79  		Use:   "save <filename>",
    80  		Short: "Stores an etcd node backend snapshot to a given file",
    81  		Run:   snapshotSaveCommandFunc,
    82  	}
    83  }
    84  
    85  func newSnapshotStatusCommand() *cobra.Command {
    86  	return &cobra.Command{
    87  		Use:   "status <filename>",
    88  		Short: "Gets backend snapshot status of a given file",
    89  		Long: `When --write-out is set to simple, this command prints out comma-separated status lists for each endpoint.
    90  The items in the lists are hash, revision, total keys, total size.
    91  `,
    92  		Run: snapshotStatusCommandFunc,
    93  	}
    94  }
    95  
    96  func NewSnapshotRestoreCommand() *cobra.Command {
    97  	cmd := &cobra.Command{
    98  		Use:   "restore <filename> [options]",
    99  		Short: "Restores an etcd member snapshot to an etcd directory",
   100  		Run:   snapshotRestoreCommandFunc,
   101  	}
   102  	cmd.Flags().StringVar(&restoreDataDir, "data-dir", "", "Path to the data directory")
   103  	cmd.Flags().StringVar(&restoreWalDir, "wal-dir", "", "Path to the WAL directory (use --data-dir if none given)")
   104  	cmd.Flags().StringVar(&restoreCluster, "initial-cluster", initialClusterFromName(defaultName), "Initial cluster configuration for restore bootstrap")
   105  	cmd.Flags().StringVar(&restoreClusterToken, "initial-cluster-token", "etcd-cluster", "Initial cluster token for the etcd cluster during restore bootstrap")
   106  	cmd.Flags().StringVar(&restorePeerURLs, "initial-advertise-peer-urls", defaultInitialAdvertisePeerURLs, "List of this member's peer URLs to advertise to the rest of the cluster")
   107  	cmd.Flags().StringVar(&restoreName, "name", defaultName, "Human-readable name for this member")
   108  	cmd.Flags().BoolVar(&skipHashCheck, "skip-hash-check", false, "Ignore snapshot integrity hash value (required if copied from data directory)")
   109  
   110  	return cmd
   111  }
   112  
   113  func snapshotSaveCommandFunc(cmd *cobra.Command, args []string) {
   114  	if len(args) != 1 {
   115  		err := fmt.Errorf("snapshot save expects one argument")
   116  		ExitWithError(ExitBadArgs, err)
   117  	}
   118  
   119  	path := args[0]
   120  
   121  	partpath := path + ".part"
   122  	f, err := os.Create(partpath)
   123  
   124  	if err != nil {
   125  		exiterr := fmt.Errorf("could not open %s (%v)", partpath, err)
   126  		ExitWithError(ExitBadArgs, exiterr)
   127  	}
   128  
   129  	c := mustClientFromCmd(cmd)
   130  	r, serr := c.Snapshot(context.TODO())
   131  	if serr != nil {
   132  		os.RemoveAll(partpath)
   133  		ExitWithError(ExitInterrupted, serr)
   134  	}
   135  	if _, rerr := io.Copy(f, r); rerr != nil {
   136  		os.RemoveAll(partpath)
   137  		ExitWithError(ExitInterrupted, rerr)
   138  	}
   139  
   140  	fileutil.Fsync(f)
   141  
   142  	f.Close()
   143  
   144  	if rerr := os.Rename(partpath, path); rerr != nil {
   145  		exiterr := fmt.Errorf("could not rename %s to %s (%v)", partpath, path, rerr)
   146  		ExitWithError(ExitIO, exiterr)
   147  	}
   148  	fmt.Printf("Snapshot saved at %s\n", path)
   149  }
   150  
   151  func snapshotStatusCommandFunc(cmd *cobra.Command, args []string) {
   152  	if len(args) != 1 {
   153  		err := fmt.Errorf("snapshot status requires exactly one argument")
   154  		ExitWithError(ExitBadArgs, err)
   155  	}
   156  	initDisplayFromCmd(cmd)
   157  	ds := dbStatus(args[0])
   158  	display.DBStatus(ds)
   159  }
   160  
   161  func snapshotRestoreCommandFunc(cmd *cobra.Command, args []string) {
   162  	if len(args) != 1 {
   163  		err := fmt.Errorf("snapshot restore requires exactly one argument")
   164  		ExitWithError(ExitBadArgs, err)
   165  	}
   166  
   167  	urlmap, uerr := types.NewURLsMap(restoreCluster)
   168  	if uerr != nil {
   169  		ExitWithError(ExitBadArgs, uerr)
   170  	}
   171  
   172  	cfg := etcdserver.ServerConfig{
   173  		InitialClusterToken: restoreClusterToken,
   174  		InitialPeerURLsMap:  urlmap,
   175  		PeerURLs:            types.MustNewURLs(strings.Split(restorePeerURLs, ",")),
   176  		Name:                restoreName,
   177  	}
   178  	if err := cfg.VerifyBootstrap(); err != nil {
   179  		ExitWithError(ExitBadArgs, err)
   180  	}
   181  
   182  	cl, cerr := membership.NewClusterFromURLsMap(restoreClusterToken, urlmap)
   183  	if cerr != nil {
   184  		ExitWithError(ExitBadArgs, cerr)
   185  	}
   186  
   187  	basedir := restoreDataDir
   188  	if basedir == "" {
   189  		basedir = restoreName + ".etcd"
   190  	}
   191  
   192  	waldir := restoreWalDir
   193  	if waldir == "" {
   194  		waldir = filepath.Join(basedir, "member", "wal")
   195  	}
   196  	snapdir := filepath.Join(basedir, "member", "snap")
   197  
   198  	if _, err := os.Stat(basedir); err == nil {
   199  		ExitWithError(ExitInvalidInput, fmt.Errorf("data-dir %q exists", basedir))
   200  	}
   201  
   202  	makeDB(snapdir, args[0], len(cl.Members()))
   203  	makeWALAndSnap(waldir, snapdir, cl)
   204  }
   205  
   206  func initialClusterFromName(name string) string {
   207  	n := name
   208  	if name == "" {
   209  		n = defaultName
   210  	}
   211  	return fmt.Sprintf("%s=http://localhost:2380", n)
   212  }
   213  
   214  // makeWAL creates a WAL for the initial cluster
   215  func makeWALAndSnap(waldir, snapdir string, cl *membership.RaftCluster) {
   216  	if err := fileutil.CreateDirAll(waldir); err != nil {
   217  		ExitWithError(ExitIO, err)
   218  	}
   219  
   220  	// add members again to persist them to the store we create.
   221  	st := store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix)
   222  	cl.SetStore(st)
   223  	for _, m := range cl.Members() {
   224  		cl.AddMember(m)
   225  	}
   226  
   227  	m := cl.MemberByName(restoreName)
   228  	md := &etcdserverpb.Metadata{NodeID: uint64(m.ID), ClusterID: uint64(cl.ID())}
   229  	metadata, merr := md.Marshal()
   230  	if merr != nil {
   231  		ExitWithError(ExitInvalidInput, merr)
   232  	}
   233  
   234  	w, walerr := wal.Create(waldir, metadata)
   235  	if walerr != nil {
   236  		ExitWithError(ExitIO, walerr)
   237  	}
   238  	defer w.Close()
   239  
   240  	peers := make([]raft.Peer, len(cl.MemberIDs()))
   241  	for i, id := range cl.MemberIDs() {
   242  		ctx, err := json.Marshal((*cl).Member(id))
   243  		if err != nil {
   244  			ExitWithError(ExitInvalidInput, err)
   245  		}
   246  		peers[i] = raft.Peer{ID: uint64(id), Context: ctx}
   247  	}
   248  
   249  	ents := make([]raftpb.Entry, len(peers))
   250  	nodeIDs := make([]uint64, len(peers))
   251  	for i, p := range peers {
   252  		nodeIDs[i] = p.ID
   253  		cc := raftpb.ConfChange{
   254  			Type:    raftpb.ConfChangeAddNode,
   255  			NodeID:  p.ID,
   256  			Context: p.Context}
   257  		d, err := cc.Marshal()
   258  		if err != nil {
   259  			ExitWithError(ExitInvalidInput, err)
   260  		}
   261  		e := raftpb.Entry{
   262  			Type:  raftpb.EntryConfChange,
   263  			Term:  1,
   264  			Index: uint64(i + 1),
   265  			Data:  d,
   266  		}
   267  		ents[i] = e
   268  	}
   269  
   270  	commit, term := uint64(len(ents)), uint64(1)
   271  
   272  	if err := w.Save(raftpb.HardState{
   273  		Term:   term,
   274  		Vote:   peers[0].ID,
   275  		Commit: commit}, ents); err != nil {
   276  		ExitWithError(ExitIO, err)
   277  	}
   278  
   279  	b, berr := st.Save()
   280  	if berr != nil {
   281  		ExitWithError(ExitError, berr)
   282  	}
   283  
   284  	raftSnap := raftpb.Snapshot{
   285  		Data: b,
   286  		Metadata: raftpb.SnapshotMetadata{
   287  			Index: commit,
   288  			Term:  term,
   289  			ConfState: raftpb.ConfState{
   290  				Nodes: nodeIDs,
   291  			},
   292  		},
   293  	}
   294  	snapshotter := snap.New(snapdir)
   295  	if err := snapshotter.SaveSnap(raftSnap); err != nil {
   296  		panic(err)
   297  	}
   298  
   299  	if err := w.SaveSnapshot(walpb.Snapshot{Index: commit, Term: term}); err != nil {
   300  		ExitWithError(ExitIO, err)
   301  	}
   302  }
   303  
   304  // initIndex implements ConsistentIndexGetter so the snapshot won't block
   305  // the new raft instance by waiting for a future raft index.
   306  type initIndex int
   307  
   308  func (i *initIndex) ConsistentIndex() uint64 { return uint64(*i) }
   309  
   310  // makeDB copies the database snapshot to the snapshot directory
   311  func makeDB(snapdir, dbfile string, commit int) {
   312  	f, ferr := os.OpenFile(dbfile, os.O_RDONLY, 0600)
   313  	if ferr != nil {
   314  		ExitWithError(ExitInvalidInput, ferr)
   315  	}
   316  	defer f.Close()
   317  
   318  	// get snapshot integrity hash
   319  	if _, err := f.Seek(-sha256.Size, io.SeekEnd); err != nil {
   320  		ExitWithError(ExitIO, err)
   321  	}
   322  	sha := make([]byte, sha256.Size)
   323  	if _, err := f.Read(sha); err != nil {
   324  		ExitWithError(ExitIO, err)
   325  	}
   326  	if _, err := f.Seek(0, io.SeekStart); err != nil {
   327  		ExitWithError(ExitIO, err)
   328  	}
   329  
   330  	if err := fileutil.CreateDirAll(snapdir); err != nil {
   331  		ExitWithError(ExitIO, err)
   332  	}
   333  
   334  	dbpath := filepath.Join(snapdir, "db")
   335  	db, dberr := os.OpenFile(dbpath, os.O_RDWR|os.O_CREATE, 0600)
   336  	if dberr != nil {
   337  		ExitWithError(ExitIO, dberr)
   338  	}
   339  	if _, err := io.Copy(db, f); err != nil {
   340  		ExitWithError(ExitIO, err)
   341  	}
   342  
   343  	// truncate away integrity hash, if any.
   344  	off, serr := db.Seek(0, io.SeekEnd)
   345  	if serr != nil {
   346  		ExitWithError(ExitIO, serr)
   347  	}
   348  	hasHash := (off % 512) == sha256.Size
   349  	if hasHash {
   350  		if err := db.Truncate(off - sha256.Size); err != nil {
   351  			ExitWithError(ExitIO, err)
   352  		}
   353  	}
   354  
   355  	if !hasHash && !skipHashCheck {
   356  		err := fmt.Errorf("snapshot missing hash but --skip-hash-check=false")
   357  		ExitWithError(ExitBadArgs, err)
   358  	}
   359  
   360  	if hasHash && !skipHashCheck {
   361  		// check for match
   362  		if _, err := db.Seek(0, io.SeekStart); err != nil {
   363  			ExitWithError(ExitIO, err)
   364  		}
   365  		h := sha256.New()
   366  		if _, err := io.Copy(h, db); err != nil {
   367  			ExitWithError(ExitIO, err)
   368  		}
   369  		dbsha := h.Sum(nil)
   370  		if !reflect.DeepEqual(sha, dbsha) {
   371  			err := fmt.Errorf("expected sha256 %v, got %v", sha, dbsha)
   372  			ExitWithError(ExitInvalidInput, err)
   373  		}
   374  	}
   375  
   376  	// db hash is OK, can now modify DB so it can be part of a new cluster
   377  	db.Close()
   378  
   379  	// update consistentIndex so applies go through on etcdserver despite
   380  	// having a new raft instance
   381  	be := backend.NewDefaultBackend(dbpath)
   382  	// a lessor never timeouts leases
   383  	lessor := lease.NewLessor(be, math.MaxInt64)
   384  	s := mvcc.NewStore(be, lessor, (*initIndex)(&commit))
   385  	txn := s.Write()
   386  	btx := be.BatchTx()
   387  	del := func(k, v []byte) error {
   388  		txn.DeleteRange(k, nil)
   389  		return nil
   390  	}
   391  
   392  	// delete stored members from old cluster since using new members
   393  	btx.UnsafeForEach([]byte("members"), del)
   394  	// todo: add back new members when we start to deprecate old snap file.
   395  	btx.UnsafeForEach([]byte("members_removed"), del)
   396  	// trigger write-out of new consistent index
   397  	txn.End()
   398  	s.Commit()
   399  	s.Close()
   400  	be.Close()
   401  }
   402  
   403  type dbstatus struct {
   404  	Hash      uint32 `json:"hash"`
   405  	Revision  int64  `json:"revision"`
   406  	TotalKey  int    `json:"totalKey"`
   407  	TotalSize int64  `json:"totalSize"`
   408  }
   409  
   410  func dbStatus(p string) dbstatus {
   411  	if _, err := os.Stat(p); err != nil {
   412  		ExitWithError(ExitError, err)
   413  	}
   414  
   415  	ds := dbstatus{}
   416  
   417  	db, err := bolt.Open(p, 0400, &bolt.Options{ReadOnly: true})
   418  	if err != nil {
   419  		ExitWithError(ExitError, err)
   420  	}
   421  	defer db.Close()
   422  
   423  	h := crc32.New(crc32.MakeTable(crc32.Castagnoli))
   424  
   425  	err = db.View(func(tx *bolt.Tx) error {
   426  		// check snapshot file integrity first
   427  		var dbErrStrings []string
   428  		for dbErr := range tx.Check() {
   429  			dbErrStrings = append(dbErrStrings, dbErr.Error())
   430  		}
   431  		if len(dbErrStrings) > 0 {
   432  			return fmt.Errorf("snapshot file integrity check failed. %d errors found.\n"+strings.Join(dbErrStrings, "\n"), len(dbErrStrings))
   433  		}
   434  		ds.TotalSize = tx.Size()
   435  		c := tx.Cursor()
   436  		for next, _ := c.First(); next != nil; next, _ = c.Next() {
   437  			b := tx.Bucket(next)
   438  			if b == nil {
   439  				return fmt.Errorf("cannot get hash of bucket %s", string(next))
   440  			}
   441  			h.Write(next)
   442  			iskeyb := (string(next) == "key")
   443  			b.ForEach(func(k, v []byte) error {
   444  				h.Write(k)
   445  				h.Write(v)
   446  				if iskeyb {
   447  					rev := bytesToRev(k)
   448  					ds.Revision = rev.main
   449  				}
   450  				ds.TotalKey++
   451  				return nil
   452  			})
   453  		}
   454  		return nil
   455  	})
   456  
   457  	if err != nil {
   458  		ExitWithError(ExitError, err)
   459  	}
   460  
   461  	ds.Hash = h.Sum32()
   462  	return ds
   463  }
   464  
   465  type revision struct {
   466  	main int64
   467  	sub  int64
   468  }
   469  
   470  func bytesToRev(bytes []byte) revision {
   471  	return revision{
   472  		main: int64(binary.BigEndian.Uint64(bytes[0:8])),
   473  		sub:  int64(binary.BigEndian.Uint64(bytes[9:])),
   474  	}
   475  }