go.etcd.io/etcd@v3.3.27+incompatible/snapshot/v3_snapshot.go (about) 1 // Copyright 2018 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package snapshot 16 17 import ( 18 "context" 19 "crypto/sha256" 20 "encoding/json" 21 "fmt" 22 "hash/crc32" 23 "io" 24 "math" 25 "os" 26 "path/filepath" 27 "reflect" 28 "time" 29 30 "github.com/coreos/etcd/clientv3" 31 "github.com/coreos/etcd/etcdserver" 32 "github.com/coreos/etcd/etcdserver/etcdserverpb" 33 "github.com/coreos/etcd/etcdserver/membership" 34 "github.com/coreos/etcd/lease" 35 "github.com/coreos/etcd/mvcc" 36 "github.com/coreos/etcd/mvcc/backend" 37 "github.com/coreos/etcd/pkg/fileutil" 38 "github.com/coreos/etcd/pkg/types" 39 "github.com/coreos/etcd/raft" 40 "github.com/coreos/etcd/raft/raftpb" 41 "github.com/coreos/etcd/snap" 42 "github.com/coreos/etcd/store" 43 "github.com/coreos/etcd/wal" 44 "github.com/coreos/etcd/wal/walpb" 45 46 bolt "github.com/coreos/bbolt" 47 "go.uber.org/zap" 48 ) 49 50 // Manager defines snapshot methods. 51 type Manager interface { 52 // Save fetches snapshot from remote etcd server and saves data 53 // to target path. If the context "ctx" is canceled or timed out, 54 // snapshot save stream will error out (e.g. context.Canceled, 55 // context.DeadlineExceeded). Make sure to specify only one endpoint 56 // in client configuration. Snapshot API must be requested to a 57 // selected node, and saved snapshot is the point-in-time state of 58 // the selected node. 59 Save(ctx context.Context, cfg clientv3.Config, dbPath string) error 60 61 // Status returns the snapshot file information. 62 Status(dbPath string) (Status, error) 63 64 // Restore restores a new etcd data directory from given snapshot 65 // file. It returns an error if specified data directory already 66 // exists, to prevent unintended data directory overwrites. 67 Restore(cfg RestoreConfig) error 68 } 69 70 // NewV3 returns a new snapshot Manager for v3.x snapshot. 71 func NewV3(lg *zap.Logger) Manager { 72 if lg == nil { 73 lg = zap.NewExample() 74 } 75 return &v3Manager{lg: lg} 76 } 77 78 type v3Manager struct { 79 lg *zap.Logger 80 81 name string 82 dbPath string 83 walDir string 84 snapDir string 85 cl *membership.RaftCluster 86 87 skipHashCheck bool 88 } 89 90 // Save fetches snapshot from remote etcd server and saves data to target path. 91 func (s *v3Manager) Save(ctx context.Context, cfg clientv3.Config, dbPath string) error { 92 if len(cfg.Endpoints) != 1 { 93 return fmt.Errorf("snapshot must be requested to one selected node, not multiple %v", cfg.Endpoints) 94 } 95 cli, err := clientv3.New(cfg) 96 if err != nil { 97 return err 98 } 99 defer cli.Close() 100 101 partpath := dbPath + ".part" 102 defer os.RemoveAll(partpath) 103 104 var f *os.File 105 f, err = os.Create(partpath) 106 if err != nil { 107 return fmt.Errorf("could not open %s (%v)", partpath, err) 108 } 109 s.lg.Info( 110 "created temporary db file", 111 zap.String("path", partpath), 112 ) 113 114 now := time.Now() 115 var rd io.ReadCloser 116 rd, err = cli.Snapshot(ctx) 117 if err != nil { 118 return err 119 } 120 s.lg.Info( 121 "fetching snapshot", 122 zap.String("endpoint", cfg.Endpoints[0]), 123 ) 124 if _, err = io.Copy(f, rd); err != nil { 125 return err 126 } 127 if err = fileutil.Fsync(f); err != nil { 128 return err 129 } 130 if err = f.Close(); err != nil { 131 return err 132 } 133 s.lg.Info( 134 "fetched snapshot", 135 zap.String("endpoint", cfg.Endpoints[0]), 136 zap.Duration("took", time.Since(now)), 137 ) 138 139 if err = os.Rename(partpath, dbPath); err != nil { 140 return fmt.Errorf("could not rename %s to %s (%v)", partpath, dbPath, err) 141 } 142 s.lg.Info("saved", zap.String("path", dbPath)) 143 return nil 144 } 145 146 // Status is the snapshot file status. 147 type Status struct { 148 Hash uint32 `json:"hash"` 149 Revision int64 `json:"revision"` 150 TotalKey int `json:"totalKey"` 151 TotalSize int64 `json:"totalSize"` 152 } 153 154 // Status returns the snapshot file information. 155 func (s *v3Manager) Status(dbPath string) (ds Status, err error) { 156 if _, err = os.Stat(dbPath); err != nil { 157 return ds, err 158 } 159 160 db, err := bolt.Open(dbPath, 0400, &bolt.Options{ReadOnly: true}) 161 if err != nil { 162 return ds, err 163 } 164 defer db.Close() 165 166 h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) 167 168 if err = db.View(func(tx *bolt.Tx) error { 169 ds.TotalSize = tx.Size() 170 c := tx.Cursor() 171 for next, _ := c.First(); next != nil; next, _ = c.Next() { 172 b := tx.Bucket(next) 173 if b == nil { 174 return fmt.Errorf("cannot get hash of bucket %s", string(next)) 175 } 176 h.Write(next) 177 iskeyb := (string(next) == "key") 178 b.ForEach(func(k, v []byte) error { 179 h.Write(k) 180 h.Write(v) 181 if iskeyb { 182 rev := bytesToRev(k) 183 ds.Revision = rev.main 184 } 185 ds.TotalKey++ 186 return nil 187 }) 188 } 189 return nil 190 }); err != nil { 191 return ds, err 192 } 193 194 ds.Hash = h.Sum32() 195 return ds, nil 196 } 197 198 // RestoreConfig configures snapshot restore operation. 199 type RestoreConfig struct { 200 // SnapshotPath is the path of snapshot file to restore from. 201 SnapshotPath string 202 203 // Name is the human-readable name of this member. 204 Name string 205 206 // OutputDataDir is the target data directory to save restored data. 207 // OutputDataDir should not conflict with existing etcd data directory. 208 // If OutputDataDir already exists, it will return an error to prevent 209 // unintended data directory overwrites. 210 // If empty, defaults to "[Name].etcd" if not given. 211 OutputDataDir string 212 // OutputWALDir is the target WAL data directory. 213 // If empty, defaults to "[OutputDataDir]/member/wal" if not given. 214 OutputWALDir string 215 216 // PeerURLs is a list of member's peer URLs to advertise to the rest of the cluster. 217 PeerURLs []string 218 219 // InitialCluster is the initial cluster configuration for restore bootstrap. 220 InitialCluster string 221 // InitialClusterToken is the initial cluster token for etcd cluster during restore bootstrap. 222 InitialClusterToken string 223 224 // SkipHashCheck is "true" to ignore snapshot integrity hash value 225 // (required if copied from data directory). 226 SkipHashCheck bool 227 } 228 229 // Restore restores a new etcd data directory from given snapshot file. 230 func (s *v3Manager) Restore(cfg RestoreConfig) error { 231 pURLs, err := types.NewURLs(cfg.PeerURLs) 232 if err != nil { 233 return err 234 } 235 var ics types.URLsMap 236 ics, err = types.NewURLsMap(cfg.InitialCluster) 237 if err != nil { 238 return err 239 } 240 241 srv := etcdserver.ServerConfig{ 242 Name: cfg.Name, 243 PeerURLs: pURLs, 244 InitialPeerURLsMap: ics, 245 InitialClusterToken: cfg.InitialClusterToken, 246 } 247 if err = srv.VerifyBootstrap(); err != nil { 248 return err 249 } 250 251 s.cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, ics) 252 if err != nil { 253 return err 254 } 255 256 dataDir := cfg.OutputDataDir 257 if dataDir == "" { 258 dataDir = cfg.Name + ".etcd" 259 } 260 if fileutil.Exist(dataDir) { 261 return fmt.Errorf("data-dir %q exists", dataDir) 262 } 263 264 walDir := cfg.OutputWALDir 265 if walDir == "" { 266 walDir = filepath.Join(dataDir, "member", "wal") 267 } else if fileutil.Exist(walDir) { 268 return fmt.Errorf("wal-dir %q exists", walDir) 269 } 270 271 s.name = cfg.Name 272 s.dbPath = cfg.SnapshotPath 273 s.walDir = walDir 274 s.snapDir = filepath.Join(dataDir, "member", "snap") 275 s.skipHashCheck = cfg.SkipHashCheck 276 277 s.lg.Info( 278 "restoring snapshot", 279 zap.String("path", s.dbPath), 280 zap.String("wal-dir", s.walDir), 281 zap.String("data-dir", dataDir), 282 zap.String("snap-dir", s.snapDir), 283 ) 284 if err = s.saveDB(); err != nil { 285 return err 286 } 287 if err = s.saveWALAndSnap(); err != nil { 288 return err 289 } 290 s.lg.Info( 291 "restored snapshot", 292 zap.String("path", s.dbPath), 293 zap.String("wal-dir", s.walDir), 294 zap.String("data-dir", dataDir), 295 zap.String("snap-dir", s.snapDir), 296 ) 297 298 return nil 299 } 300 301 // saveDB copies the database snapshot to the snapshot directory 302 func (s *v3Manager) saveDB() error { 303 f, ferr := os.OpenFile(s.dbPath, os.O_RDONLY, 0600) 304 if ferr != nil { 305 return ferr 306 } 307 defer f.Close() 308 309 // get snapshot integrity hash 310 if _, err := f.Seek(-sha256.Size, io.SeekEnd); err != nil { 311 return err 312 } 313 sha := make([]byte, sha256.Size) 314 if _, err := f.Read(sha); err != nil { 315 return err 316 } 317 if _, err := f.Seek(0, io.SeekStart); err != nil { 318 return err 319 } 320 321 if err := fileutil.CreateDirAll(s.snapDir); err != nil { 322 return err 323 } 324 325 dbpath := filepath.Join(s.snapDir, "db") 326 db, dberr := os.OpenFile(dbpath, os.O_RDWR|os.O_CREATE, 0600) 327 if dberr != nil { 328 return dberr 329 } 330 if _, err := io.Copy(db, f); err != nil { 331 return err 332 } 333 334 // truncate away integrity hash, if any. 335 off, serr := db.Seek(0, io.SeekEnd) 336 if serr != nil { 337 return serr 338 } 339 hasHash := (off % 512) == sha256.Size 340 if hasHash { 341 if err := db.Truncate(off - sha256.Size); err != nil { 342 return err 343 } 344 } 345 346 if !hasHash && !s.skipHashCheck { 347 return fmt.Errorf("snapshot missing hash but --skip-hash-check=false") 348 } 349 350 if hasHash && !s.skipHashCheck { 351 // check for match 352 if _, err := db.Seek(0, io.SeekStart); err != nil { 353 return err 354 } 355 h := sha256.New() 356 if _, err := io.Copy(h, db); err != nil { 357 return err 358 } 359 dbsha := h.Sum(nil) 360 if !reflect.DeepEqual(sha, dbsha) { 361 return fmt.Errorf("expected sha256 %v, got %v", sha, dbsha) 362 } 363 } 364 365 // db hash is OK, can now modify DB so it can be part of a new cluster 366 db.Close() 367 368 commit := len(s.cl.Members()) 369 370 // update consistentIndex so applies go through on etcdserver despite 371 // having a new raft instance 372 be := backend.NewDefaultBackend(dbpath) 373 374 // a lessor never timeouts leases 375 lessor := lease.NewLessor(be, math.MaxInt64) 376 377 mvs := mvcc.NewStore(be, lessor, (*initIndex)(&commit)) 378 txn := mvs.Write() 379 btx := be.BatchTx() 380 del := func(k, v []byte) error { 381 txn.DeleteRange(k, nil) 382 return nil 383 } 384 385 // delete stored members from old cluster since using new members 386 btx.UnsafeForEach([]byte("members"), del) 387 388 // todo: add back new members when we start to deprecate old snap file. 389 btx.UnsafeForEach([]byte("members_removed"), del) 390 391 // trigger write-out of new consistent index 392 txn.End() 393 394 mvs.Commit() 395 mvs.Close() 396 be.Close() 397 398 return nil 399 } 400 401 // saveWALAndSnap creates a WAL for the initial cluster 402 func (s *v3Manager) saveWALAndSnap() error { 403 if err := fileutil.CreateDirAll(s.walDir); err != nil { 404 return err 405 } 406 407 // add members again to persist them to the store we create. 408 st := store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix) 409 s.cl.SetStore(st) 410 for _, m := range s.cl.Members() { 411 s.cl.AddMember(m) 412 } 413 414 m := s.cl.MemberByName(s.name) 415 md := &etcdserverpb.Metadata{NodeID: uint64(m.ID), ClusterID: uint64(s.cl.ID())} 416 metadata, merr := md.Marshal() 417 if merr != nil { 418 return merr 419 } 420 w, walerr := wal.Create(s.walDir, metadata) 421 if walerr != nil { 422 return walerr 423 } 424 defer w.Close() 425 426 peers := make([]raft.Peer, len(s.cl.MemberIDs())) 427 for i, id := range s.cl.MemberIDs() { 428 ctx, err := json.Marshal((*s.cl).Member(id)) 429 if err != nil { 430 return err 431 } 432 peers[i] = raft.Peer{ID: uint64(id), Context: ctx} 433 } 434 435 ents := make([]raftpb.Entry, len(peers)) 436 nodeIDs := make([]uint64, len(peers)) 437 for i, p := range peers { 438 nodeIDs[i] = p.ID 439 cc := raftpb.ConfChange{ 440 Type: raftpb.ConfChangeAddNode, 441 NodeID: p.ID, 442 Context: p.Context, 443 } 444 d, err := cc.Marshal() 445 if err != nil { 446 return err 447 } 448 ents[i] = raftpb.Entry{ 449 Type: raftpb.EntryConfChange, 450 Term: 1, 451 Index: uint64(i + 1), 452 Data: d, 453 } 454 } 455 456 commit, term := uint64(len(ents)), uint64(1) 457 if err := w.Save(raftpb.HardState{ 458 Term: term, 459 Vote: peers[0].ID, 460 Commit: commit, 461 }, ents); err != nil { 462 return err 463 } 464 465 b, berr := st.Save() 466 if berr != nil { 467 return berr 468 } 469 raftSnap := raftpb.Snapshot{ 470 Data: b, 471 Metadata: raftpb.SnapshotMetadata{ 472 Index: commit, 473 Term: term, 474 ConfState: raftpb.ConfState{ 475 Nodes: nodeIDs, 476 }, 477 }, 478 } 479 sn := snap.New(s.snapDir) 480 if err := sn.SaveSnap(raftSnap); err != nil { 481 return err 482 } 483 484 return w.SaveSnapshot(walpb.Snapshot{Index: commit, Term: term}) 485 }