github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/state/raft/storage.go (about)

     1  package raft
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  
     7  	"github.com/coreos/etcd/raft"
     8  	"github.com/coreos/etcd/raft/raftpb"
     9  	"github.com/docker/go-metrics"
    10  	"github.com/docker/swarmkit/api"
    11  	"github.com/docker/swarmkit/log"
    12  	"github.com/docker/swarmkit/manager/encryption"
    13  	"github.com/docker/swarmkit/manager/state/raft/membership"
    14  	"github.com/docker/swarmkit/manager/state/raft/storage"
    15  	"github.com/docker/swarmkit/manager/state/store"
    16  	"github.com/pkg/errors"
    17  )
    18  
    19  var (
    20  	// Snapshot create latency timer.
    21  	snapshotLatencyTimer metrics.Timer
    22  )
    23  
    24  func init() {
    25  	ns := metrics.NewNamespace("swarm", "raft", nil)
    26  	snapshotLatencyTimer = ns.NewTimer("snapshot_latency",
    27  		"Raft snapshot create latency.")
    28  	metrics.Register(ns)
    29  }
    30  
    31  func (n *Node) readFromDisk(ctx context.Context) (*raftpb.Snapshot, storage.WALData, error) {
    32  	keys := n.keyRotator.GetKeys()
    33  
    34  	n.raftLogger = &storage.EncryptedRaftLogger{
    35  		StateDir:      n.opts.StateDir,
    36  		EncryptionKey: keys.CurrentDEK,
    37  		FIPS:          n.opts.FIPS,
    38  	}
    39  	if keys.PendingDEK != nil {
    40  		n.raftLogger.EncryptionKey = keys.PendingDEK
    41  	}
    42  
    43  	snap, walData, err := n.raftLogger.BootstrapFromDisk(ctx)
    44  
    45  	if keys.PendingDEK != nil {
    46  		switch errors.Cause(err).(type) {
    47  		case nil:
    48  			if err = n.keyRotator.UpdateKeys(EncryptionKeys{CurrentDEK: keys.PendingDEK}); err != nil {
    49  				err = errors.Wrap(err, "previous key rotation was successful, but unable mark rotation as complete")
    50  			}
    51  		case encryption.ErrCannotDecrypt:
    52  			snap, walData, err = n.raftLogger.BootstrapFromDisk(ctx, keys.CurrentDEK)
    53  		}
    54  	}
    55  
    56  	if err != nil {
    57  		return nil, storage.WALData{}, err
    58  	}
    59  	return snap, walData, nil
    60  }
    61  
    62  // bootstraps a node's raft store from the raft logs and snapshots on disk
    63  func (n *Node) loadAndStart(ctx context.Context, forceNewCluster bool) error {
    64  	snapshot, waldata, err := n.readFromDisk(ctx)
    65  	if err != nil {
    66  		return err
    67  	}
    68  
    69  	// Read logs to fully catch up store
    70  	var raftNode api.RaftMember
    71  	if err := raftNode.Unmarshal(waldata.Metadata); err != nil {
    72  		return errors.Wrap(err, "failed to unmarshal WAL metadata")
    73  	}
    74  	n.Config.ID = raftNode.RaftID
    75  
    76  	if snapshot != nil {
    77  		snapCluster, err := n.clusterSnapshot(snapshot.Data)
    78  		if err != nil {
    79  			return err
    80  		}
    81  		var bootstrapMembers []*api.RaftMember
    82  		if forceNewCluster {
    83  			for _, m := range snapCluster.Members {
    84  				if m.RaftID != n.Config.ID {
    85  					n.cluster.RemoveMember(m.RaftID)
    86  					continue
    87  				}
    88  				bootstrapMembers = append(bootstrapMembers, m)
    89  			}
    90  		} else {
    91  			bootstrapMembers = snapCluster.Members
    92  		}
    93  		n.bootstrapMembers = bootstrapMembers
    94  		for _, removedMember := range snapCluster.Removed {
    95  			n.cluster.RemoveMember(removedMember)
    96  		}
    97  	}
    98  
    99  	ents, st := waldata.Entries, waldata.HardState
   100  
   101  	// All members that are no longer part of the cluster must be added to
   102  	// the removed list right away, so that we don't try to connect to them
   103  	// before processing the configuration change entries, which could make
   104  	// us get stuck.
   105  	for _, ent := range ents {
   106  		if ent.Index <= st.Commit && ent.Type == raftpb.EntryConfChange {
   107  			var cc raftpb.ConfChange
   108  			if err := cc.Unmarshal(ent.Data); err != nil {
   109  				return errors.Wrap(err, "failed to unmarshal config change")
   110  			}
   111  			if cc.Type == raftpb.ConfChangeRemoveNode {
   112  				n.cluster.RemoveMember(cc.NodeID)
   113  			}
   114  		}
   115  	}
   116  
   117  	if forceNewCluster {
   118  		// discard the previously uncommitted entries
   119  		for i, ent := range ents {
   120  			if ent.Index > st.Commit {
   121  				log.G(ctx).Infof("discarding %d uncommitted WAL entries", len(ents)-i)
   122  				ents = ents[:i]
   123  				break
   124  			}
   125  		}
   126  
   127  		// force append the configuration change entries
   128  		toAppEnts := createConfigChangeEnts(getIDs(snapshot, ents), n.Config.ID, st.Term, st.Commit)
   129  
   130  		// All members that are being removed as part of the
   131  		// force-new-cluster process must be added to the
   132  		// removed list right away, so that we don't try to
   133  		// connect to them before processing the configuration
   134  		// change entries, which could make us get stuck.
   135  		for _, ccEnt := range toAppEnts {
   136  			if ccEnt.Type == raftpb.EntryConfChange {
   137  				var cc raftpb.ConfChange
   138  				if err := cc.Unmarshal(ccEnt.Data); err != nil {
   139  					return errors.Wrap(err, "error unmarshalling force-new-cluster config change")
   140  				}
   141  				if cc.Type == raftpb.ConfChangeRemoveNode {
   142  					n.cluster.RemoveMember(cc.NodeID)
   143  				}
   144  			}
   145  		}
   146  		ents = append(ents, toAppEnts...)
   147  
   148  		// force commit newly appended entries
   149  		err := n.raftLogger.SaveEntries(st, toAppEnts)
   150  		if err != nil {
   151  			log.G(ctx).WithError(err).Fatal("failed to save WAL while forcing new cluster")
   152  		}
   153  		if len(toAppEnts) != 0 {
   154  			st.Commit = toAppEnts[len(toAppEnts)-1].Index
   155  		}
   156  	}
   157  
   158  	if snapshot != nil {
   159  		if err := n.raftStore.ApplySnapshot(*snapshot); err != nil {
   160  			return err
   161  		}
   162  	}
   163  	if err := n.raftStore.SetHardState(st); err != nil {
   164  		return err
   165  	}
   166  	return n.raftStore.Append(ents)
   167  }
   168  
   169  func (n *Node) newRaftLogs(nodeID string) (raft.Peer, error) {
   170  	raftNode := &api.RaftMember{
   171  		RaftID: n.Config.ID,
   172  		NodeID: nodeID,
   173  		Addr:   n.opts.Addr,
   174  	}
   175  	metadata, err := raftNode.Marshal()
   176  	if err != nil {
   177  		return raft.Peer{}, errors.Wrap(err, "error marshalling raft node")
   178  	}
   179  	if err := n.raftLogger.BootstrapNew(metadata); err != nil {
   180  		return raft.Peer{}, err
   181  	}
   182  	n.cluster.AddMember(&membership.Member{RaftMember: raftNode})
   183  	return raft.Peer{ID: n.Config.ID, Context: metadata}, nil
   184  }
   185  
   186  func (n *Node) triggerSnapshot(ctx context.Context, raftConfig api.RaftConfig) {
   187  	snapshot := api.Snapshot{Version: api.Snapshot_V0}
   188  	for _, member := range n.cluster.Members() {
   189  		snapshot.Membership.Members = append(snapshot.Membership.Members,
   190  			&api.RaftMember{
   191  				NodeID: member.NodeID,
   192  				RaftID: member.RaftID,
   193  				Addr:   member.Addr,
   194  			})
   195  	}
   196  	snapshot.Membership.Removed = n.cluster.Removed()
   197  
   198  	viewStarted := make(chan struct{})
   199  	n.asyncTasks.Add(1)
   200  	n.snapshotInProgress = make(chan raftpb.SnapshotMetadata, 1) // buffered in case Shutdown is called during the snapshot
   201  	go func(appliedIndex uint64, snapshotMeta raftpb.SnapshotMetadata) {
   202  		// Deferred latency capture.
   203  		defer metrics.StartTimer(snapshotLatencyTimer)()
   204  
   205  		defer func() {
   206  			n.asyncTasks.Done()
   207  			n.snapshotInProgress <- snapshotMeta
   208  		}()
   209  		var err error
   210  		n.memoryStore.View(func(tx store.ReadTx) {
   211  			close(viewStarted)
   212  
   213  			var storeSnapshot *api.StoreSnapshot
   214  			storeSnapshot, err = n.memoryStore.Save(tx)
   215  			snapshot.Store = *storeSnapshot
   216  		})
   217  		if err != nil {
   218  			log.G(ctx).WithError(err).Error("failed to read snapshot from store")
   219  			return
   220  		}
   221  
   222  		d, err := snapshot.Marshal()
   223  		if err != nil {
   224  			log.G(ctx).WithError(err).Error("failed to marshal snapshot")
   225  			return
   226  		}
   227  		snap, err := n.raftStore.CreateSnapshot(appliedIndex, &n.confState, d)
   228  		if err == nil {
   229  			if err := n.raftLogger.SaveSnapshot(snap); err != nil {
   230  				log.G(ctx).WithError(err).Error("failed to save snapshot")
   231  				return
   232  			}
   233  			snapshotMeta = snap.Metadata
   234  
   235  			if appliedIndex > raftConfig.LogEntriesForSlowFollowers {
   236  				err := n.raftStore.Compact(appliedIndex - raftConfig.LogEntriesForSlowFollowers)
   237  				if err != nil && err != raft.ErrCompacted {
   238  					log.G(ctx).WithError(err).Error("failed to compact snapshot")
   239  				}
   240  			}
   241  		} else if err != raft.ErrSnapOutOfDate {
   242  			log.G(ctx).WithError(err).Error("failed to create snapshot")
   243  		}
   244  	}(n.appliedIndex, n.snapshotMeta)
   245  
   246  	// Wait for the goroutine to establish a read transaction, to make
   247  	// sure it sees the state as of this moment.
   248  	<-viewStarted
   249  }
   250  
   251  func (n *Node) clusterSnapshot(data []byte) (api.ClusterSnapshot, error) {
   252  	var snapshot api.Snapshot
   253  	if err := snapshot.Unmarshal(data); err != nil {
   254  		return snapshot.Membership, err
   255  	}
   256  	if snapshot.Version != api.Snapshot_V0 {
   257  		return snapshot.Membership, fmt.Errorf("unrecognized snapshot version %d", snapshot.Version)
   258  	}
   259  
   260  	if err := n.memoryStore.Restore(&snapshot.Store); err != nil {
   261  		return snapshot.Membership, err
   262  	}
   263  
   264  	return snapshot.Membership, nil
   265  }