github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/dgraph/cmd/zero/raft.go (about) 1 /* 2 * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package zero 18 19 import ( 20 "fmt" 21 "log" 22 "math" 23 "sort" 24 "strings" 25 "sync" 26 "time" 27 28 otrace "go.opencensus.io/trace" 29 30 "github.com/dgraph-io/badger/y" 31 "github.com/dgraph-io/dgraph/conn" 32 "github.com/dgraph-io/dgraph/protos/pb" 33 "github.com/dgraph-io/dgraph/x" 34 farm "github.com/dgryski/go-farm" 35 "github.com/golang/glog" 36 "github.com/google/uuid" 37 "github.com/pkg/errors" 38 "go.etcd.io/etcd/raft" 39 "go.etcd.io/etcd/raft/raftpb" 40 "golang.org/x/net/context" 41 ) 42 43 type node struct { 44 *conn.Node 45 server *Server 46 ctx context.Context 47 closer *y.Closer // to stop Run. 48 49 // The last timestamp when this Zero was able to reach quorum. 50 mu sync.RWMutex 51 lastQuorum time.Time 52 } 53 54 func (n *node) AmLeader() bool { 55 if n.Raft() == nil { 56 return false 57 } 58 r := n.Raft() 59 if r.Status().Lead != r.Status().ID { 60 return false 61 } 62 63 // This node must be the leader, but must also be an active member of the cluster, and not 64 // hidden behind a partition. Basically, if this node was the leader and goes behind a 65 // partition, it would still think that it is indeed the leader for the duration mentioned 66 // below. 67 n.mu.RLock() 68 defer n.mu.RUnlock() 69 return time.Since(n.lastQuorum) <= 5*time.Second 70 } 71 72 func (n *node) uniqueKey() string { 73 return fmt.Sprintf("z%x-%d", n.Id, n.Rand.Uint64()) 74 } 75 76 var errInternalRetry = errors.New("Retry Raft proposal internally") 77 78 // proposeAndWait makes a proposal to the quorum for Group Zero and waits for it to be accepted by 79 // the group before returning. It is safe to call concurrently. 80 func (n *node) proposeAndWait(ctx context.Context, proposal *pb.ZeroProposal) error { 81 switch { 82 case n.Raft() == nil: 83 return errors.Errorf("Raft isn't initialized yet.") 84 case ctx.Err() != nil: 85 return ctx.Err() 86 case !n.AmLeader(): 87 // Do this check upfront. Don't do this inside propose for reasons explained below. 88 return errors.Errorf("Not Zero leader. Aborting proposal: %+v", proposal) 89 } 90 91 // We could consider adding a wrapper around the user proposal, so we can access any key-values. 92 // Something like this: 93 // https://github.com/golang/go/commit/5d39260079b5170e6b4263adb4022cc4b54153c4 94 span := otrace.FromContext(ctx) 95 // Overwrite ctx, so we no longer enforce the timeouts or cancels from ctx. 96 ctx = otrace.NewContext(context.Background(), span) 97 98 stop := x.SpanTimer(span, "n.proposeAndWait") 99 defer stop() 100 101 // propose runs in a loop. So, we should not do any checks inside, including n.AmLeader. This is 102 // to avoid the scenario where the first proposal times out and the second one gets returned 103 // due to node no longer being the leader. In this scenario, the first proposal can still get 104 // accepted by Raft, causing a txn violation later for us, because we assumed that the proposal 105 // did not go through. 106 propose := func(timeout time.Duration) error { 107 cctx, cancel := context.WithTimeout(ctx, timeout) 108 defer cancel() 109 110 errCh := make(chan error, 1) 111 pctx := &conn.ProposalCtx{ 112 ErrCh: errCh, 113 // Don't use the original context, because that's not what we're passing to Raft. 114 Ctx: cctx, 115 } 116 key := n.uniqueKey() 117 x.AssertTruef(n.Proposals.Store(key, pctx), "Found existing proposal with key: [%v]", key) 118 defer n.Proposals.Delete(key) 119 proposal.Key = key 120 span.Annotatef(nil, "Proposing with key: %s. Timeout: %v", key, timeout) 121 122 data, err := proposal.Marshal() 123 if err != nil { 124 return err 125 } 126 // Propose the change. 127 if err := n.Raft().Propose(cctx, data); err != nil { 128 span.Annotatef(nil, "Error while proposing via Raft: %v", err) 129 return errors.Wrapf(err, "While proposing") 130 } 131 132 // Wait for proposal to be applied or timeout. 133 select { 134 case err := <-errCh: 135 // We arrived here by a call to n.props.Done(). 136 return err 137 case <-cctx.Done(): 138 span.Annotatef(nil, "Internal context timeout %s. Will retry...", timeout) 139 return errInternalRetry 140 } 141 } 142 143 // Some proposals can be stuck if leader change happens. For e.g. MsgProp message from follower 144 // to leader can be dropped/end up appearing with empty Data in CommittedEntries. 145 // Having a timeout here prevents the mutation being stuck forever in case they don't have a 146 // timeout. We should always try with a timeout and optionally retry. 147 err := errInternalRetry 148 timeout := 4 * time.Second 149 for err == errInternalRetry { 150 err = propose(timeout) 151 timeout *= 2 // Exponential backoff 152 if timeout > time.Minute { 153 timeout = 32 * time.Second 154 } 155 } 156 return err 157 } 158 159 var ( 160 errInvalidProposal = errors.New("Invalid group proposal") 161 errTabletAlreadyServed = errors.New("Tablet is already being served") 162 ) 163 164 func newGroup() *pb.Group { 165 return &pb.Group{ 166 Members: make(map[uint64]*pb.Member), 167 Tablets: make(map[string]*pb.Tablet), 168 } 169 } 170 171 func (n *node) handleMemberProposal(member *pb.Member) error { 172 n.server.AssertLock() 173 state := n.server.state 174 175 m := n.server.member(member.Addr) 176 // Ensures that different nodes don't have same address. 177 if m != nil && (m.Id != member.Id || m.GroupId != member.GroupId) { 178 return errors.Errorf("Found another member %d with same address: %v", m.Id, m.Addr) 179 } 180 if member.GroupId == 0 { 181 state.Zeros[member.Id] = member 182 if member.Leader { 183 // Unset leader flag for other nodes, there can be only one 184 // leader at a time. 185 for _, m := range state.Zeros { 186 if m.Id != member.Id { 187 m.Leader = false 188 } 189 } 190 } 191 return nil 192 } 193 group := state.Groups[member.GroupId] 194 if group == nil { 195 group = newGroup() 196 state.Groups[member.GroupId] = group 197 } 198 m, has := group.Members[member.Id] 199 if member.AmDead { 200 if has { 201 delete(group.Members, member.Id) 202 state.Removed = append(state.Removed, m) 203 } 204 // else already removed. 205 if len(group.Members) == 0 { 206 glog.V(3).Infof("Deleting group Id %d (no members) ...", member.GroupId) 207 delete(state.Groups, member.GroupId) 208 } 209 return nil 210 } 211 if !has && len(group.Members) >= n.server.NumReplicas { 212 // We shouldn't allow more members than the number of replicas. 213 return errors.Errorf("Group reached replication level. Can't add another member: %+v", member) 214 } 215 216 // Create a connection to this server. 217 go conn.GetPools().Connect(member.Addr) 218 219 group.Members[member.Id] = member 220 // Increment nextGroup when we have enough replicas 221 if member.GroupId == n.server.nextGroup && 222 len(group.Members) >= n.server.NumReplicas { 223 n.server.nextGroup++ 224 } 225 if member.Leader { 226 // Unset leader flag for other nodes, there can be only one 227 // leader at a time. 228 for _, m := range group.Members { 229 if m.Id != member.Id { 230 m.Leader = false 231 } 232 } 233 } 234 // On replay of logs on restart we need to set nextGroup. 235 if n.server.nextGroup <= member.GroupId { 236 n.server.nextGroup = member.GroupId + 1 237 } 238 return nil 239 } 240 241 func (n *node) handleTabletProposal(tablet *pb.Tablet) error { 242 n.server.AssertLock() 243 state := n.server.state 244 defer func() { 245 // Regenerate group checksums. These checksums are solely based on which tablets are being 246 // served by the group. If the tablets that a group is serving changes, and the Alpha does 247 // not know about these changes, then the read request must fail. 248 for _, g := range state.GetGroups() { 249 preds := make([]string, 0, len(g.GetTablets())) 250 for pred := range g.GetTablets() { 251 preds = append(preds, pred) 252 } 253 sort.Strings(preds) 254 g.Checksum = farm.Fingerprint64([]byte(strings.Join(preds, ""))) 255 } 256 if n.AmLeader() { 257 // It is important to push something to Oracle updates channel, so the subscribers would 258 // get the latest checksum that we calculated above. Otherwise, if all the queries are 259 // best effort queries which don't create any transaction, then the OracleDelta never 260 // gets sent to Alphas, causing their group checksum to mismatch and never converge. 261 n.server.orc.updates <- &pb.OracleDelta{} 262 } 263 }() 264 265 if tablet.GroupId == 0 { 266 return errors.Errorf("Tablet group id is zero: %+v", tablet) 267 } 268 group := state.Groups[tablet.GroupId] 269 if tablet.Remove { 270 glog.Infof("Removing tablet for attr: [%v], gid: [%v]\n", tablet.Predicate, tablet.GroupId) 271 if group != nil { 272 delete(group.Tablets, tablet.Predicate) 273 } 274 return nil 275 } 276 if group == nil { 277 group = newGroup() 278 state.Groups[tablet.GroupId] = group 279 } 280 281 // There's a edge case that we're handling. 282 // Two servers ask to serve the same tablet, then we need to ensure that 283 // only the first one succeeds. 284 if prev := n.server.servingTablet(tablet.Predicate); prev != nil { 285 if tablet.Force { 286 originalGroup := state.Groups[prev.GroupId] 287 delete(originalGroup.Tablets, tablet.Predicate) 288 } else { 289 if prev.GroupId != tablet.GroupId { 290 glog.Infof( 291 "Tablet for attr: [%s], gid: [%d] already served by group: [%d]\n", 292 prev.Predicate, tablet.GroupId, prev.GroupId) 293 return errTabletAlreadyServed 294 } 295 } 296 } 297 tablet.Force = false 298 group.Tablets[tablet.Predicate] = tablet 299 return nil 300 } 301 302 func (n *node) applyProposal(e raftpb.Entry) (string, error) { 303 var p pb.ZeroProposal 304 // Raft commits empty entry on becoming a leader. 305 if len(e.Data) == 0 { 306 return p.Key, nil 307 } 308 if err := p.Unmarshal(e.Data); err != nil { 309 return p.Key, err 310 } 311 if len(p.Key) == 0 { 312 return p.Key, errInvalidProposal 313 } 314 span := otrace.FromContext(n.Proposals.Ctx(p.Key)) 315 316 n.server.Lock() 317 defer n.server.Unlock() 318 319 state := n.server.state 320 state.Counter = e.Index 321 if len(p.Cid) > 0 { 322 if len(state.Cid) > 0 { 323 return p.Key, errInvalidProposal 324 } 325 state.Cid = p.Cid 326 } 327 if p.MaxRaftId > 0 { 328 if p.MaxRaftId <= state.MaxRaftId { 329 return p.Key, errInvalidProposal 330 } 331 state.MaxRaftId = p.MaxRaftId 332 } 333 if p.SnapshotTs != nil { 334 for gid, ts := range p.SnapshotTs { 335 if group, ok := state.Groups[gid]; ok { 336 group.SnapshotTs = x.Max(group.SnapshotTs, ts) 337 } 338 } 339 purgeTs := uint64(math.MaxUint64) 340 for _, group := range state.Groups { 341 purgeTs = x.Min(purgeTs, group.SnapshotTs) 342 } 343 if purgeTs < math.MaxUint64 { 344 n.server.orc.purgeBelow(purgeTs) 345 } 346 } 347 if p.Member != nil { 348 if err := n.handleMemberProposal(p.Member); err != nil { 349 span.Annotatef(nil, "While applying membership proposal: %+v", err) 350 glog.Errorf("While applying membership proposal: %+v", err) 351 return p.Key, err 352 } 353 } 354 if p.Tablet != nil { 355 if err := n.handleTabletProposal(p.Tablet); err != nil { 356 span.Annotatef(nil, "While applying tablet proposal: %+v", err) 357 glog.Errorf("While applying tablet proposal: %+v", err) 358 return p.Key, err 359 } 360 } 361 if p.License != nil { 362 // Check that the number of nodes in the cluster should be less than MaxNodes, otherwise 363 // reject the proposal. 364 numNodes := len(state.GetZeros()) 365 for _, group := range state.GetGroups() { 366 numNodes += len(group.GetMembers()) 367 } 368 if uint64(numNodes) > p.GetLicense().GetMaxNodes() { 369 return p.Key, errInvalidProposal 370 } 371 state.License = p.License 372 // Check expiry and set enabled accordingly. 373 expiry := time.Unix(state.License.ExpiryTs, 0).UTC() 374 state.License.Enabled = time.Now().UTC().Before(expiry) 375 } 376 377 if p.MaxLeaseId > state.MaxLeaseId { 378 state.MaxLeaseId = p.MaxLeaseId 379 } else if p.MaxTxnTs > state.MaxTxnTs { 380 state.MaxTxnTs = p.MaxTxnTs 381 } else if p.MaxLeaseId != 0 || p.MaxTxnTs != 0 { 382 // Could happen after restart when some entries were there in WAL and did not get 383 // snapshotted. 384 glog.Infof("Could not apply proposal, ignoring: p.MaxLeaseId=%v, p.MaxTxnTs=%v maxLeaseId=%d"+ 385 " maxTxnTs=%d\n", p.MaxLeaseId, p.MaxTxnTs, state.MaxLeaseId, state.MaxTxnTs) 386 } 387 if p.Txn != nil { 388 n.server.orc.updateCommitStatus(e.Index, p.Txn) 389 } 390 391 return p.Key, nil 392 } 393 394 func (n *node) applyConfChange(e raftpb.Entry) { 395 var cc raftpb.ConfChange 396 if err := cc.Unmarshal(e.Data); err != nil { 397 glog.Errorf("While unmarshalling confchange: %+v", err) 398 } 399 400 if cc.Type == raftpb.ConfChangeRemoveNode { 401 if cc.NodeID == n.Id { 402 glog.Fatalf("I [id:%#x group:0] have been removed. Goodbye!", n.Id) 403 } 404 n.DeletePeer(cc.NodeID) 405 n.server.removeZero(cc.NodeID) 406 407 } else if len(cc.Context) > 0 { 408 var rc pb.RaftContext 409 x.Check(rc.Unmarshal(cc.Context)) 410 go n.Connect(rc.Id, rc.Addr) 411 412 m := &pb.Member{Id: rc.Id, Addr: rc.Addr, GroupId: 0} 413 for _, member := range n.server.membershipState().Removed { 414 // It is not recommended to reuse RAFT ids. 415 if member.GroupId == 0 && m.Id == member.Id { 416 err := errors.Errorf("REUSE_RAFTID: Reusing removed id: %d.\n", m.Id) 417 n.DoneConfChange(cc.ID, err) 418 // Cancel configuration change. 419 cc.NodeID = raft.None 420 n.Raft().ApplyConfChange(cc) 421 return 422 } 423 } 424 425 n.server.storeZero(m) 426 } 427 428 cs := n.Raft().ApplyConfChange(cc) 429 n.SetConfState(cs) 430 n.DoneConfChange(cc.ID, nil) 431 432 // The following doesn't really trigger leader change. It's just capturing a leader change 433 // event. The naming is poor. TODO: Fix naming, and see if we can simplify this leader change 434 // logic. 435 n.triggerLeaderChange() 436 } 437 438 func (n *node) triggerLeaderChange() { 439 n.server.triggerLeaderChange() 440 // We update leader information on each node without proposal. This 441 // function is called on all nodes on leader change. 442 n.server.updateZeroLeader() 443 } 444 445 func (n *node) initAndStartNode() error { 446 _, restart, err := n.PastLife() 447 x.Check(err) 448 449 if restart { 450 glog.Infoln("Restarting node for dgraphzero") 451 sp, err := n.Store.Snapshot() 452 x.Checkf(err, "Unable to get existing snapshot") 453 if !raft.IsEmptySnap(sp) { 454 // It is important that we pick up the conf state here. 455 n.SetConfState(&sp.Metadata.ConfState) 456 457 var state pb.MembershipState 458 x.Check(state.Unmarshal(sp.Data)) 459 n.server.SetMembershipState(&state) 460 for _, id := range sp.Metadata.ConfState.Nodes { 461 n.Connect(id, state.Zeros[id].Addr) 462 } 463 } 464 465 n.SetRaft(raft.RestartNode(n.Cfg)) 466 467 } else if len(opts.peer) > 0 { 468 p := conn.GetPools().Connect(opts.peer) 469 if p == nil { 470 return errors.Errorf("Unhealthy connection to %v", opts.peer) 471 } 472 473 gconn := p.Get() 474 c := pb.NewRaftClient(gconn) 475 timeout := 8 * time.Second 476 for { 477 ctx, cancel := context.WithTimeout(n.ctx, timeout) 478 defer cancel() 479 // JoinCluster can block indefinitely, raft ignores conf change proposal 480 // if it has pending configuration. 481 _, err := c.JoinCluster(ctx, n.RaftContext) 482 if err == nil { 483 break 484 } 485 if x.ShouldCrash(err) { 486 log.Fatalf("Error while joining cluster: %v", err) 487 } 488 glog.Errorf("Error while joining cluster: %v\n", err) 489 timeout *= 2 490 if timeout > 32*time.Second { 491 timeout = 32 * time.Second 492 } 493 time.Sleep(timeout) // This is useful because JoinCluster can exit immediately. 494 } 495 glog.Infof("[%#x] Starting node\n", n.Id) 496 n.SetRaft(raft.StartNode(n.Cfg, nil)) 497 498 } else { 499 data, err := n.RaftContext.Marshal() 500 x.Check(err) 501 peers := []raft.Peer{{ID: n.Id, Context: data}} 502 n.SetRaft(raft.StartNode(n.Cfg, peers)) 503 504 go func() { 505 // This is a new cluster. So, propose a new ID for the cluster. 506 for { 507 id := uuid.New().String() 508 err := n.proposeAndWait(context.Background(), &pb.ZeroProposal{Cid: id}) 509 if err == nil { 510 glog.Infof("CID set for cluster: %v", id) 511 break 512 } 513 if err == errInvalidProposal { 514 glog.Errorf("invalid proposal error while proposing cluster id") 515 return 516 } 517 glog.Errorf("While proposing CID: %v. Retrying...", err) 518 time.Sleep(3 * time.Second) 519 } 520 521 if err := n.proposeTrialLicense(); err != nil { 522 glog.Errorf("while proposing trial license to cluster: %v", err) 523 } 524 }() 525 } 526 527 go n.Run() 528 go n.BatchAndSendMessages() 529 return nil 530 } 531 532 func (n *node) updateZeroMembershipPeriodically(closer *y.Closer) { 533 defer closer.Done() 534 ticker := time.NewTicker(10 * time.Second) 535 defer ticker.Stop() 536 537 for { 538 select { 539 case <-ticker.C: 540 n.server.updateZeroLeader() 541 case <-closer.HasBeenClosed(): 542 return 543 } 544 } 545 } 546 547 var startOption = otrace.WithSampler(otrace.ProbabilitySampler(0.01)) 548 549 func (n *node) checkQuorum(closer *y.Closer) { 550 defer closer.Done() 551 ticker := time.NewTicker(time.Second) 552 defer ticker.Stop() 553 554 quorum := func() { 555 // Make this timeout 1.5x the timeout on RunReadIndexLoop. 556 ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) 557 defer cancel() 558 559 ctx, span := otrace.StartSpan(ctx, "Zero.checkQuorum", startOption) 560 defer span.End() 561 span.Annotatef(nil, "Node id: %d", n.Id) 562 563 if state, err := n.server.latestMembershipState(ctx); err == nil { 564 n.mu.Lock() 565 n.lastQuorum = time.Now() 566 n.mu.Unlock() 567 // Also do some connection cleanup. 568 conn.GetPools().RemoveInvalid(state) 569 span.Annotate(nil, "Updated lastQuorum") 570 571 } else if glog.V(1) { 572 span.Annotatef(nil, "Got error: %v", err) 573 glog.Warningf("Zero node: %#x unable to reach quorum. Error: %v", n.Id, err) 574 } 575 } 576 577 for { 578 select { 579 case <-ticker.C: 580 quorum() 581 case <-closer.HasBeenClosed(): 582 return 583 } 584 } 585 } 586 587 func (n *node) snapshotPeriodically(closer *y.Closer) { 588 defer closer.Done() 589 ticker := time.NewTicker(10 * time.Second) 590 defer ticker.Stop() 591 592 for { 593 select { 594 case <-ticker.C: 595 n.trySnapshot(1000) 596 597 case <-closer.HasBeenClosed(): 598 return 599 } 600 } 601 } 602 603 func (n *node) trySnapshot(skip uint64) { 604 existing, err := n.Store.Snapshot() 605 x.Checkf(err, "Unable to get existing snapshot") 606 si := existing.Metadata.Index 607 idx := n.server.SyncedUntil() 608 if idx <= si+skip { 609 return 610 } 611 612 data, err := n.server.MarshalMembershipState() 613 x.Check(err) 614 615 err = n.Store.CreateSnapshot(idx, n.ConfState(), data) 616 x.Checkf(err, "While creating snapshot") 617 glog.Infof("Writing snapshot at index: %d, applied mark: %d\n", idx, n.Applied.DoneUntil()) 618 } 619 620 func (n *node) Run() { 621 var leader bool 622 ticker := time.NewTicker(100 * time.Millisecond) 623 defer ticker.Stop() 624 625 // snapshot can cause select loop to block while deleting entries, so run 626 // it in goroutine 627 readStateCh := make(chan raft.ReadState, 100) 628 closer := y.NewCloser(5) 629 defer func() { 630 closer.SignalAndWait() 631 n.closer.Done() 632 glog.Infof("Zero Node.Run finished.") 633 }() 634 635 go n.snapshotPeriodically(closer) 636 go n.updateEnterpriseState(closer) 637 go n.updateZeroMembershipPeriodically(closer) 638 go n.checkQuorum(closer) 639 go n.RunReadIndexLoop(closer, readStateCh) 640 // We only stop runReadIndexLoop after the for loop below has finished interacting with it. 641 // That way we know sending to readStateCh will not deadlock. 642 643 var timer x.Timer 644 for { 645 select { 646 case <-n.closer.HasBeenClosed(): 647 n.Raft().Stop() 648 return 649 case <-ticker.C: 650 n.Raft().Tick() 651 case rd := <-n.Raft().Ready(): 652 timer.Start() 653 _, span := otrace.StartSpan(n.ctx, "Zero.RunLoop", 654 otrace.WithSampler(otrace.ProbabilitySampler(0.001))) 655 for _, rs := range rd.ReadStates { 656 // No need to use select-case-default on pushing to readStateCh. It is typically 657 // empty. 658 readStateCh <- rs 659 } 660 span.Annotatef(nil, "Pushed %d readstates", len(rd.ReadStates)) 661 662 if rd.SoftState != nil { 663 if rd.RaftState == raft.StateLeader && !leader { 664 glog.Infoln("I've become the leader, updating leases.") 665 n.server.updateLeases() 666 } 667 leader = rd.RaftState == raft.StateLeader 668 // Oracle stream would close the stream once it steps down as leader 669 // predicate move would cancel any in progress move on stepping down. 670 n.triggerLeaderChange() 671 } 672 if leader { 673 // Leader can send messages in parallel with writing to disk. 674 for _, msg := range rd.Messages { 675 n.Send(msg) 676 } 677 } 678 n.SaveToStorage(rd.HardState, rd.Entries, rd.Snapshot) 679 timer.Record("disk") 680 if rd.MustSync { 681 if err := n.Store.Sync(); err != nil { 682 glog.Errorf("Error while calling Store.Sync: %v", err) 683 } 684 timer.Record("sync") 685 } 686 span.Annotatef(nil, "Saved to storage") 687 688 if !raft.IsEmptySnap(rd.Snapshot) { 689 var state pb.MembershipState 690 x.Check(state.Unmarshal(rd.Snapshot.Data)) 691 n.server.SetMembershipState(&state) 692 } 693 694 for _, entry := range rd.CommittedEntries { 695 n.Applied.Begin(entry.Index) 696 if entry.Type == raftpb.EntryConfChange { 697 n.applyConfChange(entry) 698 glog.Infof("Done applying conf change at %#x", n.Id) 699 700 } else if entry.Type == raftpb.EntryNormal { 701 key, err := n.applyProposal(entry) 702 if err != nil { 703 glog.Errorf("While applying proposal: %v\n", err) 704 } 705 n.Proposals.Done(key, err) 706 707 } else { 708 glog.Infof("Unhandled entry: %+v\n", entry) 709 } 710 n.Applied.Done(entry.Index) 711 } 712 span.Annotatef(nil, "Applied %d CommittedEntries", len(rd.CommittedEntries)) 713 714 if !leader { 715 // Followers should send messages later. 716 for _, msg := range rd.Messages { 717 n.Send(msg) 718 } 719 } 720 span.Annotate(nil, "Sent messages") 721 timer.Record("proposals") 722 723 n.Raft().Advance() 724 span.Annotate(nil, "Advanced Raft") 725 timer.Record("advance") 726 727 span.End() 728 if timer.Total() > 200*time.Millisecond { 729 glog.Warningf( 730 "Raft.Ready took too long to process: %s."+ 731 " Num entries: %d. MustSync: %v", 732 timer.String(), len(rd.Entries), rd.MustSync) 733 } 734 } 735 } 736 }