github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/nomad/leader_test.go (about)

     1  package nomad
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"strconv"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/hashicorp/consul/sdk/testutil/retry"
    11  	"github.com/hashicorp/go-hclog"
    12  	memdb "github.com/hashicorp/go-memdb"
    13  	"github.com/hashicorp/go-version"
    14  	"github.com/hashicorp/nomad/helper"
    15  	"github.com/hashicorp/nomad/nomad/mock"
    16  	"github.com/hashicorp/nomad/nomad/state"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  	"github.com/hashicorp/nomad/testutil"
    19  	"github.com/hashicorp/raft"
    20  	"github.com/hashicorp/serf/serf"
    21  	"github.com/stretchr/testify/assert"
    22  	"github.com/stretchr/testify/require"
    23  )
    24  
    25  func TestLeader_LeftServer(t *testing.T) {
    26  	s1, cleanupS1 := TestServer(t, func(c *Config) {
    27  		c.BootstrapExpect = 3
    28  	})
    29  	defer cleanupS1()
    30  
    31  	s2, cleanupS2 := TestServer(t, func(c *Config) {
    32  		c.BootstrapExpect = 3
    33  	})
    34  	defer cleanupS2()
    35  
    36  	s3, cleanupS3 := TestServer(t, func(c *Config) {
    37  		c.BootstrapExpect = 3
    38  	})
    39  	defer cleanupS3()
    40  	servers := []*Server{s1, s2, s3}
    41  	TestJoin(t, s1, s2, s3)
    42  
    43  	for _, s := range servers {
    44  		testutil.WaitForResult(func() (bool, error) {
    45  			peers, _ := s.numPeers()
    46  			return peers == 3, nil
    47  		}, func(err error) {
    48  			t.Fatalf("should have 3 peers")
    49  		})
    50  	}
    51  
    52  	// Kill any server
    53  	var peer *Server
    54  	for _, s := range servers {
    55  		if !s.IsLeader() {
    56  			peer = s
    57  			break
    58  		}
    59  	}
    60  	if peer == nil {
    61  		t.Fatalf("Should have a non-leader")
    62  	}
    63  	peer.Shutdown()
    64  	name := fmt.Sprintf("%s.%s", peer.config.NodeName, peer.config.Region)
    65  
    66  	testutil.WaitForResult(func() (bool, error) {
    67  		for _, s := range servers {
    68  			if s == peer {
    69  				continue
    70  			}
    71  
    72  			// Force remove the non-leader (transition to left state)
    73  			if err := s.RemoveFailedNode(name); err != nil {
    74  				return false, err
    75  			}
    76  
    77  			peers, _ := s.numPeers()
    78  			return peers == 2, errors.New(fmt.Sprintf("%v", peers))
    79  		}
    80  
    81  		return true, nil
    82  	}, func(err error) {
    83  		t.Fatalf("err: %s", err)
    84  	})
    85  }
    86  
    87  func TestLeader_LeftLeader(t *testing.T) {
    88  	s1, cleanupS1 := TestServer(t, func(c *Config) {
    89  		c.BootstrapExpect = 3
    90  	})
    91  	defer cleanupS1()
    92  
    93  	s2, cleanupS2 := TestServer(t, func(c *Config) {
    94  		c.BootstrapExpect = 3
    95  	})
    96  	defer cleanupS2()
    97  
    98  	s3, cleanupS3 := TestServer(t, func(c *Config) {
    99  		c.BootstrapExpect = 3
   100  	})
   101  	defer cleanupS3()
   102  	servers := []*Server{s1, s2, s3}
   103  	TestJoin(t, s1, s2, s3)
   104  
   105  	for _, s := range servers {
   106  		testutil.WaitForResult(func() (bool, error) {
   107  			peers, _ := s.numPeers()
   108  			return peers == 3, nil
   109  		}, func(err error) {
   110  			t.Fatalf("should have 3 peers")
   111  		})
   112  	}
   113  
   114  	// Kill the leader!
   115  	var leader *Server
   116  	for _, s := range servers {
   117  		if s.IsLeader() {
   118  			leader = s
   119  			break
   120  		}
   121  	}
   122  	if leader == nil {
   123  		t.Fatalf("Should have a leader")
   124  	}
   125  	leader.Leave()
   126  	leader.Shutdown()
   127  
   128  	for _, s := range servers {
   129  		if s == leader {
   130  			continue
   131  		}
   132  		testutil.WaitForResult(func() (bool, error) {
   133  			peers, _ := s.numPeers()
   134  			return peers == 2, errors.New(fmt.Sprintf("%v", peers))
   135  		}, func(err error) {
   136  			t.Fatalf("should have 2 peers: %v", err)
   137  		})
   138  	}
   139  }
   140  
   141  func TestLeader_MultiBootstrap(t *testing.T) {
   142  	s1, cleanupS1 := TestServer(t, nil)
   143  	defer cleanupS1()
   144  
   145  	s2, cleanupS2 := TestServer(t, nil)
   146  	defer cleanupS2()
   147  	servers := []*Server{s1, s2}
   148  	TestJoin(t, s1, s2)
   149  
   150  	for _, s := range servers {
   151  		testutil.WaitForResult(func() (bool, error) {
   152  			peers := s.Members()
   153  			return len(peers) == 2, nil
   154  		}, func(err error) {
   155  			t.Fatalf("should have 2 peers")
   156  		})
   157  	}
   158  
   159  	// Ensure we don't have multiple raft peers
   160  	for _, s := range servers {
   161  		peers, err := s.numPeers()
   162  		if err != nil {
   163  			t.Fatalf("failed: %v", err)
   164  		}
   165  		if peers != 1 {
   166  			t.Fatalf("should only have 1 raft peer! %v", peers)
   167  		}
   168  	}
   169  }
   170  
   171  func TestLeader_PlanQueue_Reset(t *testing.T) {
   172  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   173  		c.BootstrapExpect = 3
   174  	})
   175  	defer cleanupS1()
   176  
   177  	s2, cleanupS2 := TestServer(t, func(c *Config) {
   178  		c.BootstrapExpect = 3
   179  	})
   180  	defer cleanupS2()
   181  
   182  	s3, cleanupS3 := TestServer(t, func(c *Config) {
   183  		c.BootstrapExpect = 3
   184  	})
   185  	defer cleanupS3()
   186  	servers := []*Server{s1, s2, s3}
   187  	TestJoin(t, s1, s2, s3)
   188  
   189  	leader := waitForStableLeadership(t, servers)
   190  
   191  	if !leader.planQueue.Enabled() {
   192  		t.Fatalf("should enable plan queue")
   193  	}
   194  
   195  	for _, s := range servers {
   196  		if !s.IsLeader() && s.planQueue.Enabled() {
   197  			t.Fatalf("plan queue should not be enabled")
   198  		}
   199  	}
   200  
   201  	// Kill the leader
   202  	leader.Shutdown()
   203  	time.Sleep(100 * time.Millisecond)
   204  
   205  	// Wait for a new leader
   206  	leader = nil
   207  	testutil.WaitForResult(func() (bool, error) {
   208  		for _, s := range servers {
   209  			if s.IsLeader() {
   210  				leader = s
   211  				return true, nil
   212  			}
   213  		}
   214  		return false, nil
   215  	}, func(err error) {
   216  		t.Fatalf("should have leader")
   217  	})
   218  
   219  	// Check that the new leader has a pending GC expiration
   220  	testutil.WaitForResult(func() (bool, error) {
   221  		return leader.planQueue.Enabled(), nil
   222  	}, func(err error) {
   223  		t.Fatalf("should enable plan queue")
   224  	})
   225  }
   226  
   227  func TestLeader_EvalBroker_Reset(t *testing.T) {
   228  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   229  		c.NumSchedulers = 0
   230  	})
   231  	defer cleanupS1()
   232  
   233  	s2, cleanupS2 := TestServer(t, func(c *Config) {
   234  		c.NumSchedulers = 0
   235  		c.BootstrapExpect = 3
   236  	})
   237  	defer cleanupS2()
   238  
   239  	s3, cleanupS3 := TestServer(t, func(c *Config) {
   240  		c.NumSchedulers = 0
   241  		c.BootstrapExpect = 3
   242  	})
   243  	defer cleanupS3()
   244  	servers := []*Server{s1, s2, s3}
   245  	TestJoin(t, s1, s2, s3)
   246  
   247  	leader := waitForStableLeadership(t, servers)
   248  
   249  	// Inject a pending eval
   250  	req := structs.EvalUpdateRequest{
   251  		Evals: []*structs.Evaluation{mock.Eval()},
   252  	}
   253  	_, _, err := leader.raftApply(structs.EvalUpdateRequestType, req)
   254  	if err != nil {
   255  		t.Fatalf("err: %v", err)
   256  	}
   257  
   258  	// Kill the leader
   259  	leader.Shutdown()
   260  	time.Sleep(100 * time.Millisecond)
   261  
   262  	// Wait for a new leader
   263  	leader = nil
   264  	testutil.WaitForResult(func() (bool, error) {
   265  		for _, s := range servers {
   266  			if s.IsLeader() {
   267  				leader = s
   268  				return true, nil
   269  			}
   270  		}
   271  		return false, nil
   272  	}, func(err error) {
   273  		t.Fatalf("should have leader")
   274  	})
   275  
   276  	// Check that the new leader has a pending evaluation
   277  	testutil.WaitForResult(func() (bool, error) {
   278  		stats := leader.evalBroker.Stats()
   279  		return stats.TotalReady == 1, nil
   280  	}, func(err error) {
   281  		t.Fatalf("should have pending evaluation")
   282  	})
   283  }
   284  
   285  func TestLeader_PeriodicDispatcher_Restore_Adds(t *testing.T) {
   286  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   287  		c.NumSchedulers = 0
   288  	})
   289  	defer cleanupS1()
   290  
   291  	s2, cleanupS2 := TestServer(t, func(c *Config) {
   292  		c.NumSchedulers = 0
   293  		c.BootstrapExpect = 3
   294  	})
   295  	defer cleanupS2()
   296  
   297  	s3, cleanupS3 := TestServer(t, func(c *Config) {
   298  		c.NumSchedulers = 0
   299  		c.BootstrapExpect = 3
   300  	})
   301  	defer cleanupS3()
   302  	servers := []*Server{s1, s2, s3}
   303  	TestJoin(t, s1, s2, s3)
   304  
   305  	leader := waitForStableLeadership(t, servers)
   306  
   307  	// Inject a periodic job, a parameterized periodic job and a non-periodic job
   308  	periodic := mock.PeriodicJob()
   309  	nonPeriodic := mock.Job()
   310  	parameterizedPeriodic := mock.PeriodicJob()
   311  	parameterizedPeriodic.ParameterizedJob = &structs.ParameterizedJobConfig{}
   312  	for _, job := range []*structs.Job{nonPeriodic, periodic, parameterizedPeriodic} {
   313  		req := structs.JobRegisterRequest{
   314  			Job: job,
   315  			WriteRequest: structs.WriteRequest{
   316  				Namespace: job.Namespace,
   317  			},
   318  		}
   319  		_, _, err := leader.raftApply(structs.JobRegisterRequestType, req)
   320  		if err != nil {
   321  			t.Fatalf("err: %v", err)
   322  		}
   323  	}
   324  
   325  	// Kill the leader
   326  	leader.Shutdown()
   327  	time.Sleep(100 * time.Millisecond)
   328  
   329  	// Wait for a new leader
   330  	leader = nil
   331  	testutil.WaitForResult(func() (bool, error) {
   332  		for _, s := range servers {
   333  			if s.IsLeader() {
   334  				leader = s
   335  				return true, nil
   336  			}
   337  		}
   338  		return false, nil
   339  	}, func(err error) {
   340  		t.Fatalf("should have leader")
   341  	})
   342  
   343  	tuplePeriodic := structs.NamespacedID{
   344  		ID:        periodic.ID,
   345  		Namespace: periodic.Namespace,
   346  	}
   347  	tupleNonPeriodic := structs.NamespacedID{
   348  		ID:        nonPeriodic.ID,
   349  		Namespace: nonPeriodic.Namespace,
   350  	}
   351  	tupleParameterized := structs.NamespacedID{
   352  		ID:        parameterizedPeriodic.ID,
   353  		Namespace: parameterizedPeriodic.Namespace,
   354  	}
   355  
   356  	// Check that the new leader is tracking the periodic job only
   357  	testutil.WaitForResult(func() (bool, error) {
   358  		leader.periodicDispatcher.l.Lock()
   359  		defer leader.periodicDispatcher.l.Unlock()
   360  		if _, tracked := leader.periodicDispatcher.tracked[tuplePeriodic]; !tracked {
   361  			return false, fmt.Errorf("periodic job not tracked")
   362  		}
   363  		if _, tracked := leader.periodicDispatcher.tracked[tupleNonPeriodic]; tracked {
   364  			return false, fmt.Errorf("non periodic job tracked")
   365  		}
   366  		if _, tracked := leader.periodicDispatcher.tracked[tupleParameterized]; tracked {
   367  			return false, fmt.Errorf("parameterized periodic job tracked")
   368  		}
   369  		return true, nil
   370  	}, func(err error) {
   371  		t.Fatalf(err.Error())
   372  	})
   373  }
   374  
   375  func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) {
   376  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   377  		c.NumSchedulers = 0
   378  	})
   379  	defer cleanupS1()
   380  	testutil.WaitForLeader(t, s1.RPC)
   381  
   382  	// Inject a periodic job that will be triggered soon.
   383  	launch := time.Now().Add(1 * time.Second)
   384  	job := testPeriodicJob(launch)
   385  	req := structs.JobRegisterRequest{
   386  		Job: job,
   387  		WriteRequest: structs.WriteRequest{
   388  			Namespace: job.Namespace,
   389  		},
   390  	}
   391  	_, _, err := s1.raftApply(structs.JobRegisterRequestType, req)
   392  	if err != nil {
   393  		t.Fatalf("err: %v", err)
   394  	}
   395  
   396  	// Flush the periodic dispatcher, ensuring that no evals will be created.
   397  	s1.periodicDispatcher.SetEnabled(false)
   398  
   399  	// Get the current time to ensure the launch time is after this once we
   400  	// restore.
   401  	now := time.Now()
   402  
   403  	// Sleep till after the job should have been launched.
   404  	time.Sleep(3 * time.Second)
   405  
   406  	// Restore the periodic dispatcher.
   407  	s1.periodicDispatcher.SetEnabled(true)
   408  	s1.restorePeriodicDispatcher()
   409  
   410  	// Ensure the job is tracked.
   411  	tuple := structs.NamespacedID{
   412  		ID:        job.ID,
   413  		Namespace: job.Namespace,
   414  	}
   415  	if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked {
   416  		t.Fatalf("periodic job not restored")
   417  	}
   418  
   419  	// Check that an eval was made.
   420  	ws := memdb.NewWatchSet()
   421  	last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID)
   422  	if err != nil || last == nil {
   423  		t.Fatalf("failed to get periodic launch time: %v", err)
   424  	}
   425  
   426  	if last.Launch.Before(now) {
   427  		t.Fatalf("restorePeriodicDispatcher did not force launch: last %v; want after %v", last.Launch, now)
   428  	}
   429  }
   430  
   431  func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) {
   432  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   433  		c.NumSchedulers = 0
   434  	})
   435  	defer cleanupS1()
   436  	testutil.WaitForLeader(t, s1.RPC)
   437  
   438  	// Inject a periodic job that triggered once in the past, should trigger now
   439  	// and once in the future.
   440  	now := time.Now()
   441  	past := now.Add(-1 * time.Second)
   442  	future := now.Add(10 * time.Second)
   443  	job := testPeriodicJob(past, now, future)
   444  	req := structs.JobRegisterRequest{
   445  		Job: job,
   446  		WriteRequest: structs.WriteRequest{
   447  			Namespace: job.Namespace,
   448  		},
   449  	}
   450  	_, _, err := s1.raftApply(structs.JobRegisterRequestType, req)
   451  	if err != nil {
   452  		t.Fatalf("err: %v", err)
   453  	}
   454  
   455  	// Create an eval for the past launch.
   456  	s1.periodicDispatcher.createEval(job, past)
   457  
   458  	// Flush the periodic dispatcher, ensuring that no evals will be created.
   459  	s1.periodicDispatcher.SetEnabled(false)
   460  
   461  	// Sleep till after the job should have been launched.
   462  	time.Sleep(3 * time.Second)
   463  
   464  	// Restore the periodic dispatcher.
   465  	s1.periodicDispatcher.SetEnabled(true)
   466  	s1.restorePeriodicDispatcher()
   467  
   468  	// Ensure the job is tracked.
   469  	tuple := structs.NamespacedID{
   470  		ID:        job.ID,
   471  		Namespace: job.Namespace,
   472  	}
   473  	if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked {
   474  		t.Fatalf("periodic job not restored")
   475  	}
   476  
   477  	// Check that an eval was made.
   478  	ws := memdb.NewWatchSet()
   479  	last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID)
   480  	if err != nil || last == nil {
   481  		t.Fatalf("failed to get periodic launch time: %v", err)
   482  	}
   483  	if last.Launch == past {
   484  		t.Fatalf("restorePeriodicDispatcher did not force launch")
   485  	}
   486  }
   487  
   488  func TestLeader_PeriodicDispatch(t *testing.T) {
   489  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   490  		c.NumSchedulers = 0
   491  		c.EvalGCInterval = 5 * time.Millisecond
   492  	})
   493  	defer cleanupS1()
   494  
   495  	// Wait for a periodic dispatch
   496  	testutil.WaitForResult(func() (bool, error) {
   497  		stats := s1.evalBroker.Stats()
   498  		bySched, ok := stats.ByScheduler[structs.JobTypeCore]
   499  		if !ok {
   500  			return false, nil
   501  		}
   502  		return bySched.Ready > 0, nil
   503  	}, func(err error) {
   504  		t.Fatalf("should pending job")
   505  	})
   506  }
   507  
   508  func TestLeader_ReapFailedEval(t *testing.T) {
   509  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   510  		c.NumSchedulers = 0
   511  		c.EvalDeliveryLimit = 1
   512  	})
   513  	defer cleanupS1()
   514  	testutil.WaitForLeader(t, s1.RPC)
   515  
   516  	// Wait for a periodic dispatch
   517  	eval := mock.Eval()
   518  	s1.evalBroker.Enqueue(eval)
   519  
   520  	// Dequeue and Nack
   521  	out, token, err := s1.evalBroker.Dequeue(defaultSched, time.Second)
   522  	if err != nil {
   523  		t.Fatalf("err: %v", err)
   524  	}
   525  	s1.evalBroker.Nack(out.ID, token)
   526  
   527  	// Wait for an updated and followup evaluation
   528  	state := s1.fsm.State()
   529  	testutil.WaitForResult(func() (bool, error) {
   530  		ws := memdb.NewWatchSet()
   531  		out, err := state.EvalByID(ws, eval.ID)
   532  		if err != nil {
   533  			return false, err
   534  		}
   535  		if out == nil {
   536  			return false, fmt.Errorf("expect original evaluation to exist")
   537  		}
   538  		if out.Status != structs.EvalStatusFailed {
   539  			return false, fmt.Errorf("got status %v; want %v", out.Status, structs.EvalStatusFailed)
   540  		}
   541  		if out.NextEval == "" {
   542  			return false, fmt.Errorf("got empty NextEval")
   543  		}
   544  		// See if there is a followup
   545  		evals, err := state.EvalsByJob(ws, eval.Namespace, eval.JobID)
   546  		if err != nil {
   547  			return false, err
   548  		}
   549  
   550  		if l := len(evals); l != 2 {
   551  			return false, fmt.Errorf("got %d evals, want 2", l)
   552  		}
   553  
   554  		for _, e := range evals {
   555  			if e.ID == eval.ID {
   556  				continue
   557  			}
   558  
   559  			if e.Status != structs.EvalStatusPending {
   560  				return false, fmt.Errorf("follow up eval has status %v; want %v",
   561  					e.Status, structs.EvalStatusPending)
   562  			}
   563  
   564  			if e.ID != out.NextEval {
   565  				return false, fmt.Errorf("follow up eval id is %v; orig eval NextEval %v",
   566  					e.ID, out.NextEval)
   567  			}
   568  
   569  			if e.Wait < s1.config.EvalFailedFollowupBaselineDelay ||
   570  				e.Wait > s1.config.EvalFailedFollowupBaselineDelay+s1.config.EvalFailedFollowupDelayRange {
   571  				return false, fmt.Errorf("bad wait: %v", e.Wait)
   572  			}
   573  
   574  			if e.TriggeredBy != structs.EvalTriggerFailedFollowUp {
   575  				return false, fmt.Errorf("follow up eval TriggeredBy %v; want %v",
   576  					e.TriggeredBy, structs.EvalTriggerFailedFollowUp)
   577  			}
   578  		}
   579  
   580  		return true, nil
   581  	}, func(err error) {
   582  		t.Fatalf("err: %v", err)
   583  	})
   584  }
   585  
   586  func TestLeader_ReapDuplicateEval(t *testing.T) {
   587  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   588  		c.NumSchedulers = 0
   589  	})
   590  	defer cleanupS1()
   591  	testutil.WaitForLeader(t, s1.RPC)
   592  
   593  	// Create a duplicate blocked eval
   594  	eval := mock.Eval()
   595  	eval.CreateIndex = 100
   596  	eval2 := mock.Eval()
   597  	eval2.JobID = eval.JobID
   598  	eval2.CreateIndex = 102
   599  	s1.blockedEvals.Block(eval)
   600  	s1.blockedEvals.Block(eval2)
   601  
   602  	// Wait for the evaluation to marked as cancelled
   603  	state := s1.fsm.State()
   604  	testutil.WaitForResult(func() (bool, error) {
   605  		ws := memdb.NewWatchSet()
   606  		out, err := state.EvalByID(ws, eval.ID)
   607  		if err != nil {
   608  			return false, err
   609  		}
   610  		return out != nil && out.Status == structs.EvalStatusCancelled, nil
   611  	}, func(err error) {
   612  		t.Fatalf("err: %v", err)
   613  	})
   614  }
   615  
   616  func TestLeader_revokeVaultAccessorsOnRestore(t *testing.T) {
   617  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   618  		c.NumSchedulers = 0
   619  	})
   620  	defer cleanupS1()
   621  	testutil.WaitForLeader(t, s1.RPC)
   622  
   623  	// Insert a vault accessor that should be revoked
   624  	fsmState := s1.fsm.State()
   625  	va := mock.VaultAccessor()
   626  	if err := fsmState.UpsertVaultAccessor(100, []*structs.VaultAccessor{va}); err != nil {
   627  		t.Fatalf("bad: %v", err)
   628  	}
   629  
   630  	// Swap the Vault client
   631  	tvc := &TestVaultClient{}
   632  	s1.vault = tvc
   633  
   634  	// Do a restore
   635  	if err := s1.revokeVaultAccessorsOnRestore(); err != nil {
   636  		t.Fatalf("Failed to restore: %v", err)
   637  	}
   638  
   639  	if len(tvc.RevokedTokens) != 1 && tvc.RevokedTokens[0].Accessor != va.Accessor {
   640  		t.Fatalf("Bad revoked accessors: %v", tvc.RevokedTokens)
   641  	}
   642  }
   643  
   644  func TestLeader_revokeSITokenAccessorsOnRestore(t *testing.T) {
   645  	t.Parallel()
   646  	r := require.New(t)
   647  
   648  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   649  		c.NumSchedulers = 0
   650  	})
   651  	defer cleanupS1()
   652  	testutil.WaitForLeader(t, s1.RPC)
   653  
   654  	// replace consul ACLs api with a mock for tracking calls
   655  	var consulACLsAPI mockConsulACLsAPI
   656  	s1.consulACLs = &consulACLsAPI
   657  
   658  	// Insert a SI token accessor that should be revoked
   659  	fsmState := s1.fsm.State()
   660  	accessor := mock.SITokenAccessor()
   661  	err := fsmState.UpsertSITokenAccessors(100, []*structs.SITokenAccessor{accessor})
   662  	r.NoError(err)
   663  
   664  	// Do a restore
   665  	err = s1.revokeSITokenAccessorsOnRestore()
   666  	r.NoError(err)
   667  
   668  	// Check the accessor was revoked
   669  	exp := []revokeRequest{{
   670  		accessorID: accessor.AccessorID,
   671  		committed:  true,
   672  	}}
   673  	r.ElementsMatch(exp, consulACLsAPI.revokeRequests)
   674  }
   675  
   676  func TestLeader_ClusterID(t *testing.T) {
   677  	t.Parallel()
   678  
   679  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   680  		c.NumSchedulers = 0
   681  		c.Build = minClusterIDVersion.String()
   682  	})
   683  	defer cleanupS1()
   684  	testutil.WaitForLeader(t, s1.RPC)
   685  
   686  	clusterID, err := s1.ClusterID()
   687  
   688  	require.NoError(t, err)
   689  	require.True(t, helper.IsUUID(clusterID))
   690  }
   691  
   692  func TestLeader_ClusterID_upgradePath(t *testing.T) {
   693  	t.Parallel()
   694  
   695  	before := version.Must(version.NewVersion("0.10.1")).String()
   696  	after := minClusterIDVersion.String()
   697  
   698  	type server struct {
   699  		s       *Server
   700  		cleanup func()
   701  	}
   702  
   703  	outdated := func() server {
   704  		s, cleanup := TestServer(t, func(c *Config) {
   705  			c.NumSchedulers = 0
   706  			c.Build = before
   707  			c.BootstrapExpect = 3
   708  			c.Logger.SetLevel(hclog.Trace)
   709  		})
   710  		return server{s: s, cleanup: cleanup}
   711  	}
   712  
   713  	upgraded := func() server {
   714  		s, cleanup := TestServer(t, func(c *Config) {
   715  			c.NumSchedulers = 0
   716  			c.Build = after
   717  			c.BootstrapExpect = 0
   718  			c.Logger.SetLevel(hclog.Trace)
   719  		})
   720  		return server{s: s, cleanup: cleanup}
   721  	}
   722  
   723  	servers := []server{outdated(), outdated(), outdated()}
   724  	// fallback shutdown attempt in case testing fails
   725  	defer servers[0].cleanup()
   726  	defer servers[1].cleanup()
   727  	defer servers[2].cleanup()
   728  
   729  	upgrade := func(i int) {
   730  		previous := servers[i]
   731  
   732  		servers[i] = upgraded()
   733  		TestJoin(t, servers[i].s, servers[(i+1)%3].s, servers[(i+2)%3].s)
   734  		testutil.WaitForLeader(t, servers[i].s.RPC)
   735  
   736  		require.NoError(t, previous.s.Leave())
   737  		require.NoError(t, previous.s.Shutdown())
   738  	}
   739  
   740  	// Join the servers before doing anything
   741  	TestJoin(t, servers[0].s, servers[1].s, servers[2].s)
   742  
   743  	// Wait for servers to settle
   744  	for i := 0; i < len(servers); i++ {
   745  		testutil.WaitForLeader(t, servers[i].s.RPC)
   746  	}
   747  
   748  	// A check that ClusterID is not available yet
   749  	noIDYet := func() {
   750  		for _, s := range servers {
   751  			retry.Run(t, func(r *retry.R) {
   752  				if _, err := s.s.ClusterID(); err == nil {
   753  					r.Error("expected error")
   754  				}
   755  			})
   756  		}
   757  	}
   758  
   759  	// Replace first old server with new server
   760  	upgrade(0)
   761  	defer servers[0].cleanup()
   762  	noIDYet() // ClusterID should not work yet, servers: [new, old, old]
   763  
   764  	// Replace second old server with new server
   765  	upgrade(1)
   766  	defer servers[1].cleanup()
   767  	noIDYet() // ClusterID should not work yet, servers: [new, new, old]
   768  
   769  	// Replace third / final old server with new server
   770  	upgrade(2)
   771  	defer servers[2].cleanup()
   772  
   773  	// Wait for old servers to really be gone
   774  	for _, s := range servers {
   775  		testutil.WaitForResult(func() (bool, error) {
   776  			peers, _ := s.s.numPeers()
   777  			return peers == 3, nil
   778  		}, func(_ error) {
   779  			t.Fatalf("should have 3 peers")
   780  		})
   781  	}
   782  
   783  	// Now we can tickle the leader into making a cluster ID
   784  	leaderID := ""
   785  	for _, s := range servers {
   786  		if s.s.IsLeader() {
   787  			id, err := s.s.ClusterID()
   788  			require.NoError(t, err)
   789  			leaderID = id
   790  			break
   791  		}
   792  	}
   793  	require.True(t, helper.IsUUID(leaderID))
   794  
   795  	// Now every participating server has been upgraded, each one should be
   796  	// able to get the cluster ID, having been plumbed all the way through.
   797  	agreeClusterID(t, []*Server{servers[0].s, servers[1].s, servers[2].s})
   798  }
   799  
   800  func TestLeader_ClusterID_noUpgrade(t *testing.T) {
   801  	t.Parallel()
   802  
   803  	type server struct {
   804  		s       *Server
   805  		cleanup func()
   806  	}
   807  
   808  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   809  		c.Logger.SetLevel(hclog.Trace)
   810  		c.NumSchedulers = 0
   811  		c.Build = minClusterIDVersion.String()
   812  		c.BootstrapExpect = 3
   813  	})
   814  	defer cleanupS1()
   815  	s2, cleanupS2 := TestServer(t, func(c *Config) {
   816  		c.Logger.SetLevel(hclog.Trace)
   817  		c.NumSchedulers = 0
   818  		c.Build = minClusterIDVersion.String()
   819  		c.BootstrapExpect = 3
   820  	})
   821  	defer cleanupS2()
   822  	s3, cleanupS3 := TestServer(t, func(c *Config) {
   823  		c.Logger.SetLevel(hclog.Trace)
   824  		c.NumSchedulers = 0
   825  		c.Build = minClusterIDVersion.String()
   826  		c.BootstrapExpect = 3
   827  	})
   828  	defer cleanupS3()
   829  
   830  	servers := []*Server{s1, s2, s3}
   831  
   832  	// Join the servers before doing anything
   833  	TestJoin(t, servers[0], servers[1], servers[2])
   834  
   835  	// Wait for servers to settle
   836  	for i := 0; i < len(servers); i++ {
   837  		testutil.WaitForLeader(t, servers[i].RPC)
   838  	}
   839  
   840  	// Each server started at the minimum version, check there should be only 1
   841  	// cluster ID they all agree on.
   842  	agreeClusterID(t, []*Server{servers[0], servers[1], servers[2]})
   843  }
   844  
   845  func agreeClusterID(t *testing.T, servers []*Server) {
   846  	retries := &retry.Timer{Timeout: 60 * time.Second, Wait: 1 * time.Second}
   847  	ids := make([]string, 3)
   848  	for i, s := range servers {
   849  		retry.RunWith(retries, t, func(r *retry.R) {
   850  			id, err := s.ClusterID()
   851  			if err != nil {
   852  				r.Error(err.Error())
   853  				return
   854  			}
   855  			if !helper.IsUUID(id) {
   856  				r.Error("not a UUID")
   857  				return
   858  			}
   859  			ids[i] = id
   860  		})
   861  	}
   862  	require.True(t, ids[0] == ids[1] && ids[1] == ids[2], "ids[0] %s, ids[1] %s, ids[2] %s", ids[0], ids[1], ids[2])
   863  }
   864  
   865  func TestLeader_ReplicateACLPolicies(t *testing.T) {
   866  	t.Parallel()
   867  
   868  	s1, root, cleanupS1 := TestACLServer(t, func(c *Config) {
   869  		c.Region = "region1"
   870  		c.AuthoritativeRegion = "region1"
   871  		c.ACLEnabled = true
   872  	})
   873  	defer cleanupS1()
   874  	s2, _, cleanupS2 := TestACLServer(t, func(c *Config) {
   875  		c.Region = "region2"
   876  		c.AuthoritativeRegion = "region1"
   877  		c.ACLEnabled = true
   878  		c.ReplicationBackoff = 20 * time.Millisecond
   879  		c.ReplicationToken = root.SecretID
   880  	})
   881  	defer cleanupS2()
   882  	TestJoin(t, s1, s2)
   883  	testutil.WaitForLeader(t, s1.RPC)
   884  	testutil.WaitForLeader(t, s2.RPC)
   885  
   886  	// Write a policy to the authoritative region
   887  	p1 := mock.ACLPolicy()
   888  	if err := s1.State().UpsertACLPolicies(100, []*structs.ACLPolicy{p1}); err != nil {
   889  		t.Fatalf("bad: %v", err)
   890  	}
   891  
   892  	// Wait for the policy to replicate
   893  	testutil.WaitForResult(func() (bool, error) {
   894  		state := s2.State()
   895  		out, err := state.ACLPolicyByName(nil, p1.Name)
   896  		return out != nil, err
   897  	}, func(err error) {
   898  		t.Fatalf("should replicate policy")
   899  	})
   900  }
   901  
   902  func TestLeader_DiffACLPolicies(t *testing.T) {
   903  	t.Parallel()
   904  
   905  	state := state.TestStateStore(t)
   906  
   907  	// Populate the local state
   908  	p1 := mock.ACLPolicy()
   909  	p2 := mock.ACLPolicy()
   910  	p3 := mock.ACLPolicy()
   911  	assert.Nil(t, state.UpsertACLPolicies(100, []*structs.ACLPolicy{p1, p2, p3}))
   912  
   913  	// Simulate a remote list
   914  	p2Stub := p2.Stub()
   915  	p2Stub.ModifyIndex = 50 // Ignored, same index
   916  	p3Stub := p3.Stub()
   917  	p3Stub.ModifyIndex = 100 // Updated, higher index
   918  	p3Stub.Hash = []byte{0, 1, 2, 3}
   919  	p4 := mock.ACLPolicy()
   920  	remoteList := []*structs.ACLPolicyListStub{
   921  		p2Stub,
   922  		p3Stub,
   923  		p4.Stub(),
   924  	}
   925  	delete, update := diffACLPolicies(state, 50, remoteList)
   926  
   927  	// P1 does not exist on the remote side, should delete
   928  	assert.Equal(t, []string{p1.Name}, delete)
   929  
   930  	// P2 is un-modified - ignore. P3 modified, P4 new.
   931  	assert.Equal(t, []string{p3.Name, p4.Name}, update)
   932  }
   933  
   934  func TestLeader_ReplicateACLTokens(t *testing.T) {
   935  	t.Parallel()
   936  
   937  	s1, root, cleanupS1 := TestACLServer(t, func(c *Config) {
   938  		c.Region = "region1"
   939  		c.AuthoritativeRegion = "region1"
   940  		c.ACLEnabled = true
   941  	})
   942  	defer cleanupS1()
   943  	s2, _, cleanupS2 := TestACLServer(t, func(c *Config) {
   944  		c.Region = "region2"
   945  		c.AuthoritativeRegion = "region1"
   946  		c.ACLEnabled = true
   947  		c.ReplicationBackoff = 20 * time.Millisecond
   948  		c.ReplicationToken = root.SecretID
   949  	})
   950  	defer cleanupS2()
   951  	TestJoin(t, s1, s2)
   952  	testutil.WaitForLeader(t, s1.RPC)
   953  	testutil.WaitForLeader(t, s2.RPC)
   954  
   955  	// Write a token to the authoritative region
   956  	p1 := mock.ACLToken()
   957  	p1.Global = true
   958  	if err := s1.State().UpsertACLTokens(100, []*structs.ACLToken{p1}); err != nil {
   959  		t.Fatalf("bad: %v", err)
   960  	}
   961  
   962  	// Wait for the token to replicate
   963  	testutil.WaitForResult(func() (bool, error) {
   964  		state := s2.State()
   965  		out, err := state.ACLTokenByAccessorID(nil, p1.AccessorID)
   966  		return out != nil, err
   967  	}, func(err error) {
   968  		t.Fatalf("should replicate token")
   969  	})
   970  }
   971  
   972  func TestLeader_DiffACLTokens(t *testing.T) {
   973  	t.Parallel()
   974  
   975  	state := state.TestStateStore(t)
   976  
   977  	// Populate the local state
   978  	p0 := mock.ACLToken()
   979  	p1 := mock.ACLToken()
   980  	p1.Global = true
   981  	p2 := mock.ACLToken()
   982  	p2.Global = true
   983  	p3 := mock.ACLToken()
   984  	p3.Global = true
   985  	assert.Nil(t, state.UpsertACLTokens(100, []*structs.ACLToken{p0, p1, p2, p3}))
   986  
   987  	// Simulate a remote list
   988  	p2Stub := p2.Stub()
   989  	p2Stub.ModifyIndex = 50 // Ignored, same index
   990  	p3Stub := p3.Stub()
   991  	p3Stub.ModifyIndex = 100 // Updated, higher index
   992  	p3Stub.Hash = []byte{0, 1, 2, 3}
   993  	p4 := mock.ACLToken()
   994  	p4.Global = true
   995  	remoteList := []*structs.ACLTokenListStub{
   996  		p2Stub,
   997  		p3Stub,
   998  		p4.Stub(),
   999  	}
  1000  	delete, update := diffACLTokens(state, 50, remoteList)
  1001  
  1002  	// P0 is local and should be ignored
  1003  	// P1 does not exist on the remote side, should delete
  1004  	assert.Equal(t, []string{p1.AccessorID}, delete)
  1005  
  1006  	// P2 is un-modified - ignore. P3 modified, P4 new.
  1007  	assert.Equal(t, []string{p3.AccessorID, p4.AccessorID}, update)
  1008  }
  1009  
  1010  func TestLeader_UpgradeRaftVersion(t *testing.T) {
  1011  	t.Parallel()
  1012  
  1013  	s1, cleanupS1 := TestServer(t, func(c *Config) {
  1014  		c.Datacenter = "dc1"
  1015  		c.RaftConfig.ProtocolVersion = 2
  1016  	})
  1017  	defer cleanupS1()
  1018  
  1019  	s2, cleanupS2 := TestServer(t, func(c *Config) {
  1020  		c.BootstrapExpect = 3
  1021  		c.RaftConfig.ProtocolVersion = 1
  1022  	})
  1023  	defer cleanupS2()
  1024  
  1025  	s3, cleanupS3 := TestServer(t, func(c *Config) {
  1026  		c.BootstrapExpect = 3
  1027  		c.RaftConfig.ProtocolVersion = 2
  1028  	})
  1029  	defer cleanupS3()
  1030  
  1031  	servers := []*Server{s1, s2, s3}
  1032  
  1033  	// Try to join
  1034  	TestJoin(t, s1, s2, s3)
  1035  
  1036  	for _, s := range servers {
  1037  		testutil.WaitForResult(func() (bool, error) {
  1038  			peers, _ := s.numPeers()
  1039  			return peers == 3, nil
  1040  		}, func(err error) {
  1041  			t.Fatalf("should have 3 peers")
  1042  		})
  1043  	}
  1044  
  1045  	// Kill the v1 server
  1046  	if err := s2.Leave(); err != nil {
  1047  		t.Fatal(err)
  1048  	}
  1049  
  1050  	for _, s := range []*Server{s1, s3} {
  1051  		minVer, err := s.autopilot.MinRaftProtocol()
  1052  		if err != nil {
  1053  			t.Fatal(err)
  1054  		}
  1055  		if got, want := minVer, 2; got != want {
  1056  			t.Fatalf("got min raft version %d want %d", got, want)
  1057  		}
  1058  	}
  1059  
  1060  	// Replace the dead server with one running raft protocol v3
  1061  	s4, cleanupS4 := TestServer(t, func(c *Config) {
  1062  		c.BootstrapExpect = 3
  1063  		c.Datacenter = "dc1"
  1064  		c.RaftConfig.ProtocolVersion = 3
  1065  	})
  1066  	defer cleanupS4()
  1067  	TestJoin(t, s1, s4)
  1068  	servers[1] = s4
  1069  
  1070  	// Make sure we're back to 3 total peers with the new one added via ID
  1071  	for _, s := range servers {
  1072  		testutil.WaitForResult(func() (bool, error) {
  1073  			addrs := 0
  1074  			ids := 0
  1075  			future := s.raft.GetConfiguration()
  1076  			if err := future.Error(); err != nil {
  1077  				return false, err
  1078  			}
  1079  			for _, server := range future.Configuration().Servers {
  1080  				if string(server.ID) == string(server.Address) {
  1081  					addrs++
  1082  				} else {
  1083  					ids++
  1084  				}
  1085  			}
  1086  			if got, want := addrs, 2; got != want {
  1087  				return false, fmt.Errorf("got %d server addresses want %d", got, want)
  1088  			}
  1089  			if got, want := ids, 1; got != want {
  1090  				return false, fmt.Errorf("got %d server ids want %d", got, want)
  1091  			}
  1092  
  1093  			return true, nil
  1094  		}, func(err error) {
  1095  			t.Fatal(err)
  1096  		})
  1097  	}
  1098  }
  1099  
  1100  func TestLeader_Reelection(t *testing.T) {
  1101  	raftProtocols := []int{1, 2, 3}
  1102  	for _, p := range raftProtocols {
  1103  		t.Run("Leader Election - Protocol version "+string(p), func(t *testing.T) {
  1104  			leaderElectionTest(t, raft.ProtocolVersion(p))
  1105  		})
  1106  	}
  1107  
  1108  }
  1109  
  1110  func leaderElectionTest(t *testing.T, raftProtocol raft.ProtocolVersion) {
  1111  	s1, cleanupS1 := TestServer(t, func(c *Config) {
  1112  		c.BootstrapExpect = 3
  1113  		c.RaftConfig.ProtocolVersion = raftProtocol
  1114  	})
  1115  	defer cleanupS1()
  1116  
  1117  	s2, cleanupS2 := TestServer(t, func(c *Config) {
  1118  		c.BootstrapExpect = 3
  1119  		c.RaftConfig.ProtocolVersion = raftProtocol
  1120  	})
  1121  	defer cleanupS2()
  1122  
  1123  	s3, cleanupS3 := TestServer(t, func(c *Config) {
  1124  		c.BootstrapExpect = 3
  1125  		c.RaftConfig.ProtocolVersion = raftProtocol
  1126  	})
  1127  	defer cleanupS3() // todo(shoenig) added this, should be here right??
  1128  
  1129  	servers := []*Server{s1, s2, s3}
  1130  
  1131  	// Try to join
  1132  	TestJoin(t, s1, s2, s3)
  1133  	testutil.WaitForLeader(t, s1.RPC)
  1134  
  1135  	testutil.WaitForResult(func() (bool, error) {
  1136  		future := s1.raft.GetConfiguration()
  1137  		if err := future.Error(); err != nil {
  1138  			return false, err
  1139  		}
  1140  
  1141  		for _, server := range future.Configuration().Servers {
  1142  			if server.Suffrage == raft.Nonvoter {
  1143  				return false, fmt.Errorf("non-voter %v", server)
  1144  			}
  1145  		}
  1146  
  1147  		return true, nil
  1148  	}, func(err error) {
  1149  		t.Fatal(err)
  1150  	})
  1151  
  1152  	var leader, nonLeader *Server
  1153  	for _, s := range servers {
  1154  		if s.IsLeader() {
  1155  			leader = s
  1156  		} else {
  1157  			nonLeader = s
  1158  		}
  1159  	}
  1160  
  1161  	// Shutdown the leader
  1162  	leader.Shutdown()
  1163  	// Wait for new leader to elect
  1164  	testutil.WaitForLeader(t, nonLeader.RPC)
  1165  }
  1166  
  1167  func TestLeader_RollRaftServer(t *testing.T) {
  1168  	t.Parallel()
  1169  
  1170  	s1, cleanupS1 := TestServer(t, func(c *Config) {
  1171  		c.RaftConfig.ProtocolVersion = 2
  1172  	})
  1173  	defer cleanupS1()
  1174  
  1175  	s2, cleanupS2 := TestServer(t, func(c *Config) {
  1176  		c.BootstrapExpect = 3
  1177  		c.RaftConfig.ProtocolVersion = 2
  1178  	})
  1179  	defer cleanupS2()
  1180  
  1181  	s3, cleanupS3 := TestServer(t, func(c *Config) {
  1182  		c.BootstrapExpect = 3
  1183  		c.RaftConfig.ProtocolVersion = 2
  1184  	})
  1185  	defer cleanupS3()
  1186  
  1187  	servers := []*Server{s1, s2, s3}
  1188  
  1189  	// Try to join
  1190  	TestJoin(t, s1, s2, s3)
  1191  
  1192  	for _, s := range servers {
  1193  		retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
  1194  	}
  1195  
  1196  	// Kill the first v2 server
  1197  	s1.Shutdown()
  1198  
  1199  	for _, s := range []*Server{s1, s3} {
  1200  		retry.Run(t, func(r *retry.R) {
  1201  			minVer, err := s.autopilot.MinRaftProtocol()
  1202  			if err != nil {
  1203  				r.Fatal(err)
  1204  			}
  1205  			if got, want := minVer, 2; got != want {
  1206  				r.Fatalf("got min raft version %d want %d", got, want)
  1207  			}
  1208  		})
  1209  	}
  1210  
  1211  	// Replace the dead server with one running raft protocol v3
  1212  	s4, cleanupS4 := TestServer(t, func(c *Config) {
  1213  		c.BootstrapExpect = 3
  1214  		c.RaftConfig.ProtocolVersion = 3
  1215  	})
  1216  	defer cleanupS4()
  1217  	TestJoin(t, s4, s2)
  1218  	servers[0] = s4
  1219  
  1220  	// Kill the second v2 server
  1221  	s2.Shutdown()
  1222  
  1223  	for _, s := range []*Server{s3, s4} {
  1224  		retry.Run(t, func(r *retry.R) {
  1225  			minVer, err := s.autopilot.MinRaftProtocol()
  1226  			if err != nil {
  1227  				r.Fatal(err)
  1228  			}
  1229  			if got, want := minVer, 2; got != want {
  1230  				r.Fatalf("got min raft version %d want %d", got, want)
  1231  			}
  1232  		})
  1233  	}
  1234  	// Replace another dead server with one running raft protocol v3
  1235  	s5, cleanupS5 := TestServer(t, func(c *Config) {
  1236  		c.BootstrapExpect = 3
  1237  		c.RaftConfig.ProtocolVersion = 3
  1238  	})
  1239  	defer cleanupS5()
  1240  	TestJoin(t, s5, s4)
  1241  	servers[1] = s5
  1242  
  1243  	// Kill the last v2 server, now minRaftProtocol should be 3
  1244  	s3.Shutdown()
  1245  
  1246  	for _, s := range []*Server{s4, s5} {
  1247  		retry.Run(t, func(r *retry.R) {
  1248  			minVer, err := s.autopilot.MinRaftProtocol()
  1249  			if err != nil {
  1250  				r.Fatal(err)
  1251  			}
  1252  			if got, want := minVer, 3; got != want {
  1253  				r.Fatalf("got min raft version %d want %d", got, want)
  1254  			}
  1255  		})
  1256  	}
  1257  
  1258  	// Replace the last dead server with one running raft protocol v3
  1259  	s6, cleanupS6 := TestServer(t, func(c *Config) {
  1260  		c.BootstrapExpect = 3
  1261  		c.RaftConfig.ProtocolVersion = 3
  1262  	})
  1263  	defer cleanupS6()
  1264  	TestJoin(t, s6, s4)
  1265  	servers[2] = s6
  1266  
  1267  	// Make sure all the dead servers are removed and we're back to 3 total peers
  1268  	for _, s := range servers {
  1269  		retry.Run(t, func(r *retry.R) {
  1270  			addrs := 0
  1271  			ids := 0
  1272  			future := s.raft.GetConfiguration()
  1273  			if err := future.Error(); err != nil {
  1274  				r.Fatal(err)
  1275  			}
  1276  			for _, server := range future.Configuration().Servers {
  1277  				if string(server.ID) == string(server.Address) {
  1278  					addrs++
  1279  				} else {
  1280  					ids++
  1281  				}
  1282  			}
  1283  			if got, want := addrs, 0; got != want {
  1284  				r.Fatalf("got %d server addresses want %d", got, want)
  1285  			}
  1286  			if got, want := ids, 3; got != want {
  1287  				r.Fatalf("got %d server ids want %d", got, want)
  1288  			}
  1289  		})
  1290  	}
  1291  }
  1292  
  1293  func TestLeader_RevokeLeadership_MultipleTimes(t *testing.T) {
  1294  	s1, cleanupS1 := TestServer(t, nil)
  1295  	defer cleanupS1()
  1296  	testutil.WaitForLeader(t, s1.RPC)
  1297  
  1298  	testutil.WaitForResult(func() (bool, error) {
  1299  		return s1.evalBroker.Enabled(), nil
  1300  	}, func(err error) {
  1301  		t.Fatalf("should have finished establish leader loop")
  1302  	})
  1303  
  1304  	require.Nil(t, s1.revokeLeadership())
  1305  	require.Nil(t, s1.revokeLeadership())
  1306  	require.Nil(t, s1.revokeLeadership())
  1307  }
  1308  
  1309  func TestLeader_TransitionsUpdateConsistencyRead(t *testing.T) {
  1310  	s1, cleanupS1 := TestServer(t, nil)
  1311  	defer cleanupS1()
  1312  	testutil.WaitForLeader(t, s1.RPC)
  1313  
  1314  	testutil.WaitForResult(func() (bool, error) {
  1315  		return s1.isReadyForConsistentReads(), nil
  1316  	}, func(err error) {
  1317  		require.Fail(t, "should have finished establish leader loop")
  1318  	})
  1319  
  1320  	require.Nil(t, s1.revokeLeadership())
  1321  	require.False(t, s1.isReadyForConsistentReads())
  1322  
  1323  	ch := make(chan struct{})
  1324  	require.Nil(t, s1.establishLeadership(ch))
  1325  	require.True(t, s1.isReadyForConsistentReads())
  1326  }
  1327  
  1328  // Test doing an inplace upgrade on a server from raft protocol 2 to 3
  1329  // This verifies that removing the server and adding it back with a uuid works
  1330  // even if the server's address stays the same.
  1331  func TestServer_ReconcileMember(t *testing.T) {
  1332  	t.Parallel()
  1333  
  1334  	// Create a three node cluster
  1335  	s1, cleanupS1 := TestServer(t, func(c *Config) {
  1336  		c.BootstrapExpect = 2
  1337  		c.RaftConfig.ProtocolVersion = 3
  1338  	})
  1339  	defer cleanupS1()
  1340  
  1341  	s2, cleanupS2 := TestServer(t, func(c *Config) {
  1342  		c.BootstrapExpect = 2
  1343  		c.RaftConfig.ProtocolVersion = 3
  1344  	})
  1345  	defer cleanupS2()
  1346  
  1347  	TestJoin(t, s1, s2)
  1348  	testutil.WaitForLeader(t, s1.RPC)
  1349  
  1350  	// test relies on s3 not being the leader, so adding it
  1351  	// after leadership has been established to reduce
  1352  	s3, cleanupS3 := TestServer(t, func(c *Config) {
  1353  		c.BootstrapExpect = 0
  1354  		c.RaftConfig.ProtocolVersion = 2
  1355  	})
  1356  	defer cleanupS3()
  1357  
  1358  	TestJoin(t, s1, s3)
  1359  
  1360  	// Create a memberlist object for s3, with raft protocol upgraded to 3
  1361  	upgradedS3Member := serf.Member{
  1362  		Name:   s3.config.NodeName,
  1363  		Addr:   s3.config.RPCAddr.IP,
  1364  		Status: serf.StatusAlive,
  1365  		Tags:   make(map[string]string),
  1366  	}
  1367  	upgradedS3Member.Tags["role"] = "nomad"
  1368  	upgradedS3Member.Tags["id"] = s3.config.NodeID
  1369  	upgradedS3Member.Tags["region"] = s3.config.Region
  1370  	upgradedS3Member.Tags["dc"] = s3.config.Datacenter
  1371  	upgradedS3Member.Tags["rpc_addr"] = "127.0.0.1"
  1372  	upgradedS3Member.Tags["port"] = strconv.Itoa(s3.config.RPCAddr.Port)
  1373  	upgradedS3Member.Tags["build"] = "0.8.0"
  1374  	upgradedS3Member.Tags["vsn"] = "2"
  1375  	upgradedS3Member.Tags["mvn"] = "1"
  1376  	upgradedS3Member.Tags["raft_vsn"] = "3"
  1377  
  1378  	findLeader := func(t *testing.T) *Server {
  1379  		t.Helper()
  1380  		for _, s := range []*Server{s1, s2, s3} {
  1381  			if s.IsLeader() {
  1382  				t.Logf("found leader: %v %v", s.config.NodeID, s.config.RPCAddr)
  1383  				return s
  1384  			}
  1385  		}
  1386  
  1387  		t.Fatalf("no leader found")
  1388  		return nil
  1389  	}
  1390  
  1391  	// Find the leader so that we can call reconcile member on it
  1392  	leader := findLeader(t)
  1393  	if err := leader.reconcileMember(upgradedS3Member); err != nil {
  1394  		t.Fatalf("failed to reconcile member: %v", err)
  1395  	}
  1396  
  1397  	// This should remove s3 from the config and potentially cause a leader election
  1398  	testutil.WaitForLeader(t, s1.RPC)
  1399  
  1400  	// Figure out the new leader and call reconcile again, this should add s3 with the new ID format
  1401  	leader = findLeader(t)
  1402  	if err := leader.reconcileMember(upgradedS3Member); err != nil {
  1403  		t.Fatalf("failed to reconcile member: %v", err)
  1404  	}
  1405  
  1406  	testutil.WaitForLeader(t, s1.RPC)
  1407  	future := s2.raft.GetConfiguration()
  1408  	if err := future.Error(); err != nil {
  1409  		t.Fatal(err)
  1410  	}
  1411  	addrs := 0
  1412  	ids := 0
  1413  	for _, server := range future.Configuration().Servers {
  1414  		if string(server.ID) == string(server.Address) {
  1415  			addrs++
  1416  		} else {
  1417  			ids++
  1418  		}
  1419  	}
  1420  	// After this, all three servers should have IDs in raft
  1421  	if got, want := addrs, 0; got != want {
  1422  		t.Fatalf("got %d server addresses want %d", got, want)
  1423  	}
  1424  	if got, want := ids, 3; got != want {
  1425  		t.Fatalf("got %d server ids want %d: %#v", got, want, future.Configuration().Servers)
  1426  	}
  1427  }
  1428  
  1429  // waitForStableLeadership waits until a leader is elected and all servers
  1430  // get promoted as voting members, returns the leader
  1431  func waitForStableLeadership(t *testing.T, servers []*Server) *Server {
  1432  	nPeers := len(servers)
  1433  
  1434  	// wait for all servers to discover each other
  1435  	for _, s := range servers {
  1436  		testutil.WaitForResult(func() (bool, error) {
  1437  			peers, _ := s.numPeers()
  1438  			return peers == 3, fmt.Errorf("should find %d peers but found %d", nPeers, peers)
  1439  		}, func(err error) {
  1440  			require.NoError(t, err)
  1441  		})
  1442  	}
  1443  
  1444  	// wait for leader
  1445  	var leader *Server
  1446  	testutil.WaitForResult(func() (bool, error) {
  1447  		for _, s := range servers {
  1448  			if s.IsLeader() {
  1449  				leader = s
  1450  				return true, nil
  1451  			}
  1452  		}
  1453  
  1454  		return false, fmt.Errorf("no leader found")
  1455  	}, func(err error) {
  1456  		require.NoError(t, err)
  1457  	})
  1458  
  1459  	// wait for all servers get marked as voters
  1460  	testutil.WaitForResult(func() (bool, error) {
  1461  		future := leader.raft.GetConfiguration()
  1462  		if err := future.Error(); err != nil {
  1463  			return false, fmt.Errorf("failed to get raft config: %v", future.Error())
  1464  		}
  1465  		ss := future.Configuration().Servers
  1466  		if len(ss) != len(servers) {
  1467  			return false, fmt.Errorf("raft doesn't contain all servers.  Expected %d but found %d", len(servers), len(ss))
  1468  		}
  1469  
  1470  		for _, s := range ss {
  1471  			if s.Suffrage != raft.Voter {
  1472  				return false, fmt.Errorf("configuration has non voting server: %v", s)
  1473  			}
  1474  		}
  1475  
  1476  		return true, nil
  1477  	}, func(err error) {
  1478  		require.NoError(t, err)
  1479  	})
  1480  
  1481  	return leader
  1482  }