github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/nomad/leader_test.go (about)

     1  package nomad
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/hashicorp/consul/testutil/retry"
    10  	memdb "github.com/hashicorp/go-memdb"
    11  	"github.com/hashicorp/nomad/nomad/mock"
    12  	"github.com/hashicorp/nomad/nomad/state"
    13  	"github.com/hashicorp/nomad/nomad/structs"
    14  	"github.com/hashicorp/nomad/testutil"
    15  	"github.com/stretchr/testify/assert"
    16  )
    17  
    18  func TestLeader_LeftServer(t *testing.T) {
    19  	s1 := TestServer(t, nil)
    20  	defer s1.Shutdown()
    21  
    22  	s2 := TestServer(t, func(c *Config) {
    23  		c.DevDisableBootstrap = true
    24  	})
    25  	defer s2.Shutdown()
    26  
    27  	s3 := TestServer(t, func(c *Config) {
    28  		c.DevDisableBootstrap = true
    29  	})
    30  	defer s3.Shutdown()
    31  	servers := []*Server{s1, s2, s3}
    32  	TestJoin(t, s1, s2, s3)
    33  
    34  	for _, s := range servers {
    35  		testutil.WaitForResult(func() (bool, error) {
    36  			peers, _ := s.numPeers()
    37  			return peers == 3, nil
    38  		}, func(err error) {
    39  			t.Fatalf("should have 3 peers")
    40  		})
    41  	}
    42  
    43  	// Kill any server
    44  	var peer *Server
    45  	for _, s := range servers {
    46  		if !s.IsLeader() {
    47  			peer = s
    48  			break
    49  		}
    50  	}
    51  	if peer == nil {
    52  		t.Fatalf("Should have a non-leader")
    53  	}
    54  	peer.Shutdown()
    55  	name := fmt.Sprintf("%s.%s", peer.config.NodeName, peer.config.Region)
    56  
    57  	testutil.WaitForResult(func() (bool, error) {
    58  		for _, s := range servers {
    59  			if s == peer {
    60  				continue
    61  			}
    62  
    63  			// Force remove the non-leader (transition to left state)
    64  			if err := s.RemoveFailedNode(name); err != nil {
    65  				return false, err
    66  			}
    67  
    68  			peers, _ := s.numPeers()
    69  			return peers == 2, errors.New(fmt.Sprintf("%v", peers))
    70  		}
    71  
    72  		return true, nil
    73  	}, func(err error) {
    74  		t.Fatalf("err: %s", err)
    75  	})
    76  }
    77  
    78  func TestLeader_LeftLeader(t *testing.T) {
    79  	s1 := TestServer(t, nil)
    80  	defer s1.Shutdown()
    81  
    82  	s2 := TestServer(t, func(c *Config) {
    83  		c.DevDisableBootstrap = true
    84  	})
    85  	defer s2.Shutdown()
    86  
    87  	s3 := TestServer(t, func(c *Config) {
    88  		c.DevDisableBootstrap = true
    89  	})
    90  	defer s3.Shutdown()
    91  	servers := []*Server{s1, s2, s3}
    92  	TestJoin(t, s1, s2, s3)
    93  
    94  	for _, s := range servers {
    95  		testutil.WaitForResult(func() (bool, error) {
    96  			peers, _ := s.numPeers()
    97  			return peers == 3, nil
    98  		}, func(err error) {
    99  			t.Fatalf("should have 3 peers")
   100  		})
   101  	}
   102  
   103  	// Kill the leader!
   104  	var leader *Server
   105  	for _, s := range servers {
   106  		if s.IsLeader() {
   107  			leader = s
   108  			break
   109  		}
   110  	}
   111  	if leader == nil {
   112  		t.Fatalf("Should have a leader")
   113  	}
   114  	leader.Leave()
   115  	leader.Shutdown()
   116  
   117  	for _, s := range servers {
   118  		if s == leader {
   119  			continue
   120  		}
   121  		testutil.WaitForResult(func() (bool, error) {
   122  			peers, _ := s.numPeers()
   123  			return peers == 2, errors.New(fmt.Sprintf("%v", peers))
   124  		}, func(err error) {
   125  			t.Fatalf("should have 2 peers: %v", err)
   126  		})
   127  	}
   128  }
   129  
   130  func TestLeader_MultiBootstrap(t *testing.T) {
   131  	s1 := TestServer(t, nil)
   132  	defer s1.Shutdown()
   133  
   134  	s2 := TestServer(t, nil)
   135  	defer s2.Shutdown()
   136  	servers := []*Server{s1, s2}
   137  	TestJoin(t, s1, s2)
   138  
   139  	for _, s := range servers {
   140  		testutil.WaitForResult(func() (bool, error) {
   141  			peers := s.Members()
   142  			return len(peers) == 2, nil
   143  		}, func(err error) {
   144  			t.Fatalf("should have 2 peers")
   145  		})
   146  	}
   147  
   148  	// Ensure we don't have multiple raft peers
   149  	for _, s := range servers {
   150  		peers, _ := s.numPeers()
   151  		if peers != 1 {
   152  			t.Fatalf("should only have 1 raft peer!")
   153  		}
   154  	}
   155  }
   156  
   157  func TestLeader_PlanQueue_Reset(t *testing.T) {
   158  	s1 := TestServer(t, nil)
   159  	defer s1.Shutdown()
   160  
   161  	s2 := TestServer(t, func(c *Config) {
   162  		c.DevDisableBootstrap = true
   163  	})
   164  	defer s2.Shutdown()
   165  
   166  	s3 := TestServer(t, func(c *Config) {
   167  		c.DevDisableBootstrap = true
   168  	})
   169  	defer s3.Shutdown()
   170  	servers := []*Server{s1, s2, s3}
   171  	TestJoin(t, s1, s2, s3)
   172  
   173  	for _, s := range servers {
   174  		testutil.WaitForResult(func() (bool, error) {
   175  			peers, _ := s.numPeers()
   176  			return peers == 3, nil
   177  		}, func(err error) {
   178  			t.Fatalf("should have 3 peers")
   179  		})
   180  	}
   181  
   182  	var leader *Server
   183  	for _, s := range servers {
   184  		if s.IsLeader() {
   185  			leader = s
   186  			break
   187  		}
   188  	}
   189  	if leader == nil {
   190  		t.Fatalf("Should have a leader")
   191  	}
   192  
   193  	if !leader.planQueue.Enabled() {
   194  		t.Fatalf("should enable plan queue")
   195  	}
   196  
   197  	for _, s := range servers {
   198  		if !s.IsLeader() && s.planQueue.Enabled() {
   199  			t.Fatalf("plan queue should not be enabled")
   200  		}
   201  	}
   202  
   203  	// Kill the leader
   204  	leader.Shutdown()
   205  	time.Sleep(100 * time.Millisecond)
   206  
   207  	// Wait for a new leader
   208  	leader = nil
   209  	testutil.WaitForResult(func() (bool, error) {
   210  		for _, s := range servers {
   211  			if s.IsLeader() {
   212  				leader = s
   213  				return true, nil
   214  			}
   215  		}
   216  		return false, nil
   217  	}, func(err error) {
   218  		t.Fatalf("should have leader")
   219  	})
   220  
   221  	// Check that the new leader has a pending GC expiration
   222  	testutil.WaitForResult(func() (bool, error) {
   223  		return leader.planQueue.Enabled(), nil
   224  	}, func(err error) {
   225  		t.Fatalf("should enable plan queue")
   226  	})
   227  }
   228  
   229  func TestLeader_EvalBroker_Reset(t *testing.T) {
   230  	s1 := TestServer(t, func(c *Config) {
   231  		c.NumSchedulers = 0
   232  	})
   233  	defer s1.Shutdown()
   234  
   235  	s2 := TestServer(t, func(c *Config) {
   236  		c.NumSchedulers = 0
   237  		c.DevDisableBootstrap = true
   238  	})
   239  	defer s2.Shutdown()
   240  
   241  	s3 := TestServer(t, func(c *Config) {
   242  		c.NumSchedulers = 0
   243  		c.DevDisableBootstrap = true
   244  	})
   245  	defer s3.Shutdown()
   246  	servers := []*Server{s1, s2, s3}
   247  	TestJoin(t, s1, s2, s3)
   248  	testutil.WaitForLeader(t, s1.RPC)
   249  
   250  	for _, s := range servers {
   251  		testutil.WaitForResult(func() (bool, error) {
   252  			peers, _ := s.numPeers()
   253  			return peers == 3, nil
   254  		}, func(err error) {
   255  			t.Fatalf("should have 3 peers")
   256  		})
   257  	}
   258  
   259  	var leader *Server
   260  	for _, s := range servers {
   261  		if s.IsLeader() {
   262  			leader = s
   263  			break
   264  		}
   265  	}
   266  	if leader == nil {
   267  		t.Fatalf("Should have a leader")
   268  	}
   269  
   270  	// Inject a pending eval
   271  	req := structs.EvalUpdateRequest{
   272  		Evals: []*structs.Evaluation{mock.Eval()},
   273  	}
   274  	_, _, err := leader.raftApply(structs.EvalUpdateRequestType, req)
   275  	if err != nil {
   276  		t.Fatalf("err: %v", err)
   277  	}
   278  
   279  	// Kill the leader
   280  	leader.Shutdown()
   281  	time.Sleep(100 * time.Millisecond)
   282  
   283  	// Wait for a new leader
   284  	leader = nil
   285  	testutil.WaitForResult(func() (bool, error) {
   286  		for _, s := range servers {
   287  			if s.IsLeader() {
   288  				leader = s
   289  				return true, nil
   290  			}
   291  		}
   292  		return false, nil
   293  	}, func(err error) {
   294  		t.Fatalf("should have leader")
   295  	})
   296  
   297  	// Check that the new leader has a pending evaluation
   298  	testutil.WaitForResult(func() (bool, error) {
   299  		stats := leader.evalBroker.Stats()
   300  		return stats.TotalReady == 1, nil
   301  	}, func(err error) {
   302  		t.Fatalf("should have pending evaluation")
   303  	})
   304  }
   305  
   306  func TestLeader_PeriodicDispatcher_Restore_Adds(t *testing.T) {
   307  	s1 := TestServer(t, func(c *Config) {
   308  		c.NumSchedulers = 0
   309  	})
   310  	defer s1.Shutdown()
   311  
   312  	s2 := TestServer(t, func(c *Config) {
   313  		c.NumSchedulers = 0
   314  		c.DevDisableBootstrap = true
   315  	})
   316  	defer s2.Shutdown()
   317  
   318  	s3 := TestServer(t, func(c *Config) {
   319  		c.NumSchedulers = 0
   320  		c.DevDisableBootstrap = true
   321  	})
   322  	defer s3.Shutdown()
   323  	servers := []*Server{s1, s2, s3}
   324  	TestJoin(t, s1, s2, s3)
   325  	testutil.WaitForLeader(t, s1.RPC)
   326  
   327  	for _, s := range servers {
   328  		testutil.WaitForResult(func() (bool, error) {
   329  			peers, _ := s.numPeers()
   330  			return peers == 3, nil
   331  		}, func(err error) {
   332  			t.Fatalf("should have 3 peers")
   333  		})
   334  	}
   335  
   336  	var leader *Server
   337  	for _, s := range servers {
   338  		if s.IsLeader() {
   339  			leader = s
   340  			break
   341  		}
   342  	}
   343  	if leader == nil {
   344  		t.Fatalf("Should have a leader")
   345  	}
   346  
   347  	// Inject a periodic job, a parameterized periodic job and a non-periodic job
   348  	periodic := mock.PeriodicJob()
   349  	nonPeriodic := mock.Job()
   350  	parameterizedPeriodic := mock.PeriodicJob()
   351  	parameterizedPeriodic.ParameterizedJob = &structs.ParameterizedJobConfig{}
   352  	for _, job := range []*structs.Job{nonPeriodic, periodic, parameterizedPeriodic} {
   353  		req := structs.JobRegisterRequest{
   354  			Job: job,
   355  			WriteRequest: structs.WriteRequest{
   356  				Namespace: job.Namespace,
   357  			},
   358  		}
   359  		_, _, err := leader.raftApply(structs.JobRegisterRequestType, req)
   360  		if err != nil {
   361  			t.Fatalf("err: %v", err)
   362  		}
   363  	}
   364  
   365  	// Kill the leader
   366  	leader.Shutdown()
   367  	time.Sleep(100 * time.Millisecond)
   368  
   369  	// Wait for a new leader
   370  	leader = nil
   371  	testutil.WaitForResult(func() (bool, error) {
   372  		for _, s := range servers {
   373  			if s.IsLeader() {
   374  				leader = s
   375  				return true, nil
   376  			}
   377  		}
   378  		return false, nil
   379  	}, func(err error) {
   380  		t.Fatalf("should have leader")
   381  	})
   382  
   383  	tuplePeriodic := structs.NamespacedID{
   384  		ID:        periodic.ID,
   385  		Namespace: periodic.Namespace,
   386  	}
   387  	tupleNonPeriodic := structs.NamespacedID{
   388  		ID:        nonPeriodic.ID,
   389  		Namespace: nonPeriodic.Namespace,
   390  	}
   391  	tupleParameterized := structs.NamespacedID{
   392  		ID:        parameterizedPeriodic.ID,
   393  		Namespace: parameterizedPeriodic.Namespace,
   394  	}
   395  
   396  	// Check that the new leader is tracking the periodic job only
   397  	testutil.WaitForResult(func() (bool, error) {
   398  		if _, tracked := leader.periodicDispatcher.tracked[tuplePeriodic]; !tracked {
   399  			return false, fmt.Errorf("periodic job not tracked")
   400  		}
   401  		if _, tracked := leader.periodicDispatcher.tracked[tupleNonPeriodic]; tracked {
   402  			return false, fmt.Errorf("non periodic job tracked")
   403  		}
   404  		if _, tracked := leader.periodicDispatcher.tracked[tupleParameterized]; tracked {
   405  			return false, fmt.Errorf("parameterized periodic job tracked")
   406  		}
   407  		return true, nil
   408  	}, func(err error) {
   409  		t.Fatalf(err.Error())
   410  	})
   411  }
   412  
   413  func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) {
   414  	s1 := TestServer(t, func(c *Config) {
   415  		c.NumSchedulers = 0
   416  	})
   417  	defer s1.Shutdown()
   418  	testutil.WaitForLeader(t, s1.RPC)
   419  
   420  	// Inject a periodic job that will be triggered soon.
   421  	launch := time.Now().Add(1 * time.Second)
   422  	job := testPeriodicJob(launch)
   423  	req := structs.JobRegisterRequest{
   424  		Job: job,
   425  		WriteRequest: structs.WriteRequest{
   426  			Namespace: job.Namespace,
   427  		},
   428  	}
   429  	_, _, err := s1.raftApply(structs.JobRegisterRequestType, req)
   430  	if err != nil {
   431  		t.Fatalf("err: %v", err)
   432  	}
   433  
   434  	// Flush the periodic dispatcher, ensuring that no evals will be created.
   435  	s1.periodicDispatcher.SetEnabled(false)
   436  
   437  	// Get the current time to ensure the launch time is after this once we
   438  	// restore.
   439  	now := time.Now()
   440  
   441  	// Sleep till after the job should have been launched.
   442  	time.Sleep(3 * time.Second)
   443  
   444  	// Restore the periodic dispatcher.
   445  	s1.periodicDispatcher.SetEnabled(true)
   446  	s1.restorePeriodicDispatcher()
   447  
   448  	// Ensure the job is tracked.
   449  	tuple := structs.NamespacedID{
   450  		ID:        job.ID,
   451  		Namespace: job.Namespace,
   452  	}
   453  	if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked {
   454  		t.Fatalf("periodic job not restored")
   455  	}
   456  
   457  	// Check that an eval was made.
   458  	ws := memdb.NewWatchSet()
   459  	last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID)
   460  	if err != nil || last == nil {
   461  		t.Fatalf("failed to get periodic launch time: %v", err)
   462  	}
   463  
   464  	if last.Launch.Before(now) {
   465  		t.Fatalf("restorePeriodicDispatcher did not force launch: last %v; want after %v", last.Launch, now)
   466  	}
   467  }
   468  
   469  func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) {
   470  	s1 := TestServer(t, func(c *Config) {
   471  		c.NumSchedulers = 0
   472  	})
   473  	defer s1.Shutdown()
   474  	testutil.WaitForLeader(t, s1.RPC)
   475  
   476  	// Inject a periodic job that triggered once in the past, should trigger now
   477  	// and once in the future.
   478  	now := time.Now()
   479  	past := now.Add(-1 * time.Second)
   480  	future := now.Add(10 * time.Second)
   481  	job := testPeriodicJob(past, now, future)
   482  	req := structs.JobRegisterRequest{
   483  		Job: job,
   484  		WriteRequest: structs.WriteRequest{
   485  			Namespace: job.Namespace,
   486  		},
   487  	}
   488  	_, _, err := s1.raftApply(structs.JobRegisterRequestType, req)
   489  	if err != nil {
   490  		t.Fatalf("err: %v", err)
   491  	}
   492  
   493  	// Create an eval for the past launch.
   494  	s1.periodicDispatcher.createEval(job, past)
   495  
   496  	// Flush the periodic dispatcher, ensuring that no evals will be created.
   497  	s1.periodicDispatcher.SetEnabled(false)
   498  
   499  	// Sleep till after the job should have been launched.
   500  	time.Sleep(3 * time.Second)
   501  
   502  	// Restore the periodic dispatcher.
   503  	s1.periodicDispatcher.SetEnabled(true)
   504  	s1.restorePeriodicDispatcher()
   505  
   506  	// Ensure the job is tracked.
   507  	tuple := structs.NamespacedID{
   508  		ID:        job.ID,
   509  		Namespace: job.Namespace,
   510  	}
   511  	if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked {
   512  		t.Fatalf("periodic job not restored")
   513  	}
   514  
   515  	// Check that an eval was made.
   516  	ws := memdb.NewWatchSet()
   517  	last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID)
   518  	if err != nil || last == nil {
   519  		t.Fatalf("failed to get periodic launch time: %v", err)
   520  	}
   521  	if last.Launch == past {
   522  		t.Fatalf("restorePeriodicDispatcher did not force launch")
   523  	}
   524  }
   525  
   526  func TestLeader_PeriodicDispatch(t *testing.T) {
   527  	s1 := TestServer(t, func(c *Config) {
   528  		c.NumSchedulers = 0
   529  		c.EvalGCInterval = 5 * time.Millisecond
   530  	})
   531  	defer s1.Shutdown()
   532  
   533  	// Wait for a periodic dispatch
   534  	testutil.WaitForResult(func() (bool, error) {
   535  		stats := s1.evalBroker.Stats()
   536  		bySched, ok := stats.ByScheduler[structs.JobTypeCore]
   537  		if !ok {
   538  			return false, nil
   539  		}
   540  		return bySched.Ready > 0, nil
   541  	}, func(err error) {
   542  		t.Fatalf("should pending job")
   543  	})
   544  }
   545  
   546  func TestLeader_ReapFailedEval(t *testing.T) {
   547  	s1 := TestServer(t, func(c *Config) {
   548  		c.NumSchedulers = 0
   549  		c.EvalDeliveryLimit = 1
   550  	})
   551  	defer s1.Shutdown()
   552  	testutil.WaitForLeader(t, s1.RPC)
   553  
   554  	// Wait for a periodic dispatch
   555  	eval := mock.Eval()
   556  	s1.evalBroker.Enqueue(eval)
   557  
   558  	// Dequeue and Nack
   559  	out, token, err := s1.evalBroker.Dequeue(defaultSched, time.Second)
   560  	if err != nil {
   561  		t.Fatalf("err: %v", err)
   562  	}
   563  	s1.evalBroker.Nack(out.ID, token)
   564  
   565  	// Wait for an updated and followup evaluation
   566  	state := s1.fsm.State()
   567  	testutil.WaitForResult(func() (bool, error) {
   568  		ws := memdb.NewWatchSet()
   569  		out, err := state.EvalByID(ws, eval.ID)
   570  		if err != nil {
   571  			return false, err
   572  		}
   573  		if out == nil {
   574  			return false, fmt.Errorf("expect original evaluation to exist")
   575  		}
   576  		if out.Status != structs.EvalStatusFailed {
   577  			return false, fmt.Errorf("got status %v; want %v", out.Status, structs.EvalStatusFailed)
   578  		}
   579  
   580  		// See if there is a followup
   581  		evals, err := state.EvalsByJob(ws, eval.Namespace, eval.JobID)
   582  		if err != nil {
   583  			return false, err
   584  		}
   585  
   586  		if l := len(evals); l != 2 {
   587  			return false, fmt.Errorf("got %d evals, want 2", l)
   588  		}
   589  
   590  		for _, e := range evals {
   591  			if e.ID == eval.ID {
   592  				continue
   593  			}
   594  
   595  			if e.Status != structs.EvalStatusPending {
   596  				return false, fmt.Errorf("follow up eval has status %v; want %v",
   597  					e.Status, structs.EvalStatusPending)
   598  			}
   599  
   600  			if e.Wait < s1.config.EvalFailedFollowupBaselineDelay ||
   601  				e.Wait > s1.config.EvalFailedFollowupBaselineDelay+s1.config.EvalFailedFollowupDelayRange {
   602  				return false, fmt.Errorf("bad wait: %v", e.Wait)
   603  			}
   604  
   605  			if e.TriggeredBy != structs.EvalTriggerFailedFollowUp {
   606  				return false, fmt.Errorf("follow up eval TriggeredBy %v; want %v",
   607  					e.TriggeredBy, structs.EvalTriggerFailedFollowUp)
   608  			}
   609  		}
   610  
   611  		return true, nil
   612  	}, func(err error) {
   613  		t.Fatalf("err: %v", err)
   614  	})
   615  }
   616  
   617  func TestLeader_ReapDuplicateEval(t *testing.T) {
   618  	s1 := TestServer(t, func(c *Config) {
   619  		c.NumSchedulers = 0
   620  	})
   621  	defer s1.Shutdown()
   622  	testutil.WaitForLeader(t, s1.RPC)
   623  
   624  	// Create a duplicate blocked eval
   625  	eval := mock.Eval()
   626  	eval2 := mock.Eval()
   627  	eval2.JobID = eval.JobID
   628  	s1.blockedEvals.Block(eval)
   629  	s1.blockedEvals.Block(eval2)
   630  
   631  	// Wait for the evaluation to marked as cancelled
   632  	state := s1.fsm.State()
   633  	testutil.WaitForResult(func() (bool, error) {
   634  		ws := memdb.NewWatchSet()
   635  		out, err := state.EvalByID(ws, eval2.ID)
   636  		if err != nil {
   637  			return false, err
   638  		}
   639  		return out != nil && out.Status == structs.EvalStatusCancelled, nil
   640  	}, func(err error) {
   641  		t.Fatalf("err: %v", err)
   642  	})
   643  }
   644  
   645  func TestLeader_RestoreVaultAccessors(t *testing.T) {
   646  	s1 := TestServer(t, func(c *Config) {
   647  		c.NumSchedulers = 0
   648  	})
   649  	defer s1.Shutdown()
   650  	testutil.WaitForLeader(t, s1.RPC)
   651  
   652  	// Insert a vault accessor that should be revoked
   653  	state := s1.fsm.State()
   654  	va := mock.VaultAccessor()
   655  	if err := state.UpsertVaultAccessor(100, []*structs.VaultAccessor{va}); err != nil {
   656  		t.Fatalf("bad: %v", err)
   657  	}
   658  
   659  	// Swap the Vault client
   660  	tvc := &TestVaultClient{}
   661  	s1.vault = tvc
   662  
   663  	// Do a restore
   664  	if err := s1.restoreRevokingAccessors(); err != nil {
   665  		t.Fatalf("Failed to restore: %v", err)
   666  	}
   667  
   668  	if len(tvc.RevokedTokens) != 1 && tvc.RevokedTokens[0].Accessor != va.Accessor {
   669  		t.Fatalf("Bad revoked accessors: %v", tvc.RevokedTokens)
   670  	}
   671  }
   672  
   673  func TestLeader_ReplicateACLPolicies(t *testing.T) {
   674  	t.Parallel()
   675  	s1, root := TestACLServer(t, func(c *Config) {
   676  		c.Region = "region1"
   677  		c.AuthoritativeRegion = "region1"
   678  		c.ACLEnabled = true
   679  	})
   680  	defer s1.Shutdown()
   681  	s2, _ := TestACLServer(t, func(c *Config) {
   682  		c.Region = "region2"
   683  		c.AuthoritativeRegion = "region1"
   684  		c.ACLEnabled = true
   685  		c.ReplicationBackoff = 20 * time.Millisecond
   686  		c.ReplicationToken = root.SecretID
   687  	})
   688  	defer s2.Shutdown()
   689  	TestJoin(t, s1, s2)
   690  	testutil.WaitForLeader(t, s1.RPC)
   691  	testutil.WaitForLeader(t, s2.RPC)
   692  
   693  	// Write a policy to the authoritative region
   694  	p1 := mock.ACLPolicy()
   695  	if err := s1.State().UpsertACLPolicies(100, []*structs.ACLPolicy{p1}); err != nil {
   696  		t.Fatalf("bad: %v", err)
   697  	}
   698  
   699  	// Wait for the policy to replicate
   700  	testutil.WaitForResult(func() (bool, error) {
   701  		state := s2.State()
   702  		out, err := state.ACLPolicyByName(nil, p1.Name)
   703  		return out != nil, err
   704  	}, func(err error) {
   705  		t.Fatalf("should replicate policy")
   706  	})
   707  }
   708  
   709  func TestLeader_DiffACLPolicies(t *testing.T) {
   710  	t.Parallel()
   711  
   712  	state := state.TestStateStore(t)
   713  
   714  	// Populate the local state
   715  	p1 := mock.ACLPolicy()
   716  	p2 := mock.ACLPolicy()
   717  	p3 := mock.ACLPolicy()
   718  	assert.Nil(t, state.UpsertACLPolicies(100, []*structs.ACLPolicy{p1, p2, p3}))
   719  
   720  	// Simulate a remote list
   721  	p2Stub := p2.Stub()
   722  	p2Stub.ModifyIndex = 50 // Ignored, same index
   723  	p3Stub := p3.Stub()
   724  	p3Stub.ModifyIndex = 100 // Updated, higher index
   725  	p3Stub.Hash = []byte{0, 1, 2, 3}
   726  	p4 := mock.ACLPolicy()
   727  	remoteList := []*structs.ACLPolicyListStub{
   728  		p2Stub,
   729  		p3Stub,
   730  		p4.Stub(),
   731  	}
   732  	delete, update := diffACLPolicies(state, 50, remoteList)
   733  
   734  	// P1 does not exist on the remote side, should delete
   735  	assert.Equal(t, []string{p1.Name}, delete)
   736  
   737  	// P2 is un-modified - ignore. P3 modified, P4 new.
   738  	assert.Equal(t, []string{p3.Name, p4.Name}, update)
   739  }
   740  
   741  func TestLeader_ReplicateACLTokens(t *testing.T) {
   742  	t.Parallel()
   743  	s1, root := TestACLServer(t, func(c *Config) {
   744  		c.Region = "region1"
   745  		c.AuthoritativeRegion = "region1"
   746  		c.ACLEnabled = true
   747  	})
   748  	defer s1.Shutdown()
   749  	s2, _ := TestACLServer(t, func(c *Config) {
   750  		c.Region = "region2"
   751  		c.AuthoritativeRegion = "region1"
   752  		c.ACLEnabled = true
   753  		c.ReplicationBackoff = 20 * time.Millisecond
   754  		c.ReplicationToken = root.SecretID
   755  	})
   756  	defer s2.Shutdown()
   757  	TestJoin(t, s1, s2)
   758  	testutil.WaitForLeader(t, s1.RPC)
   759  	testutil.WaitForLeader(t, s2.RPC)
   760  
   761  	// Write a token to the authoritative region
   762  	p1 := mock.ACLToken()
   763  	p1.Global = true
   764  	if err := s1.State().UpsertACLTokens(100, []*structs.ACLToken{p1}); err != nil {
   765  		t.Fatalf("bad: %v", err)
   766  	}
   767  
   768  	// Wait for the token to replicate
   769  	testutil.WaitForResult(func() (bool, error) {
   770  		state := s2.State()
   771  		out, err := state.ACLTokenByAccessorID(nil, p1.AccessorID)
   772  		return out != nil, err
   773  	}, func(err error) {
   774  		t.Fatalf("should replicate token")
   775  	})
   776  }
   777  
   778  func TestLeader_DiffACLTokens(t *testing.T) {
   779  	t.Parallel()
   780  
   781  	state := state.TestStateStore(t)
   782  
   783  	// Populate the local state
   784  	p0 := mock.ACLToken()
   785  	p1 := mock.ACLToken()
   786  	p1.Global = true
   787  	p2 := mock.ACLToken()
   788  	p2.Global = true
   789  	p3 := mock.ACLToken()
   790  	p3.Global = true
   791  	assert.Nil(t, state.UpsertACLTokens(100, []*structs.ACLToken{p0, p1, p2, p3}))
   792  
   793  	// Simulate a remote list
   794  	p2Stub := p2.Stub()
   795  	p2Stub.ModifyIndex = 50 // Ignored, same index
   796  	p3Stub := p3.Stub()
   797  	p3Stub.ModifyIndex = 100 // Updated, higher index
   798  	p3Stub.Hash = []byte{0, 1, 2, 3}
   799  	p4 := mock.ACLToken()
   800  	p4.Global = true
   801  	remoteList := []*structs.ACLTokenListStub{
   802  		p2Stub,
   803  		p3Stub,
   804  		p4.Stub(),
   805  	}
   806  	delete, update := diffACLTokens(state, 50, remoteList)
   807  
   808  	// P0 is local and should be ignored
   809  	// P1 does not exist on the remote side, should delete
   810  	assert.Equal(t, []string{p1.AccessorID}, delete)
   811  
   812  	// P2 is un-modified - ignore. P3 modified, P4 new.
   813  	assert.Equal(t, []string{p3.AccessorID, p4.AccessorID}, update)
   814  }
   815  
   816  func TestLeader_UpgradeRaftVersion(t *testing.T) {
   817  	t.Parallel()
   818  	s1 := TestServer(t, func(c *Config) {
   819  		c.Datacenter = "dc1"
   820  		c.RaftConfig.ProtocolVersion = 2
   821  	})
   822  	defer s1.Shutdown()
   823  
   824  	s2 := TestServer(t, func(c *Config) {
   825  		c.DevDisableBootstrap = true
   826  		c.RaftConfig.ProtocolVersion = 1
   827  	})
   828  	defer s2.Shutdown()
   829  
   830  	s3 := TestServer(t, func(c *Config) {
   831  		c.DevDisableBootstrap = true
   832  		c.RaftConfig.ProtocolVersion = 2
   833  	})
   834  	defer s3.Shutdown()
   835  
   836  	servers := []*Server{s1, s2, s3}
   837  
   838  	// Try to join
   839  	TestJoin(t, s1, s2, s3)
   840  
   841  	for _, s := range servers {
   842  		testutil.WaitForResult(func() (bool, error) {
   843  			peers, _ := s.numPeers()
   844  			return peers == 3, nil
   845  		}, func(err error) {
   846  			t.Fatalf("should have 3 peers")
   847  		})
   848  	}
   849  
   850  	// Kill the v1 server
   851  	if err := s2.Leave(); err != nil {
   852  		t.Fatal(err)
   853  	}
   854  
   855  	for _, s := range []*Server{s1, s3} {
   856  		minVer, err := s.autopilot.MinRaftProtocol()
   857  		if err != nil {
   858  			t.Fatal(err)
   859  		}
   860  		if got, want := minVer, 2; got != want {
   861  			t.Fatalf("got min raft version %d want %d", got, want)
   862  		}
   863  	}
   864  
   865  	// Replace the dead server with one running raft protocol v3
   866  	s4 := TestServer(t, func(c *Config) {
   867  		c.DevDisableBootstrap = true
   868  		c.Datacenter = "dc1"
   869  		c.RaftConfig.ProtocolVersion = 3
   870  	})
   871  	defer s4.Shutdown()
   872  	TestJoin(t, s1, s4)
   873  	servers[1] = s4
   874  
   875  	// Make sure we're back to 3 total peers with the new one added via ID
   876  	for _, s := range servers {
   877  		testutil.WaitForResult(func() (bool, error) {
   878  			addrs := 0
   879  			ids := 0
   880  			future := s.raft.GetConfiguration()
   881  			if err := future.Error(); err != nil {
   882  				return false, err
   883  			}
   884  			for _, server := range future.Configuration().Servers {
   885  				if string(server.ID) == string(server.Address) {
   886  					addrs++
   887  				} else {
   888  					ids++
   889  				}
   890  			}
   891  			if got, want := addrs, 2; got != want {
   892  				return false, fmt.Errorf("got %d server addresses want %d", got, want)
   893  			}
   894  			if got, want := ids, 1; got != want {
   895  				return false, fmt.Errorf("got %d server ids want %d", got, want)
   896  			}
   897  
   898  			return true, nil
   899  		}, func(err error) {
   900  			t.Fatal(err)
   901  		})
   902  	}
   903  }
   904  
   905  func TestLeader_RollRaftServer(t *testing.T) {
   906  	t.Parallel()
   907  	s1 := TestServer(t, func(c *Config) {
   908  		c.RaftConfig.ProtocolVersion = 2
   909  	})
   910  	defer s1.Shutdown()
   911  
   912  	s2 := TestServer(t, func(c *Config) {
   913  		c.DevDisableBootstrap = true
   914  		c.RaftConfig.ProtocolVersion = 1
   915  	})
   916  	defer s2.Shutdown()
   917  
   918  	s3 := TestServer(t, func(c *Config) {
   919  		c.DevDisableBootstrap = true
   920  		c.RaftConfig.ProtocolVersion = 2
   921  	})
   922  	defer s3.Shutdown()
   923  
   924  	servers := []*Server{s1, s2, s3}
   925  
   926  	// Try to join
   927  	TestJoin(t, s1, s2, s3)
   928  
   929  	for _, s := range servers {
   930  		retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) })
   931  	}
   932  
   933  	// Kill the v1 server
   934  	s2.Shutdown()
   935  
   936  	for _, s := range []*Server{s1, s3} {
   937  		retry.Run(t, func(r *retry.R) {
   938  			minVer, err := s.autopilot.MinRaftProtocol()
   939  			if err != nil {
   940  				r.Fatal(err)
   941  			}
   942  			if got, want := minVer, 2; got != want {
   943  				r.Fatalf("got min raft version %d want %d", got, want)
   944  			}
   945  		})
   946  	}
   947  
   948  	// Replace the dead server with one running raft protocol v3
   949  	s4 := TestServer(t, func(c *Config) {
   950  		c.DevDisableBootstrap = true
   951  		c.RaftConfig.ProtocolVersion = 3
   952  	})
   953  	defer s4.Shutdown()
   954  	TestJoin(t, s4, s1)
   955  	servers[1] = s4
   956  
   957  	// Make sure the dead server is removed and we're back to 3 total peers
   958  	for _, s := range servers {
   959  		retry.Run(t, func(r *retry.R) {
   960  			addrs := 0
   961  			ids := 0
   962  			future := s.raft.GetConfiguration()
   963  			if err := future.Error(); err != nil {
   964  				r.Fatal(err)
   965  			}
   966  			for _, server := range future.Configuration().Servers {
   967  				if string(server.ID) == string(server.Address) {
   968  					addrs++
   969  				} else {
   970  					ids++
   971  				}
   972  			}
   973  			if got, want := addrs, 2; got != want {
   974  				r.Fatalf("got %d server addresses want %d", got, want)
   975  			}
   976  			if got, want := ids, 1; got != want {
   977  				r.Fatalf("got %d server ids want %d", got, want)
   978  			}
   979  		})
   980  	}
   981  }