vitess.io/vitess@v0.16.2/go/vt/topo/test/election.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package test
    18  
    19  import (
    20  	"context"
    21  	"testing"
    22  	"time"
    23  
    24  	"vitess.io/vitess/go/vt/topo"
    25  )
    26  
    27  func waitForLeaderID(t *testing.T, mp topo.LeaderParticipation, expected string) {
    28  	deadline := time.Now().Add(5 * time.Second)
    29  	for {
    30  		Leader, err := mp.GetCurrentLeaderID(context.Background())
    31  		if err != nil {
    32  			t.Fatalf("GetCurrentLeaderID failed: %v", err)
    33  		}
    34  
    35  		if Leader == expected {
    36  			return
    37  		}
    38  
    39  		if time.Now().After(deadline) {
    40  			t.Fatalf("GetCurrentLeaderID timed out with %v, expected %v", Leader, expected)
    41  		}
    42  
    43  		time.Sleep(10 * time.Millisecond)
    44  	}
    45  }
    46  
    47  // checkElection runs the tests on the LeaderParticipation part of the
    48  // topo.Conn API.
    49  func checkElection(t *testing.T, ts *topo.Server) {
    50  	conn, err := ts.ConnForCell(context.Background(), topo.GlobalCell)
    51  	if err != nil {
    52  		t.Fatalf("ConnForCell(global) failed: %v", err)
    53  	}
    54  	name := "testmp"
    55  
    56  	// create a new LeaderParticipation
    57  	id1 := "id1"
    58  	mp1, err := conn.NewLeaderParticipation(name, id1)
    59  	if err != nil {
    60  		t.Fatalf("cannot create mp1: %v", err)
    61  	}
    62  
    63  	// no primary yet, check name
    64  	waitForLeaderID(t, mp1, "")
    65  
    66  	// wait for id1 to be the primary
    67  	ctx1, err := mp1.WaitForLeadership()
    68  	if err != nil {
    69  		t.Fatalf("mp1 cannot become Leader: %v", err)
    70  	}
    71  
    72  	// A lot of implementations use a toplevel directory for their elections.
    73  	// Make sure it is marked as 'Ephemeral'.
    74  	entries, err := conn.ListDir(context.Background(), "/", true /*full*/)
    75  	if err != nil {
    76  		t.Fatalf("ListDir(/) failed: %v", err)
    77  	}
    78  	for _, e := range entries {
    79  		if e.Name != topo.CellsPath {
    80  			if !e.Ephemeral {
    81  				t.Errorf("toplevel directory that is not ephemeral: %v", e)
    82  			}
    83  		}
    84  	}
    85  
    86  	// get the current primary name, better be id1
    87  	waitForLeaderID(t, mp1, id1)
    88  
    89  	// create a second LeaderParticipation on same name
    90  	id2 := "id2"
    91  	mp2, err := conn.NewLeaderParticipation(name, id2)
    92  	if err != nil {
    93  		t.Fatalf("cannot create mp2: %v", err)
    94  	}
    95  
    96  	// wait until mp2 gets to be the primary in the background
    97  	mp2IsLeader := make(chan error)
    98  	var mp2Context context.Context
    99  	go func() {
   100  		var err error
   101  		mp2Context, err = mp2.WaitForLeadership()
   102  		mp2IsLeader <- err
   103  	}()
   104  
   105  	// ask mp2 for primary name, should get id1
   106  	waitForLeaderID(t, mp2, id1)
   107  
   108  	// stop mp1
   109  	mp1.Stop()
   110  
   111  	// this should have closed ctx1 as soon as possible,
   112  	// so 5s should be enough in tests. This will be used during lameduck
   113  	// when the server exits, so we can't wait too long anyway.
   114  	select {
   115  	case <-ctx1.Done():
   116  	case <-time.After(5 * time.Second):
   117  		t.Fatalf("shutting down mp1 didn't close ctx1 in time")
   118  	}
   119  
   120  	// now mp2 should be primary
   121  	err = <-mp2IsLeader
   122  	if err != nil {
   123  		t.Fatalf("mp2 awoke with error: %v", err)
   124  	}
   125  
   126  	// ask mp2 for primary name, should get id2
   127  	waitForLeaderID(t, mp2, id2)
   128  
   129  	// stop mp2, we're done
   130  	mp2.Stop()
   131  
   132  	// mp2Context should then close.
   133  	select {
   134  	case <-mp2Context.Done():
   135  	case <-time.After(5 * time.Second):
   136  		t.Fatalf("shutting down mp2 didn't close mp2Context in time")
   137  	}
   138  
   139  	// At this point, we should be able to call WaitForLeadership
   140  	// again, and it should return topo.ErrInterrupted.  Testing
   141  	// this here as this is what the vtctld workflow manager loop
   142  	// does, for instance. There is a go routine that runs
   143  	// WaitForLeadership and needs to exit cleanly at the end.
   144  	_, err = mp2.WaitForLeadership()
   145  	if !topo.IsErrType(err, topo.Interrupted) {
   146  		t.Errorf("wrong error returned by WaitForLeadership, got %v expected %v", err, topo.NewError(topo.Interrupted, ""))
   147  	}
   148  }
   149  
   150  // checkWaitForNewLeader runs the WaitForLeadership test on the LeaderParticipation
   151  func checkWaitForNewLeader(t *testing.T, ts *topo.Server) {
   152  	conn, err := ts.ConnForCell(context.Background(), topo.GlobalCell)
   153  	if err != nil {
   154  		t.Fatalf("ConnForCell(global) failed: %v", err)
   155  	}
   156  	name := "testmp"
   157  
   158  	// create a new LeaderParticipation
   159  	id1 := "id1"
   160  	mp1, err := conn.NewLeaderParticipation(name, id1)
   161  	if err != nil {
   162  		t.Fatalf("cannot create mp1: %v", err)
   163  	}
   164  
   165  	// no primary yet, check name
   166  	waitForLeaderID(t, mp1, "")
   167  
   168  	// wait for id1 to be the primary
   169  	_, err = mp1.WaitForLeadership()
   170  	if err != nil {
   171  		t.Fatalf("mp1 cannot become Leader: %v", err)
   172  	}
   173  
   174  	// A lot of implementations use a toplevel directory for their elections.
   175  	// Make sure it is marked as 'Ephemeral'.
   176  	entries, err := conn.ListDir(context.Background(), "/", true /*full*/)
   177  	if err != nil {
   178  		t.Fatalf("ListDir(/) failed: %v", err)
   179  	}
   180  	for _, e := range entries {
   181  		if e.Name != topo.CellsPath {
   182  			if !e.Ephemeral {
   183  				t.Errorf("toplevel directory that is not ephemeral: %v", e)
   184  			}
   185  		}
   186  	}
   187  
   188  	// get the current primary name, better be id1
   189  	waitForLeaderID(t, mp1, id1)
   190  
   191  	// create a second LeaderParticipation on same name
   192  	id2 := "id2"
   193  	mp2, err := conn.NewLeaderParticipation(name, id2)
   194  	if err != nil {
   195  		t.Fatalf("cannot create mp2: %v", err)
   196  	}
   197  
   198  	leaders, err := mp2.WaitForNewLeader(context.Background())
   199  	if topo.IsErrType(err, topo.NoImplementation) {
   200  		t.Logf("%T does not support WaitForNewLeader()", mp2)
   201  		return
   202  	}
   203  	if err != nil {
   204  		t.Fatalf("cannot wait for leadership: %v", err)
   205  		return
   206  	}
   207  
   208  	// ask mp2 for primary name, should get id1
   209  	waitForLeaderID(t, mp2, id1)
   210  
   211  	// stop mp1
   212  	mp1.Stop()
   213  
   214  	leader := <-leaders
   215  
   216  	if leader != id1 {
   217  		t.Fatalf("wrong node elected: %v", leader)
   218  	}
   219  }