vitess.io/vitess@v0.16.2/go/vt/topo/zk2topo/election.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package zk2topo
    18  
    19  import (
    20  	"context"
    21  	"path"
    22  	"sort"
    23  
    24  	"github.com/z-division/go-zookeeper/zk"
    25  
    26  	"vitess.io/vitess/go/vt/vterrors"
    27  
    28  	"vitess.io/vitess/go/vt/log"
    29  	"vitess.io/vitess/go/vt/topo"
    30  )
    31  
    32  // This file contains the primary election code for zk2topo.Server.
    33  
    34  // NewLeaderParticipation is part of the topo.Server interface.
    35  // We use the full path: <root path>/election/<name>
    36  func (zs *Server) NewLeaderParticipation(name, id string) (topo.LeaderParticipation, error) {
    37  	ctx := context.TODO()
    38  
    39  	zkPath := path.Join(zs.root, electionsPath, name)
    40  
    41  	// Create the toplevel directory, OK if it exists already.
    42  	// We will create the parent directory as well, but not more.
    43  	if _, err := CreateRecursive(ctx, zs.conn, zkPath, nil, 0, zk.WorldACL(PermDirectory), 1); err != nil && err != zk.ErrNodeExists {
    44  		return nil, convertError(err, zkPath)
    45  	}
    46  
    47  	result := &zkLeaderParticipation{
    48  		zs:   zs,
    49  		name: name,
    50  		id:   []byte(id),
    51  		done: make(chan struct{}),
    52  	}
    53  	result.stopCtx, result.stopCtxCancel = context.WithCancel(context.Background())
    54  	return result, nil
    55  }
    56  
    57  // zkLeaderParticipation implements topo.LeaderParticipation.
    58  //
    59  // We use a directory with files created as sequence and ephemeral,
    60  // see https://zookeeper.apache.org/doc/trunk/recipes.html#sc_leaderElection
    61  // From the toplevel election directory, we'll have one sub-directory
    62  // per name, with the sequence files in there. Each sequence file also contains
    63  // the id.
    64  type zkLeaderParticipation struct {
    65  	// zs is our parent zk topo Server
    66  	zs *Server
    67  
    68  	// name is the name of this LeaderParticipation
    69  	name string
    70  
    71  	// id is the process's current id.
    72  	id []byte
    73  
    74  	// stopCtx is a context that is closed when Stop is called.
    75  	stopCtx context.Context
    76  
    77  	// stopCtxCancel is the cancel function to call to cancel stopCtx.
    78  	stopCtxCancel context.CancelFunc
    79  
    80  	// done is a channel closed when the stop operation is done.
    81  	done chan struct{}
    82  }
    83  
    84  // WaitForLeadership is part of the topo.LeaderParticipation interface.
    85  func (mp *zkLeaderParticipation) WaitForLeadership() (context.Context, error) {
    86  	// If Stop was already called, mp.done is closed, so we are interrupted.
    87  	select {
    88  	case <-mp.done:
    89  		return nil, topo.NewError(topo.Interrupted, "Leadership")
    90  	default:
    91  	}
    92  
    93  	ctx := context.TODO()
    94  	zkPath := path.Join(mp.zs.root, electionsPath, mp.name)
    95  
    96  	// Fast path if Stop was already called.
    97  	select {
    98  	case <-mp.stopCtx.Done():
    99  		close(mp.done)
   100  		return nil, topo.NewError(topo.Interrupted, "Leadership")
   101  	default:
   102  	}
   103  
   104  	// Create the current proposal.
   105  	proposal, err := mp.zs.conn.Create(ctx, zkPath+"/", mp.id, zk.FlagSequence|zk.FlagEphemeral, zk.WorldACL(PermFile))
   106  	if err != nil {
   107  		return nil, vterrors.Wrapf(err, "cannot create proposal file in %v", zkPath)
   108  	}
   109  
   110  	// Wait until we are it, or we are interrupted. Using a
   111  	// small-ish time out so it gets exercised faster (as opposed
   112  	// to crashing after a day of use).
   113  	err = obtainQueueLock(mp.stopCtx, mp.zs.conn, proposal)
   114  	switch err {
   115  	case nil:
   116  		break
   117  	case context.Canceled:
   118  		close(mp.done)
   119  		return nil, topo.NewError(topo.Interrupted, "Leadership")
   120  	default:
   121  		// something else went wrong
   122  		return nil, err
   123  	}
   124  
   125  	// we got the lock, create our background context
   126  	ctx, cancel := context.WithCancel(context.Background())
   127  	go mp.watchLeadership(ctx, mp.zs.conn, proposal, cancel)
   128  	return ctx, nil
   129  }
   130  
   131  // watchLeadership is the background go routine we run while we are the primary.
   132  // We will do two things:
   133  //   - watch for changes to the proposal file. If anything happens there,
   134  //     it most likely means we lost the ZK session, so we want to stop
   135  //     being the primary.
   136  //   - wait for mp.stop.
   137  func (mp *zkLeaderParticipation) watchLeadership(ctx context.Context, conn *ZkConn, proposal string, cancel context.CancelFunc) {
   138  	// any interruption of this routine means we're not primary any more.
   139  	defer cancel()
   140  
   141  	// get to work watching our own proposal
   142  	_, stats, events, err := conn.GetW(ctx, proposal)
   143  	if err != nil {
   144  		log.Warningf("Cannot watch proposal while being Leader, stopping: %v", err)
   145  		return
   146  	}
   147  
   148  	select {
   149  	case <-mp.stopCtx.Done():
   150  		// we were asked to stop, we're done. Remove our node.
   151  		log.Infof("Canceling leadership '%v' upon Stop.", mp.name)
   152  
   153  		if err := conn.Delete(ctx, proposal, stats.Version); err != nil {
   154  			log.Warningf("Error deleting our proposal %v: %v", proposal, err)
   155  		}
   156  		close(mp.done)
   157  
   158  	case e := <-events:
   159  		// something happened to our proposal, that can only be bad.
   160  		log.Warningf("Watch on proposal triggered, canceling leadership '%v': %v", mp.name, e)
   161  	}
   162  }
   163  
   164  // Stop is part of the topo.LeaderParticipation interface
   165  func (mp *zkLeaderParticipation) Stop() {
   166  	mp.stopCtxCancel()
   167  	<-mp.done
   168  }
   169  
   170  // GetCurrentLeaderID is part of the topo.LeaderParticipation interface.
   171  // We just read the smallest (first) node content, that is the id.
   172  func (mp *zkLeaderParticipation) GetCurrentLeaderID(ctx context.Context) (string, error) {
   173  	zkPath := path.Join(mp.zs.root, electionsPath, mp.name)
   174  
   175  	for {
   176  		children, _, err := mp.zs.conn.Children(ctx, zkPath)
   177  		if err != nil {
   178  			return "", convertError(err, zkPath)
   179  		}
   180  		if len(children) == 0 {
   181  			// no current primary
   182  			return "", nil
   183  		}
   184  		sort.Strings(children)
   185  
   186  		childPath := path.Join(zkPath, children[0])
   187  		data, _, err := mp.zs.conn.Get(ctx, childPath)
   188  		if err != nil {
   189  			if err == zk.ErrNoNode {
   190  				// primary terminated in front of our own eyes,
   191  				// try again
   192  				continue
   193  			}
   194  			return "", convertError(err, zkPath)
   195  		}
   196  
   197  		return string(data), nil
   198  	}
   199  }
   200  
   201  // WaitForNewLeader is part of the topo.LeaderParticipation interface
   202  func (mp *zkLeaderParticipation) WaitForNewLeader(context.Context) (<-chan string, error) {
   203  	// This isn't implemented yet, but likely can be implemented in the same way
   204  	// as how WatchRecursive could be implemented as well.
   205  	return nil, topo.NewError(topo.NoImplementation, "wait for leader not supported in ZK2 topo")
   206  }