vitess.io/vitess@v0.16.2/go/vt/topo/zk2topo/election.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package zk2topo 18 19 import ( 20 "context" 21 "path" 22 "sort" 23 24 "github.com/z-division/go-zookeeper/zk" 25 26 "vitess.io/vitess/go/vt/vterrors" 27 28 "vitess.io/vitess/go/vt/log" 29 "vitess.io/vitess/go/vt/topo" 30 ) 31 32 // This file contains the primary election code for zk2topo.Server. 33 34 // NewLeaderParticipation is part of the topo.Server interface. 35 // We use the full path: <root path>/election/<name> 36 func (zs *Server) NewLeaderParticipation(name, id string) (topo.LeaderParticipation, error) { 37 ctx := context.TODO() 38 39 zkPath := path.Join(zs.root, electionsPath, name) 40 41 // Create the toplevel directory, OK if it exists already. 42 // We will create the parent directory as well, but not more. 43 if _, err := CreateRecursive(ctx, zs.conn, zkPath, nil, 0, zk.WorldACL(PermDirectory), 1); err != nil && err != zk.ErrNodeExists { 44 return nil, convertError(err, zkPath) 45 } 46 47 result := &zkLeaderParticipation{ 48 zs: zs, 49 name: name, 50 id: []byte(id), 51 done: make(chan struct{}), 52 } 53 result.stopCtx, result.stopCtxCancel = context.WithCancel(context.Background()) 54 return result, nil 55 } 56 57 // zkLeaderParticipation implements topo.LeaderParticipation. 58 // 59 // We use a directory with files created as sequence and ephemeral, 60 // see https://zookeeper.apache.org/doc/trunk/recipes.html#sc_leaderElection 61 // From the toplevel election directory, we'll have one sub-directory 62 // per name, with the sequence files in there. Each sequence file also contains 63 // the id. 64 type zkLeaderParticipation struct { 65 // zs is our parent zk topo Server 66 zs *Server 67 68 // name is the name of this LeaderParticipation 69 name string 70 71 // id is the process's current id. 72 id []byte 73 74 // stopCtx is a context that is closed when Stop is called. 75 stopCtx context.Context 76 77 // stopCtxCancel is the cancel function to call to cancel stopCtx. 78 stopCtxCancel context.CancelFunc 79 80 // done is a channel closed when the stop operation is done. 81 done chan struct{} 82 } 83 84 // WaitForLeadership is part of the topo.LeaderParticipation interface. 85 func (mp *zkLeaderParticipation) WaitForLeadership() (context.Context, error) { 86 // If Stop was already called, mp.done is closed, so we are interrupted. 87 select { 88 case <-mp.done: 89 return nil, topo.NewError(topo.Interrupted, "Leadership") 90 default: 91 } 92 93 ctx := context.TODO() 94 zkPath := path.Join(mp.zs.root, electionsPath, mp.name) 95 96 // Fast path if Stop was already called. 97 select { 98 case <-mp.stopCtx.Done(): 99 close(mp.done) 100 return nil, topo.NewError(topo.Interrupted, "Leadership") 101 default: 102 } 103 104 // Create the current proposal. 105 proposal, err := mp.zs.conn.Create(ctx, zkPath+"/", mp.id, zk.FlagSequence|zk.FlagEphemeral, zk.WorldACL(PermFile)) 106 if err != nil { 107 return nil, vterrors.Wrapf(err, "cannot create proposal file in %v", zkPath) 108 } 109 110 // Wait until we are it, or we are interrupted. Using a 111 // small-ish time out so it gets exercised faster (as opposed 112 // to crashing after a day of use). 113 err = obtainQueueLock(mp.stopCtx, mp.zs.conn, proposal) 114 switch err { 115 case nil: 116 break 117 case context.Canceled: 118 close(mp.done) 119 return nil, topo.NewError(topo.Interrupted, "Leadership") 120 default: 121 // something else went wrong 122 return nil, err 123 } 124 125 // we got the lock, create our background context 126 ctx, cancel := context.WithCancel(context.Background()) 127 go mp.watchLeadership(ctx, mp.zs.conn, proposal, cancel) 128 return ctx, nil 129 } 130 131 // watchLeadership is the background go routine we run while we are the primary. 132 // We will do two things: 133 // - watch for changes to the proposal file. If anything happens there, 134 // it most likely means we lost the ZK session, so we want to stop 135 // being the primary. 136 // - wait for mp.stop. 137 func (mp *zkLeaderParticipation) watchLeadership(ctx context.Context, conn *ZkConn, proposal string, cancel context.CancelFunc) { 138 // any interruption of this routine means we're not primary any more. 139 defer cancel() 140 141 // get to work watching our own proposal 142 _, stats, events, err := conn.GetW(ctx, proposal) 143 if err != nil { 144 log.Warningf("Cannot watch proposal while being Leader, stopping: %v", err) 145 return 146 } 147 148 select { 149 case <-mp.stopCtx.Done(): 150 // we were asked to stop, we're done. Remove our node. 151 log.Infof("Canceling leadership '%v' upon Stop.", mp.name) 152 153 if err := conn.Delete(ctx, proposal, stats.Version); err != nil { 154 log.Warningf("Error deleting our proposal %v: %v", proposal, err) 155 } 156 close(mp.done) 157 158 case e := <-events: 159 // something happened to our proposal, that can only be bad. 160 log.Warningf("Watch on proposal triggered, canceling leadership '%v': %v", mp.name, e) 161 } 162 } 163 164 // Stop is part of the topo.LeaderParticipation interface 165 func (mp *zkLeaderParticipation) Stop() { 166 mp.stopCtxCancel() 167 <-mp.done 168 } 169 170 // GetCurrentLeaderID is part of the topo.LeaderParticipation interface. 171 // We just read the smallest (first) node content, that is the id. 172 func (mp *zkLeaderParticipation) GetCurrentLeaderID(ctx context.Context) (string, error) { 173 zkPath := path.Join(mp.zs.root, electionsPath, mp.name) 174 175 for { 176 children, _, err := mp.zs.conn.Children(ctx, zkPath) 177 if err != nil { 178 return "", convertError(err, zkPath) 179 } 180 if len(children) == 0 { 181 // no current primary 182 return "", nil 183 } 184 sort.Strings(children) 185 186 childPath := path.Join(zkPath, children[0]) 187 data, _, err := mp.zs.conn.Get(ctx, childPath) 188 if err != nil { 189 if err == zk.ErrNoNode { 190 // primary terminated in front of our own eyes, 191 // try again 192 continue 193 } 194 return "", convertError(err, zkPath) 195 } 196 197 return string(data), nil 198 } 199 } 200 201 // WaitForNewLeader is part of the topo.LeaderParticipation interface 202 func (mp *zkLeaderParticipation) WaitForNewLeader(context.Context) (<-chan string, error) { 203 // This isn't implemented yet, but likely can be implemented in the same way 204 // as how WatchRecursive could be implemented as well. 205 return nil, topo.NewError(topo.NoImplementation, "wait for leader not supported in ZK2 topo") 206 }