vitess.io/vitess@v0.16.2/go/vt/topo/etcd2topo/lock.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package etcd2topo 18 19 import ( 20 "context" 21 "fmt" 22 "path" 23 24 "github.com/spf13/pflag" 25 26 "go.etcd.io/etcd/api/v3/mvccpb" 27 clientv3 "go.etcd.io/etcd/client/v3" 28 29 "vitess.io/vitess/go/vt/log" 30 "vitess.io/vitess/go/vt/proto/vtrpc" 31 "vitess.io/vitess/go/vt/servenv" 32 "vitess.io/vitess/go/vt/topo" 33 "vitess.io/vitess/go/vt/vterrors" 34 ) 35 36 var ( 37 leaseTTL = 30 38 ) 39 40 func init() { 41 for _, cmd := range topo.FlagBinaries { 42 servenv.OnParseFor(cmd, registerEtcd2TopoLockFlags) 43 } 44 } 45 46 func registerEtcd2TopoLockFlags(fs *pflag.FlagSet) { 47 fs.IntVar(&leaseTTL, "topo_etcd_lease_ttl", leaseTTL, "Lease TTL for locks and leader election. The client will use KeepAlive to keep the lease going.") 48 } 49 50 // newUniqueEphemeralKV creates a new file in the provided directory. 51 // It is linked to the Lease. 52 // Errors returned are converted to topo errors. 53 func (s *Server) newUniqueEphemeralKV(ctx context.Context, cli *clientv3.Client, leaseID clientv3.LeaseID, nodePath string, contents string) (string, int64, error) { 54 // Use the lease ID as the file name, so it's guaranteed unique. 55 newKey := fmt.Sprintf("%v/%v", nodePath, leaseID) 56 57 // Only create a new file if it doesn't exist already 58 // (version = 0), to avoid two processes using the 59 // same file name. Since we use the lease ID, this should never happen. 60 txnresp, err := cli.Txn(ctx). 61 If(clientv3.Compare(clientv3.Version(newKey), "=", 0)). 62 Then(clientv3.OpPut(newKey, contents, clientv3.WithLease(leaseID))). 63 Commit() 64 if err != nil { 65 if err == context.Canceled || err == context.DeadlineExceeded { 66 // Our context was canceled as we were sending 67 // a creation request. We don't know if it 68 // succeeded or not. In any case, let's try to 69 // delete the node, so we don't leave an orphan 70 // node behind for *leaseTTL time. 71 72 if _, err := cli.Delete(context.Background(), newKey); err != nil { 73 log.Errorf("cli.Delete(context.Background(), newKey) failed :%v", err) 74 } 75 } 76 return "", 0, convertError(err, newKey) 77 } 78 if !txnresp.Succeeded { 79 // The key already exists, that should not happen. 80 return "", 0, ErrBadResponse 81 } 82 // The key was created. 83 return newKey, txnresp.Header.Revision, nil 84 } 85 86 // waitOnLastRev waits on all revisions of the files in the provided 87 // directory that have revisions smaller than the provided revision. 88 // It returns true only if there is no more other older files. 89 func (s *Server) waitOnLastRev(ctx context.Context, cli *clientv3.Client, nodePath string, revision int64) (bool, error) { 90 // Get the keys that are blocking us, if any. 91 opts := append(clientv3.WithLastRev(), clientv3.WithMaxModRev(revision-1)) 92 lastKey, err := cli.Get(ctx, nodePath+"/", opts...) 93 if err != nil { 94 return false, convertError(err, nodePath) 95 } 96 if len(lastKey.Kvs) == 0 { 97 // No older key, we're done waiting. 98 return true, nil 99 } 100 101 // Wait for release on blocking key. Cancel the watch when we 102 // exit this function. 103 key := string(lastKey.Kvs[0].Key) 104 ctx, cancel := context.WithCancel(ctx) 105 defer cancel() 106 wc := cli.Watch(ctx, key, clientv3.WithRev(revision)) 107 if wc == nil { 108 return false, vterrors.Errorf(vtrpc.Code_INTERNAL, "Watch failed") 109 } 110 111 select { 112 case <-ctx.Done(): 113 return false, convertError(ctx.Err(), nodePath) 114 case wresp := <-wc: 115 for _, ev := range wresp.Events { 116 if ev.Type == mvccpb.DELETE { 117 // There might still be older keys, 118 // but not this one. 119 return false, nil 120 } 121 } 122 } 123 124 // The Watch stopped, we're not sure if there are more items. 125 return false, nil 126 } 127 128 // etcdLockDescriptor implements topo.LockDescriptor. 129 type etcdLockDescriptor struct { 130 s *Server 131 leaseID clientv3.LeaseID 132 } 133 134 // TryLock is part of the topo.Conn interface. 135 func (s *Server) TryLock(ctx context.Context, dirPath, contents string) (topo.LockDescriptor, error) { 136 // We list all the entries under dirPath 137 entries, err := s.ListDir(ctx, dirPath, true) 138 if err != nil { 139 // We need to return the right error codes, like 140 // topo.ErrNoNode and topo.ErrInterrupted, and the 141 // easiest way to do this is to return convertError(err). 142 // It may lose some of the context, if this is an issue, 143 // maybe logging the error would work here. 144 return nil, convertError(err, dirPath) 145 } 146 147 // If there is a folder '/locks' with some entries in it then we can assume that someone else already has a lock. 148 // Throw error in this case 149 for _, e := range entries { 150 if e.Name == locksPath && e.Type == topo.TypeDirectory && e.Ephemeral { 151 return nil, topo.NewError(topo.NodeExists, fmt.Sprintf("lock already exists at path %s", dirPath)) 152 } 153 } 154 155 // everything is good let's acquire the lock. 156 return s.lock(ctx, dirPath, contents) 157 } 158 159 // Lock is part of the topo.Conn interface. 160 func (s *Server) Lock(ctx context.Context, dirPath, contents string) (topo.LockDescriptor, error) { 161 // We list the directory first to make sure it exists. 162 if _, err := s.ListDir(ctx, dirPath, false /*full*/); err != nil { 163 // We need to return the right error codes, like 164 // topo.ErrNoNode and topo.ErrInterrupted, and the 165 // easiest way to do this is to return convertError(err). 166 // It may lose some of the context, if this is an issue, 167 // maybe logging the error would work here. 168 return nil, convertError(err, dirPath) 169 } 170 171 return s.lock(ctx, dirPath, contents) 172 } 173 174 // lock is used by both Lock() and primary election. 175 func (s *Server) lock(ctx context.Context, nodePath, contents string) (topo.LockDescriptor, error) { 176 nodePath = path.Join(s.root, nodePath, locksPath) 177 178 // Get a lease, set its KeepAlive. 179 lease, err := s.cli.Grant(ctx, int64(leaseTTL)) 180 if err != nil { 181 return nil, convertError(err, nodePath) 182 } 183 leaseKA, err := s.cli.KeepAlive(ctx, lease.ID) 184 if err != nil { 185 return nil, convertError(err, nodePath) 186 } 187 go func() { 188 // Drain the lease keepAlive channel, we're not 189 // interested in its contents. 190 for range leaseKA { 191 } 192 }() 193 194 // Create an ephemeral node in the locks directory. 195 key, revision, err := s.newUniqueEphemeralKV(ctx, s.cli, lease.ID, nodePath, contents) 196 if err != nil { 197 return nil, err 198 } 199 200 // Wait until all older nodes in the locks directory are gone. 201 for { 202 done, err := s.waitOnLastRev(ctx, s.cli, nodePath, revision) 203 if err != nil { 204 // We had an error waiting on the last node. 205 // Revoke our lease, this will delete the file. 206 if _, rerr := s.cli.Revoke(context.Background(), lease.ID); rerr != nil { 207 log.Warningf("Revoke(%d) failed, may have left %v behind: %v", lease.ID, key, rerr) 208 } 209 return nil, err 210 } 211 if done { 212 // No more older nodes, we're it! 213 return &etcdLockDescriptor{ 214 s: s, 215 leaseID: lease.ID, 216 }, nil 217 } 218 } 219 } 220 221 // Check is part of the topo.LockDescriptor interface. 222 // We use KeepAliveOnce to make sure the lease is still active and well. 223 func (ld *etcdLockDescriptor) Check(ctx context.Context) error { 224 _, err := ld.s.cli.KeepAliveOnce(ctx, ld.leaseID) 225 if err != nil { 226 return convertError(err, "lease") 227 } 228 return nil 229 } 230 231 // Unlock is part of the topo.LockDescriptor interface. 232 func (ld *etcdLockDescriptor) Unlock(ctx context.Context) error { 233 _, err := ld.s.cli.Revoke(ctx, ld.leaseID) 234 if err != nil { 235 return convertError(err, "lease") 236 } 237 return nil 238 }