github.com/webmeshproj/webmesh-cni@v0.0.27/internal/ipam/lock.go (about) 1 /* 2 Copyright 2023 Avi Zimmerman <avi.zimmerman@gmail.com>. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package ipam 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 coordinationv1client "k8s.io/client-go/kubernetes/typed/coordination/v1" 29 corev1client "k8s.io/client-go/kubernetes/typed/core/v1" 30 "k8s.io/client-go/rest" 31 "k8s.io/client-go/tools/leaderelection/resourcelock" 32 "sigs.k8s.io/controller-runtime/pkg/log" 33 34 "github.com/webmeshproj/webmesh-cni/internal/types" 35 ) 36 37 // Locker is the interface for taking a distributed lock during IPv4 allocations. 38 type Locker interface { 39 // Acquire attempts to acquire the lock. If a lock is already acquired, the 40 // lock count is incremented. When the lock is released, the lock count is 41 // decremented. When the lock count reaches 0, the lock is released. 42 Acquire(ctx context.Context) error 43 // Release releases the lock. This decrements the lock count. When the lock 44 // count reaches 0, the lock is released. 45 Release(ctx context.Context) 46 } 47 48 // LockConfig is the configuration for a lock. 49 type LockConfig struct { 50 ID string 51 Namespace string 52 LockDuration time.Duration 53 LockAcquireTimeout time.Duration 54 } 55 56 // NewLock creates a new IPAM lock. 57 func NewLock(cfg *rest.Config, config LockConfig) (Locker, error) { 58 corev1client, err := corev1client.NewForConfig(cfg) 59 if err != nil { 60 return nil, fmt.Errorf("create corev1 client: %w", err) 61 } 62 coordinationClient, err := coordinationv1client.NewForConfig(cfg) 63 if err != nil { 64 return nil, fmt.Errorf("create coordinationv1 client: %w", err) 65 } 66 rlock, err := resourcelock.New( 67 "leases", 68 config.Namespace, 69 types.IPAMLockID, 70 corev1client, 71 coordinationClient, 72 resourcelock.ResourceLockConfig{ 73 Identity: config.ID, 74 }, 75 ) 76 if err != nil { 77 return nil, fmt.Errorf("create resource lock interface: %w", err) 78 } 79 ipamlock := &ipamLock{ 80 rlock: rlock, 81 config: config, 82 } 83 return ipamlock, nil 84 } 85 86 type ipamLock struct { 87 rlock resourcelock.Interface 88 config LockConfig 89 lockCount atomic.Int32 90 mu sync.Mutex 91 } 92 93 // Acquire attempts to acquire the lock. 94 func (l *ipamLock) Acquire(ctx context.Context) error { 95 l.mu.Lock() 96 defer l.mu.Unlock() 97 log := log.FromContext(ctx).WithName("ipam-lock") 98 if l.lockCount.Load() > 0 { 99 log.V(1).Info("Lock already held, attempting to renew and increment lock count") 100 // Try to update the lock with a renew time. 101 lock, _, err := l.rlock.Get(ctx) 102 if err == nil { 103 lock.RenewTime = metav1.NewTime(time.Now().UTC()) 104 err = l.rlock.Update(ctx, *lock) 105 if err == nil { 106 l.lockCount.Add(1) 107 return nil 108 } 109 log.Error(err, "Failed to renew IPAM lock") 110 l.lockCount.Store(0) 111 return fmt.Errorf("failed to renew IPAM lock: %w", err) 112 } 113 log.Error(err, "Failed to get IPAM lock") 114 l.lockCount.Store(0) 115 return fmt.Errorf("failed to acquire IPAM lock: %w", err) 116 } 117 ctx, cancel := context.WithTimeout(ctx, l.config.LockAcquireTimeout) 118 defer cancel() 119 for { 120 // Check if the lock has already been created. 121 lock, _, err := l.rlock.Get(ctx) 122 if err == nil { 123 // Check if there is a holder for the lock. 124 if lock.HolderIdentity != "" { 125 // Check if the lock expired. 126 if !lock.RenewTime.IsZero() || !lock.AcquireTime.IsZero() { 127 var lockExpiry time.Time 128 if !lock.RenewTime.IsZero() { 129 lockExpiry = lock.RenewTime.Add(time.Duration(lock.LeaseDurationSeconds) * time.Second) 130 } else { 131 lockExpiry = lock.AcquireTime.Add(time.Duration(lock.LeaseDurationSeconds) * time.Second) 132 } 133 if lockExpiry.After(time.Now().UTC()) { 134 log.V(1).Info("Lock currently held, retrying...", "holder", lock.HolderIdentity) 135 goto Retry 136 } 137 // The lock has expired, try to acquire it. 138 } 139 } 140 // Try to update the lock. 141 lock.LeaseDurationSeconds = int(l.config.LockDuration.Seconds()) 142 lock.HolderIdentity = l.config.ID 143 lock.AcquireTime = metav1.NewTime(time.Now().UTC()) 144 lock.RenewTime = metav1.NewTime(time.Now().UTC()) 145 err = l.rlock.Update(ctx, *lock) 146 if err == nil { 147 // We acquired the lock. 148 l.lockCount.Add(1) 149 return nil 150 } 151 log.Error(err, "Failed to acquire IPAM lock, retrying...") 152 goto Retry 153 } 154 // Try to create the lock. 155 err = l.rlock.Create(ctx, resourcelock.LeaderElectionRecord{ 156 HolderIdentity: l.config.ID, 157 LeaseDurationSeconds: int(l.config.LockDuration.Seconds()), 158 }) 159 if err == nil { 160 // We acquired the lock. 161 l.lockCount.Add(1) 162 return nil 163 } 164 log.Error(err, "Failed to acquire IPAM lock, retrying...") 165 Retry: 166 select { 167 case <-ctx.Done(): 168 return fmt.Errorf("failed to acquire IPAM lock: %w", ctx.Err()) 169 default: 170 time.Sleep(time.Second) 171 } 172 } 173 } 174 175 func (l *ipamLock) Release(ctx context.Context) { 176 l.mu.Lock() 177 defer l.mu.Unlock() 178 log := log.FromContext(ctx).WithName("ipam-lock") 179 lockCount := l.lockCount.Load() 180 if lockCount <= 0 { 181 log.Error(errors.New("release unacquired lock"), "Lock count is already 0, cannot release lock") 182 return 183 } 184 lockCount-- 185 l.lockCount.Store(lockCount) 186 if lockCount > 0 { 187 log.V(1).Info("Lock still held, not releasing") 188 return 189 } 190 log.V(1).Info("Releasing IPAM lock") 191 err := l.rlock.Update(ctx, resourcelock.LeaderElectionRecord{ 192 HolderIdentity: "", 193 LeaseDurationSeconds: int(l.config.LockDuration.Seconds()), 194 }) 195 if err != nil { 196 log.Error(err, "Failed to release IPAM lock") 197 } 198 }