istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/leaderelection/leaderelection.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package leaderelection 16 17 import ( 18 "context" 19 "fmt" 20 "os" 21 "strings" 22 "sync" 23 "time" 24 25 "go.uber.org/atomic" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/client-go/kubernetes" 28 29 "istio.io/istio/pilot/pkg/features" 30 "istio.io/istio/pilot/pkg/leaderelection/k8sleaderelection" 31 "istio.io/istio/pilot/pkg/leaderelection/k8sleaderelection/k8sresourcelock" 32 "istio.io/istio/pkg/kube" 33 "istio.io/istio/pkg/log" 34 "istio.io/istio/pkg/revisions" 35 ) 36 37 // Various locks used throughout the code 38 const ( 39 NamespaceController = "istio-namespace-controller-election" 40 ServiceExportController = "istio-serviceexport-controller-election" 41 // This holds the legacy name to not conflict with older control plane deployments which are just 42 // doing the ingress syncing. 43 IngressController = "istio-leader" 44 // GatewayStatusController controls the status of gateway.networking.k8s.io objects. For the v1alpha1 45 // this was formally "istio-gateway-leader"; because they are a different API group we need a different 46 // election to ensure we do not only handle one or the other. 47 GatewayStatusController = "istio-gateway-status-leader" 48 StatusController = "istio-status-leader" 49 AnalyzeController = "istio-analyze-leader" 50 // GatewayDeploymentController controls translating Kubernetes Gateway objects into various derived 51 // resources (Service, Deployment, etc). 52 // Unlike other types which use ConfigMaps, we use a Lease here. This is because: 53 // * Others use configmap for backwards compatibility 54 // * This type is per-revision, so it is higher cost. Leases are cheaper 55 // * Other types use "prioritized leader election", which isn't implemented for Lease 56 GatewayDeploymentController = "istio-gateway-deployment" 57 NodeUntaintController = "istio-node-untaint" 58 ) 59 60 // Leader election key prefix for remote istiod managed clusters 61 const remoteIstiodPrefix = "^" 62 63 type LeaderElection struct { 64 namespace string 65 name string 66 runFns []func(stop <-chan struct{}) 67 client kubernetes.Interface 68 ttl time.Duration 69 70 // enabled sets whether leader election is enabled. Setting enabled=false 71 // before calling Run() bypasses leader election and assumes that we are 72 // always leader, avoiding unnecessary lease updates on single-node 73 // clusters. 74 enabled bool 75 76 // Criteria to determine leader priority. 77 revision string 78 perRevision bool 79 remote bool 80 defaultWatcher revisions.DefaultWatcher 81 82 // Records which "cycle" the election is on. This is incremented each time an election is won and then lost 83 // This is mostly just for testing 84 cycle *atomic.Int32 85 electionID string 86 87 // Store as field for testing 88 le *k8sleaderelection.LeaderElector 89 mu sync.RWMutex 90 } 91 92 // Run will start leader election, calling all runFns when we become the leader. 93 // If leader election is disabled, it skips straight to the runFns. 94 func (l *LeaderElection) Run(stop <-chan struct{}) { 95 if !l.enabled { 96 log.Infof("bypassing leader election: %v", l.electionID) 97 for _, f := range l.runFns { 98 go f(stop) 99 } 100 <-stop 101 return 102 } 103 if l.defaultWatcher != nil { 104 go l.defaultWatcher.Run(stop) 105 } 106 for { 107 le, err := l.create() 108 if err != nil { 109 // This should never happen; errors are only from invalid input and the input is not user modifiable 110 panic("LeaderElection creation failed: " + err.Error()) 111 } 112 l.mu.Lock() 113 l.le = le 114 l.cycle.Inc() 115 l.mu.Unlock() 116 ctx, cancel := context.WithCancel(context.Background()) 117 go func() { 118 <-stop 119 cancel() 120 }() 121 le.Run(ctx) 122 select { 123 case <-stop: 124 // We were told to stop explicitly. Exit now 125 return 126 default: 127 cancel() 128 // Otherwise, we may have lost our lock. This can happen when the default revision changes and steals 129 // the lock from us. 130 log.Infof("Leader election cycle %v lost. Trying again", l.cycle.Load()) 131 } 132 } 133 } 134 135 func (l *LeaderElection) create() (*k8sleaderelection.LeaderElector, error) { 136 callbacks := k8sleaderelection.LeaderCallbacks{ 137 OnStartedLeading: func(ctx context.Context) { 138 log.Infof("leader election lock obtained: %v", l.electionID) 139 for _, f := range l.runFns { 140 go f(ctx.Done()) 141 } 142 }, 143 OnStoppedLeading: func() { 144 log.Infof("leader election lock lost: %v", l.electionID) 145 }, 146 } 147 148 key := l.revision 149 if l.remote { 150 key = remoteIstiodPrefix + key 151 } 152 var lock k8sresourcelock.Interface = &k8sresourcelock.ConfigMapLock{ 153 ConfigMapMeta: metav1.ObjectMeta{Namespace: l.namespace, Name: l.electionID}, 154 Client: l.client.CoreV1(), 155 LockConfig: k8sresourcelock.ResourceLockConfig{ 156 Identity: l.name, 157 Key: key, 158 }, 159 } 160 if l.perRevision { 161 lock = &k8sresourcelock.LeaseLock{ 162 LeaseMeta: metav1.ObjectMeta{Namespace: l.namespace, Name: l.electionID}, 163 Client: l.client.CoordinationV1(), 164 // Note: Key is NOT used. This is not implemented in the library for Lease nor needed, since this is already per-revision. 165 // See below, where we disable KeyComparison 166 LockConfig: k8sresourcelock.ResourceLockConfig{ 167 Identity: l.name, 168 }, 169 } 170 } 171 172 config := k8sleaderelection.LeaderElectionConfig{ 173 Lock: lock, 174 LeaseDuration: l.ttl, 175 RenewDeadline: l.ttl / 2, 176 RetryPeriod: l.ttl / 4, 177 Callbacks: callbacks, 178 // When Pilot exits, the lease will be dropped. This is more likely to lead to a case where 179 // to instances are both considered the leaders. As such, if this is intended to be use for mission-critical 180 // usages (rather than avoiding duplication of work), this may need to be re-evaluated. 181 ReleaseOnCancel: true, 182 } 183 if !l.perRevision { 184 // Function to use to decide whether this leader should steal the existing lock. 185 // This is disable when perRevision is used, as this enables the Lease. Lease doesn't have a holderKey field to place our key 186 // as holderKey is an Istio specific fork. 187 // While its possible to make it work with Lease as well (via an annotation to store it), we don't ever need prioritized 188 // for these per-revision ones anyways, since the prioritization is about preferring one revision over others. 189 config.KeyComparison = func(leaderKey string) bool { 190 return LocationPrioritizedComparison(leaderKey, l) 191 } 192 } 193 194 return k8sleaderelection.NewLeaderElector(config) 195 } 196 197 func LocationPrioritizedComparison(currentLeaderRevision string, l *LeaderElection) bool { 198 var currentLeaderRemote bool 199 if currentLeaderRemote = strings.HasPrefix(currentLeaderRevision, remoteIstiodPrefix); currentLeaderRemote { 200 currentLeaderRevision = strings.TrimPrefix(currentLeaderRevision, remoteIstiodPrefix) 201 } 202 defaultRevision := l.defaultWatcher.GetDefault() 203 if l.revision != currentLeaderRevision && defaultRevision != "" && defaultRevision == l.revision { 204 // Always steal the lock if the new one is the default revision and the current one is not 205 return true 206 } 207 // Otherwise steal the lock if the new one and the current one are the same revision, but new one is local and current is remote 208 return l.revision == currentLeaderRevision && !l.remote && currentLeaderRemote 209 } 210 211 // AddRunFunction registers a function to run when we are the leader. These will be run asynchronously. 212 // To avoid running when not a leader, functions should respect the stop channel. 213 func (l *LeaderElection) AddRunFunction(f func(stop <-chan struct{})) *LeaderElection { 214 l.runFns = append(l.runFns, f) 215 return l 216 } 217 218 // NewLeaderElection creates a leader election instance with the provided ID. This follows standard Kubernetes 219 // elections, with one difference: the "default" revision will steal the lock from other revisions. 220 func NewLeaderElection(namespace, name, electionID, revision string, client kube.Client) *LeaderElection { 221 return newLeaderElection(namespace, name, electionID, revision, false, false, client) 222 } 223 224 // NewPerRevisionLeaderElection creates a *per revision* leader election. This means there will be one leader for each revision. 225 func NewPerRevisionLeaderElection(namespace, name, electionID, revision string, client kube.Client) *LeaderElection { 226 return newLeaderElection(namespace, name, electionID, revision, true, false, client) 227 } 228 229 func NewLeaderElectionMulticluster(namespace, name, electionID, revision string, remote bool, client kube.Client) *LeaderElection { 230 return newLeaderElection(namespace, name, electionID, revision, false, remote, client) 231 } 232 233 func newLeaderElection(namespace, name, electionID, revision string, perRevision bool, remote bool, client kube.Client) *LeaderElection { 234 var watcher revisions.DefaultWatcher 235 if features.EnableLeaderElection { 236 watcher = revisions.NewDefaultWatcher(client, revision) 237 } 238 if name == "" { 239 hn, _ := os.Hostname() 240 name = fmt.Sprintf("unknown-%s", hn) 241 } 242 if perRevision && revision != "" { 243 electionID += "-" + revision 244 } 245 return &LeaderElection{ 246 namespace: namespace, 247 name: name, 248 client: client.Kube(), 249 electionID: electionID, 250 revision: revision, 251 perRevision: perRevision, 252 enabled: features.EnableLeaderElection, 253 remote: remote, 254 defaultWatcher: watcher, 255 // Default to a 30s ttl. Overridable for tests 256 ttl: time.Second * 30, 257 cycle: atomic.NewInt32(0), 258 mu: sync.RWMutex{}, 259 } 260 } 261 262 func (l *LeaderElection) isLeader() bool { 263 l.mu.RLock() 264 defer l.mu.RUnlock() 265 if !l.enabled { 266 return true 267 } 268 if l.le == nil { 269 return false 270 } 271 return l.le.IsLeader() 272 }