istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/leaderelection/leaderelection.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package leaderelection
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"os"
    21  	"strings"
    22  	"sync"
    23  	"time"
    24  
    25  	"go.uber.org/atomic"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	"k8s.io/client-go/kubernetes"
    28  
    29  	"istio.io/istio/pilot/pkg/features"
    30  	"istio.io/istio/pilot/pkg/leaderelection/k8sleaderelection"
    31  	"istio.io/istio/pilot/pkg/leaderelection/k8sleaderelection/k8sresourcelock"
    32  	"istio.io/istio/pkg/kube"
    33  	"istio.io/istio/pkg/log"
    34  	"istio.io/istio/pkg/revisions"
    35  )
    36  
    37  // Various locks used throughout the code
    38  const (
    39  	NamespaceController     = "istio-namespace-controller-election"
    40  	ServiceExportController = "istio-serviceexport-controller-election"
    41  	// This holds the legacy name to not conflict with older control plane deployments which are just
    42  	// doing the ingress syncing.
    43  	IngressController = "istio-leader"
    44  	// GatewayStatusController controls the status of gateway.networking.k8s.io objects. For the v1alpha1
    45  	// this was formally "istio-gateway-leader"; because they are a different API group we need a different
    46  	// election to ensure we do not only handle one or the other.
    47  	GatewayStatusController = "istio-gateway-status-leader"
    48  	StatusController        = "istio-status-leader"
    49  	AnalyzeController       = "istio-analyze-leader"
    50  	// GatewayDeploymentController controls translating Kubernetes Gateway objects into various derived
    51  	// resources (Service, Deployment, etc).
    52  	// Unlike other types which use ConfigMaps, we use a Lease here. This is because:
    53  	// * Others use configmap for backwards compatibility
    54  	// * This type is per-revision, so it is higher cost. Leases are cheaper
    55  	// * Other types use "prioritized leader election", which isn't implemented for Lease
    56  	GatewayDeploymentController = "istio-gateway-deployment"
    57  	NodeUntaintController       = "istio-node-untaint"
    58  )
    59  
    60  // Leader election key prefix for remote istiod managed clusters
    61  const remoteIstiodPrefix = "^"
    62  
    63  type LeaderElection struct {
    64  	namespace string
    65  	name      string
    66  	runFns    []func(stop <-chan struct{})
    67  	client    kubernetes.Interface
    68  	ttl       time.Duration
    69  
    70  	// enabled sets whether leader election is enabled. Setting enabled=false
    71  	// before calling Run() bypasses leader election and assumes that we are
    72  	// always leader, avoiding unnecessary lease updates on single-node
    73  	// clusters.
    74  	enabled bool
    75  
    76  	// Criteria to determine leader priority.
    77  	revision       string
    78  	perRevision    bool
    79  	remote         bool
    80  	defaultWatcher revisions.DefaultWatcher
    81  
    82  	// Records which "cycle" the election is on. This is incremented each time an election is won and then lost
    83  	// This is mostly just for testing
    84  	cycle      *atomic.Int32
    85  	electionID string
    86  
    87  	// Store as field for testing
    88  	le *k8sleaderelection.LeaderElector
    89  	mu sync.RWMutex
    90  }
    91  
    92  // Run will start leader election, calling all runFns when we become the leader.
    93  // If leader election is disabled, it skips straight to the runFns.
    94  func (l *LeaderElection) Run(stop <-chan struct{}) {
    95  	if !l.enabled {
    96  		log.Infof("bypassing leader election: %v", l.electionID)
    97  		for _, f := range l.runFns {
    98  			go f(stop)
    99  		}
   100  		<-stop
   101  		return
   102  	}
   103  	if l.defaultWatcher != nil {
   104  		go l.defaultWatcher.Run(stop)
   105  	}
   106  	for {
   107  		le, err := l.create()
   108  		if err != nil {
   109  			// This should never happen; errors are only from invalid input and the input is not user modifiable
   110  			panic("LeaderElection creation failed: " + err.Error())
   111  		}
   112  		l.mu.Lock()
   113  		l.le = le
   114  		l.cycle.Inc()
   115  		l.mu.Unlock()
   116  		ctx, cancel := context.WithCancel(context.Background())
   117  		go func() {
   118  			<-stop
   119  			cancel()
   120  		}()
   121  		le.Run(ctx)
   122  		select {
   123  		case <-stop:
   124  			// We were told to stop explicitly. Exit now
   125  			return
   126  		default:
   127  			cancel()
   128  			// Otherwise, we may have lost our lock. This can happen when the default revision changes and steals
   129  			// the lock from us.
   130  			log.Infof("Leader election cycle %v lost. Trying again", l.cycle.Load())
   131  		}
   132  	}
   133  }
   134  
   135  func (l *LeaderElection) create() (*k8sleaderelection.LeaderElector, error) {
   136  	callbacks := k8sleaderelection.LeaderCallbacks{
   137  		OnStartedLeading: func(ctx context.Context) {
   138  			log.Infof("leader election lock obtained: %v", l.electionID)
   139  			for _, f := range l.runFns {
   140  				go f(ctx.Done())
   141  			}
   142  		},
   143  		OnStoppedLeading: func() {
   144  			log.Infof("leader election lock lost: %v", l.electionID)
   145  		},
   146  	}
   147  
   148  	key := l.revision
   149  	if l.remote {
   150  		key = remoteIstiodPrefix + key
   151  	}
   152  	var lock k8sresourcelock.Interface = &k8sresourcelock.ConfigMapLock{
   153  		ConfigMapMeta: metav1.ObjectMeta{Namespace: l.namespace, Name: l.electionID},
   154  		Client:        l.client.CoreV1(),
   155  		LockConfig: k8sresourcelock.ResourceLockConfig{
   156  			Identity: l.name,
   157  			Key:      key,
   158  		},
   159  	}
   160  	if l.perRevision {
   161  		lock = &k8sresourcelock.LeaseLock{
   162  			LeaseMeta: metav1.ObjectMeta{Namespace: l.namespace, Name: l.electionID},
   163  			Client:    l.client.CoordinationV1(),
   164  			// Note: Key is NOT used. This is not implemented in the library for Lease nor needed, since this is already per-revision.
   165  			// See below, where we disable KeyComparison
   166  			LockConfig: k8sresourcelock.ResourceLockConfig{
   167  				Identity: l.name,
   168  			},
   169  		}
   170  	}
   171  
   172  	config := k8sleaderelection.LeaderElectionConfig{
   173  		Lock:          lock,
   174  		LeaseDuration: l.ttl,
   175  		RenewDeadline: l.ttl / 2,
   176  		RetryPeriod:   l.ttl / 4,
   177  		Callbacks:     callbacks,
   178  		// When Pilot exits, the lease will be dropped. This is more likely to lead to a case where
   179  		// to instances are both considered the leaders. As such, if this is intended to be use for mission-critical
   180  		// usages (rather than avoiding duplication of work), this may need to be re-evaluated.
   181  		ReleaseOnCancel: true,
   182  	}
   183  	if !l.perRevision {
   184  		// Function to use to decide whether this leader should steal the existing lock.
   185  		// This is disable when perRevision is used, as this enables the Lease. Lease doesn't have a holderKey field to place our key
   186  		// as holderKey is an Istio specific fork.
   187  		// While its possible to make it work with Lease as well (via an annotation to store it), we don't ever need prioritized
   188  		// for these per-revision ones anyways, since the prioritization is about preferring one revision over others.
   189  		config.KeyComparison = func(leaderKey string) bool {
   190  			return LocationPrioritizedComparison(leaderKey, l)
   191  		}
   192  	}
   193  
   194  	return k8sleaderelection.NewLeaderElector(config)
   195  }
   196  
   197  func LocationPrioritizedComparison(currentLeaderRevision string, l *LeaderElection) bool {
   198  	var currentLeaderRemote bool
   199  	if currentLeaderRemote = strings.HasPrefix(currentLeaderRevision, remoteIstiodPrefix); currentLeaderRemote {
   200  		currentLeaderRevision = strings.TrimPrefix(currentLeaderRevision, remoteIstiodPrefix)
   201  	}
   202  	defaultRevision := l.defaultWatcher.GetDefault()
   203  	if l.revision != currentLeaderRevision && defaultRevision != "" && defaultRevision == l.revision {
   204  		// Always steal the lock if the new one is the default revision and the current one is not
   205  		return true
   206  	}
   207  	// Otherwise steal the lock if the new one and the current one are the same revision, but new one is local and current is remote
   208  	return l.revision == currentLeaderRevision && !l.remote && currentLeaderRemote
   209  }
   210  
   211  // AddRunFunction registers a function to run when we are the leader. These will be run asynchronously.
   212  // To avoid running when not a leader, functions should respect the stop channel.
   213  func (l *LeaderElection) AddRunFunction(f func(stop <-chan struct{})) *LeaderElection {
   214  	l.runFns = append(l.runFns, f)
   215  	return l
   216  }
   217  
   218  // NewLeaderElection creates a leader election instance with the provided ID. This follows standard Kubernetes
   219  // elections, with one difference: the "default" revision will steal the lock from other revisions.
   220  func NewLeaderElection(namespace, name, electionID, revision string, client kube.Client) *LeaderElection {
   221  	return newLeaderElection(namespace, name, electionID, revision, false, false, client)
   222  }
   223  
   224  // NewPerRevisionLeaderElection creates a *per revision* leader election. This means there will be one leader for each revision.
   225  func NewPerRevisionLeaderElection(namespace, name, electionID, revision string, client kube.Client) *LeaderElection {
   226  	return newLeaderElection(namespace, name, electionID, revision, true, false, client)
   227  }
   228  
   229  func NewLeaderElectionMulticluster(namespace, name, electionID, revision string, remote bool, client kube.Client) *LeaderElection {
   230  	return newLeaderElection(namespace, name, electionID, revision, false, remote, client)
   231  }
   232  
   233  func newLeaderElection(namespace, name, electionID, revision string, perRevision bool, remote bool, client kube.Client) *LeaderElection {
   234  	var watcher revisions.DefaultWatcher
   235  	if features.EnableLeaderElection {
   236  		watcher = revisions.NewDefaultWatcher(client, revision)
   237  	}
   238  	if name == "" {
   239  		hn, _ := os.Hostname()
   240  		name = fmt.Sprintf("unknown-%s", hn)
   241  	}
   242  	if perRevision && revision != "" {
   243  		electionID += "-" + revision
   244  	}
   245  	return &LeaderElection{
   246  		namespace:      namespace,
   247  		name:           name,
   248  		client:         client.Kube(),
   249  		electionID:     electionID,
   250  		revision:       revision,
   251  		perRevision:    perRevision,
   252  		enabled:        features.EnableLeaderElection,
   253  		remote:         remote,
   254  		defaultWatcher: watcher,
   255  		// Default to a 30s ttl. Overridable for tests
   256  		ttl:   time.Second * 30,
   257  		cycle: atomic.NewInt32(0),
   258  		mu:    sync.RWMutex{},
   259  	}
   260  }
   261  
   262  func (l *LeaderElection) isLeader() bool {
   263  	l.mu.RLock()
   264  	defer l.mu.RUnlock()
   265  	if !l.enabled {
   266  		return true
   267  	}
   268  	if l.le == nil {
   269  		return false
   270  	}
   271  	return l.le.IsLeader()
   272  }