github.com/cilium/cilium@v1.16.2/pkg/service/reconciler.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package service
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"net/netip"
    10  
    11  	"github.com/cilium/hive/cell"
    12  	"github.com/cilium/hive/job"
    13  	"github.com/cilium/statedb"
    14  	"k8s.io/apimachinery/pkg/util/sets"
    15  
    16  	"github.com/cilium/cilium/pkg/backoff"
    17  	"github.com/cilium/cilium/pkg/datapath/tables"
    18  	"github.com/cilium/cilium/pkg/inctimer"
    19  	"github.com/cilium/cilium/pkg/time"
    20  )
    21  
    22  // registerServiceReconciler registers a background job to synchronize NodePort frontends
    23  // with the new set of node addresses assigned for NodePort use.
    24  func registerServiceReconciler(p serviceReconcilerParams) {
    25  	sr := serviceReconciler(p)
    26  	g := p.Jobs.NewGroup(p.Health)
    27  	g.Add(job.OneShot("ServiceReconciler", sr.reconcileLoop))
    28  	p.Lifecycle.Append(g)
    29  }
    30  
    31  type syncNodePort interface {
    32  	SyncNodePortFrontends(sets.Set[netip.Addr]) error
    33  }
    34  
    35  type serviceReconcilerParams struct {
    36  	cell.In
    37  
    38  	Lifecycle      cell.Lifecycle
    39  	Jobs           job.Registry
    40  	Health         cell.Health
    41  	DB             *statedb.DB
    42  	NodeAddresses  statedb.Table[tables.NodeAddress]
    43  	ServiceManager syncNodePort
    44  }
    45  
    46  type serviceReconciler serviceReconcilerParams
    47  
    48  func (sr serviceReconciler) reconcileLoop(ctx context.Context, health cell.Health) error {
    49  	var (
    50  		retry        <-chan time.Time
    51  		retryAttempt int
    52  		addrs        sets.Set[netip.Addr]
    53  	)
    54  
    55  	retryTimer, retryTimerStop := inctimer.New()
    56  	defer retryTimerStop()
    57  
    58  	// Use exponential backoff for retries. Keep small minimum time for fast tests,
    59  	// but backoff with aggressive factor.
    60  	backoff := backoff.Exponential{
    61  		Min:    10 * time.Millisecond,
    62  		Max:    30 * time.Second,
    63  		Factor: 8,
    64  	}
    65  
    66  	// Perform a sync periodically. This resolves the rare races where k8s.ParseService uses old
    67  	// set of frontend addresses. This will eventually be fixed by moving the NodePort frontend
    68  	// expansion further down the stack, ideally to datapath.
    69  	const periodicSyncInterval = 15 * time.Minute
    70  	periodicSyncTicker := time.NewTicker(periodicSyncInterval)
    71  	defer periodicSyncTicker.Stop()
    72  
    73  	for {
    74  		iter, watch := sr.NodeAddresses.AllWatch(sr.DB.ReadTxn())
    75  
    76  		// Collect all NodePort addresses
    77  		newAddrs := sets.New(statedb.Collect(
    78  			statedb.Map(
    79  				statedb.Filter(
    80  					iter,
    81  					func(addr tables.NodeAddress) bool { return addr.NodePort },
    82  				),
    83  				func(addr tables.NodeAddress) netip.Addr { return addr.Addr },
    84  			),
    85  		)...)
    86  
    87  		// Refresh the frontends if the set of NodePort addresses changed
    88  		if !addrs.Equal(newAddrs) {
    89  			err := sr.ServiceManager.SyncNodePortFrontends(newAddrs)
    90  			if err != nil {
    91  				duration := backoff.Duration(retryAttempt)
    92  				retry = retryTimer.After(duration)
    93  				retryAttempt++
    94  				log.WithError(err).Warnf("Could not synchronize new frontend addresses, retrying in %s", duration)
    95  				health.Degraded("Failed to sync NodePort frontends", err)
    96  			} else {
    97  				addrs = newAddrs
    98  				retryAttempt = 0
    99  				retry = nil
   100  				health.OK(fmt.Sprintf("%d NodePort frontend addresses", len(addrs)))
   101  			}
   102  		}
   103  
   104  		select {
   105  		case <-ctx.Done():
   106  			return nil
   107  		case <-watch:
   108  		case <-retry:
   109  		case <-periodicSyncTicker.C:
   110  		}
   111  	}
   112  
   113  }