github.com/cilium/cilium@v1.16.2/pkg/datapath/l2responder/l2responder.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package l2responder
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"net/netip"
    11  
    12  	"github.com/cilium/hive/cell"
    13  	"github.com/cilium/hive/job"
    14  	"github.com/cilium/statedb"
    15  	"github.com/sirupsen/logrus"
    16  	"github.com/vishvananda/netlink"
    17  
    18  	"github.com/cilium/cilium/pkg/datapath/garp"
    19  	"github.com/cilium/cilium/pkg/datapath/tables"
    20  	"github.com/cilium/cilium/pkg/ebpf"
    21  	"github.com/cilium/cilium/pkg/maps/l2respondermap"
    22  	"github.com/cilium/cilium/pkg/time"
    23  	"github.com/cilium/cilium/pkg/types"
    24  )
    25  
    26  // Cell provides the L2 Responder Reconciler. This component takes the desired state, calculated by
    27  // the L2 announcer component from the StateDB table and reconciles it with the L2 responder map.
    28  // The L2 Responder Reconciler watches for incremental changes in the table and applies these
    29  // incremental changes immediately and it periodically perform full reconciliation as redundancy.
    30  var Cell = cell.Module(
    31  	"l2-responder",
    32  	"L2 Responder Reconciler",
    33  
    34  	// Provide and register the Table[*L2AnnounceEntry] containing the
    35  	// desired state.
    36  	cell.Provide(
    37  		tables.NewL2AnnounceTable,
    38  		statedb.RWTable[*tables.L2AnnounceEntry].ToTable,
    39  	),
    40  	cell.Invoke(statedb.RegisterTable[*tables.L2AnnounceEntry]),
    41  
    42  	cell.Invoke(NewL2ResponderReconciler),
    43  	cell.Provide(newNeighborNetlink),
    44  )
    45  
    46  type params struct {
    47  	cell.In
    48  
    49  	Lifecycle           cell.Lifecycle
    50  	Logger              logrus.FieldLogger
    51  	L2AnnouncementTable statedb.RWTable[*tables.L2AnnounceEntry]
    52  	StateDB             *statedb.DB
    53  	L2ResponderMap      l2respondermap.Map
    54  	NetLink             linkByNamer
    55  	JobGroup            job.Group
    56  	Health              cell.Health
    57  }
    58  
    59  type linkByNamer interface {
    60  	LinkByName(name string) (netlink.Link, error)
    61  }
    62  
    63  func newNeighborNetlink() linkByNamer {
    64  	return &netlink.Handle{}
    65  }
    66  
    67  type l2ResponderReconciler struct {
    68  	params params
    69  }
    70  
    71  func NewL2ResponderReconciler(params params) *l2ResponderReconciler {
    72  	reconciler := l2ResponderReconciler{
    73  		params: params,
    74  	}
    75  
    76  	params.JobGroup.Add(job.OneShot("l2-responder-reconciler", reconciler.run))
    77  
    78  	return &reconciler
    79  }
    80  
    81  func (p *l2ResponderReconciler) run(ctx context.Context, health cell.Health) error {
    82  	log := p.params.Logger
    83  
    84  	// This timer triggers full reconciliation once in a while, in case partial reconciliation
    85  	// got out of sync or the map was changed underneath us.
    86  	ticker := time.NewTicker(5 * time.Minute)
    87  
    88  	tbl := p.params.L2AnnouncementTable
    89  	txn := p.params.StateDB.WriteTxn(tbl)
    90  	changes, err := tbl.Changes(txn)
    91  	if err != nil {
    92  		txn.Abort()
    93  		return fmt.Errorf("delete tracker: %w", err)
    94  	}
    95  	txn.Commit()
    96  
    97  	defer changes.Close()
    98  
    99  	// At startup, do an initial full reconciliation
   100  	err = p.fullReconciliation(p.params.StateDB.ReadTxn())
   101  	if err != nil {
   102  		log.WithError(err).Error("Error(s) while reconciling l2 responder map")
   103  	}
   104  
   105  	for ctx.Err() == nil {
   106  		p.cycle(ctx, changes, ticker.C)
   107  	}
   108  
   109  	return nil
   110  }
   111  
   112  func (p *l2ResponderReconciler) cycle(
   113  	ctx context.Context,
   114  	changes statedb.ChangeIterator[*tables.L2AnnounceEntry],
   115  	fullReconciliation <-chan time.Time,
   116  ) {
   117  	arMap := p.params.L2ResponderMap
   118  	log := p.params.Logger
   119  
   120  	lr := cachingLinkResolver{nl: p.params.NetLink}
   121  
   122  	process := func(e *tables.L2AnnounceEntry, deleted bool) error {
   123  		// Ignore IPv6 addresses, L2 is IPv4 only
   124  		if e.IP.Is6() {
   125  			return nil
   126  		}
   127  
   128  		idx, err := lr.LinkIndex(e.NetworkInterface)
   129  		if err != nil {
   130  			return fmt.Errorf("link index: %w", err)
   131  		}
   132  
   133  		if deleted {
   134  			err = arMap.Delete(e.IP, uint32(idx))
   135  			if err != nil {
   136  				return fmt.Errorf("delete %s@%d: %w", e.IP, idx, err)
   137  			}
   138  
   139  			return nil
   140  		}
   141  
   142  		err = garpOnNewEntry(arMap, e.IP, idx)
   143  		if err != nil {
   144  			return err
   145  		}
   146  
   147  		err = arMap.Create(e.IP, uint32(idx))
   148  		if err != nil {
   149  			return fmt.Errorf("create %s@%d: %w", e.IP, idx, err)
   150  		}
   151  
   152  		return nil
   153  	}
   154  
   155  	// Partial reconciliation
   156  	for change, _, ok := changes.Next(); ok; change, _, ok = changes.Next() {
   157  		err := process(change.Object, change.Deleted)
   158  		if err != nil {
   159  			log.WithError(err).Error("error during partial reconciliation")
   160  			break
   161  		}
   162  	}
   163  
   164  	txn := p.params.StateDB.ReadTxn()
   165  
   166  	select {
   167  	case <-ctx.Done():
   168  		// Shutdown
   169  		return
   170  
   171  	case <-changes.Watch(txn):
   172  		// There are pending changes in the table, return from the cycle
   173  
   174  	case <-fullReconciliation:
   175  		// Full reconciliation timer fired, perform full reconciliation
   176  
   177  		// The existing `iter` is the result of a `All` query, so this will return all
   178  		// entries in the table for full reconciliation.
   179  		err := p.fullReconciliation(txn)
   180  		if err != nil {
   181  			log.WithError(err).Error("Error(s) while full reconciling l2 responder map")
   182  		}
   183  	}
   184  }
   185  
   186  func (p *l2ResponderReconciler) fullReconciliation(txn statedb.ReadTxn) (err error) {
   187  	var errs error
   188  
   189  	log := p.params.Logger
   190  	tbl := p.params.L2AnnouncementTable
   191  	arMap := p.params.L2ResponderMap
   192  	lr := cachingLinkResolver{nl: p.params.NetLink}
   193  
   194  	log.Debug("l2 announcer table full reconciliation")
   195  
   196  	// Prepare index for desired entries based on map key
   197  	type desiredEntry struct {
   198  		satisfied bool
   199  		entry     *tables.L2AnnounceEntry
   200  	}
   201  	desiredMap := make(map[l2respondermap.L2ResponderKey]desiredEntry)
   202  
   203  	statedb.ProcessEach(tbl.All(txn), func(e *tables.L2AnnounceEntry, _ uint64) error {
   204  		// Ignore IPv6 addresses, L2 is IPv4 only
   205  		if e.IP.Is6() {
   206  			return nil
   207  		}
   208  
   209  		idx, err := lr.LinkIndex(e.NetworkInterface)
   210  		if err != nil {
   211  			errs = errors.Join(errs, err)
   212  			return nil
   213  		}
   214  
   215  		desiredMap[l2respondermap.L2ResponderKey{
   216  			IP:      types.IPv4(e.IP.As4()),
   217  			IfIndex: uint32(idx),
   218  		}] = desiredEntry{
   219  			entry: e,
   220  		}
   221  
   222  		return nil
   223  	})
   224  
   225  	// Loop over all map values, use the desired entries index to see which we want to delete.
   226  	var toDelete []*l2respondermap.L2ResponderKey
   227  	arMap.IterateWithCallback(func(key *l2respondermap.L2ResponderKey, _ *l2respondermap.L2ResponderStats) {
   228  		e, found := desiredMap[*key]
   229  		if !found {
   230  			toDelete = append(toDelete, key)
   231  			return
   232  		}
   233  		e.satisfied = true
   234  	})
   235  
   236  	// Delete all unwanted map values
   237  	for _, del := range toDelete {
   238  		if err := arMap.Delete(netip.AddrFrom4(del.IP), del.IfIndex); err != nil {
   239  			errs = errors.Join(errs, fmt.Errorf("delete %s@%d: %w", del.IP, del.IfIndex, err))
   240  		}
   241  	}
   242  
   243  	// Add map values that do not yet exist
   244  	for key, entry := range desiredMap {
   245  		if entry.satisfied {
   246  			continue
   247  		}
   248  
   249  		err = garpOnNewEntry(arMap, netip.AddrFrom4(key.IP), int(key.IfIndex))
   250  		if err != nil {
   251  			errs = errors.Join(errs, err)
   252  		}
   253  
   254  		if err := arMap.Create(netip.AddrFrom4(key.IP), key.IfIndex); err != nil {
   255  			errs = errors.Join(errs, fmt.Errorf("create %s@%d: %w", key.IP, key.IfIndex, err))
   256  		}
   257  	}
   258  
   259  	return errs
   260  }
   261  
   262  // If the given IP and network interface index does not yet exist in the l2 responder map,
   263  // a failover might have taken place. Therefor we should send out a gARP reply to let
   264  // the local network know the IP has moved to minimize downtime due to ARP caching.
   265  func garpOnNewEntry(arMap l2respondermap.Map, ip netip.Addr, ifIndex int) error {
   266  	_, err := arMap.Lookup(ip, uint32(ifIndex))
   267  	if !errors.Is(err, ebpf.ErrKeyNotExist) {
   268  		return nil
   269  	}
   270  
   271  	err = garp.SendOnInterfaceIdx(ifIndex, ip)
   272  	if err != nil {
   273  		return fmt.Errorf("garp %s@%d: %w", ip, ifIndex, err)
   274  	}
   275  
   276  	return nil
   277  }
   278  
   279  type cachingLinkResolver struct {
   280  	nl    linkByNamer
   281  	cache map[string]int
   282  }
   283  
   284  // LinkIndex returns the link index for a given netdev name, from its cache or netlink
   285  func (clr *cachingLinkResolver) LinkIndex(name string) (int, error) {
   286  	if clr.cache == nil {
   287  		clr.cache = make(map[string]int)
   288  	}
   289  
   290  	idx, found := clr.cache[name]
   291  	if found {
   292  		return idx, nil
   293  	}
   294  
   295  	link, err := clr.nl.LinkByName(name)
   296  	if err != nil {
   297  		return 0, err
   298  	}
   299  
   300  	idx = link.Attrs().Index
   301  	clr.cache[name] = idx
   302  
   303  	return idx, nil
   304  }