github.com/cilium/cilium@v1.16.2/pkg/datapath/iptables/ipset/ipset.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package ipset
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"context"
    10  	"fmt"
    11  	"net/netip"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  
    16  	"github.com/cilium/hive/cell"
    17  	"github.com/cilium/hive/job"
    18  	"github.com/cilium/statedb"
    19  	"github.com/cilium/statedb/reconciler"
    20  	"github.com/sirupsen/logrus"
    21  	"k8s.io/apimachinery/pkg/util/sets"
    22  
    23  	"github.com/cilium/cilium/pkg/datapath/tables"
    24  	"github.com/cilium/cilium/pkg/lock"
    25  )
    26  
    27  const (
    28  	CiliumNodeIPSetV4 = "cilium_node_set_v4"
    29  	CiliumNodeIPSetV6 = "cilium_node_set_v6"
    30  )
    31  
    32  type Family string
    33  
    34  const (
    35  	INetFamily  Family = "inet"
    36  	INet6Family Family = "inet6"
    37  )
    38  
    39  type AddrSet = sets.Set[netip.Addr]
    40  
    41  // Manager handles the kernel IP sets configuration
    42  type Manager interface {
    43  	NewInitializer() Initializer
    44  	AddToIPSet(name string, family Family, addrs ...netip.Addr)
    45  	RemoveFromIPSet(name string, addrs ...netip.Addr)
    46  }
    47  
    48  type Initializer interface {
    49  	InitDone()
    50  }
    51  
    52  type initializer struct {
    53  	once sync.Once
    54  	wg   *lock.StoppableWaitGroup
    55  }
    56  
    57  func (i *initializer) InitDone() {
    58  	// further calls to InitDone will be a no-op
    59  	i.once.Do(i.wg.Done)
    60  }
    61  
    62  type manager struct {
    63  	logger  logrus.FieldLogger
    64  	enabled bool
    65  
    66  	db    *statedb.DB
    67  	table statedb.RWTable[*tables.IPSetEntry]
    68  
    69  	ipset *ipset
    70  
    71  	reconciler reconciler.Reconciler[*tables.IPSetEntry]
    72  	ops        *ops
    73  
    74  	started   atomic.Bool
    75  	startedWG *lock.StoppableWaitGroup
    76  }
    77  
    78  func (m *manager) NewInitializer() Initializer {
    79  	if m.started.Load() {
    80  		panic("an initializer to the ipset manager cannot be taken after the manager started")
    81  	}
    82  	m.startedWG.Add()
    83  	return &initializer{wg: m.startedWG}
    84  }
    85  
    86  // AddToIPSet adds the addresses to the ipset with given name and family.
    87  // It creates the ipset if it doesn't already exist and doesn't error out
    88  // if either the ipset or the IP already exist.
    89  func (m *manager) AddToIPSet(name string, family Family, addrs ...netip.Addr) {
    90  	if !m.enabled {
    91  		return
    92  	}
    93  
    94  	txn := m.db.WriteTxn(m.table)
    95  	defer txn.Abort()
    96  
    97  	for _, addr := range addrs {
    98  		key := tables.IPSetEntryKey{
    99  			Name: name,
   100  			Addr: addr,
   101  		}
   102  		if _, _, found := m.table.Get(txn, tables.IPSetEntryIndex.Query(key)); found {
   103  			continue
   104  		}
   105  		_, _, _ = m.table.Insert(txn, &tables.IPSetEntry{
   106  			Name:   name,
   107  			Family: string(family),
   108  			Addr:   addr,
   109  			Status: reconciler.StatusPending(),
   110  		})
   111  	}
   112  
   113  	txn.Commit()
   114  }
   115  
   116  // RemoveFromIPSet removes the addresses from the specified ipset.
   117  func (m *manager) RemoveFromIPSet(name string, addrs ...netip.Addr) {
   118  	if !m.enabled {
   119  		return
   120  	}
   121  
   122  	txn := m.db.WriteTxn(m.table)
   123  	defer txn.Abort()
   124  
   125  	for _, addr := range addrs {
   126  		key := tables.IPSetEntryKey{
   127  			Name: name,
   128  			Addr: addr,
   129  		}
   130  		obj, _, found := m.table.Get(txn, tables.IPSetEntryIndex.Query(key))
   131  		if !found {
   132  			continue
   133  		}
   134  		m.table.Delete(txn, obj)
   135  	}
   136  
   137  	txn.Commit()
   138  }
   139  
   140  func newIPSetManager(
   141  	logger logrus.FieldLogger,
   142  	lc cell.Lifecycle,
   143  	jg job.Group,
   144  	health cell.Health,
   145  	db *statedb.DB,
   146  	table statedb.RWTable[*tables.IPSetEntry],
   147  	cfg config,
   148  	ipset *ipset,
   149  	reconciler reconciler.Reconciler[*tables.IPSetEntry],
   150  	ops *ops,
   151  ) Manager {
   152  	mgr := &manager{
   153  		logger:     logger,
   154  		enabled:    cfg.NodeIPSetNeeded,
   155  		db:         db,
   156  		table:      table,
   157  		ipset:      ipset,
   158  		reconciler: reconciler,
   159  		ops:        ops,
   160  		startedWG:  lock.NewStoppableWaitGroup(),
   161  	}
   162  
   163  	lc.Append(cell.Hook{
   164  		OnStart: func(ctx cell.HookContext) error {
   165  			if !cfg.NodeIPSetNeeded {
   166  				return nil
   167  			}
   168  
   169  			// When NodeIPSetNeeded is set, node ipsets must be created even if empty,
   170  			// to avoid failures when referencing them in iptables masquerading rules.
   171  			if err := ipset.create(ctx, CiliumNodeIPSetV4, string(INetFamily)); err != nil {
   172  				return fmt.Errorf("error while creating ipset %s", CiliumNodeIPSetV4)
   173  			}
   174  			if err := ipset.create(ctx, CiliumNodeIPSetV6, string(INet6Family)); err != nil {
   175  				return fmt.Errorf("error while creating ipset %s", CiliumNodeIPSetV6)
   176  			}
   177  			return nil
   178  		},
   179  	})
   180  
   181  	jg.Add(job.OneShot("ipset-init-finalizer", mgr.init))
   182  
   183  	return mgr
   184  }
   185  
   186  func (m *manager) init(ctx context.Context, _ cell.Health) error {
   187  	if !m.enabled {
   188  		// If node ipsets are not needed, clear the Cilium managed ones to remove possible stale entries.
   189  		for _, ciliumNodeIPSet := range []string{CiliumNodeIPSetV4, CiliumNodeIPSetV6} {
   190  			if err := m.ipset.remove(ctx, ciliumNodeIPSet); err != nil {
   191  				m.logger.Info("Unable to remove stale ipset. This is usually due to a stale iptables rule referring to it. "+
   192  					"The set will not be removed. This is harmless and it will be removed at the next Cilium restart, when the stale iptables rule has been removed.",
   193  					"ipset", ciliumNodeIPSet,
   194  					"error", err)
   195  			}
   196  		}
   197  		return nil
   198  	}
   199  
   200  	// no further initializers after manager started
   201  	m.started.Store(true)
   202  	m.startedWG.Stop()
   203  
   204  	// wait for all existing initializers to complete before finalizing manager
   205  	// initialization and allowing prune operations in the ipset reconciler
   206  	select {
   207  	case <-ctx.Done():
   208  		return nil
   209  	case <-m.startedWG.WaitChannel():
   210  	}
   211  
   212  	m.ops.enablePrune()
   213  	m.reconciler.Prune()
   214  
   215  	return nil
   216  }
   217  
   218  type ipset struct {
   219  	executable
   220  
   221  	log logrus.FieldLogger
   222  }
   223  
   224  func (i *ipset) create(ctx context.Context, name string, family string) error {
   225  	if _, err := i.run(ctx, "create", name, "iphash", "family", family, "-exist"); err != nil {
   226  		return fmt.Errorf("failed to create ipset %s: %w", name, err)
   227  	}
   228  	return nil
   229  }
   230  
   231  func (i *ipset) remove(ctx context.Context, name string) error {
   232  	if _, err := i.run(ctx, "list", name); err != nil {
   233  		// ipset does not exist, nothing to remove
   234  		return nil
   235  	}
   236  	if _, err := i.run(ctx, "destroy", name); err != nil {
   237  		return fmt.Errorf("failed to remove ipset %s: %w", name, err)
   238  	}
   239  	return nil
   240  }
   241  
   242  func (i *ipset) list(ctx context.Context, name string) (AddrSet, error) {
   243  	out, err := i.run(ctx, "list", name)
   244  	if err != nil {
   245  		return AddrSet{}, fmt.Errorf("failed to list ipset %s: %w", name, err)
   246  	}
   247  
   248  	addrs := AddrSet{}
   249  	scanner := bufio.NewScanner(bytes.NewReader(out))
   250  	for scanner.Scan() {
   251  		line := scanner.Text()
   252  		addr, err := netip.ParseAddr(line)
   253  		if err != nil {
   254  			continue
   255  		}
   256  		addrs = addrs.Insert(addr)
   257  	}
   258  	if err := scanner.Err(); err != nil {
   259  		return AddrSet{}, fmt.Errorf("failed to scan ipset %s: %w", name, err)
   260  	}
   261  	return addrs, nil
   262  }
   263  
   264  func (i *ipset) addBatch(ctx context.Context, batch map[string][]netip.Addr) error {
   265  	b := strings.Builder{}
   266  	for name, addrs := range batch {
   267  		for _, addr := range addrs {
   268  			fmt.Fprintf(&b, "add %s %s -exist\n", name, addr)
   269  		}
   270  	}
   271  	_, err := i.exec(ctx, "ipset", b.String(), "restore")
   272  	return err
   273  }
   274  
   275  func (i *ipset) delBatch(ctx context.Context, batch map[string][]netip.Addr) error {
   276  	b := strings.Builder{}
   277  	for name, addrs := range batch {
   278  		for _, addr := range addrs {
   279  			fmt.Fprintf(&b, "del %s %s -exist\n", name, addr)
   280  		}
   281  	}
   282  	_, err := i.exec(ctx, "ipset", b.String(), "restore")
   283  	return err
   284  }
   285  
   286  func (i *ipset) run(ctx context.Context, args ...string) ([]byte, error) {
   287  	i.log.Debugf("Running command %s", i.fullCommand(args...))
   288  	return i.exec(ctx, "ipset", "", args...)
   289  }
   290  
   291  func (i *ipset) fullCommand(args ...string) string {
   292  	return strings.Join(append([]string{"ipset"}, args...), " ")
   293  }
   294  
   295  // useful to ease the creation of a mock ipset command for testing purposes
   296  type executable interface {
   297  	exec(ctx context.Context, name string, stdin string, arg ...string) ([]byte, error)
   298  }
   299  
   300  type funcExecutable func(ctx context.Context, name string, stdin string, arg ...string) ([]byte, error)
   301  
   302  func (f funcExecutable) exec(ctx context.Context, name string, stdin string, arg ...string) ([]byte, error) {
   303  	return f(ctx, name, stdin, arg...)
   304  }