github.com/cilium/cilium@v1.16.2/pkg/maps/ctmap/per_cluster_ctmap.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package ctmap
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"reflect"
    10  	"strconv"
    11  
    12  	"github.com/cilium/ebpf"
    13  	"k8s.io/apimachinery/pkg/util/sets"
    14  
    15  	"github.com/cilium/cilium/pkg/bpf"
    16  	cmtypes "github.com/cilium/cilium/pkg/clustermesh/types"
    17  	"github.com/cilium/cilium/pkg/lock"
    18  )
    19  
    20  const (
    21  	perClusterCTOuterMapPrefix = "cilium_per_cluster_ct_"
    22  )
    23  
    24  // ClusterOuterMapName returns the name of the outer per-cluster CT map
    25  // for the given type. It can be overwritten for testing purposes.
    26  var ClusterOuterMapName = clusterOuterMapName
    27  
    28  func clusterOuterMapName(typ mapType) string {
    29  	return perClusterCTOuterMapPrefix + typ.name()
    30  }
    31  
    32  func ClusterOuterMapNameTestOverride(prefix string) {
    33  	ClusterOuterMapName = func(typ mapType) string {
    34  		return prefix + "_" + clusterOuterMapName(typ)
    35  	}
    36  }
    37  
    38  // ClusterInnerMapName returns the name of the inner per-cluster NAT map
    39  // for the given IP family and cluster ID.
    40  func ClusterInnerMapName(typ mapType, clusterID uint32) string {
    41  	return ClusterOuterMapName(typ) + "_" + strconv.FormatUint(uint64(clusterID), 10)
    42  }
    43  
    44  var _ PerClusterCTMapper = (*perClusterCTMaps)(nil)
    45  
    46  // An interface to manage the per-cluster CT maps.
    47  type PerClusterCTMapper interface {
    48  	// Create enforces the presence of the outer per-cluster CT maps.
    49  	OpenOrCreate() error
    50  	// Close closes the outer per-cluster CT maps handlers.
    51  	Close() error
    52  
    53  	// CreateClusterNATMaps enforces the presence of the inner maps for
    54  	// the given cluster ID. It must be called after that OpenOrCreate()
    55  	// has returned successfully.
    56  	CreateClusterCTMaps(clusterID uint32) error
    57  	// DeleteClusterNATMaps deletes the inner maps for the given cluster ID.
    58  	// It must be called after that OpenOrCreate() has returned successfully.
    59  	DeleteClusterCTMaps(clusterID uint32) error
    60  
    61  	// GetClusterCTMaps returns the per-cluster maps for each known cluster ID.
    62  	// The returned maps need to be opened by the caller.
    63  	GetAllClusterCTMaps() []*Map
    64  }
    65  
    66  // GetClusterCTMaps returns the per-cluster maps for the given cluster ID. The
    67  // returned maps need to be opened by the caller, and are not guaranteed to exist.
    68  func GetClusterCTMaps(clusterID uint32, ipv4, ipv6 bool) ([]*Map, error) {
    69  	maps := NewPerClusterCTMaps(ipv4, ipv6)
    70  	return maps.getClusterCTMaps(clusterID)
    71  }
    72  
    73  // CleanupPerClusterCTMaps deletes the per-cluster CT maps, including the inner ones.
    74  func CleanupPerClusterCTMaps(ipv4, ipv6 bool) error {
    75  	maps := NewPerClusterCTMaps(ipv4, ipv6)
    76  	return maps.cleanup()
    77  }
    78  
    79  // A "real" set of per-cluster CT maps. It implements PerClusterCTMapper.
    80  type perClusterCTMaps struct {
    81  	lock.RWMutex
    82  
    83  	tcp4 *PerClusterCTMap
    84  	any4 *PerClusterCTMap
    85  	tcp6 *PerClusterCTMap
    86  	any6 *PerClusterCTMap
    87  
    88  	// clusterIDs tracks the inner CT maps that have been created,
    89  	// to optimize the GetAllClusterCTMaps implementation.
    90  	clusterIDs sets.Set[uint32]
    91  }
    92  
    93  // PerClusterCTMap is a special conntrack map created when we
    94  // enable cluster-aware addressing. As the name says, it is
    95  // per-cluster and tracks the connection from/to specific
    96  // remote clusters. It is implemented as an array-of-maps which
    97  // its index is a ClusterID.
    98  //
    99  // Why can't we use global CT maps? That's because we currently
   100  // don't have a good way of extending CT map's key without breaking
   101  // user's connection. Thus, instead of extending existing CT map
   102  // key with ClusterID, we chose to create CT map per-cluster. When
   103  // we have a good way of extending global CT maps in the future, we
   104  // should retire this entire file.
   105  type PerClusterCTMap struct {
   106  	*bpf.Map
   107  	m mapType
   108  }
   109  
   110  type PerClusterCTMapKey struct {
   111  	ClusterID uint32
   112  }
   113  
   114  func (k *PerClusterCTMapKey) String() string  { return strconv.FormatUint(uint64(k.ClusterID), 10) }
   115  func (k *PerClusterCTMapKey) New() bpf.MapKey { return &PerClusterCTMapKey{} }
   116  
   117  type PerClusterCTMapVal struct {
   118  	Fd uint32
   119  }
   120  
   121  func (v *PerClusterCTMapVal) String() string    { return fmt.Sprintf("fd=%d", v.Fd) }
   122  func (v *PerClusterCTMapVal) New() bpf.MapValue { return &PerClusterCTMapVal{} }
   123  
   124  // NewPerClusterCTMaps returns a new instance of the per-cluster CT maps manager.
   125  func NewPerClusterCTMaps(ipv4, ipv6 bool) *perClusterCTMaps {
   126  	gm := perClusterCTMaps{clusterIDs: sets.New[uint32]()}
   127  
   128  	if ipv4 {
   129  		gm.tcp4 = newPerClusterCTMap(mapTypeIPv4TCPGlobal)
   130  		gm.any4 = newPerClusterCTMap(mapTypeIPv4AnyGlobal)
   131  	}
   132  
   133  	if ipv6 {
   134  		gm.tcp6 = newPerClusterCTMap(mapTypeIPv6TCPGlobal)
   135  		gm.any6 = newPerClusterCTMap(mapTypeIPv6AnyGlobal)
   136  	}
   137  
   138  	return &gm
   139  }
   140  
   141  func (gm *perClusterCTMaps) OpenOrCreate() (err error) {
   142  	gm.Lock()
   143  	defer gm.Unlock()
   144  
   145  	return gm.foreach(
   146  		func(om *PerClusterCTMap) error { return om.OpenOrCreate() },
   147  	)
   148  }
   149  
   150  func (gm *perClusterCTMaps) Close() (err error) {
   151  	gm.Lock()
   152  	defer gm.Unlock()
   153  
   154  	return gm.foreach(
   155  		func(om *PerClusterCTMap) error { return om.Close() },
   156  	)
   157  }
   158  
   159  func (gm *perClusterCTMaps) CreateClusterCTMaps(clusterID uint32) error {
   160  	if err := cmtypes.ValidateClusterID(clusterID); err != nil {
   161  		return err
   162  	}
   163  
   164  	gm.Lock()
   165  	defer gm.Unlock()
   166  
   167  	// We don't rollback the insertion of the current ClusterID in case the maps
   168  	// creation fails (as we also don't rollback the maps insertion itself).
   169  	// Indeed, this is only used as an optimization when retrieving all maps
   170  	// (for the GC process), and non-existing maps will be automatically skipped.
   171  	gm.clusterIDs.Insert(clusterID)
   172  
   173  	return gm.foreach(
   174  		func(om *PerClusterCTMap) error { return om.createClusterCTMap(clusterID) },
   175  	)
   176  }
   177  
   178  func (gm *perClusterCTMaps) DeleteClusterCTMaps(clusterID uint32) error {
   179  	if err := cmtypes.ValidateClusterID(clusterID); err != nil {
   180  		return err
   181  	}
   182  
   183  	gm.Lock()
   184  	defer gm.Unlock()
   185  
   186  	// We don't rollback the deletion of the current ClusterID in case the maps
   187  	// removal fails (as we also don't rollback the maps removal itself).
   188  	// Indeed, this is only used as an optimization when retrieving all maps
   189  	// (for the GC process), and the maps are expected to be deleted at this point.
   190  	gm.clusterIDs.Delete(clusterID)
   191  
   192  	return gm.foreach(
   193  		func(om *PerClusterCTMap) error { return om.deleteClusterCTMap(clusterID) },
   194  	)
   195  }
   196  
   197  func (gm *perClusterCTMaps) GetAllClusterCTMaps() []*Map {
   198  	gm.Lock()
   199  	defer gm.Unlock()
   200  
   201  	var maps []*Map
   202  	for clusterID := range gm.clusterIDs {
   203  		gm.foreach(func(om *PerClusterCTMap) error {
   204  			maps = append(maps, om.newInnerMap(clusterID))
   205  			return nil
   206  		})
   207  	}
   208  	return maps
   209  }
   210  
   211  func (gm *perClusterCTMaps) getClusterCTMaps(clusterID uint32) ([]*Map, error) {
   212  	if err := cmtypes.ValidateClusterID(clusterID); err != nil {
   213  		return nil, err
   214  	}
   215  
   216  	gm.Lock()
   217  	defer gm.Unlock()
   218  
   219  	var maps []*Map
   220  	gm.foreach(func(om *PerClusterCTMap) error {
   221  		maps = append(maps, om.newInnerMap(clusterID))
   222  		return nil
   223  	})
   224  
   225  	return maps, nil
   226  }
   227  
   228  func (gm *perClusterCTMaps) cleanup() error {
   229  	gm.Lock()
   230  	defer gm.Unlock()
   231  
   232  	return gm.foreach(func(om *PerClusterCTMap) error {
   233  		return om.cleanup()
   234  	})
   235  }
   236  
   237  func (gm *perClusterCTMaps) foreach(fn func(om *PerClusterCTMap) error) error {
   238  	var errs []error
   239  
   240  	// Attempt to perform the given operation on all maps, and collect all
   241  	// errors that are encountered. We do not implement a rollback mechanism
   242  	// in case of failures to keep the overall logic simple, as it is likely
   243  	// that the consumer of the different methods will nonetheless retry again
   244  	// the same operation on error. Hence, the rollback would only introduce
   245  	// additional churn, and it might not be even possible in certain cases
   246  	// (e.g., for deletion operations, to restore the previous state).
   247  	for _, om := range []*PerClusterCTMap{gm.tcp4, gm.any4, gm.tcp6, gm.any6} {
   248  		if om != nil {
   249  			if err := fn(om); err != nil {
   250  				errs = append(errs, fmt.Errorf("%s: %w", om.m.name(), err))
   251  			}
   252  		}
   253  	}
   254  
   255  	return errors.Join(errs...)
   256  }
   257  
   258  func newPerClusterCTMap(m mapType) *PerClusterCTMap {
   259  	keySize := reflect.Indirect(reflect.ValueOf(m.key())).Type().Size()
   260  	inner := &ebpf.MapSpec{
   261  		Type:       ebpf.LRUHash,
   262  		KeySize:    uint32(keySize),
   263  		ValueSize:  uint32(SizeofCtEntry),
   264  		MaxEntries: uint32(m.maxEntries()),
   265  	}
   266  
   267  	om := bpf.NewMapWithInnerSpec(
   268  		ClusterOuterMapName(m),
   269  		ebpf.ArrayOfMaps,
   270  		&PerClusterCTMapKey{},
   271  		&PerClusterCTMapVal{},
   272  		int(cmtypes.ClusterIDMax+1),
   273  		0,
   274  		inner,
   275  	)
   276  
   277  	return &PerClusterCTMap{
   278  		Map: om,
   279  		m:   m,
   280  	}
   281  }
   282  
   283  func (om *PerClusterCTMap) newInnerMap(clusterID uint32) *Map {
   284  	name := ClusterInnerMapName(om.m, clusterID)
   285  	im := newMap(name, om.m)
   286  	im.clusterID = clusterID
   287  	return im
   288  }
   289  
   290  func (om *PerClusterCTMap) createClusterCTMap(clusterID uint32) error {
   291  	im := om.newInnerMap(clusterID)
   292  	if err := im.OpenOrCreate(); err != nil {
   293  		return fmt.Errorf("create inner map: %w", err)
   294  	}
   295  
   296  	// Close the file descriptor, but won't unpin because we don't want to
   297  	// lookup outer map (lookup of map-in-map is slow because it involves
   298  	// RCU synchronization) and want to open inner map from bpffs.
   299  	defer im.Close()
   300  
   301  	if err := om.Update(
   302  		&PerClusterCTMapKey{clusterID},
   303  		&PerClusterCTMapVal{uint32(im.FD())},
   304  	); err != nil {
   305  		return fmt.Errorf("update outer CT map: %w", err)
   306  	}
   307  
   308  	return nil
   309  }
   310  
   311  func (om *PerClusterCTMap) deleteClusterCTMap(clusterID uint32) error {
   312  	im := om.newInnerMap(clusterID)
   313  	if err := im.Unpin(); err != nil {
   314  		return fmt.Errorf("delete inner map: %w", err)
   315  	}
   316  
   317  	// Detach inner map from outer map. At this point, no
   318  	// one should have the reference of the inner map after
   319  	// this call.
   320  	if _, err := om.SilentDelete(&PerClusterCTMapKey{clusterID}); err != nil {
   321  		return fmt.Errorf("update outer map: %w", err)
   322  	}
   323  
   324  	return nil
   325  }
   326  
   327  func (om *PerClusterCTMap) cleanup() error {
   328  	var errs []error
   329  
   330  	for id := uint32(1); id <= cmtypes.ClusterIDMax; id++ {
   331  		im := om.newInnerMap(id)
   332  		if err := im.Unpin(); err != nil {
   333  			errs = append(errs, fmt.Errorf("delete inner map for cluster ID %v: %w", id, err))
   334  		}
   335  	}
   336  
   337  	if err := om.Unpin(); err != nil {
   338  		errs = append(errs, fmt.Errorf("delete outer map: %w", err))
   339  	}
   340  
   341  	return errors.Join(errs...)
   342  }