github.com/cilium/cilium@v1.16.2/pkg/maps/ctmap/per_cluster_ctmap.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package ctmap 5 6 import ( 7 "errors" 8 "fmt" 9 "reflect" 10 "strconv" 11 12 "github.com/cilium/ebpf" 13 "k8s.io/apimachinery/pkg/util/sets" 14 15 "github.com/cilium/cilium/pkg/bpf" 16 cmtypes "github.com/cilium/cilium/pkg/clustermesh/types" 17 "github.com/cilium/cilium/pkg/lock" 18 ) 19 20 const ( 21 perClusterCTOuterMapPrefix = "cilium_per_cluster_ct_" 22 ) 23 24 // ClusterOuterMapName returns the name of the outer per-cluster CT map 25 // for the given type. It can be overwritten for testing purposes. 26 var ClusterOuterMapName = clusterOuterMapName 27 28 func clusterOuterMapName(typ mapType) string { 29 return perClusterCTOuterMapPrefix + typ.name() 30 } 31 32 func ClusterOuterMapNameTestOverride(prefix string) { 33 ClusterOuterMapName = func(typ mapType) string { 34 return prefix + "_" + clusterOuterMapName(typ) 35 } 36 } 37 38 // ClusterInnerMapName returns the name of the inner per-cluster NAT map 39 // for the given IP family and cluster ID. 40 func ClusterInnerMapName(typ mapType, clusterID uint32) string { 41 return ClusterOuterMapName(typ) + "_" + strconv.FormatUint(uint64(clusterID), 10) 42 } 43 44 var _ PerClusterCTMapper = (*perClusterCTMaps)(nil) 45 46 // An interface to manage the per-cluster CT maps. 47 type PerClusterCTMapper interface { 48 // Create enforces the presence of the outer per-cluster CT maps. 49 OpenOrCreate() error 50 // Close closes the outer per-cluster CT maps handlers. 51 Close() error 52 53 // CreateClusterNATMaps enforces the presence of the inner maps for 54 // the given cluster ID. It must be called after that OpenOrCreate() 55 // has returned successfully. 56 CreateClusterCTMaps(clusterID uint32) error 57 // DeleteClusterNATMaps deletes the inner maps for the given cluster ID. 58 // It must be called after that OpenOrCreate() has returned successfully. 59 DeleteClusterCTMaps(clusterID uint32) error 60 61 // GetClusterCTMaps returns the per-cluster maps for each known cluster ID. 62 // The returned maps need to be opened by the caller. 63 GetAllClusterCTMaps() []*Map 64 } 65 66 // GetClusterCTMaps returns the per-cluster maps for the given cluster ID. The 67 // returned maps need to be opened by the caller, and are not guaranteed to exist. 68 func GetClusterCTMaps(clusterID uint32, ipv4, ipv6 bool) ([]*Map, error) { 69 maps := NewPerClusterCTMaps(ipv4, ipv6) 70 return maps.getClusterCTMaps(clusterID) 71 } 72 73 // CleanupPerClusterCTMaps deletes the per-cluster CT maps, including the inner ones. 74 func CleanupPerClusterCTMaps(ipv4, ipv6 bool) error { 75 maps := NewPerClusterCTMaps(ipv4, ipv6) 76 return maps.cleanup() 77 } 78 79 // A "real" set of per-cluster CT maps. It implements PerClusterCTMapper. 80 type perClusterCTMaps struct { 81 lock.RWMutex 82 83 tcp4 *PerClusterCTMap 84 any4 *PerClusterCTMap 85 tcp6 *PerClusterCTMap 86 any6 *PerClusterCTMap 87 88 // clusterIDs tracks the inner CT maps that have been created, 89 // to optimize the GetAllClusterCTMaps implementation. 90 clusterIDs sets.Set[uint32] 91 } 92 93 // PerClusterCTMap is a special conntrack map created when we 94 // enable cluster-aware addressing. As the name says, it is 95 // per-cluster and tracks the connection from/to specific 96 // remote clusters. It is implemented as an array-of-maps which 97 // its index is a ClusterID. 98 // 99 // Why can't we use global CT maps? That's because we currently 100 // don't have a good way of extending CT map's key without breaking 101 // user's connection. Thus, instead of extending existing CT map 102 // key with ClusterID, we chose to create CT map per-cluster. When 103 // we have a good way of extending global CT maps in the future, we 104 // should retire this entire file. 105 type PerClusterCTMap struct { 106 *bpf.Map 107 m mapType 108 } 109 110 type PerClusterCTMapKey struct { 111 ClusterID uint32 112 } 113 114 func (k *PerClusterCTMapKey) String() string { return strconv.FormatUint(uint64(k.ClusterID), 10) } 115 func (k *PerClusterCTMapKey) New() bpf.MapKey { return &PerClusterCTMapKey{} } 116 117 type PerClusterCTMapVal struct { 118 Fd uint32 119 } 120 121 func (v *PerClusterCTMapVal) String() string { return fmt.Sprintf("fd=%d", v.Fd) } 122 func (v *PerClusterCTMapVal) New() bpf.MapValue { return &PerClusterCTMapVal{} } 123 124 // NewPerClusterCTMaps returns a new instance of the per-cluster CT maps manager. 125 func NewPerClusterCTMaps(ipv4, ipv6 bool) *perClusterCTMaps { 126 gm := perClusterCTMaps{clusterIDs: sets.New[uint32]()} 127 128 if ipv4 { 129 gm.tcp4 = newPerClusterCTMap(mapTypeIPv4TCPGlobal) 130 gm.any4 = newPerClusterCTMap(mapTypeIPv4AnyGlobal) 131 } 132 133 if ipv6 { 134 gm.tcp6 = newPerClusterCTMap(mapTypeIPv6TCPGlobal) 135 gm.any6 = newPerClusterCTMap(mapTypeIPv6AnyGlobal) 136 } 137 138 return &gm 139 } 140 141 func (gm *perClusterCTMaps) OpenOrCreate() (err error) { 142 gm.Lock() 143 defer gm.Unlock() 144 145 return gm.foreach( 146 func(om *PerClusterCTMap) error { return om.OpenOrCreate() }, 147 ) 148 } 149 150 func (gm *perClusterCTMaps) Close() (err error) { 151 gm.Lock() 152 defer gm.Unlock() 153 154 return gm.foreach( 155 func(om *PerClusterCTMap) error { return om.Close() }, 156 ) 157 } 158 159 func (gm *perClusterCTMaps) CreateClusterCTMaps(clusterID uint32) error { 160 if err := cmtypes.ValidateClusterID(clusterID); err != nil { 161 return err 162 } 163 164 gm.Lock() 165 defer gm.Unlock() 166 167 // We don't rollback the insertion of the current ClusterID in case the maps 168 // creation fails (as we also don't rollback the maps insertion itself). 169 // Indeed, this is only used as an optimization when retrieving all maps 170 // (for the GC process), and non-existing maps will be automatically skipped. 171 gm.clusterIDs.Insert(clusterID) 172 173 return gm.foreach( 174 func(om *PerClusterCTMap) error { return om.createClusterCTMap(clusterID) }, 175 ) 176 } 177 178 func (gm *perClusterCTMaps) DeleteClusterCTMaps(clusterID uint32) error { 179 if err := cmtypes.ValidateClusterID(clusterID); err != nil { 180 return err 181 } 182 183 gm.Lock() 184 defer gm.Unlock() 185 186 // We don't rollback the deletion of the current ClusterID in case the maps 187 // removal fails (as we also don't rollback the maps removal itself). 188 // Indeed, this is only used as an optimization when retrieving all maps 189 // (for the GC process), and the maps are expected to be deleted at this point. 190 gm.clusterIDs.Delete(clusterID) 191 192 return gm.foreach( 193 func(om *PerClusterCTMap) error { return om.deleteClusterCTMap(clusterID) }, 194 ) 195 } 196 197 func (gm *perClusterCTMaps) GetAllClusterCTMaps() []*Map { 198 gm.Lock() 199 defer gm.Unlock() 200 201 var maps []*Map 202 for clusterID := range gm.clusterIDs { 203 gm.foreach(func(om *PerClusterCTMap) error { 204 maps = append(maps, om.newInnerMap(clusterID)) 205 return nil 206 }) 207 } 208 return maps 209 } 210 211 func (gm *perClusterCTMaps) getClusterCTMaps(clusterID uint32) ([]*Map, error) { 212 if err := cmtypes.ValidateClusterID(clusterID); err != nil { 213 return nil, err 214 } 215 216 gm.Lock() 217 defer gm.Unlock() 218 219 var maps []*Map 220 gm.foreach(func(om *PerClusterCTMap) error { 221 maps = append(maps, om.newInnerMap(clusterID)) 222 return nil 223 }) 224 225 return maps, nil 226 } 227 228 func (gm *perClusterCTMaps) cleanup() error { 229 gm.Lock() 230 defer gm.Unlock() 231 232 return gm.foreach(func(om *PerClusterCTMap) error { 233 return om.cleanup() 234 }) 235 } 236 237 func (gm *perClusterCTMaps) foreach(fn func(om *PerClusterCTMap) error) error { 238 var errs []error 239 240 // Attempt to perform the given operation on all maps, and collect all 241 // errors that are encountered. We do not implement a rollback mechanism 242 // in case of failures to keep the overall logic simple, as it is likely 243 // that the consumer of the different methods will nonetheless retry again 244 // the same operation on error. Hence, the rollback would only introduce 245 // additional churn, and it might not be even possible in certain cases 246 // (e.g., for deletion operations, to restore the previous state). 247 for _, om := range []*PerClusterCTMap{gm.tcp4, gm.any4, gm.tcp6, gm.any6} { 248 if om != nil { 249 if err := fn(om); err != nil { 250 errs = append(errs, fmt.Errorf("%s: %w", om.m.name(), err)) 251 } 252 } 253 } 254 255 return errors.Join(errs...) 256 } 257 258 func newPerClusterCTMap(m mapType) *PerClusterCTMap { 259 keySize := reflect.Indirect(reflect.ValueOf(m.key())).Type().Size() 260 inner := &ebpf.MapSpec{ 261 Type: ebpf.LRUHash, 262 KeySize: uint32(keySize), 263 ValueSize: uint32(SizeofCtEntry), 264 MaxEntries: uint32(m.maxEntries()), 265 } 266 267 om := bpf.NewMapWithInnerSpec( 268 ClusterOuterMapName(m), 269 ebpf.ArrayOfMaps, 270 &PerClusterCTMapKey{}, 271 &PerClusterCTMapVal{}, 272 int(cmtypes.ClusterIDMax+1), 273 0, 274 inner, 275 ) 276 277 return &PerClusterCTMap{ 278 Map: om, 279 m: m, 280 } 281 } 282 283 func (om *PerClusterCTMap) newInnerMap(clusterID uint32) *Map { 284 name := ClusterInnerMapName(om.m, clusterID) 285 im := newMap(name, om.m) 286 im.clusterID = clusterID 287 return im 288 } 289 290 func (om *PerClusterCTMap) createClusterCTMap(clusterID uint32) error { 291 im := om.newInnerMap(clusterID) 292 if err := im.OpenOrCreate(); err != nil { 293 return fmt.Errorf("create inner map: %w", err) 294 } 295 296 // Close the file descriptor, but won't unpin because we don't want to 297 // lookup outer map (lookup of map-in-map is slow because it involves 298 // RCU synchronization) and want to open inner map from bpffs. 299 defer im.Close() 300 301 if err := om.Update( 302 &PerClusterCTMapKey{clusterID}, 303 &PerClusterCTMapVal{uint32(im.FD())}, 304 ); err != nil { 305 return fmt.Errorf("update outer CT map: %w", err) 306 } 307 308 return nil 309 } 310 311 func (om *PerClusterCTMap) deleteClusterCTMap(clusterID uint32) error { 312 im := om.newInnerMap(clusterID) 313 if err := im.Unpin(); err != nil { 314 return fmt.Errorf("delete inner map: %w", err) 315 } 316 317 // Detach inner map from outer map. At this point, no 318 // one should have the reference of the inner map after 319 // this call. 320 if _, err := om.SilentDelete(&PerClusterCTMapKey{clusterID}); err != nil { 321 return fmt.Errorf("update outer map: %w", err) 322 } 323 324 return nil 325 } 326 327 func (om *PerClusterCTMap) cleanup() error { 328 var errs []error 329 330 for id := uint32(1); id <= cmtypes.ClusterIDMax; id++ { 331 im := om.newInnerMap(id) 332 if err := im.Unpin(); err != nil { 333 errs = append(errs, fmt.Errorf("delete inner map for cluster ID %v: %w", id, err)) 334 } 335 } 336 337 if err := om.Unpin(); err != nil { 338 errs = append(errs, fmt.Errorf("delete outer map: %w", err)) 339 } 340 341 return errors.Join(errs...) 342 }