github.com/zhyoulun/cilium@v1.6.12/pkg/maps/ipcache/ipcache.go (about) 1 // Copyright 2016-2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package ipcache 16 17 import ( 18 "fmt" 19 "net" 20 "sync" 21 "unsafe" 22 23 "github.com/cilium/cilium/common/types" 24 "github.com/cilium/cilium/pkg/bpf" 25 "github.com/cilium/cilium/pkg/logging" 26 "github.com/cilium/cilium/pkg/logging/logfields" 27 28 "golang.org/x/sys/unix" 29 ) 30 31 var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "map-ipcache") 32 33 const ( 34 // MaxEntries is the maximum number of keys that can be present in the 35 // RemoteEndpointMap. 36 MaxEntries = 512000 37 38 // Name is the canonical name for the IPCache map on the filesystem. 39 Name = "cilium_ipcache" 40 41 // maxPrefixLengths is an approximation of how many different CIDR 42 // prefix lengths may be supported by the BPF datapath without causing 43 // BPF code generation to exceed the verifier instruction limit. 44 // It applies to Linux versions that lack support for LPM, ie < v4.11. 45 // 46 // This is based upon the defines in bpf/lxc_config.h, which in turn 47 // are derived by building the bpf/ directory and running the script 48 // test/bpf/verifier-test.sh, then adjusting the number of unique 49 // prefix lengths until the script passes. 50 maxPrefixLengths6 = 4 51 maxPrefixLengths4 = 18 52 ) 53 54 // Key implements the bpf.MapKey interface. 55 // 56 // Must be in sync with struct ipcache_key in <bpf/lib/maps.h> 57 // +k8s:deepcopy-gen=true 58 // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapKey 59 type Key struct { 60 Prefixlen uint32 `align:"lpm_key"` 61 Pad1 uint16 `align:"pad1"` 62 Pad2 uint8 `align:"pad2"` 63 Family uint8 `align:"family"` 64 // represents both IPv6 and IPv4 (in the lowest four bytes) 65 IP types.IPv6 `align:"$union0"` 66 } 67 68 // GetKeyPtr returns the unsafe pointer to the BPF key 69 func (k *Key) GetKeyPtr() unsafe.Pointer { return unsafe.Pointer(k) } 70 71 // NewValue returns a new empty instance of the structure representing the BPF 72 // map value 73 func (k Key) NewValue() bpf.MapValue { return &RemoteEndpointInfo{} } 74 75 func getStaticPrefixBits() uint32 { 76 staticMatchSize := unsafe.Sizeof(Key{}) 77 staticMatchSize -= unsafe.Sizeof(Key{}.Prefixlen) 78 staticMatchSize -= unsafe.Sizeof(Key{}.IP) 79 return uint32(staticMatchSize) * 8 80 } 81 82 func (k Key) String() string { 83 prefixLen := k.Prefixlen - getStaticPrefixBits() 84 switch k.Family { 85 case bpf.EndpointKeyIPv4: 86 ipStr := net.IP(k.IP[:net.IPv4len]).String() 87 return fmt.Sprintf("%s/%d", ipStr, prefixLen) 88 case bpf.EndpointKeyIPv6: 89 ipStr := k.IP.String() 90 return fmt.Sprintf("%s/%d", ipStr, prefixLen) 91 } 92 return fmt.Sprintf("<unknown>") 93 } 94 95 // getPrefixLen determines the length that should be set inside the Key so that 96 // the lookup prefix is correct in the BPF map key. The specified 'prefixBits' 97 // indicates the number of bits in the IP that must match to match the entry in 98 // the BPF ipcache. 99 func getPrefixLen(prefixBits int) uint32 { 100 return getStaticPrefixBits() + uint32(prefixBits) 101 } 102 103 // NewKey returns an Key based on the provided IP address and mask. The address 104 // family is automatically detected 105 func NewKey(ip net.IP, mask net.IPMask) Key { 106 result := Key{} 107 108 ones, _ := mask.Size() 109 if ip4 := ip.To4(); ip4 != nil { 110 if mask == nil { 111 ones = net.IPv4len * 8 112 } 113 result.Prefixlen = getPrefixLen(ones) 114 result.Family = bpf.EndpointKeyIPv4 115 copy(result.IP[:], ip4) 116 } else { 117 if mask == nil { 118 ones = net.IPv6len * 8 119 } 120 result.Prefixlen = getPrefixLen(ones) 121 result.Family = bpf.EndpointKeyIPv6 122 copy(result.IP[:], ip) 123 } 124 125 return result 126 } 127 128 // RemoteEndpointInfo implements the bpf.MapValue interface. It contains the 129 // security identity of a remote endpoint. 130 // +k8s:deepcopy-gen=true 131 // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapValue 132 type RemoteEndpointInfo struct { 133 SecurityIdentity uint32 `align:"sec_label"` 134 TunnelEndpoint types.IPv4 `align:"tunnel_endpoint"` 135 Key uint8 `align:"key"` 136 } 137 138 func (v *RemoteEndpointInfo) String() string { 139 return fmt.Sprintf("%d %d %s", v.SecurityIdentity, v.Key, v.TunnelEndpoint) 140 } 141 142 // GetValuePtr returns the unsafe pointer to the BPF value. 143 func (v *RemoteEndpointInfo) GetValuePtr() unsafe.Pointer { return unsafe.Pointer(v) } 144 145 // Map represents an IPCache BPF map. 146 type Map struct { 147 bpf.Map 148 149 // detectDeleteSupport is used to initialize 'supportsDelete' the first 150 // time that a delete is issued from the datapath. 151 detectDeleteSupport sync.Once 152 153 // deleteSupport is set to 'true' initially, then is updated to set 154 // whether the underlying kernel supports delete operations on the map 155 // the first time that supportsDelete() is called. 156 deleteSupport bool 157 } 158 159 // NewMap instantiates a Map. 160 func NewMap(name string) *Map { 161 return &Map{ 162 Map: *bpf.NewMap( 163 name, 164 bpf.BPF_MAP_TYPE_LPM_TRIE, 165 &Key{}, 166 int(unsafe.Sizeof(Key{})), 167 &RemoteEndpointInfo{}, 168 int(unsafe.Sizeof(RemoteEndpointInfo{})), 169 MaxEntries, 170 bpf.BPF_F_NO_PREALLOC, 0, 171 bpf.ConvertKeyValue, 172 ).WithCache(), 173 deleteSupport: true, 174 } 175 } 176 177 // delete removes a key from the ipcache BPF map, and returns whether the 178 // kernel supports the delete operation (true) or not (false), and any error 179 // that may have occurred while attempting to delete the entry. 180 // 181 // If "overwrite" is true, then if delete is not supported the entry's value 182 // will be overwritten with zeroes to signify that it's an invalid entry. 183 func (m *Map) delete(k bpf.MapKey, overwrite bool) (bool, error) { 184 // Older kernels do not support deletion of LPM map entries so zero out 185 // the entry instead of attempting a deletion 186 err, errno := m.DeleteWithErrno(k) 187 if errno == unix.ENOSYS { 188 if overwrite { 189 return false, m.Update(k, &RemoteEndpointInfo{}) 190 } 191 return false, nil 192 } 193 194 return true, err 195 } 196 197 // Delete removes a key from the ipcache BPF map 198 func (m *Map) Delete(k bpf.MapKey) error { 199 _, err := m.delete(k, true) 200 return err 201 } 202 203 // GetMaxPrefixLengths determines how many unique prefix lengths are supported 204 // simultaneously based on the underlying BPF map type in use. 205 func (m *Map) GetMaxPrefixLengths(ipv6 bool) (count int) { 206 if IPCache.MapType == bpf.BPF_MAP_TYPE_LPM_TRIE { 207 if ipv6 { 208 return net.IPv6len*8 + 1 209 } else { 210 return net.IPv4len*8 + 1 211 } 212 } 213 if ipv6 { 214 return maxPrefixLengths6 215 } 216 return maxPrefixLengths4 217 } 218 219 func (m *Map) supportsDelete() bool { 220 m.detectDeleteSupport.Do(func() { 221 // Entry is invalid because IPCache needs a family specified. 222 invalidEntry := &Key{} 223 m.deleteSupport, _ = m.delete(invalidEntry, false) 224 log.Debugf("Detected IPCache delete operation support: %t", m.deleteSupport) 225 226 // In addition to delete support, ability to dump the map is 227 // also required in order to run the garbage collector which 228 // will iterate over the map and delete entries. 229 if m.deleteSupport { 230 err := m.Dump(map[string][]string{}) 231 m.deleteSupport = err == nil 232 log.Debugf("Detected IPCache dump operation support: %t", m.deleteSupport) 233 } 234 235 if !m.deleteSupport { 236 log.Infof("Periodic IPCache map swap will occur due to lack of kernel support for LPM delete operation. Upgrade to Linux 4.15 or higher to avoid this.") 237 } 238 }) 239 return m.deleteSupport 240 } 241 242 // SupportsDelete determines whether the underlying kernel map type supports 243 // the delete operation. 244 func SupportsDelete() bool { 245 return IPCache.supportsDelete() 246 } 247 248 // BackedByLPM returns true if the IPCache is backed by a proper LPM 249 // implementation (provided by Linux kernels 4.11 or later), false otherwise. 250 func BackedByLPM() bool { 251 return IPCache.MapType == bpf.BPF_MAP_TYPE_LPM_TRIE 252 } 253 254 var ( 255 // IPCache is a mapping of all endpoint IPs in the cluster which this 256 // Cilium agent is a part of to their corresponding security identities. 257 // It is a singleton; there is only one such map per agent. 258 IPCache = NewMap(Name) 259 ) 260 261 // Reopen attempts to close and re-open the IPCache map at the standard path 262 // on the filesystem. 263 func Reopen() error { 264 return IPCache.Map.Reopen() 265 }