github.com/zhyoulun/cilium@v1.6.12/pkg/maps/ipcache/ipcache.go (about)

     1  // Copyright 2016-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package ipcache
    16  
    17  import (
    18  	"fmt"
    19  	"net"
    20  	"sync"
    21  	"unsafe"
    22  
    23  	"github.com/cilium/cilium/common/types"
    24  	"github.com/cilium/cilium/pkg/bpf"
    25  	"github.com/cilium/cilium/pkg/logging"
    26  	"github.com/cilium/cilium/pkg/logging/logfields"
    27  
    28  	"golang.org/x/sys/unix"
    29  )
    30  
    31  var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "map-ipcache")
    32  
    33  const (
    34  	// MaxEntries is the maximum number of keys that can be present in the
    35  	// RemoteEndpointMap.
    36  	MaxEntries = 512000
    37  
    38  	// Name is the canonical name for the IPCache map on the filesystem.
    39  	Name = "cilium_ipcache"
    40  
    41  	// maxPrefixLengths is an approximation of how many different CIDR
    42  	// prefix lengths may be supported by the BPF datapath without causing
    43  	// BPF code generation to exceed the verifier instruction limit.
    44  	// It applies to Linux versions that lack support for LPM, ie < v4.11.
    45  	//
    46  	// This is based upon the defines in bpf/lxc_config.h, which in turn
    47  	// are derived by building the bpf/ directory and running the script
    48  	// test/bpf/verifier-test.sh, then adjusting the number of unique
    49  	// prefix lengths until the script passes.
    50  	maxPrefixLengths6 = 4
    51  	maxPrefixLengths4 = 18
    52  )
    53  
    54  // Key implements the bpf.MapKey interface.
    55  //
    56  // Must be in sync with struct ipcache_key in <bpf/lib/maps.h>
    57  // +k8s:deepcopy-gen=true
    58  // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapKey
    59  type Key struct {
    60  	Prefixlen uint32 `align:"lpm_key"`
    61  	Pad1      uint16 `align:"pad1"`
    62  	Pad2      uint8  `align:"pad2"`
    63  	Family    uint8  `align:"family"`
    64  	// represents both IPv6 and IPv4 (in the lowest four bytes)
    65  	IP types.IPv6 `align:"$union0"`
    66  }
    67  
    68  // GetKeyPtr returns the unsafe pointer to the BPF key
    69  func (k *Key) GetKeyPtr() unsafe.Pointer { return unsafe.Pointer(k) }
    70  
    71  // NewValue returns a new empty instance of the structure representing the BPF
    72  // map value
    73  func (k Key) NewValue() bpf.MapValue { return &RemoteEndpointInfo{} }
    74  
    75  func getStaticPrefixBits() uint32 {
    76  	staticMatchSize := unsafe.Sizeof(Key{})
    77  	staticMatchSize -= unsafe.Sizeof(Key{}.Prefixlen)
    78  	staticMatchSize -= unsafe.Sizeof(Key{}.IP)
    79  	return uint32(staticMatchSize) * 8
    80  }
    81  
    82  func (k Key) String() string {
    83  	prefixLen := k.Prefixlen - getStaticPrefixBits()
    84  	switch k.Family {
    85  	case bpf.EndpointKeyIPv4:
    86  		ipStr := net.IP(k.IP[:net.IPv4len]).String()
    87  		return fmt.Sprintf("%s/%d", ipStr, prefixLen)
    88  	case bpf.EndpointKeyIPv6:
    89  		ipStr := k.IP.String()
    90  		return fmt.Sprintf("%s/%d", ipStr, prefixLen)
    91  	}
    92  	return fmt.Sprintf("<unknown>")
    93  }
    94  
    95  // getPrefixLen determines the length that should be set inside the Key so that
    96  // the lookup prefix is correct in the BPF map key. The specified 'prefixBits'
    97  // indicates the number of bits in the IP that must match to match the entry in
    98  // the BPF ipcache.
    99  func getPrefixLen(prefixBits int) uint32 {
   100  	return getStaticPrefixBits() + uint32(prefixBits)
   101  }
   102  
   103  // NewKey returns an Key based on the provided IP address and mask. The address
   104  // family is automatically detected
   105  func NewKey(ip net.IP, mask net.IPMask) Key {
   106  	result := Key{}
   107  
   108  	ones, _ := mask.Size()
   109  	if ip4 := ip.To4(); ip4 != nil {
   110  		if mask == nil {
   111  			ones = net.IPv4len * 8
   112  		}
   113  		result.Prefixlen = getPrefixLen(ones)
   114  		result.Family = bpf.EndpointKeyIPv4
   115  		copy(result.IP[:], ip4)
   116  	} else {
   117  		if mask == nil {
   118  			ones = net.IPv6len * 8
   119  		}
   120  		result.Prefixlen = getPrefixLen(ones)
   121  		result.Family = bpf.EndpointKeyIPv6
   122  		copy(result.IP[:], ip)
   123  	}
   124  
   125  	return result
   126  }
   127  
   128  // RemoteEndpointInfo implements the bpf.MapValue interface. It contains the
   129  // security identity of a remote endpoint.
   130  // +k8s:deepcopy-gen=true
   131  // +k8s:deepcopy-gen:interfaces=github.com/cilium/cilium/pkg/bpf.MapValue
   132  type RemoteEndpointInfo struct {
   133  	SecurityIdentity uint32     `align:"sec_label"`
   134  	TunnelEndpoint   types.IPv4 `align:"tunnel_endpoint"`
   135  	Key              uint8      `align:"key"`
   136  }
   137  
   138  func (v *RemoteEndpointInfo) String() string {
   139  	return fmt.Sprintf("%d %d %s", v.SecurityIdentity, v.Key, v.TunnelEndpoint)
   140  }
   141  
   142  // GetValuePtr returns the unsafe pointer to the BPF value.
   143  func (v *RemoteEndpointInfo) GetValuePtr() unsafe.Pointer { return unsafe.Pointer(v) }
   144  
   145  // Map represents an IPCache BPF map.
   146  type Map struct {
   147  	bpf.Map
   148  
   149  	// detectDeleteSupport is used to initialize 'supportsDelete' the first
   150  	// time that a delete is issued from the datapath.
   151  	detectDeleteSupport sync.Once
   152  
   153  	// deleteSupport is set to 'true' initially, then is updated to set
   154  	// whether the underlying kernel supports delete operations on the map
   155  	// the first time that supportsDelete() is called.
   156  	deleteSupport bool
   157  }
   158  
   159  // NewMap instantiates a Map.
   160  func NewMap(name string) *Map {
   161  	return &Map{
   162  		Map: *bpf.NewMap(
   163  			name,
   164  			bpf.BPF_MAP_TYPE_LPM_TRIE,
   165  			&Key{},
   166  			int(unsafe.Sizeof(Key{})),
   167  			&RemoteEndpointInfo{},
   168  			int(unsafe.Sizeof(RemoteEndpointInfo{})),
   169  			MaxEntries,
   170  			bpf.BPF_F_NO_PREALLOC, 0,
   171  			bpf.ConvertKeyValue,
   172  		).WithCache(),
   173  		deleteSupport: true,
   174  	}
   175  }
   176  
   177  // delete removes a key from the ipcache BPF map, and returns whether the
   178  // kernel supports the delete operation (true) or not (false), and any error
   179  // that may have occurred while attempting to delete the entry.
   180  //
   181  // If "overwrite" is true, then if delete is not supported the entry's value
   182  // will be overwritten with zeroes to signify that it's an invalid entry.
   183  func (m *Map) delete(k bpf.MapKey, overwrite bool) (bool, error) {
   184  	// Older kernels do not support deletion of LPM map entries so zero out
   185  	// the entry instead of attempting a deletion
   186  	err, errno := m.DeleteWithErrno(k)
   187  	if errno == unix.ENOSYS {
   188  		if overwrite {
   189  			return false, m.Update(k, &RemoteEndpointInfo{})
   190  		}
   191  		return false, nil
   192  	}
   193  
   194  	return true, err
   195  }
   196  
   197  // Delete removes a key from the ipcache BPF map
   198  func (m *Map) Delete(k bpf.MapKey) error {
   199  	_, err := m.delete(k, true)
   200  	return err
   201  }
   202  
   203  // GetMaxPrefixLengths determines how many unique prefix lengths are supported
   204  // simultaneously based on the underlying BPF map type in use.
   205  func (m *Map) GetMaxPrefixLengths(ipv6 bool) (count int) {
   206  	if IPCache.MapType == bpf.BPF_MAP_TYPE_LPM_TRIE {
   207  		if ipv6 {
   208  			return net.IPv6len*8 + 1
   209  		} else {
   210  			return net.IPv4len*8 + 1
   211  		}
   212  	}
   213  	if ipv6 {
   214  		return maxPrefixLengths6
   215  	}
   216  	return maxPrefixLengths4
   217  }
   218  
   219  func (m *Map) supportsDelete() bool {
   220  	m.detectDeleteSupport.Do(func() {
   221  		// Entry is invalid because IPCache needs a family specified.
   222  		invalidEntry := &Key{}
   223  		m.deleteSupport, _ = m.delete(invalidEntry, false)
   224  		log.Debugf("Detected IPCache delete operation support: %t", m.deleteSupport)
   225  
   226  		// In addition to delete support, ability to dump the map is
   227  		// also required in order to run the garbage collector which
   228  		// will iterate over the map and delete entries.
   229  		if m.deleteSupport {
   230  			err := m.Dump(map[string][]string{})
   231  			m.deleteSupport = err == nil
   232  			log.Debugf("Detected IPCache dump operation support: %t", m.deleteSupport)
   233  		}
   234  
   235  		if !m.deleteSupport {
   236  			log.Infof("Periodic IPCache map swap will occur due to lack of kernel support for LPM delete operation. Upgrade to Linux 4.15 or higher to avoid this.")
   237  		}
   238  	})
   239  	return m.deleteSupport
   240  }
   241  
   242  // SupportsDelete determines whether the underlying kernel map type supports
   243  // the delete operation.
   244  func SupportsDelete() bool {
   245  	return IPCache.supportsDelete()
   246  }
   247  
   248  // BackedByLPM returns true if the IPCache is backed by a proper LPM
   249  // implementation (provided by Linux kernels 4.11 or later), false otherwise.
   250  func BackedByLPM() bool {
   251  	return IPCache.MapType == bpf.BPF_MAP_TYPE_LPM_TRIE
   252  }
   253  
   254  var (
   255  	// IPCache is a mapping of all endpoint IPs in the cluster which this
   256  	// Cilium agent is a part of to their corresponding security identities.
   257  	// It is a singleton; there is only one such map per agent.
   258  	IPCache = NewMap(Name)
   259  )
   260  
   261  // Reopen attempts to close and re-open the IPCache map at the standard path
   262  // on the filesystem.
   263  func Reopen() error {
   264  	return IPCache.Map.Reopen()
   265  }