github.com/fafucoder/cilium@v1.6.11/pkg/endpoint/connector/ipvlan.go (about)

     1  // Copyright 2018 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package connector
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  	"runtime"
    21  	"unsafe"
    22  
    23  	"github.com/cilium/cilium/api/v1/models"
    24  	"github.com/cilium/cilium/pkg/datapath/link"
    25  	"github.com/cilium/cilium/pkg/logging/logfields"
    26  	"github.com/cilium/cilium/pkg/option"
    27  
    28  	"github.com/containernetworking/plugins/pkg/ns"
    29  
    30  	"github.com/vishvananda/netlink"
    31  
    32  	"golang.org/x/sys/unix"
    33  )
    34  
    35  // TODO: We cannot include bpf package here due to CGO_ENABLED=0,
    36  // but we should refactor common bits into a pure golang package.
    37  
    38  type bpfAttrProg struct {
    39  	ProgType    uint32
    40  	InsnCnt     uint32
    41  	Insns       uintptr
    42  	License     uintptr
    43  	LogLevel    uint32
    44  	LogSize     uint32
    45  	LogBuf      uintptr
    46  	KernVersion uint32
    47  	Flags       uint32
    48  	Name        [16]byte
    49  }
    50  
    51  func loadEntryProg(mapFd int) (int, error) {
    52  	tmp := (*[4]byte)(unsafe.Pointer(&mapFd))
    53  	insns := []byte{
    54  		0x18, 0x12, 0x00, 0x00, tmp[0], tmp[1], tmp[2], tmp[3],
    55  		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    56  		0xb7, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    57  		0x85, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
    58  		0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    59  		0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    60  	}
    61  	license := []byte{'A', 'S', 'L', '2', '\x00'}
    62  	bpfAttr := bpfAttrProg{
    63  		ProgType: 3,
    64  		InsnCnt:  uint32(len(insns) / 8),
    65  		Insns:    uintptr(unsafe.Pointer(&insns[0])),
    66  		License:  uintptr(unsafe.Pointer(&license[0])),
    67  	}
    68  	fd, _, errno := unix.Syscall(unix.SYS_BPF, 5, /* BPF_PROG_LOAD */
    69  		uintptr(unsafe.Pointer(&bpfAttr)),
    70  		unsafe.Sizeof(bpfAttr))
    71  	runtime.KeepAlive(&insns)
    72  	runtime.KeepAlive(&license)
    73  	runtime.KeepAlive(&bpfAttr)
    74  	if errno != 0 {
    75  		return 0, errno
    76  	}
    77  	return int(fd), nil
    78  }
    79  
    80  type bpfAttrMap struct {
    81  	MapType    uint32
    82  	SizeKey    uint32
    83  	SizeValue  uint32
    84  	MaxEntries uint32
    85  	Flags      uint32
    86  }
    87  
    88  type bpfMapInfo struct {
    89  	MapType    uint32
    90  	MapID      uint32
    91  	SizeKey    uint32
    92  	SizeValue  uint32
    93  	MaxEntries uint32
    94  	Flags      uint32
    95  }
    96  
    97  type bpfAttrObjInfo struct {
    98  	Fd      uint32
    99  	InfoLen uint32
   100  	Info    uint64
   101  }
   102  
   103  func createTailCallMap() (int, int, error) {
   104  	bpfAttr := bpfAttrMap{
   105  		MapType:    3,
   106  		SizeKey:    4,
   107  		SizeValue:  4,
   108  		MaxEntries: 1,
   109  		Flags:      0,
   110  	}
   111  	fd, _, errno := unix.Syscall(unix.SYS_BPF, 0, /* BPF_MAP_CREATE */
   112  		uintptr(unsafe.Pointer(&bpfAttr)),
   113  		unsafe.Sizeof(bpfAttr))
   114  	runtime.KeepAlive(&bpfAttr)
   115  	if int(fd) < 0 || errno != 0 {
   116  		return 0, 0, errno
   117  	}
   118  
   119  	info := bpfMapInfo{}
   120  	bpfAttrInfo := bpfAttrObjInfo{
   121  		Fd:      uint32(fd),
   122  		InfoLen: uint32(unsafe.Sizeof(info)),
   123  		Info:    uint64(uintptr(unsafe.Pointer(&info))),
   124  	}
   125  	bpfAttr2 := struct {
   126  		info bpfAttrObjInfo
   127  	}{
   128  		info: bpfAttrInfo,
   129  	}
   130  	ret, _, errno := unix.Syscall(unix.SYS_BPF, 15, /* BPF_OBJ_GET_INFO_BY_FD */
   131  		uintptr(unsafe.Pointer(&bpfAttr2)),
   132  		unsafe.Sizeof(bpfAttr2))
   133  	runtime.KeepAlive(&info)
   134  	runtime.KeepAlive(&bpfAttr2)
   135  	if ret != 0 || errno != 0 {
   136  		unix.Close(int(fd))
   137  		return 0, 0, errno
   138  	}
   139  
   140  	return int(fd), int(info.MapID), nil
   141  }
   142  
   143  // SetupIpvlanInRemoteNs creates a tail call map, renames the netdevice inside
   144  // the target netns and attaches a BPF program to it on egress path which
   145  // then jumps into the tail call map index 0.
   146  //
   147  // NB: Do not close the returned mapFd before it has been pinned. Otherwise,
   148  // the map will be destroyed.
   149  func SetupIpvlanInRemoteNs(netNs ns.NetNS, srcIfName, dstIfName string) (int, int, error) {
   150  	rl := unix.Rlimit{
   151  		Cur: math.MaxUint64,
   152  		Max: math.MaxUint64,
   153  	}
   154  
   155  	err := unix.Setrlimit(unix.RLIMIT_MEMLOCK, &rl)
   156  	if err != nil {
   157  		return 0, 0, fmt.Errorf("Unable to increase rlimit: %s", err)
   158  	}
   159  
   160  	mapFd, mapId, err := createTailCallMap()
   161  	if err != nil {
   162  		return 0, 0, fmt.Errorf("failed to create root BPF map for %q: %s", dstIfName, err)
   163  	}
   164  
   165  	err = netNs.Do(func(_ ns.NetNS) error {
   166  		var err error
   167  
   168  		if srcIfName != dstIfName {
   169  			err = link.Rename(srcIfName, dstIfName)
   170  			if err != nil {
   171  				return fmt.Errorf("failed to rename ipvlan from %q to %q: %s", srcIfName, dstIfName, err)
   172  			}
   173  		}
   174  
   175  		ipvlan, err := netlink.LinkByName(dstIfName)
   176  		if err != nil {
   177  			return fmt.Errorf("failed to lookup ipvlan device %q: %s", dstIfName, err)
   178  		}
   179  
   180  		qdiscAttrs := netlink.QdiscAttrs{
   181  			LinkIndex: ipvlan.Attrs().Index,
   182  			Handle:    netlink.MakeHandle(0xffff, 0),
   183  			Parent:    netlink.HANDLE_CLSACT,
   184  		}
   185  		qdisc := &netlink.GenericQdisc{
   186  			QdiscAttrs: qdiscAttrs,
   187  			QdiscType:  "clsact",
   188  		}
   189  		if err = netlink.QdiscAdd(qdisc); err != nil {
   190  			return fmt.Errorf("failed to create clsact qdisc on %q: %s", dstIfName, err)
   191  		}
   192  
   193  		progFd, err := loadEntryProg(mapFd)
   194  		if err != nil {
   195  			return fmt.Errorf("failed to load root BPF prog for %q: %s", dstIfName, err)
   196  		}
   197  
   198  		filterAttrs := netlink.FilterAttrs{
   199  			LinkIndex: ipvlan.Attrs().Index,
   200  			Parent:    netlink.HANDLE_MIN_EGRESS,
   201  			Handle:    netlink.MakeHandle(0, 1),
   202  			Protocol:  3,
   203  			Priority:  1,
   204  		}
   205  		filter := &netlink.BpfFilter{
   206  			FilterAttrs:  filterAttrs,
   207  			Fd:           progFd,
   208  			Name:         "polEntry",
   209  			DirectAction: true,
   210  		}
   211  		if err = netlink.FilterAdd(filter); err != nil {
   212  			unix.Close(progFd)
   213  			return fmt.Errorf("failed to create cls_bpf filter on %q: %s", dstIfName, err)
   214  		}
   215  
   216  		return nil
   217  	})
   218  	if err != nil {
   219  		unix.Close(mapFd)
   220  		return 0, 0, err
   221  	}
   222  	return mapFd, mapId, nil
   223  }
   224  
   225  // CreateIpvlanSlave creates an ipvlan slave in L3 based on the master device.
   226  func CreateIpvlanSlave(id string, mtu, masterDev int, mode string, ep *models.EndpointChangeRequest) (*netlink.IPVlan, *netlink.Link, string, error) {
   227  	if id == "" {
   228  		return nil, nil, "", fmt.Errorf("invalid: empty ID")
   229  	}
   230  
   231  	tmpIfName := Endpoint2TempIfName(id)
   232  	ipvlan, link, err := createIpvlanSlave(tmpIfName, mtu, masterDev, mode, ep)
   233  
   234  	return ipvlan, link, tmpIfName, err
   235  }
   236  
   237  func createIpvlanSlave(lxcIfName string, mtu, masterDev int, mode string, ep *models.EndpointChangeRequest) (*netlink.IPVlan, *netlink.Link, error) {
   238  	var (
   239  		link       netlink.Link
   240  		err        error
   241  		ipvlanMode netlink.IPVlanMode
   242  	)
   243  
   244  	if masterDev == 0 {
   245  		return nil, nil, fmt.Errorf("invalid: master device ifindex")
   246  	}
   247  
   248  	switch mode {
   249  	case option.OperationModeL3:
   250  		ipvlanMode = netlink.IPVLAN_MODE_L3
   251  	case option.OperationModeL3S:
   252  		ipvlanMode = netlink.IPVLAN_MODE_L3S
   253  	default:
   254  		return nil, nil, fmt.Errorf("invalid or unsupported ipvlan operation mode: %s", mode)
   255  	}
   256  
   257  	ipvlan := &netlink.IPVlan{
   258  		LinkAttrs: netlink.LinkAttrs{
   259  			Name:        lxcIfName,
   260  			ParentIndex: masterDev,
   261  		},
   262  		Mode: ipvlanMode,
   263  	}
   264  
   265  	if err = netlink.LinkAdd(ipvlan); err != nil {
   266  		return nil, nil, fmt.Errorf("unable to create ipvlan slave device: %s", err)
   267  	}
   268  
   269  	master, err := netlink.LinkByIndex(masterDev)
   270  	if err != nil {
   271  		return nil, nil, fmt.Errorf("unable to find master device: %s", err)
   272  	}
   273  
   274  	defer func() {
   275  		if err != nil {
   276  			if err = netlink.LinkDel(ipvlan); err != nil {
   277  				log.WithError(err).WithField(logfields.Ipvlan, ipvlan.Name).Warn("failed to clean up ipvlan")
   278  			}
   279  		}
   280  	}()
   281  
   282  	log.WithField(logfields.Ipvlan, []string{lxcIfName}).Debug("Created ipvlan slave in L3 mode")
   283  
   284  	err = DisableRpFilter(lxcIfName)
   285  	if err != nil {
   286  		return nil, nil, err
   287  	}
   288  
   289  	link, err = netlink.LinkByName(lxcIfName)
   290  	if err != nil {
   291  		return nil, nil, fmt.Errorf("unable to lookup ipvlan slave just created: %s", err)
   292  	}
   293  
   294  	if err = netlink.LinkSetMTU(link, mtu); err != nil {
   295  		return nil, nil, fmt.Errorf("unable to set MTU to %q: %s", lxcIfName, err)
   296  	}
   297  
   298  	ep.Mac = link.Attrs().HardwareAddr.String()
   299  	ep.HostMac = master.Attrs().HardwareAddr.String()
   300  	ep.InterfaceIndex = int64(link.Attrs().Index)
   301  	ep.InterfaceName = link.Attrs().Name
   302  
   303  	return ipvlan, &link, nil
   304  }
   305  
   306  // CreateAndSetupIpvlanSlave creates an ipvlan slave device for the given
   307  // master device, moves it to the given network namespace, and finally
   308  // initializes it (see SetupIpvlanInRemoteNs).
   309  func CreateAndSetupIpvlanSlave(id string, slaveIfName string, netNs ns.NetNS, mtu int, masterDev int, mode string, ep *models.EndpointChangeRequest) (int, error) {
   310  	var tmpIfName string
   311  
   312  	if id == "" {
   313  		tmpIfName = Endpoint2TempRandIfName()
   314  	} else {
   315  		tmpIfName = Endpoint2TempIfName(id)
   316  	}
   317  
   318  	_, link, err := createIpvlanSlave(tmpIfName, mtu, masterDev, mode, ep)
   319  	if err != nil {
   320  		return 0, fmt.Errorf("createIpvlanSlave has failed: %s", err)
   321  	}
   322  
   323  	if err = netlink.LinkSetNsFd(*link, int(netNs.Fd())); err != nil {
   324  		return 0, fmt.Errorf("unable to move ipvlan slave '%v' to netns: %s", link, err)
   325  	}
   326  
   327  	mapFD, mapID, err := SetupIpvlanInRemoteNs(netNs, tmpIfName, slaveIfName)
   328  	if err != nil {
   329  		return 0, fmt.Errorf("unable to setup ipvlan slave in remote netns: %s", err)
   330  	}
   331  
   332  	ep.DatapathMapID = int64(mapID)
   333  
   334  	return mapFD, nil
   335  }