github.com/noironetworks/cilium-net@v1.6.12/pkg/endpoint/connector/ipvlan.go (about) 1 // Copyright 2018 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package connector 16 17 import ( 18 "fmt" 19 "math" 20 "runtime" 21 "unsafe" 22 23 "github.com/cilium/cilium/api/v1/models" 24 "github.com/cilium/cilium/pkg/datapath/link" 25 "github.com/cilium/cilium/pkg/logging/logfields" 26 "github.com/cilium/cilium/pkg/option" 27 28 "github.com/containernetworking/plugins/pkg/ns" 29 30 "github.com/vishvananda/netlink" 31 32 "golang.org/x/sys/unix" 33 ) 34 35 // TODO: We cannot include bpf package here due to CGO_ENABLED=0, 36 // but we should refactor common bits into a pure golang package. 37 38 type bpfAttrProg struct { 39 ProgType uint32 40 InsnCnt uint32 41 Insns uintptr 42 License uintptr 43 LogLevel uint32 44 LogSize uint32 45 LogBuf uintptr 46 KernVersion uint32 47 Flags uint32 48 Name [16]byte 49 } 50 51 func loadEntryProg(mapFd int) (int, error) { 52 tmp := (*[4]byte)(unsafe.Pointer(&mapFd)) 53 insns := []byte{ 54 0x18, 0x12, 0x00, 0x00, tmp[0], tmp[1], tmp[2], tmp[3], 55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 56 0xb7, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 57 0x85, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 58 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 59 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 60 } 61 license := []byte{'A', 'S', 'L', '2', '\x00'} 62 bpfAttr := bpfAttrProg{ 63 ProgType: 3, 64 InsnCnt: uint32(len(insns) / 8), 65 Insns: uintptr(unsafe.Pointer(&insns[0])), 66 License: uintptr(unsafe.Pointer(&license[0])), 67 } 68 fd, _, errno := unix.Syscall(unix.SYS_BPF, 5, /* BPF_PROG_LOAD */ 69 uintptr(unsafe.Pointer(&bpfAttr)), 70 unsafe.Sizeof(bpfAttr)) 71 runtime.KeepAlive(&insns) 72 runtime.KeepAlive(&license) 73 runtime.KeepAlive(&bpfAttr) 74 if errno != 0 { 75 return 0, errno 76 } 77 return int(fd), nil 78 } 79 80 type bpfAttrMap struct { 81 MapType uint32 82 SizeKey uint32 83 SizeValue uint32 84 MaxEntries uint32 85 Flags uint32 86 } 87 88 type bpfMapInfo struct { 89 MapType uint32 90 MapID uint32 91 SizeKey uint32 92 SizeValue uint32 93 MaxEntries uint32 94 Flags uint32 95 } 96 97 type bpfAttrObjInfo struct { 98 Fd uint32 99 InfoLen uint32 100 Info uint64 101 } 102 103 func createTailCallMap() (int, int, error) { 104 bpfAttr := bpfAttrMap{ 105 MapType: 3, 106 SizeKey: 4, 107 SizeValue: 4, 108 MaxEntries: 1, 109 Flags: 0, 110 } 111 fd, _, errno := unix.Syscall(unix.SYS_BPF, 0, /* BPF_MAP_CREATE */ 112 uintptr(unsafe.Pointer(&bpfAttr)), 113 unsafe.Sizeof(bpfAttr)) 114 runtime.KeepAlive(&bpfAttr) 115 if int(fd) < 0 || errno != 0 { 116 return 0, 0, errno 117 } 118 119 info := bpfMapInfo{} 120 bpfAttrInfo := bpfAttrObjInfo{ 121 Fd: uint32(fd), 122 InfoLen: uint32(unsafe.Sizeof(info)), 123 Info: uint64(uintptr(unsafe.Pointer(&info))), 124 } 125 bpfAttr2 := struct { 126 info bpfAttrObjInfo 127 }{ 128 info: bpfAttrInfo, 129 } 130 ret, _, errno := unix.Syscall(unix.SYS_BPF, 15, /* BPF_OBJ_GET_INFO_BY_FD */ 131 uintptr(unsafe.Pointer(&bpfAttr2)), 132 unsafe.Sizeof(bpfAttr2)) 133 runtime.KeepAlive(&info) 134 runtime.KeepAlive(&bpfAttr2) 135 if ret != 0 || errno != 0 { 136 unix.Close(int(fd)) 137 return 0, 0, errno 138 } 139 140 return int(fd), int(info.MapID), nil 141 } 142 143 // SetupIpvlanInRemoteNs creates a tail call map, renames the netdevice inside 144 // the target netns and attaches a BPF program to it on egress path which 145 // then jumps into the tail call map index 0. 146 // 147 // NB: Do not close the returned mapFd before it has been pinned. Otherwise, 148 // the map will be destroyed. 149 func SetupIpvlanInRemoteNs(netNs ns.NetNS, srcIfName, dstIfName string) (int, int, error) { 150 rl := unix.Rlimit{ 151 Cur: math.MaxUint64, 152 Max: math.MaxUint64, 153 } 154 155 err := unix.Setrlimit(unix.RLIMIT_MEMLOCK, &rl) 156 if err != nil { 157 return 0, 0, fmt.Errorf("Unable to increase rlimit: %s", err) 158 } 159 160 mapFd, mapId, err := createTailCallMap() 161 if err != nil { 162 return 0, 0, fmt.Errorf("failed to create root BPF map for %q: %s", dstIfName, err) 163 } 164 165 err = netNs.Do(func(_ ns.NetNS) error { 166 var err error 167 168 if srcIfName != dstIfName { 169 err = link.Rename(srcIfName, dstIfName) 170 if err != nil { 171 return fmt.Errorf("failed to rename ipvlan from %q to %q: %s", srcIfName, dstIfName, err) 172 } 173 } 174 175 ipvlan, err := netlink.LinkByName(dstIfName) 176 if err != nil { 177 return fmt.Errorf("failed to lookup ipvlan device %q: %s", dstIfName, err) 178 } 179 180 qdiscAttrs := netlink.QdiscAttrs{ 181 LinkIndex: ipvlan.Attrs().Index, 182 Handle: netlink.MakeHandle(0xffff, 0), 183 Parent: netlink.HANDLE_CLSACT, 184 } 185 qdisc := &netlink.GenericQdisc{ 186 QdiscAttrs: qdiscAttrs, 187 QdiscType: "clsact", 188 } 189 if err = netlink.QdiscAdd(qdisc); err != nil { 190 return fmt.Errorf("failed to create clsact qdisc on %q: %s", dstIfName, err) 191 } 192 193 progFd, err := loadEntryProg(mapFd) 194 if err != nil { 195 return fmt.Errorf("failed to load root BPF prog for %q: %s", dstIfName, err) 196 } 197 198 filterAttrs := netlink.FilterAttrs{ 199 LinkIndex: ipvlan.Attrs().Index, 200 Parent: netlink.HANDLE_MIN_EGRESS, 201 Handle: netlink.MakeHandle(0, 1), 202 Protocol: 3, 203 Priority: 1, 204 } 205 filter := &netlink.BpfFilter{ 206 FilterAttrs: filterAttrs, 207 Fd: progFd, 208 Name: "polEntry", 209 DirectAction: true, 210 } 211 if err = netlink.FilterAdd(filter); err != nil { 212 unix.Close(progFd) 213 return fmt.Errorf("failed to create cls_bpf filter on %q: %s", dstIfName, err) 214 } 215 216 return nil 217 }) 218 if err != nil { 219 unix.Close(mapFd) 220 return 0, 0, err 221 } 222 return mapFd, mapId, nil 223 } 224 225 // CreateIpvlanSlave creates an ipvlan slave in L3 based on the master device. 226 func CreateIpvlanSlave(id string, mtu, masterDev int, mode string, ep *models.EndpointChangeRequest) (*netlink.IPVlan, *netlink.Link, string, error) { 227 if id == "" { 228 return nil, nil, "", fmt.Errorf("invalid: empty ID") 229 } 230 231 tmpIfName := Endpoint2TempIfName(id) 232 ipvlan, link, err := createIpvlanSlave(tmpIfName, mtu, masterDev, mode, ep) 233 234 return ipvlan, link, tmpIfName, err 235 } 236 237 func createIpvlanSlave(lxcIfName string, mtu, masterDev int, mode string, ep *models.EndpointChangeRequest) (*netlink.IPVlan, *netlink.Link, error) { 238 var ( 239 link netlink.Link 240 err error 241 ipvlanMode netlink.IPVlanMode 242 ) 243 244 if masterDev == 0 { 245 return nil, nil, fmt.Errorf("invalid: master device ifindex") 246 } 247 248 switch mode { 249 case option.OperationModeL3: 250 ipvlanMode = netlink.IPVLAN_MODE_L3 251 case option.OperationModeL3S: 252 ipvlanMode = netlink.IPVLAN_MODE_L3S 253 default: 254 return nil, nil, fmt.Errorf("invalid or unsupported ipvlan operation mode: %s", mode) 255 } 256 257 ipvlan := &netlink.IPVlan{ 258 LinkAttrs: netlink.LinkAttrs{ 259 Name: lxcIfName, 260 ParentIndex: masterDev, 261 }, 262 Mode: ipvlanMode, 263 } 264 265 if err = netlink.LinkAdd(ipvlan); err != nil { 266 return nil, nil, fmt.Errorf("unable to create ipvlan slave device: %s", err) 267 } 268 269 master, err := netlink.LinkByIndex(masterDev) 270 if err != nil { 271 return nil, nil, fmt.Errorf("unable to find master device: %s", err) 272 } 273 274 defer func() { 275 if err != nil { 276 if err = netlink.LinkDel(ipvlan); err != nil { 277 log.WithError(err).WithField(logfields.Ipvlan, ipvlan.Name).Warn("failed to clean up ipvlan") 278 } 279 } 280 }() 281 282 log.WithField(logfields.Ipvlan, []string{lxcIfName}).Debug("Created ipvlan slave in L3 mode") 283 284 err = DisableRpFilter(lxcIfName) 285 if err != nil { 286 return nil, nil, err 287 } 288 289 link, err = netlink.LinkByName(lxcIfName) 290 if err != nil { 291 return nil, nil, fmt.Errorf("unable to lookup ipvlan slave just created: %s", err) 292 } 293 294 if err = netlink.LinkSetMTU(link, mtu); err != nil { 295 return nil, nil, fmt.Errorf("unable to set MTU to %q: %s", lxcIfName, err) 296 } 297 298 ep.Mac = link.Attrs().HardwareAddr.String() 299 ep.HostMac = master.Attrs().HardwareAddr.String() 300 ep.InterfaceIndex = int64(link.Attrs().Index) 301 ep.InterfaceName = link.Attrs().Name 302 303 return ipvlan, &link, nil 304 } 305 306 // CreateAndSetupIpvlanSlave creates an ipvlan slave device for the given 307 // master device, moves it to the given network namespace, and finally 308 // initializes it (see SetupIpvlanInRemoteNs). 309 func CreateAndSetupIpvlanSlave(id string, slaveIfName string, netNs ns.NetNS, mtu int, masterDev int, mode string, ep *models.EndpointChangeRequest) (int, error) { 310 var tmpIfName string 311 312 if id == "" { 313 tmpIfName = Endpoint2TempRandIfName() 314 } else { 315 tmpIfName = Endpoint2TempIfName(id) 316 } 317 318 _, link, err := createIpvlanSlave(tmpIfName, mtu, masterDev, mode, ep) 319 if err != nil { 320 return 0, fmt.Errorf("createIpvlanSlave has failed: %s", err) 321 } 322 323 if err = netlink.LinkSetNsFd(*link, int(netNs.Fd())); err != nil { 324 return 0, fmt.Errorf("unable to move ipvlan slave '%v' to netns: %s", link, err) 325 } 326 327 mapFD, mapID, err := SetupIpvlanInRemoteNs(netNs, tmpIfName, slaveIfName) 328 if err != nil { 329 return 0, fmt.Errorf("unable to setup ipvlan slave in remote netns: %s", err) 330 } 331 332 ep.DatapathMapID = int64(mapID) 333 334 return mapFD, nil 335 }