github.com/elfadel/cilium@v1.6.12/pkg/datapath/linux/ipsec/ipsec_linux.go (about) 1 // Copyright 2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // +build linux 16 17 package ipsec 18 19 import ( 20 "bufio" 21 "encoding/hex" 22 "fmt" 23 "io" 24 "io/ioutil" 25 "net" 26 "os" 27 "path/filepath" 28 "strconv" 29 "strings" 30 "time" 31 32 "github.com/cilium/cilium/pkg/datapath/linux/linux_defaults" 33 "github.com/cilium/cilium/pkg/datapath/linux/route" 34 "github.com/cilium/cilium/pkg/maps/encrypt" 35 "github.com/vishvananda/netlink" 36 37 "github.com/sirupsen/logrus" 38 ) 39 40 type IPSecDir string 41 42 const ( 43 IPSecDirIn IPSecDir = "IPSEC_IN" 44 IPSecDirOut IPSecDir = "IPSEC_OUT" 45 IPSecDirBoth IPSecDir = "IPSEC_BOTH" 46 IPSecDirOutNode IPSecDir = "IPSEC_OUT_NODE" 47 ) 48 49 type ipSecKey struct { 50 Spi uint8 51 ReqID int 52 Auth *netlink.XfrmStateAlgo 53 Crypt *netlink.XfrmStateAlgo 54 Aead *netlink.XfrmStateAlgo 55 } 56 57 // ipSecKeysGlobal is safe to read unlocked because the only writers are from 58 // daemon init time before any readers will be online. 59 var ipSecKeysGlobal = make(map[string]*ipSecKey) 60 61 func getIPSecKeys(ip net.IP) *ipSecKey { 62 key, scoped := ipSecKeysGlobal[ip.String()] 63 if scoped == false { 64 key, _ = ipSecKeysGlobal[""] 65 } 66 return key 67 } 68 69 func ipSecNewState() *netlink.XfrmState { 70 state := netlink.XfrmState{ 71 Mode: netlink.XFRM_MODE_TUNNEL, 72 Proto: netlink.XFRM_PROTO_ESP, 73 ESN: false, 74 } 75 return &state 76 } 77 78 func ipSecNewPolicy() *netlink.XfrmPolicy { 79 policy := netlink.XfrmPolicy{} 80 return &policy 81 } 82 83 func ipSecAttachPolicyTempl(policy *netlink.XfrmPolicy, keys *ipSecKey, srcIP, dstIP net.IP, spi bool) { 84 tmpl := netlink.XfrmPolicyTmpl{ 85 Proto: netlink.XFRM_PROTO_ESP, 86 Mode: netlink.XFRM_MODE_TUNNEL, 87 Reqid: keys.ReqID, 88 Dst: dstIP, 89 Src: srcIP, 90 } 91 92 if spi { 93 tmpl.Spi = int(keys.Spi) 94 } 95 96 policy.Tmpls = append(policy.Tmpls, tmpl) 97 } 98 99 func ipSecJoinState(state *netlink.XfrmState, keys *ipSecKey) { 100 if keys.Aead != nil { 101 state.Aead = keys.Aead 102 } else { 103 state.Crypt = keys.Crypt 104 state.Auth = keys.Auth 105 } 106 state.Spi = int(keys.Spi) 107 state.Reqid = keys.ReqID 108 } 109 110 func ipSecReplaceStateIn(remoteIP, localIP net.IP, setMark bool) (uint8, error) { 111 key := getIPSecKeys(localIP) 112 if key == nil { 113 return 0, fmt.Errorf("IPSec key missing") 114 } 115 state := ipSecNewState() 116 ipSecJoinState(state, key) 117 state.Src = localIP 118 state.Dst = remoteIP 119 state.Mark = &netlink.XfrmMark{ 120 Value: linux_defaults.RouteMarkDecrypt, 121 Mask: linux_defaults.IPsecMarkMaskIn, 122 } 123 if setMark { 124 state.OutputMark = linux_defaults.RouteMarkDecrypt 125 } 126 127 return key.Spi, netlink.XfrmStateAdd(state) 128 } 129 130 func ipSecReplaceStateOut(remoteIP, localIP net.IP, setMark bool) (uint8, error) { 131 key := getIPSecKeys(localIP) 132 if key == nil { 133 return 0, fmt.Errorf("IPSec key missing") 134 } 135 spiWide := uint32(key.Spi) 136 state := ipSecNewState() 137 ipSecJoinState(state, key) 138 state.Src = localIP 139 state.Dst = remoteIP 140 state.Mark = &netlink.XfrmMark{ 141 Value: ((spiWide << 12) | linux_defaults.RouteMarkEncrypt), 142 Mask: linux_defaults.IPsecMarkMask, 143 } 144 if setMark { 145 state.OutputMark = linux_defaults.RouteMarkEncrypt 146 } 147 return key.Spi, netlink.XfrmStateAdd(state) 148 } 149 150 func ipSecReplacePolicyIn(src, dst *net.IPNet) error { 151 if err := ipSecReplacePolicyInFwd(src, dst, netlink.XFRM_DIR_IN); err != nil { 152 if !os.IsExist(err) { 153 return err 154 } 155 } 156 return ipSecReplacePolicyInFwd(src, dst, netlink.XFRM_DIR_FWD) 157 } 158 159 func ipSecReplacePolicyInFwd(src, dst *net.IPNet, dir netlink.Dir) error { 160 key := getIPSecKeys(dst.IP) 161 if key == nil { 162 return fmt.Errorf("IPSec key missing") 163 } 164 165 policy := ipSecNewPolicy() 166 policy.Dir = dir 167 policy.Src = src 168 policy.Dst = dst 169 policy.Mark = &netlink.XfrmMark{ 170 Value: linux_defaults.RouteMarkDecrypt, 171 Mask: linux_defaults.IPsecMarkMaskIn, 172 } 173 ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP, false) 174 return netlink.XfrmPolicyUpdate(policy) 175 } 176 177 func ipSecReplacePolicyOut(src, dst, tmplSrc, tmplDst *net.IPNet, dir IPSecDir) error { 178 var spiWide uint32 179 180 key := getIPSecKeys(dst.IP) 181 if key == nil { 182 return fmt.Errorf("IPSec key missing") 183 } 184 spiWide = uint32(key.Spi) 185 186 policy := ipSecNewPolicy() 187 if dir == IPSecDirOutNode { 188 wildcardIP := net.ParseIP("0.0.0.0") 189 wildcardMask := net.IPv4Mask(0, 0, 0, 0) 190 policy.Src = &net.IPNet{IP: wildcardIP, Mask: wildcardMask} 191 } else { 192 policy.Src = src 193 } 194 policy.Dst = dst 195 policy.Dir = netlink.XFRM_DIR_OUT 196 policy.Mark = &netlink.XfrmMark{ 197 Value: ((spiWide << 12) | linux_defaults.RouteMarkEncrypt), 198 Mask: linux_defaults.IPsecMarkMask, 199 } 200 if tmplSrc != nil && tmplDst != nil { 201 ipSecAttachPolicyTempl(policy, key, tmplSrc.IP, tmplDst.IP, true) 202 } else { 203 ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP, true) 204 } 205 return netlink.XfrmPolicyUpdate(policy) 206 } 207 208 func ipsecDeleteXfrmSpi(spi uint8) { 209 var err error 210 scopedLog := log.WithFields(logrus.Fields{ 211 "spi": spi, 212 }) 213 214 xfrmStateList, err := netlink.XfrmStateList(0) 215 if err != nil { 216 scopedLog.WithError(err).Warning("deleting previous SPI, xfrm state list error") 217 return 218 } 219 for _, s := range xfrmStateList { 220 if s.Spi != int(spi) { 221 if err := netlink.XfrmStateDel(&s); err != nil { 222 scopedLog.WithError(err).Warning("deleting old xfrm state failed") 223 } 224 } 225 } 226 } 227 228 func ipsecDeleteXfrmState(ip net.IP) { 229 scopedLog := log.WithFields(logrus.Fields{ 230 "remote-ip": ip, 231 }) 232 233 xfrmStateList, err := netlink.XfrmStateList(0) 234 if err != nil { 235 scopedLog.WithError(err).Warning("deleting xfrm state, xfrm state list error") 236 return 237 } 238 for _, s := range xfrmStateList { 239 if ip.Equal(s.Dst) { 240 if err := netlink.XfrmStateDel(&s); err != nil { 241 scopedLog.WithError(err).Warning("deleting xfrm state failed") 242 } 243 } 244 } 245 } 246 247 func ipsecDeleteXfrmPolicy(ip net.IP) { 248 scopedLog := log.WithFields(logrus.Fields{ 249 "remote-ip": ip, 250 }) 251 252 xfrmPolicyList, err := netlink.XfrmPolicyList(0) 253 if err != nil { 254 scopedLog.WithError(err).Warning("deleting policy state, xfrm policy list error") 255 } 256 for _, p := range xfrmPolicyList { 257 if ip.Equal(p.Dst.IP) { 258 if err := netlink.XfrmPolicyDel(&p); err != nil { 259 scopedLog.WithError(err).Warning("deleting xfrm policy failed") 260 } 261 } 262 } 263 } 264 265 /* UpsertIPsecEndpoint updates the IPSec context for a new endpoint inserted in 266 * the ipcache. Currently we support a global crypt/auth keyset that will encrypt 267 * all traffic between endpoints. An IPSec context consists of two pieces a policy 268 * and a state, the security policy database (SPD) and security association 269 * database (SAD). These are implemented using the Linux kernels XFRM implementation. 270 * 271 * For all traffic that matches a policy, the policy tuple used is 272 * (sip/mask, dip/mask, dev) with an optional mark field used in the Cilium implementation 273 * to ensure only expected traffic is encrypted. The state hashtable is searched for 274 * a matching state associated with that flow. The Linux kernel will do a series of 275 * hash lookups to find the most specific state (xfrm_dst) possible. The hash keys searched are 276 * the following, (daddr, saddr, reqid, encap_family), (daddr, wildcard, reqid, encap), 277 * (mark, daddr, spi, proto, encap). Any "hits" in the hash table will subsequently 278 * have the SPI checked to ensure it also matches. Encap is ignored in our case here 279 * and can be used with UDP encap if wanted. 280 * 281 * The implications of the (inflexible!) hash key implementation is that in-order 282 * to have a policy/state match we _must_ insert a state for each daddr. For Cilium 283 * this translates to a state entry per node. We learn the nodes/endpoints by 284 * listening to ipcache events. Finally, because IPSec is unidirectional a state 285 * is needed for both ingress and egress. Denoted by the DIR on the xfrm cmd line 286 * in the policy lookup. In the Cilium case, where we have IPSec between all 287 * endpoints this results in two policy rules per node, one for ingress 288 * and one for egress. 289 * 290 * For a concrete example consider two cluster nodes using transparent mode e.g. 291 * without an IPSec tunnel IP. Cluster Node A has host_ip 10.156.0.1 with an 292 * endpoint assigned to IP 10.156.2.2 and cluster Node B has host_ip 10.182.0.1 293 * with an endpoint using IP 10.182.3.3. Then on Node A there will be a two policy 294 * entries and a set of State entries, 295 * 296 * Policy1(src=10.182.0.0/16,dst=10.156.0.1/16,dir=in,tmpl(spi=#spi,reqid=#reqid)) 297 * Policy2(src=10.156.0.0/16,dst=10.182.0.1/16,dir=out,tmpl(spi=#spi,reqid=#reqid)) 298 * State1(src=*,dst=10.182.0.1,spi=#spi,reqid=#reqid,...) 299 * State2(src=*,dst=10.156.0.1,spi=#spi,reqid=#reqid,...) 300 * 301 * setMark is used to set output-marks and use table 200 post-encryption 302 * This only applies to the subnet mode where sip/dip needs to be rewritten 303 * 304 * Design Note: For newer kernels a BPF xfrm interface would greatly simplify the 305 * state space. Basic idea would be to reference a state using any key generated 306 * from BPF program allowing for a single state per security ctx. 307 */ 308 func UpsertIPsecEndpoint(local, remote *net.IPNet, dir IPSecDir, setMark bool) (uint8, error) { 309 var spi uint8 310 var err error 311 312 /* TODO: state reference ID is (dip,spi) which can be duplicated in the current global 313 * mode. The duplication is on _all_ ingress states because dst_ip == host_ip in this 314 * case and only a single spi entry is in use. Currently no check is done to avoid 315 * attempting to add duplicate (dip,spi) states and we get 'file exist' error. These 316 * errors are expected at the moment but perhaps it would be better to avoid calling 317 * netlink API at all when we "know" an entry is a duplicate. To do this the xfer 318 * state would need to be cached in the ipcache. 319 */ 320 /* The two states plus policy below is sufficient for tunnel mode for 321 * transparent mode ciliumIP == nil case must also be handled. 322 */ 323 if !local.IP.Equal(remote.IP) { 324 if dir == IPSecDirIn || dir == IPSecDirBoth { 325 if spi, err = ipSecReplaceStateIn(local.IP, remote.IP, setMark); err != nil { 326 if !os.IsExist(err) { 327 return 0, fmt.Errorf("unable to replace local state: %s", err) 328 } 329 } 330 if err = ipSecReplacePolicyIn(remote, local); err != nil { 331 if !os.IsExist(err) { 332 return 0, fmt.Errorf("unable to replace policy in: %s", err) 333 } 334 } 335 } 336 337 if dir == IPSecDirOut || dir == IPSecDirOutNode || dir == IPSecDirBoth { 338 if spi, err = ipSecReplaceStateOut(remote.IP, local.IP, setMark); err != nil { 339 if !os.IsExist(err) { 340 return 0, fmt.Errorf("unable to replace remote state: %s", err) 341 } 342 } 343 344 if err = ipSecReplacePolicyOut(local, remote, nil, nil, dir); err != nil { 345 if !os.IsExist(err) { 346 return 0, fmt.Errorf("unable to replace policy out: %s", err) 347 } 348 } 349 } 350 } 351 return spi, nil 352 } 353 354 // UpsertIPsecEndpointPolicy adds a policy to the xfrm rules. Used to add a policy when the state 355 // rule is already available. 356 func UpsertIPsecEndpointPolicy(local, remote, localT, remoteT *net.IPNet, dir IPSecDir) error { 357 if err := ipSecReplacePolicyOut(local, remote, localT, remoteT, dir); err != nil { 358 if !os.IsExist(err) { 359 return fmt.Errorf("unable to replace templated policy out: %s", err) 360 } 361 } 362 return nil 363 } 364 365 // DeleteIPsecEndpoint deletes a endpoint associated with the remote IP address 366 func DeleteIPsecEndpoint(remote *net.IPNet) { 367 ipsecDeleteXfrmState(remote.IP) 368 ipsecDeleteXfrmPolicy(remote.IP) 369 } 370 371 func decodeIPSecKey(keyRaw string) (int, []byte, error) { 372 // As we have released the v1.4.0 docs telling the users to write the 373 // k8s secret with the prefix "0x" we have to remove it if it is present, 374 // so we can decode the secret. 375 if keyRaw == "\"\"" { 376 return 0, nil, nil 377 } 378 keyTrimmed := strings.TrimPrefix(keyRaw, "0x") 379 key, err := hex.DecodeString(keyTrimmed) 380 return len(keyTrimmed), key, err 381 } 382 383 // LoadIPSecKeysFile imports IPSec auth and crypt keys from a file. The format 384 // is to put a key per line as follows, (auth-algo auth-key enc-algo enc-key) 385 // Returns the authentication overhead in bytes, the key ID, and an error. 386 func LoadIPSecKeysFile(path string) (int, uint8, error) { 387 file, err := os.Open(path) 388 if err != nil { 389 return 0, 0, err 390 } 391 defer file.Close() 392 return loadIPSecKeys(file) 393 } 394 395 func loadIPSecKeys(r io.Reader) (int, uint8, error) { 396 var spi uint8 397 var keyLen int 398 scopedLog := log.WithFields(logrus.Fields{ 399 "spi": spi, 400 }) 401 402 if err := encrypt.MapCreate(); err != nil { 403 return 0, 0, fmt.Errorf("Encrypt map create failed: %v", err) 404 } 405 406 scanner := bufio.NewScanner(r) 407 scanner.Split(bufio.ScanLines) 408 for scanner.Scan() { 409 var oldSpi uint8 410 var authkey []byte 411 offset := 0 412 413 ipSecKey := &ipSecKey{ 414 ReqID: 1, 415 } 416 417 // Scanning IPsec keys formatted as follows, 418 // auth-algo auth-key enc-algo enc-key 419 s := strings.Split(scanner.Text(), " ") 420 if len(s) < 2 { 421 return 0, 0, fmt.Errorf("missing IPSec keys or invalid format") 422 } 423 424 spiI, err := strconv.Atoi(s[0]) 425 if err != nil { 426 // If no version info is provided assume using key format without 427 // versioning and assign SPI. 428 spiI = 1 429 offset = -1 430 } 431 if spiI > linux_defaults.IPsecMaxKeyVersion { 432 return 0, 0, fmt.Errorf("encryption Key space exhausted, id must be nonzero and less than %d. Attempted %q", linux_defaults.IPsecMaxKeyVersion, s[0]) 433 } 434 if spiI == 0 { 435 return 0, 0, fmt.Errorf("zero is not a valid key to disable encryption use `--enable-ipsec=false`, id must be nonzero and less than %d. Attempted %q", linux_defaults.IPsecMaxKeyVersion, s[0]) 436 } 437 spi = uint8(spiI) 438 439 keyLen, authkey, err = decodeIPSecKey(s[2+offset]) 440 if err != nil { 441 return 0, 0, fmt.Errorf("unable to decode authkey string %q", s[1+offset]) 442 } 443 authname := s[1+offset] 444 445 if strings.HasPrefix(authname, "rfc") { 446 icvLen, err := strconv.Atoi(s[3+offset]) 447 if err != nil { 448 return 0, 0, fmt.Errorf("ICVLen is invalid or missing") 449 } 450 451 if icvLen != 96 && icvLen != 128 && icvLen != 256 { 452 return 0, 0, fmt.Errorf("Unknown ICVLen accepts 96, 128, 256") 453 } 454 455 ipSecKey.Aead = &netlink.XfrmStateAlgo{ 456 Name: authname, 457 Key: authkey, 458 ICVLen: icvLen, 459 } 460 keyLen = icvLen / 8 461 } else { 462 _, enckey, err := decodeIPSecKey(s[4+offset]) 463 if err != nil { 464 return 0, 0, fmt.Errorf("unable to decode enckey string %q", s[3+offset]) 465 } 466 467 encname := s[3+offset] 468 469 ipSecKey.Auth = &netlink.XfrmStateAlgo{ 470 Name: authname, 471 Key: authkey, 472 } 473 ipSecKey.Crypt = &netlink.XfrmStateAlgo{ 474 Name: encname, 475 Key: enckey, 476 } 477 } 478 479 ipSecKey.Spi = spi 480 481 if len(s) == 6+offset { 482 if ipSecKeysGlobal[s[5+offset]] != nil { 483 oldSpi = ipSecKeysGlobal[s[5+offset]].Spi 484 } 485 ipSecKeysGlobal[s[5+offset]] = ipSecKey 486 } else { 487 if ipSecKeysGlobal[""] != nil { 488 oldSpi = ipSecKeysGlobal[""].Spi 489 } 490 ipSecKeysGlobal[""] = ipSecKey 491 } 492 493 // Detect a version change and call cleanup routine to remove old 494 // keys after a timeout period. We also want to ensure on restart 495 // we remove any stale keys for example when a restart changes keys. 496 // In the restart case oldSpi will be '0' and cause the delete logic 497 // to run. 498 if oldSpi != ipSecKey.Spi { 499 go func() { 500 time.Sleep(linux_defaults.IPsecKeyDeleteDelay) 501 scopedLog.Info("New encryption keys reclaiming SPI") 502 ipsecDeleteXfrmSpi(ipSecKey.Spi) 503 }() 504 } 505 } 506 if err := encrypt.MapUpdateContext(0, spi); err != nil { 507 scopedLog.WithError(err).Warn("cilium_encrypt_state map updated failed:") 508 return 0, 0, err 509 } 510 return keyLen, spi, nil 511 } 512 513 // EnableIPv6Forwarding sets proc file to enable IPv6 forwarding 514 func EnableIPv6Forwarding() error { 515 ip6ConfPath := "/proc/sys/net/ipv6/conf/" 516 device := "all" 517 forwarding := "forwarding" 518 forwardingOn := "1" 519 path := filepath.Join(ip6ConfPath, device, forwarding) 520 return ioutil.WriteFile(path, []byte(forwardingOn), 0644) 521 } 522 523 // DeleteIPsecEncryptRoute removes nodes in main routing table by walking 524 // routes and matching route protocol type. 525 func DeleteIPsecEncryptRoute() { 526 filter := &netlink.Route{ 527 Protocol: route.EncryptRouteProtocol, 528 } 529 530 for _, family := range []int{netlink.FAMILY_V4, netlink.FAMILY_V6} { 531 routes, err := netlink.RouteListFiltered(family, filter, netlink.RT_FILTER_PROTOCOL) 532 if err != nil { 533 log.WithError(err).Error("Unable to list direct routes") 534 return 535 } 536 537 for _, rt := range routes { 538 if err := netlink.RouteDel(&rt); err != nil { 539 log.WithError(err).Warningf("Unable to delete direct node route %s", rt.String()) 540 } 541 } 542 } 543 }