github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/libnetwork/drivers/overlay/encryption.go (about) 1 package overlay 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "encoding/hex" 7 "fmt" 8 "hash/fnv" 9 "net" 10 "sync" 11 "syscall" 12 13 "strconv" 14 15 "github.com/docker/libnetwork/drivers/overlay/overlayutils" 16 "github.com/docker/libnetwork/iptables" 17 "github.com/docker/libnetwork/ns" 18 "github.com/docker/libnetwork/types" 19 "github.com/hashicorp/go-multierror" 20 "github.com/sirupsen/logrus" 21 "github.com/vishvananda/netlink" 22 ) 23 24 /* 25 Encrypted overlay networks use IPsec in transport mode to encrypt and 26 authenticate the VXLAN UDP datagrams. This driver implements a bespoke control 27 plane which negotiates the security parameters for each peer-to-peer tunnel. 28 29 IPsec Terminology 30 31 - ESP: IPSec Encapsulating Security Payload 32 - SPI: Security Parameter Index 33 - ICV: Integrity Check Value 34 - SA: Security Association https://en.wikipedia.org/wiki/IPsec#Security_association 35 36 37 Developer documentation for Linux IPsec is rather sparse online. The following 38 slide deck provides a decent overview. 39 https://libreswan.org/wiki/images/e/e0/Netdev-0x12-ipsec-flow.pdf 40 41 The Linux IPsec stack is part of XFRM, the netlink packet transformation 42 interface. 43 https://man7.org/linux/man-pages/man8/ip-xfrm.8.html 44 */ 45 46 const ( 47 // Value used to mark outgoing packets which should have our IPsec 48 // processing applied. It is also used as a label to identify XFRM 49 // states (Security Associations) and policies (Security Policies) 50 // programmed by us so we know which ones we can clean up without 51 // disrupting other VPN connections on the system. 52 mark = 0xD0C4E3 53 54 pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8) 55 ) 56 57 const ( 58 forward = iota + 1 59 reverse 60 bidir 61 ) 62 63 // Mark value for matching packets which should have our IPsec security policy 64 // applied. 65 var spMark = netlink.XfrmMark{Value: mark, Mask: 0xffffffff} 66 67 type key struct { 68 value []byte 69 tag uint32 70 } 71 72 func (k *key) String() string { 73 if k != nil { 74 return fmt.Sprintf("(key: %s, tag: 0x%x)", hex.EncodeToString(k.value)[0:5], k.tag) 75 } 76 return "" 77 } 78 79 // Security Parameter Indices for the IPsec flows between local node and a 80 // remote peer, which identify the Security Associations (XFRM states) to be 81 // applied when encrypting and decrypting packets. 82 type spi struct { 83 forward int 84 reverse int 85 } 86 87 func (s *spi) String() string { 88 return fmt.Sprintf("SPI(FWD: 0x%x, REV: 0x%x)", uint32(s.forward), uint32(s.reverse)) 89 } 90 91 type encrMap struct { 92 nodes map[string][]*spi 93 sync.Mutex 94 } 95 96 func (e *encrMap) String() string { 97 e.Lock() 98 defer e.Unlock() 99 b := new(bytes.Buffer) 100 for k, v := range e.nodes { 101 b.WriteString("\n") 102 b.WriteString(k) 103 b.WriteString(":") 104 b.WriteString("[") 105 for _, s := range v { 106 b.WriteString(s.String()) 107 b.WriteString(",") 108 } 109 b.WriteString("]") 110 111 } 112 return b.String() 113 } 114 115 func (d *driver) checkEncryption(nid string, rIP net.IP, isLocal, add bool) error { 116 logrus.Debugf("checkEncryption(%.7s, %v, %t)", nid, rIP, isLocal) 117 118 n := d.network(nid) 119 if n == nil || !n.secure { 120 return nil 121 } 122 123 if len(d.keys) == 0 { 124 return types.ForbiddenErrorf("encryption key is not present") 125 } 126 127 lIP := net.ParseIP(d.bindAddress) 128 aIP := net.ParseIP(d.advertiseAddress) 129 nodes := map[string]net.IP{} 130 131 switch { 132 case isLocal: 133 if err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 134 if !aIP.Equal(pEntry.vtep) { 135 nodes[pEntry.vtep.String()] = pEntry.vtep 136 } 137 return false 138 }); err != nil { 139 logrus.Warnf("Failed to retrieve list of participating nodes in overlay network %.5s: %v", nid, err) 140 } 141 default: 142 if len(d.network(nid).endpoints) > 0 { 143 nodes[rIP.String()] = rIP 144 } 145 } 146 147 logrus.Debugf("List of nodes: %s", nodes) 148 149 if add { 150 for _, rIP := range nodes { 151 if err := setupEncryption(lIP, aIP, rIP, d.secMap, d.keys); err != nil { 152 logrus.Warnf("Failed to program network encryption between %s and %s: %v", lIP, rIP, err) 153 } 154 } 155 } else { 156 if len(nodes) == 0 { 157 if err := removeEncryption(lIP, rIP, d.secMap); err != nil { 158 logrus.Warnf("Failed to remove network encryption between %s and %s: %v", lIP, rIP, err) 159 } 160 } 161 } 162 163 return nil 164 } 165 166 // setupEncryption programs the encryption parameters for secure communication 167 // between the local node and a remote node. 168 func setupEncryption(localIP, advIP, remoteIP net.IP, em *encrMap, keys []*key) error { 169 logrus.Debugf("Programming encryption between %s and %s", localIP, remoteIP) 170 rIPs := remoteIP.String() 171 172 indices := make([]*spi, 0, len(keys)) 173 174 for i, k := range keys { 175 spis := &spi{buildSPI(advIP, remoteIP, k.tag), buildSPI(remoteIP, advIP, k.tag)} 176 dir := reverse 177 if i == 0 { 178 dir = bidir 179 } 180 fSA, rSA, err := programSA(localIP, remoteIP, spis, k, dir, true) 181 if err != nil { 182 logrus.Warn(err) 183 } 184 indices = append(indices, spis) 185 if i != 0 { 186 continue 187 } 188 err = programSP(fSA, rSA, true) 189 if err != nil { 190 logrus.Warn(err) 191 } 192 } 193 194 em.Lock() 195 em.nodes[rIPs] = indices 196 em.Unlock() 197 198 return nil 199 } 200 201 func removeEncryption(localIP, remoteIP net.IP, em *encrMap) error { 202 em.Lock() 203 indices, ok := em.nodes[remoteIP.String()] 204 em.Unlock() 205 if !ok { 206 return nil 207 } 208 for i, idxs := range indices { 209 dir := reverse 210 if i == 0 { 211 dir = bidir 212 } 213 fSA, rSA, err := programSA(localIP, remoteIP, idxs, nil, dir, false) 214 if err != nil { 215 logrus.Warn(err) 216 } 217 if i != 0 { 218 continue 219 } 220 err = programSP(fSA, rSA, false) 221 if err != nil { 222 logrus.Warn(err) 223 } 224 } 225 return nil 226 } 227 228 type matchVXLANFunc func(port, vni uint32) []string 229 230 // programVXLANRuleFunc returns a function which tries calling programWithMatch 231 // with the u32 match, falling back to the BPF match if installing u32 variant 232 // of the rules fails. 233 func programVXLANRuleFunc(programWithMatch func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error) func(vni uint32, add bool) error { 234 return func(vni uint32, add bool) error { 235 if add { 236 if err := programWithMatch(matchVXLANWithU32, vni, add); err != nil { 237 // That didn't work. Maybe the xt_u32 module isn't available? Try again with xt_bpf. 238 err2 := programWithMatch(matchVXLANWithBPF, vni, add) 239 if err2 != nil { 240 return multierror.Append(err, err2) 241 } 242 } 243 } else { 244 // Delete both flavours. 245 err := programWithMatch(matchVXLANWithU32, vni, add) 246 return multierror.Append(err, programWithMatch(matchVXLANWithBPF, vni, add)).ErrorOrNil() 247 } 248 return nil 249 } 250 } 251 252 var programMangle = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error { 253 var ( 254 m = strconv.FormatUint(mark, 10) 255 chain = "OUTPUT" 256 rule = append(matchVXLAN(overlayutils.VXLANUDPPort(), vni), "-j", "MARK", "--set-mark", m) 257 a = iptables.Append 258 action = "install" 259 ) 260 261 // TODO IPv6 support 262 iptable := iptables.GetIptable(iptables.IPv4) 263 264 if !add { 265 a = iptables.Delete 266 action = "remove" 267 } 268 269 if err := iptable.ProgramRule(iptables.Mangle, chain, a, rule); err != nil { 270 return fmt.Errorf("could not %s mangle rule: %w", action, err) 271 } 272 273 return nil 274 }) 275 276 var programInput = programVXLANRuleFunc(func(matchVXLAN matchVXLANFunc, vni uint32, add bool) error { 277 var ( 278 plainVxlan = matchVXLAN(overlayutils.VXLANUDPPort(), vni) 279 chain = "INPUT" 280 msg = "add" 281 ) 282 283 rule := func(policy, jump string) []string { 284 args := append([]string{"-m", "policy", "--dir", "in", "--pol", policy}, plainVxlan...) 285 return append(args, "-j", jump) 286 } 287 288 // TODO IPv6 support 289 iptable := iptables.GetIptable(iptables.IPv4) 290 291 if !add { 292 msg = "remove" 293 } 294 295 action := func(a iptables.Action) iptables.Action { 296 if !add { 297 return iptables.Delete 298 } 299 return a 300 } 301 302 // Accept incoming VXLAN datagrams for the VNI which were subjected to IPSec processing. 303 // Append to the bottom of the chain to give administrator-configured rules precedence. 304 if err := iptable.ProgramRule(iptables.Filter, chain, action(iptables.Append), rule("ipsec", "ACCEPT")); err != nil { 305 return fmt.Errorf("could not %s input accept rule: %w", msg, err) 306 } 307 308 // Drop incoming VXLAN datagrams for the VNI which were received in cleartext. 309 // Insert at the top of the chain so the packets are dropped even if an 310 // administrator-configured rule exists which would otherwise unconditionally 311 // accept incoming VXLAN traffic. 312 if err := iptable.ProgramRule(iptables.Filter, chain, action(iptables.Insert), rule("none", "DROP")); err != nil { 313 return fmt.Errorf("could not %s input drop rule: %w", msg, err) 314 } 315 316 return nil 317 }) 318 319 func programSA(localIP, remoteIP net.IP, spi *spi, k *key, dir int, add bool) (fSA *netlink.XfrmState, rSA *netlink.XfrmState, err error) { 320 var ( 321 action = "Removing" 322 xfrmProgram = ns.NlHandle().XfrmStateDel 323 ) 324 325 if add { 326 action = "Adding" 327 xfrmProgram = ns.NlHandle().XfrmStateAdd 328 } 329 330 if dir&reverse > 0 { 331 rSA = &netlink.XfrmState{ 332 Src: remoteIP, 333 Dst: localIP, 334 Proto: netlink.XFRM_PROTO_ESP, 335 Spi: spi.reverse, 336 Mode: netlink.XFRM_MODE_TRANSPORT, 337 Reqid: mark, 338 } 339 if add { 340 rSA.Aead = buildAeadAlgo(k, spi.reverse) 341 } 342 343 exists, err := saExists(rSA) 344 if err != nil { 345 exists = !add 346 } 347 348 if add != exists { 349 logrus.Debugf("%s: rSA{%s}", action, rSA) 350 if err := xfrmProgram(rSA); err != nil { 351 logrus.Warnf("Failed %s rSA{%s}: %v", action, rSA, err) 352 } 353 } 354 } 355 356 if dir&forward > 0 { 357 fSA = &netlink.XfrmState{ 358 Src: localIP, 359 Dst: remoteIP, 360 Proto: netlink.XFRM_PROTO_ESP, 361 Spi: spi.forward, 362 Mode: netlink.XFRM_MODE_TRANSPORT, 363 Reqid: mark, 364 } 365 if add { 366 fSA.Aead = buildAeadAlgo(k, spi.forward) 367 } 368 369 exists, err := saExists(fSA) 370 if err != nil { 371 exists = !add 372 } 373 374 if add != exists { 375 logrus.Debugf("%s fSA{%s}", action, fSA) 376 if err := xfrmProgram(fSA); err != nil { 377 logrus.Warnf("Failed %s fSA{%s}: %v.", action, fSA, err) 378 } 379 } 380 } 381 382 return 383 } 384 385 func programSP(fSA *netlink.XfrmState, rSA *netlink.XfrmState, add bool) error { 386 action := "Removing" 387 xfrmProgram := ns.NlHandle().XfrmPolicyDel 388 if add { 389 action = "Adding" 390 xfrmProgram = ns.NlHandle().XfrmPolicyAdd 391 } 392 393 // Create a congruent cidr 394 s := types.GetMinimalIP(fSA.Src) 395 d := types.GetMinimalIP(fSA.Dst) 396 fullMask := net.CIDRMask(8*len(s), 8*len(s)) 397 398 fPol := &netlink.XfrmPolicy{ 399 Src: &net.IPNet{IP: s, Mask: fullMask}, 400 Dst: &net.IPNet{IP: d, Mask: fullMask}, 401 Dir: netlink.XFRM_DIR_OUT, 402 Proto: 17, 403 DstPort: 4789, 404 Mark: &spMark, 405 Tmpls: []netlink.XfrmPolicyTmpl{ 406 { 407 Src: fSA.Src, 408 Dst: fSA.Dst, 409 Proto: netlink.XFRM_PROTO_ESP, 410 Mode: netlink.XFRM_MODE_TRANSPORT, 411 Spi: fSA.Spi, 412 Reqid: mark, 413 }, 414 }, 415 } 416 417 exists, err := spExists(fPol) 418 if err != nil { 419 exists = !add 420 } 421 422 if add != exists { 423 logrus.Debugf("%s fSP{%s}", action, fPol) 424 if err := xfrmProgram(fPol); err != nil { 425 logrus.Warnf("%s fSP{%s}: %v", action, fPol, err) 426 } 427 } 428 429 return nil 430 } 431 432 func saExists(sa *netlink.XfrmState) (bool, error) { 433 _, err := ns.NlHandle().XfrmStateGet(sa) 434 switch err { 435 case nil: 436 return true, nil 437 case syscall.ESRCH: 438 return false, nil 439 default: 440 err = fmt.Errorf("Error while checking for SA existence: %v", err) 441 logrus.Warn(err) 442 return false, err 443 } 444 } 445 446 func spExists(sp *netlink.XfrmPolicy) (bool, error) { 447 _, err := ns.NlHandle().XfrmPolicyGet(sp) 448 switch err { 449 case nil: 450 return true, nil 451 case syscall.ENOENT: 452 return false, nil 453 default: 454 err = fmt.Errorf("Error while checking for SP existence: %v", err) 455 logrus.Warn(err) 456 return false, err 457 } 458 } 459 460 func buildSPI(src, dst net.IP, st uint32) int { 461 b := make([]byte, 4) 462 binary.BigEndian.PutUint32(b, st) 463 h := fnv.New32a() 464 h.Write(src) 465 h.Write(b) 466 h.Write(dst) 467 return int(binary.BigEndian.Uint32(h.Sum(nil))) 468 } 469 470 func buildAeadAlgo(k *key, s int) *netlink.XfrmStateAlgo { 471 salt := make([]byte, 4) 472 binary.BigEndian.PutUint32(salt, uint32(s)) 473 return &netlink.XfrmStateAlgo{ 474 Name: "rfc4106(gcm(aes))", 475 Key: append(k.value, salt...), 476 ICVLen: 64, 477 } 478 } 479 480 func (d *driver) secMapWalk(f func(string, []*spi) ([]*spi, bool)) error { 481 d.secMap.Lock() 482 for node, indices := range d.secMap.nodes { 483 idxs, stop := f(node, indices) 484 if idxs != nil { 485 d.secMap.nodes[node] = idxs 486 } 487 if stop { 488 break 489 } 490 } 491 d.secMap.Unlock() 492 return nil 493 } 494 495 func (d *driver) setKeys(keys []*key) error { 496 // Remove any stale policy, state 497 clearEncryptionStates() 498 // Accept the encryption keys and clear any stale encryption map 499 d.Lock() 500 d.keys = keys 501 d.secMap = &encrMap{nodes: map[string][]*spi{}} 502 d.Unlock() 503 logrus.Debugf("Initial encryption keys: %v", keys) 504 return nil 505 } 506 507 // updateKeys allows to add a new key and/or change the primary key and/or prune an existing key 508 // The primary key is the key used in transmission and will go in first position in the list. 509 func (d *driver) updateKeys(newKey, primary, pruneKey *key) error { 510 logrus.Debugf("Updating Keys. New: %v, Primary: %v, Pruned: %v", newKey, primary, pruneKey) 511 512 logrus.Debugf("Current: %v", d.keys) 513 514 var ( 515 newIdx = -1 516 priIdx = -1 517 delIdx = -1 518 lIP = net.ParseIP(d.bindAddress) 519 aIP = net.ParseIP(d.advertiseAddress) 520 ) 521 522 d.Lock() 523 defer d.Unlock() 524 525 // add new 526 if newKey != nil { 527 d.keys = append(d.keys, newKey) 528 newIdx += len(d.keys) 529 } 530 for i, k := range d.keys { 531 if primary != nil && k.tag == primary.tag { 532 priIdx = i 533 } 534 if pruneKey != nil && k.tag == pruneKey.tag { 535 delIdx = i 536 } 537 } 538 539 if (newKey != nil && newIdx == -1) || 540 (primary != nil && priIdx == -1) || 541 (pruneKey != nil && delIdx == -1) { 542 return types.BadRequestErrorf("cannot find proper key indices while processing key update:"+ 543 "(newIdx,priIdx,delIdx):(%d, %d, %d)", newIdx, priIdx, delIdx) 544 } 545 546 if priIdx != -1 && priIdx == delIdx { 547 return types.BadRequestErrorf("attempting to both make a key (index %d) primary and delete it", priIdx) 548 } 549 550 d.secMapWalk(func(rIPs string, spis []*spi) ([]*spi, bool) { 551 rIP := net.ParseIP(rIPs) 552 return updateNodeKey(lIP, aIP, rIP, spis, d.keys, newIdx, priIdx, delIdx), false 553 }) 554 555 // swap primary 556 if priIdx != -1 { 557 d.keys[0], d.keys[priIdx] = d.keys[priIdx], d.keys[0] 558 } 559 // prune 560 if delIdx != -1 { 561 if delIdx == 0 { 562 delIdx = priIdx 563 } 564 d.keys = append(d.keys[:delIdx], d.keys[delIdx+1:]...) 565 } 566 567 logrus.Debugf("Updated: %v", d.keys) 568 569 return nil 570 } 571 572 /******************************************************** 573 * Steady state: rSA0, rSA1, rSA2, fSA1, fSP1 574 * Rotation --> -rSA0, +rSA3, +fSA2, +fSP2/-fSP1, -fSA1 575 * Steady state: rSA1, rSA2, rSA3, fSA2, fSP2 576 *********************************************************/ 577 578 // Spis and keys are sorted in such away the one in position 0 is the primary 579 func updateNodeKey(lIP, aIP, rIP net.IP, idxs []*spi, curKeys []*key, newIdx, priIdx, delIdx int) []*spi { 580 logrus.Debugf("Updating keys for node: %s (%d,%d,%d)", rIP, newIdx, priIdx, delIdx) 581 582 spis := idxs 583 logrus.Debugf("Current: %v", spis) 584 585 // add new 586 if newIdx != -1 { 587 spis = append(spis, &spi{ 588 forward: buildSPI(aIP, rIP, curKeys[newIdx].tag), 589 reverse: buildSPI(rIP, aIP, curKeys[newIdx].tag), 590 }) 591 } 592 593 if delIdx != -1 { 594 // -rSA0 595 programSA(lIP, rIP, spis[delIdx], nil, reverse, false) 596 } 597 598 if newIdx > -1 { 599 // +rSA2 600 programSA(lIP, rIP, spis[newIdx], curKeys[newIdx], reverse, true) 601 } 602 603 if priIdx > 0 { 604 // +fSA2 605 fSA2, _, _ := programSA(lIP, rIP, spis[priIdx], curKeys[priIdx], forward, true) 606 607 // +fSP2, -fSP1 608 s := types.GetMinimalIP(fSA2.Src) 609 d := types.GetMinimalIP(fSA2.Dst) 610 fullMask := net.CIDRMask(8*len(s), 8*len(s)) 611 612 fSP1 := &netlink.XfrmPolicy{ 613 Src: &net.IPNet{IP: s, Mask: fullMask}, 614 Dst: &net.IPNet{IP: d, Mask: fullMask}, 615 Dir: netlink.XFRM_DIR_OUT, 616 Proto: 17, 617 DstPort: 4789, 618 Mark: &spMark, 619 Tmpls: []netlink.XfrmPolicyTmpl{ 620 { 621 Src: fSA2.Src, 622 Dst: fSA2.Dst, 623 Proto: netlink.XFRM_PROTO_ESP, 624 Mode: netlink.XFRM_MODE_TRANSPORT, 625 Spi: fSA2.Spi, 626 Reqid: mark, 627 }, 628 }, 629 } 630 logrus.Debugf("Updating fSP{%s}", fSP1) 631 if err := ns.NlHandle().XfrmPolicyUpdate(fSP1); err != nil { 632 logrus.Warnf("Failed to update fSP{%s}: %v", fSP1, err) 633 } 634 635 // -fSA1 636 programSA(lIP, rIP, spis[0], nil, forward, false) 637 } 638 639 // swap 640 if priIdx > 0 { 641 swp := spis[0] 642 spis[0] = spis[priIdx] 643 spis[priIdx] = swp 644 } 645 // prune 646 if delIdx != -1 { 647 if delIdx == 0 { 648 delIdx = priIdx 649 } 650 spis = append(spis[:delIdx], spis[delIdx+1:]...) 651 } 652 653 logrus.Debugf("Updated: %v", spis) 654 655 return spis 656 } 657 658 func (n *network) maxMTU() int { 659 mtu := 1500 660 if n.mtu != 0 { 661 mtu = n.mtu 662 } 663 mtu -= vxlanEncap 664 if n.secure { 665 // In case of encryption account for the 666 // esp packet expansion and padding 667 mtu -= pktExpansion 668 mtu -= (mtu % 4) 669 } 670 return mtu 671 } 672 673 func clearEncryptionStates() { 674 nlh := ns.NlHandle() 675 spList, err := nlh.XfrmPolicyList(netlink.FAMILY_ALL) 676 if err != nil { 677 logrus.Warnf("Failed to retrieve SP list for cleanup: %v", err) 678 } 679 saList, err := nlh.XfrmStateList(netlink.FAMILY_ALL) 680 if err != nil { 681 logrus.Warnf("Failed to retrieve SA list for cleanup: %v", err) 682 } 683 for _, sp := range spList { 684 if sp.Mark != nil && sp.Mark.Value == spMark.Value { 685 if err := nlh.XfrmPolicyDel(&sp); err != nil { 686 logrus.Warnf("Failed to delete stale SP %s: %v", sp, err) 687 continue 688 } 689 logrus.Debugf("Removed stale SP: %s", sp) 690 } 691 } 692 for _, sa := range saList { 693 if sa.Reqid == mark { 694 if err := nlh.XfrmStateDel(&sa); err != nil { 695 logrus.Warnf("Failed to delete stale SA %s: %v", sa, err) 696 continue 697 } 698 logrus.Debugf("Removed stale SA: %s", sa) 699 } 700 } 701 }