github.com/elfadel/cilium@v1.6.12/pkg/datapath/linux/ipsec/ipsec_linux.go (about)

     1  // Copyright 2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // +build linux
    16  
    17  package ipsec
    18  
    19  import (
    20  	"bufio"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"io"
    24  	"io/ioutil"
    25  	"net"
    26  	"os"
    27  	"path/filepath"
    28  	"strconv"
    29  	"strings"
    30  	"time"
    31  
    32  	"github.com/cilium/cilium/pkg/datapath/linux/linux_defaults"
    33  	"github.com/cilium/cilium/pkg/datapath/linux/route"
    34  	"github.com/cilium/cilium/pkg/maps/encrypt"
    35  	"github.com/vishvananda/netlink"
    36  
    37  	"github.com/sirupsen/logrus"
    38  )
    39  
    40  type IPSecDir string
    41  
    42  const (
    43  	IPSecDirIn      IPSecDir = "IPSEC_IN"
    44  	IPSecDirOut     IPSecDir = "IPSEC_OUT"
    45  	IPSecDirBoth    IPSecDir = "IPSEC_BOTH"
    46  	IPSecDirOutNode IPSecDir = "IPSEC_OUT_NODE"
    47  )
    48  
    49  type ipSecKey struct {
    50  	Spi   uint8
    51  	ReqID int
    52  	Auth  *netlink.XfrmStateAlgo
    53  	Crypt *netlink.XfrmStateAlgo
    54  	Aead  *netlink.XfrmStateAlgo
    55  }
    56  
    57  // ipSecKeysGlobal is safe to read unlocked because the only writers are from
    58  // daemon init time before any readers will be online.
    59  var ipSecKeysGlobal = make(map[string]*ipSecKey)
    60  
    61  func getIPSecKeys(ip net.IP) *ipSecKey {
    62  	key, scoped := ipSecKeysGlobal[ip.String()]
    63  	if scoped == false {
    64  		key, _ = ipSecKeysGlobal[""]
    65  	}
    66  	return key
    67  }
    68  
    69  func ipSecNewState() *netlink.XfrmState {
    70  	state := netlink.XfrmState{
    71  		Mode:  netlink.XFRM_MODE_TUNNEL,
    72  		Proto: netlink.XFRM_PROTO_ESP,
    73  		ESN:   false,
    74  	}
    75  	return &state
    76  }
    77  
    78  func ipSecNewPolicy() *netlink.XfrmPolicy {
    79  	policy := netlink.XfrmPolicy{}
    80  	return &policy
    81  }
    82  
    83  func ipSecAttachPolicyTempl(policy *netlink.XfrmPolicy, keys *ipSecKey, srcIP, dstIP net.IP, spi bool) {
    84  	tmpl := netlink.XfrmPolicyTmpl{
    85  		Proto: netlink.XFRM_PROTO_ESP,
    86  		Mode:  netlink.XFRM_MODE_TUNNEL,
    87  		Reqid: keys.ReqID,
    88  		Dst:   dstIP,
    89  		Src:   srcIP,
    90  	}
    91  
    92  	if spi {
    93  		tmpl.Spi = int(keys.Spi)
    94  	}
    95  
    96  	policy.Tmpls = append(policy.Tmpls, tmpl)
    97  }
    98  
    99  func ipSecJoinState(state *netlink.XfrmState, keys *ipSecKey) {
   100  	if keys.Aead != nil {
   101  		state.Aead = keys.Aead
   102  	} else {
   103  		state.Crypt = keys.Crypt
   104  		state.Auth = keys.Auth
   105  	}
   106  	state.Spi = int(keys.Spi)
   107  	state.Reqid = keys.ReqID
   108  }
   109  
   110  func ipSecReplaceStateIn(remoteIP, localIP net.IP, setMark bool) (uint8, error) {
   111  	key := getIPSecKeys(localIP)
   112  	if key == nil {
   113  		return 0, fmt.Errorf("IPSec key missing")
   114  	}
   115  	state := ipSecNewState()
   116  	ipSecJoinState(state, key)
   117  	state.Src = localIP
   118  	state.Dst = remoteIP
   119  	state.Mark = &netlink.XfrmMark{
   120  		Value: linux_defaults.RouteMarkDecrypt,
   121  		Mask:  linux_defaults.IPsecMarkMaskIn,
   122  	}
   123  	if setMark {
   124  		state.OutputMark = linux_defaults.RouteMarkDecrypt
   125  	}
   126  
   127  	return key.Spi, netlink.XfrmStateAdd(state)
   128  }
   129  
   130  func ipSecReplaceStateOut(remoteIP, localIP net.IP, setMark bool) (uint8, error) {
   131  	key := getIPSecKeys(localIP)
   132  	if key == nil {
   133  		return 0, fmt.Errorf("IPSec key missing")
   134  	}
   135  	spiWide := uint32(key.Spi)
   136  	state := ipSecNewState()
   137  	ipSecJoinState(state, key)
   138  	state.Src = localIP
   139  	state.Dst = remoteIP
   140  	state.Mark = &netlink.XfrmMark{
   141  		Value: ((spiWide << 12) | linux_defaults.RouteMarkEncrypt),
   142  		Mask:  linux_defaults.IPsecMarkMask,
   143  	}
   144  	if setMark {
   145  		state.OutputMark = linux_defaults.RouteMarkEncrypt
   146  	}
   147  	return key.Spi, netlink.XfrmStateAdd(state)
   148  }
   149  
   150  func ipSecReplacePolicyIn(src, dst *net.IPNet) error {
   151  	if err := ipSecReplacePolicyInFwd(src, dst, netlink.XFRM_DIR_IN); err != nil {
   152  		if !os.IsExist(err) {
   153  			return err
   154  		}
   155  	}
   156  	return ipSecReplacePolicyInFwd(src, dst, netlink.XFRM_DIR_FWD)
   157  }
   158  
   159  func ipSecReplacePolicyInFwd(src, dst *net.IPNet, dir netlink.Dir) error {
   160  	key := getIPSecKeys(dst.IP)
   161  	if key == nil {
   162  		return fmt.Errorf("IPSec key missing")
   163  	}
   164  
   165  	policy := ipSecNewPolicy()
   166  	policy.Dir = dir
   167  	policy.Src = src
   168  	policy.Dst = dst
   169  	policy.Mark = &netlink.XfrmMark{
   170  		Value: linux_defaults.RouteMarkDecrypt,
   171  		Mask:  linux_defaults.IPsecMarkMaskIn,
   172  	}
   173  	ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP, false)
   174  	return netlink.XfrmPolicyUpdate(policy)
   175  }
   176  
   177  func ipSecReplacePolicyOut(src, dst, tmplSrc, tmplDst *net.IPNet, dir IPSecDir) error {
   178  	var spiWide uint32
   179  
   180  	key := getIPSecKeys(dst.IP)
   181  	if key == nil {
   182  		return fmt.Errorf("IPSec key missing")
   183  	}
   184  	spiWide = uint32(key.Spi)
   185  
   186  	policy := ipSecNewPolicy()
   187  	if dir == IPSecDirOutNode {
   188  		wildcardIP := net.ParseIP("0.0.0.0")
   189  		wildcardMask := net.IPv4Mask(0, 0, 0, 0)
   190  		policy.Src = &net.IPNet{IP: wildcardIP, Mask: wildcardMask}
   191  	} else {
   192  		policy.Src = src
   193  	}
   194  	policy.Dst = dst
   195  	policy.Dir = netlink.XFRM_DIR_OUT
   196  	policy.Mark = &netlink.XfrmMark{
   197  		Value: ((spiWide << 12) | linux_defaults.RouteMarkEncrypt),
   198  		Mask:  linux_defaults.IPsecMarkMask,
   199  	}
   200  	if tmplSrc != nil && tmplDst != nil {
   201  		ipSecAttachPolicyTempl(policy, key, tmplSrc.IP, tmplDst.IP, true)
   202  	} else {
   203  		ipSecAttachPolicyTempl(policy, key, src.IP, dst.IP, true)
   204  	}
   205  	return netlink.XfrmPolicyUpdate(policy)
   206  }
   207  
   208  func ipsecDeleteXfrmSpi(spi uint8) {
   209  	var err error
   210  	scopedLog := log.WithFields(logrus.Fields{
   211  		"spi": spi,
   212  	})
   213  
   214  	xfrmStateList, err := netlink.XfrmStateList(0)
   215  	if err != nil {
   216  		scopedLog.WithError(err).Warning("deleting previous SPI, xfrm state list error")
   217  		return
   218  	}
   219  	for _, s := range xfrmStateList {
   220  		if s.Spi != int(spi) {
   221  			if err := netlink.XfrmStateDel(&s); err != nil {
   222  				scopedLog.WithError(err).Warning("deleting old xfrm state failed")
   223  			}
   224  		}
   225  	}
   226  }
   227  
   228  func ipsecDeleteXfrmState(ip net.IP) {
   229  	scopedLog := log.WithFields(logrus.Fields{
   230  		"remote-ip": ip,
   231  	})
   232  
   233  	xfrmStateList, err := netlink.XfrmStateList(0)
   234  	if err != nil {
   235  		scopedLog.WithError(err).Warning("deleting xfrm state, xfrm state list error")
   236  		return
   237  	}
   238  	for _, s := range xfrmStateList {
   239  		if ip.Equal(s.Dst) {
   240  			if err := netlink.XfrmStateDel(&s); err != nil {
   241  				scopedLog.WithError(err).Warning("deleting xfrm state failed")
   242  			}
   243  		}
   244  	}
   245  }
   246  
   247  func ipsecDeleteXfrmPolicy(ip net.IP) {
   248  	scopedLog := log.WithFields(logrus.Fields{
   249  		"remote-ip": ip,
   250  	})
   251  
   252  	xfrmPolicyList, err := netlink.XfrmPolicyList(0)
   253  	if err != nil {
   254  		scopedLog.WithError(err).Warning("deleting policy state, xfrm policy list error")
   255  	}
   256  	for _, p := range xfrmPolicyList {
   257  		if ip.Equal(p.Dst.IP) {
   258  			if err := netlink.XfrmPolicyDel(&p); err != nil {
   259  				scopedLog.WithError(err).Warning("deleting xfrm policy failed")
   260  			}
   261  		}
   262  	}
   263  }
   264  
   265  /* UpsertIPsecEndpoint updates the IPSec context for a new endpoint inserted in
   266   * the ipcache. Currently we support a global crypt/auth keyset that will encrypt
   267   * all traffic between endpoints. An IPSec context consists of two pieces a policy
   268   * and a state, the security policy database (SPD) and security association
   269   * database (SAD). These are implemented using the Linux kernels XFRM implementation.
   270   *
   271   * For all traffic that matches a policy, the policy tuple used is
   272   * (sip/mask, dip/mask, dev) with an optional mark field used in the Cilium implementation
   273   * to ensure only expected traffic is encrypted. The state hashtable is searched for
   274   * a matching state associated with that flow. The Linux kernel will do a series of
   275   * hash lookups to find the most specific state (xfrm_dst) possible. The hash keys searched are
   276   * the following, (daddr, saddr, reqid, encap_family), (daddr, wildcard, reqid, encap),
   277   * (mark, daddr, spi, proto, encap). Any "hits" in the hash table will subsequently
   278   * have the SPI checked to ensure it also matches. Encap is ignored in our case here
   279   * and can be used with UDP encap if wanted.
   280   *
   281   * The implications of the (inflexible!) hash key implementation is that in-order
   282   * to have a policy/state match we _must_ insert a state for each daddr. For Cilium
   283   * this translates to a state entry per node. We learn the nodes/endpoints by
   284   * listening to ipcache events. Finally, because IPSec is unidirectional a state
   285   * is needed for both ingress and egress. Denoted by the DIR on the xfrm cmd line
   286   * in the policy lookup. In the Cilium case, where we have IPSec between all
   287   * endpoints this results in two policy rules per node, one for ingress
   288   * and one for egress.
   289   *
   290   * For a concrete example consider two cluster nodes using transparent mode e.g.
   291   * without an IPSec tunnel IP. Cluster Node A has host_ip 10.156.0.1 with an
   292   * endpoint assigned to IP 10.156.2.2 and cluster Node B has host_ip 10.182.0.1
   293   * with an endpoint using IP 10.182.3.3. Then on Node A there will be a two policy
   294   * entries and a set of State entries,
   295   *
   296   * Policy1(src=10.182.0.0/16,dst=10.156.0.1/16,dir=in,tmpl(spi=#spi,reqid=#reqid))
   297   * Policy2(src=10.156.0.0/16,dst=10.182.0.1/16,dir=out,tmpl(spi=#spi,reqid=#reqid))
   298   * State1(src=*,dst=10.182.0.1,spi=#spi,reqid=#reqid,...)
   299   * State2(src=*,dst=10.156.0.1,spi=#spi,reqid=#reqid,...)
   300   *
   301   * setMark is used to set output-marks and use table 200 post-encryption
   302   * This only applies to the subnet mode where sip/dip needs to be rewritten
   303   *
   304   * Design Note: For newer kernels a BPF xfrm interface would greatly simplify the
   305   * state space. Basic idea would be to reference a state using any key generated
   306   * from BPF program allowing for a single state per security ctx.
   307   */
   308  func UpsertIPsecEndpoint(local, remote *net.IPNet, dir IPSecDir, setMark bool) (uint8, error) {
   309  	var spi uint8
   310  	var err error
   311  
   312  	/* TODO: state reference ID is (dip,spi) which can be duplicated in the current global
   313  	 * mode. The duplication is on _all_ ingress states because dst_ip == host_ip in this
   314  	 * case and only a single spi entry is in use. Currently no check is done to avoid
   315  	 * attempting to add duplicate (dip,spi) states and we get 'file exist' error. These
   316  	 * errors are expected at the moment but perhaps it would be better to avoid calling
   317  	 * netlink API at all when we "know" an entry is a duplicate. To do this the xfer
   318  	 * state would need to be cached in the ipcache.
   319  	 */
   320  	/* The two states plus policy below is sufficient for tunnel mode for
   321  	 * transparent mode ciliumIP == nil case must also be handled.
   322  	 */
   323  	if !local.IP.Equal(remote.IP) {
   324  		if dir == IPSecDirIn || dir == IPSecDirBoth {
   325  			if spi, err = ipSecReplaceStateIn(local.IP, remote.IP, setMark); err != nil {
   326  				if !os.IsExist(err) {
   327  					return 0, fmt.Errorf("unable to replace local state: %s", err)
   328  				}
   329  			}
   330  			if err = ipSecReplacePolicyIn(remote, local); err != nil {
   331  				if !os.IsExist(err) {
   332  					return 0, fmt.Errorf("unable to replace policy in: %s", err)
   333  				}
   334  			}
   335  		}
   336  
   337  		if dir == IPSecDirOut || dir == IPSecDirOutNode || dir == IPSecDirBoth {
   338  			if spi, err = ipSecReplaceStateOut(remote.IP, local.IP, setMark); err != nil {
   339  				if !os.IsExist(err) {
   340  					return 0, fmt.Errorf("unable to replace remote state: %s", err)
   341  				}
   342  			}
   343  
   344  			if err = ipSecReplacePolicyOut(local, remote, nil, nil, dir); err != nil {
   345  				if !os.IsExist(err) {
   346  					return 0, fmt.Errorf("unable to replace policy out: %s", err)
   347  				}
   348  			}
   349  		}
   350  	}
   351  	return spi, nil
   352  }
   353  
   354  // UpsertIPsecEndpointPolicy adds a policy to the xfrm rules. Used to add a policy when the state
   355  // rule is already available.
   356  func UpsertIPsecEndpointPolicy(local, remote, localT, remoteT *net.IPNet, dir IPSecDir) error {
   357  	if err := ipSecReplacePolicyOut(local, remote, localT, remoteT, dir); err != nil {
   358  		if !os.IsExist(err) {
   359  			return fmt.Errorf("unable to replace templated policy out: %s", err)
   360  		}
   361  	}
   362  	return nil
   363  }
   364  
   365  // DeleteIPsecEndpoint deletes a endpoint associated with the remote IP address
   366  func DeleteIPsecEndpoint(remote *net.IPNet) {
   367  	ipsecDeleteXfrmState(remote.IP)
   368  	ipsecDeleteXfrmPolicy(remote.IP)
   369  }
   370  
   371  func decodeIPSecKey(keyRaw string) (int, []byte, error) {
   372  	// As we have released the v1.4.0 docs telling the users to write the
   373  	// k8s secret with the prefix "0x" we have to remove it if it is present,
   374  	// so we can decode the secret.
   375  	if keyRaw == "\"\"" {
   376  		return 0, nil, nil
   377  	}
   378  	keyTrimmed := strings.TrimPrefix(keyRaw, "0x")
   379  	key, err := hex.DecodeString(keyTrimmed)
   380  	return len(keyTrimmed), key, err
   381  }
   382  
   383  // LoadIPSecKeysFile imports IPSec auth and crypt keys from a file. The format
   384  // is to put a key per line as follows, (auth-algo auth-key enc-algo enc-key)
   385  // Returns the authentication overhead in bytes, the key ID, and an error.
   386  func LoadIPSecKeysFile(path string) (int, uint8, error) {
   387  	file, err := os.Open(path)
   388  	if err != nil {
   389  		return 0, 0, err
   390  	}
   391  	defer file.Close()
   392  	return loadIPSecKeys(file)
   393  }
   394  
   395  func loadIPSecKeys(r io.Reader) (int, uint8, error) {
   396  	var spi uint8
   397  	var keyLen int
   398  	scopedLog := log.WithFields(logrus.Fields{
   399  		"spi": spi,
   400  	})
   401  
   402  	if err := encrypt.MapCreate(); err != nil {
   403  		return 0, 0, fmt.Errorf("Encrypt map create failed: %v", err)
   404  	}
   405  
   406  	scanner := bufio.NewScanner(r)
   407  	scanner.Split(bufio.ScanLines)
   408  	for scanner.Scan() {
   409  		var oldSpi uint8
   410  		var authkey []byte
   411  		offset := 0
   412  
   413  		ipSecKey := &ipSecKey{
   414  			ReqID: 1,
   415  		}
   416  
   417  		// Scanning IPsec keys formatted as follows,
   418  		//    auth-algo auth-key enc-algo enc-key
   419  		s := strings.Split(scanner.Text(), " ")
   420  		if len(s) < 2 {
   421  			return 0, 0, fmt.Errorf("missing IPSec keys or invalid format")
   422  		}
   423  
   424  		spiI, err := strconv.Atoi(s[0])
   425  		if err != nil {
   426  			// If no version info is provided assume using key format without
   427  			// versioning and assign SPI.
   428  			spiI = 1
   429  			offset = -1
   430  		}
   431  		if spiI > linux_defaults.IPsecMaxKeyVersion {
   432  			return 0, 0, fmt.Errorf("encryption Key space exhausted, id must be nonzero and less than %d. Attempted %q", linux_defaults.IPsecMaxKeyVersion, s[0])
   433  		}
   434  		if spiI == 0 {
   435  			return 0, 0, fmt.Errorf("zero is not a valid key to disable encryption use `--enable-ipsec=false`, id must be nonzero and less than %d. Attempted %q", linux_defaults.IPsecMaxKeyVersion, s[0])
   436  		}
   437  		spi = uint8(spiI)
   438  
   439  		keyLen, authkey, err = decodeIPSecKey(s[2+offset])
   440  		if err != nil {
   441  			return 0, 0, fmt.Errorf("unable to decode authkey string %q", s[1+offset])
   442  		}
   443  		authname := s[1+offset]
   444  
   445  		if strings.HasPrefix(authname, "rfc") {
   446  			icvLen, err := strconv.Atoi(s[3+offset])
   447  			if err != nil {
   448  				return 0, 0, fmt.Errorf("ICVLen is invalid or missing")
   449  			}
   450  
   451  			if icvLen != 96 && icvLen != 128 && icvLen != 256 {
   452  				return 0, 0, fmt.Errorf("Unknown ICVLen accepts 96, 128, 256")
   453  			}
   454  
   455  			ipSecKey.Aead = &netlink.XfrmStateAlgo{
   456  				Name:   authname,
   457  				Key:    authkey,
   458  				ICVLen: icvLen,
   459  			}
   460  			keyLen = icvLen / 8
   461  		} else {
   462  			_, enckey, err := decodeIPSecKey(s[4+offset])
   463  			if err != nil {
   464  				return 0, 0, fmt.Errorf("unable to decode enckey string %q", s[3+offset])
   465  			}
   466  
   467  			encname := s[3+offset]
   468  
   469  			ipSecKey.Auth = &netlink.XfrmStateAlgo{
   470  				Name: authname,
   471  				Key:  authkey,
   472  			}
   473  			ipSecKey.Crypt = &netlink.XfrmStateAlgo{
   474  				Name: encname,
   475  				Key:  enckey,
   476  			}
   477  		}
   478  
   479  		ipSecKey.Spi = spi
   480  
   481  		if len(s) == 6+offset {
   482  			if ipSecKeysGlobal[s[5+offset]] != nil {
   483  				oldSpi = ipSecKeysGlobal[s[5+offset]].Spi
   484  			}
   485  			ipSecKeysGlobal[s[5+offset]] = ipSecKey
   486  		} else {
   487  			if ipSecKeysGlobal[""] != nil {
   488  				oldSpi = ipSecKeysGlobal[""].Spi
   489  			}
   490  			ipSecKeysGlobal[""] = ipSecKey
   491  		}
   492  
   493  		// Detect a version change and call cleanup routine to remove old
   494  		// keys after a timeout period. We also want to ensure on restart
   495  		// we remove any stale keys for example when a restart changes keys.
   496  		// In the restart case oldSpi will be '0' and cause the delete logic
   497  		// to run.
   498  		if oldSpi != ipSecKey.Spi {
   499  			go func() {
   500  				time.Sleep(linux_defaults.IPsecKeyDeleteDelay)
   501  				scopedLog.Info("New encryption keys reclaiming SPI")
   502  				ipsecDeleteXfrmSpi(ipSecKey.Spi)
   503  			}()
   504  		}
   505  	}
   506  	if err := encrypt.MapUpdateContext(0, spi); err != nil {
   507  		scopedLog.WithError(err).Warn("cilium_encrypt_state map updated failed:")
   508  		return 0, 0, err
   509  	}
   510  	return keyLen, spi, nil
   511  }
   512  
   513  // EnableIPv6Forwarding sets proc file to enable IPv6 forwarding
   514  func EnableIPv6Forwarding() error {
   515  	ip6ConfPath := "/proc/sys/net/ipv6/conf/"
   516  	device := "all"
   517  	forwarding := "forwarding"
   518  	forwardingOn := "1"
   519  	path := filepath.Join(ip6ConfPath, device, forwarding)
   520  	return ioutil.WriteFile(path, []byte(forwardingOn), 0644)
   521  }
   522  
   523  // DeleteIPsecEncryptRoute removes nodes in main routing table by walking
   524  // routes and matching route protocol type.
   525  func DeleteIPsecEncryptRoute() {
   526  	filter := &netlink.Route{
   527  		Protocol: route.EncryptRouteProtocol,
   528  	}
   529  
   530  	for _, family := range []int{netlink.FAMILY_V4, netlink.FAMILY_V6} {
   531  		routes, err := netlink.RouteListFiltered(family, filter, netlink.RT_FILTER_PROTOCOL)
   532  		if err != nil {
   533  			log.WithError(err).Error("Unable to list direct routes")
   534  			return
   535  		}
   536  
   537  		for _, rt := range routes {
   538  			if err := netlink.RouteDel(&rt); err != nil {
   539  				log.WithError(err).Warningf("Unable to delete direct node route %s", rt.String())
   540  			}
   541  		}
   542  	}
   543  }