github.com/cilium/cilium@v1.16.2/pkg/mtu/mtu.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package mtu
     5  
     6  import (
     7  	"net"
     8  )
     9  
    10  const (
    11  	// MaxMTU is the highest MTU that can be used for devices and routes
    12  	// handled by Cilium. It will typically be used to configure inbound
    13  	// paths towards containers where it is guaranteed that the packet will
    14  	// not be rerouted to another node, and therefore will not lead to
    15  	// any form of IP fragmentation.
    16  	// One might expect this to be 65535, however Linux seems to cap the
    17  	// MTU of routes at 65520, so we use this value below.
    18  	MaxMTU = 65520
    19  
    20  	// EthernetMTU is the standard MTU for Ethernet devices. It is used
    21  	// as the MTU for container devices when running direct routing mode.
    22  	EthernetMTU = 1500
    23  
    24  	// TunnelOverhead is an approximation for bytes used for tunnel
    25  	// encapsulation. It accounts for:
    26  	//    (Outer ethernet is not accounted against MTU size)
    27  	//    Outer IPv4 header:  20B
    28  	//    Outer UDP header:    8B
    29  	//    Outer VXLAN header:  8B
    30  	//    Original Ethernet:  14B
    31  	//                        ---
    32  	//    Total extra bytes:  50B
    33  	TunnelOverhead = 50
    34  
    35  	// DsrTunnelOverhead is about the GENEVE DSR option that gets inserted
    36  	// by the LB, when addressing a Service in hs-ipcache mode
    37  	DsrTunnelOverhead = 12
    38  
    39  	// EncryptionIPsecOverhead is an approximation for bytes used for
    40  	// encryption. Depending on key size and encryption type the actual
    41  	// size may vary here we do calculations for 128B keys and Auth. The
    42  	// overhead is accounted for as:
    43  	//    Outer IP header:    20B
    44  	//    SPI:		   4B
    45  	//    Sequece Numbers:	   4B
    46  	//    Next Header:         1B
    47  	//    ICV:		  16B
    48  	//    Padding:            16B
    49  	//    128bit Auth:        16B
    50  	//			  ---
    51  	//    Total extra bytes:  77B
    52  	EncryptionIPsecOverhead = 77
    53  
    54  	// EncryptionDefaultAuthKeyLength is 16 representing 128B key recommended
    55  	// size for GCM(AES*) in RFC4106. Users may input other lengths via
    56  	// key secrets.
    57  	EncryptionDefaultAuthKeyLength = 16
    58  
    59  	// WireguardOverhead is an approximation for the overhead of WireGuard
    60  	// encapsulation.
    61  	//
    62  	// https://github.com/torvalds/linux/blob/v5.12/drivers/net/wireguard/device.c#L262:
    63  	//      MESSAGE_MINIMUM_LENGTH:    32B
    64  	//      Outer IPv4 or IPv6 header: 40B
    65  	//      Outer UDP header:           8B
    66  	//                                 ---
    67  	//      Total extra bytes:         80B
    68  	WireguardOverhead = 80
    69  )
    70  
    71  // Configuration is an MTU configuration as returned by NewConfiguration
    72  type Configuration struct {
    73  	// standardMTU is the regular MTU used for configuring devices and
    74  	// routes where packets are expected to be delivered outside the node.
    75  	//
    76  	// Note that this is a singleton for the process including this
    77  	// package. This means, for instance, that when using this from the
    78  	// ``pkg/plugins/*`` sources, it will not respect the settings
    79  	// configured inside the ``daemon/``.
    80  	standardMTU int
    81  
    82  	// tunnelMTU is the MTU used for configuring a tunnel mesh for
    83  	// inter-node connectivity.
    84  	//
    85  	// Similar to StandardMTU, this is a singleton for the process.
    86  	tunnelMTU int
    87  
    88  	// preEncrypMTU is the MTU used for configurations of a encryption route.
    89  	// If tunneling is enabled the tunnelMTU is used which will include
    90  	// additional encryption overhead if needed.
    91  	preEncryptMTU int
    92  
    93  	// postEncryptMTU is the MTU used for configurations of a encryption
    94  	// route _after_ encryption tags have been addded. These will be used
    95  	// in the encryption routing table. The MTU accounts for the tunnel
    96  	// overhead, if any, but assumes packets are already encrypted.
    97  	postEncryptMTU int
    98  
    99  	encapEnabled     bool
   100  	encryptEnabled   bool
   101  	wireguardEnabled bool
   102  
   103  	// Enable route MTU for pod netns when CNI chaining is used
   104  	enableRouteMTUForCNIChaining bool
   105  }
   106  
   107  // NewConfiguration returns a new MTU configuration. The MTU can be manually
   108  // specified, otherwise it will be automatically detected. if encapEnabled is
   109  // true, the MTU is adjusted to account for encapsulation overhead for all
   110  // routes involved in node to node communication.
   111  func NewConfiguration(authKeySize int, encryptEnabled bool, encapEnabled bool, wireguardEnabled bool, hsIpcacheDSRenabled bool, mtu int, mtuDetectIP net.IP, enableRouteMTUForCNIChaining bool) Configuration {
   112  	encryptOverhead := 0
   113  
   114  	if mtu == 0 {
   115  		var err error
   116  
   117  		if mtuDetectIP != nil {
   118  			mtu, err = getMTUFromIf(mtuDetectIP)
   119  		} else {
   120  			mtu, err = autoDetect()
   121  		}
   122  		if err != nil {
   123  			log.WithError(err).Warning("Unable to automatically detect MTU")
   124  			mtu = EthernetMTU
   125  		}
   126  	}
   127  
   128  	if encryptEnabled {
   129  		// Add the difference between the default and the actual key sizes here
   130  		// to account for users specifying non-default auth key lengths.
   131  		encryptOverhead = EncryptionIPsecOverhead + (authKeySize - EncryptionDefaultAuthKeyLength)
   132  	}
   133  
   134  	fullTunnelOverhead := TunnelOverhead
   135  	if hsIpcacheDSRenabled {
   136  		fullTunnelOverhead += DsrTunnelOverhead
   137  	}
   138  
   139  	conf := Configuration{
   140  		standardMTU:                  mtu,
   141  		tunnelMTU:                    mtu - (fullTunnelOverhead + encryptOverhead),
   142  		postEncryptMTU:               mtu - TunnelOverhead,
   143  		preEncryptMTU:                mtu - encryptOverhead,
   144  		encapEnabled:                 encapEnabled,
   145  		encryptEnabled:               encryptEnabled,
   146  		wireguardEnabled:             wireguardEnabled,
   147  		enableRouteMTUForCNIChaining: enableRouteMTUForCNIChaining,
   148  	}
   149  
   150  	if conf.tunnelMTU < 0 {
   151  		conf.tunnelMTU = 0
   152  	}
   153  
   154  	return conf
   155  }
   156  
   157  // GetRoutePostEncryptMTU return the MTU to be used on the encryption routing
   158  // table. This is the MTU without encryption overhead and in the tunnel
   159  // case accounts for the tunnel overhead.
   160  func (c *Configuration) GetRoutePostEncryptMTU() int {
   161  	if c.encapEnabled {
   162  		if c.postEncryptMTU == 0 {
   163  			return EthernetMTU - TunnelOverhead
   164  		}
   165  		return c.postEncryptMTU
   166  
   167  	}
   168  	return c.GetDeviceMTU()
   169  }
   170  
   171  // GetRouteMTU returns the MTU to be used on the network. When running in
   172  // tunneling mode and/or with encryption enabled, this will have tunnel and
   173  // encryption overhead accounted for.
   174  func (c *Configuration) GetRouteMTU() int {
   175  	if c.wireguardEnabled {
   176  		if c.encapEnabled {
   177  			return c.GetDeviceMTU() - (WireguardOverhead + TunnelOverhead)
   178  		}
   179  		return c.GetDeviceMTU() - WireguardOverhead
   180  	}
   181  
   182  	if !c.encapEnabled && !c.encryptEnabled {
   183  		return c.GetDeviceMTU()
   184  	}
   185  
   186  	if c.encryptEnabled && !c.encapEnabled {
   187  		if c.preEncryptMTU == 0 {
   188  			return EthernetMTU - EncryptionIPsecOverhead
   189  		}
   190  		return c.preEncryptMTU
   191  	}
   192  
   193  	if c.tunnelMTU == 0 {
   194  		if c.encryptEnabled {
   195  			return EthernetMTU - (TunnelOverhead + EncryptionIPsecOverhead)
   196  		}
   197  		return EthernetMTU - TunnelOverhead
   198  	}
   199  
   200  	return c.tunnelMTU
   201  }
   202  
   203  func (c *Configuration) IsEnableRouteMTUForCNIChaining() bool {
   204  	return c.enableRouteMTUForCNIChaining
   205  }
   206  
   207  // GetDeviceMTU returns the MTU to be used on workload facing devices.
   208  func (c *Configuration) GetDeviceMTU() int {
   209  	if c.standardMTU == 0 {
   210  		return EthernetMTU
   211  	}
   212  
   213  	return c.standardMTU
   214  }