k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/cmd/kube-proxy/app/conntrack.go (about)

     1  /*
     2  Copyright 2015 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package app
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"os"
    23  	"strconv"
    24  	"strings"
    25  
    26  	"k8s.io/component-helpers/node/util/sysctl"
    27  	"k8s.io/klog/v2"
    28  	"k8s.io/mount-utils"
    29  )
    30  
    31  // Conntracker is an interface to the global sysctl. Descriptions of the various
    32  // sysctl fields can be found here:
    33  //
    34  // https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt
    35  type Conntracker interface {
    36  	// SetMax adjusts nf_conntrack_max.
    37  	SetMax(ctx context.Context, max int) error
    38  	// SetTCPEstablishedTimeout adjusts nf_conntrack_tcp_timeout_established.
    39  	SetTCPEstablishedTimeout(ctx context.Context, seconds int) error
    40  	// SetTCPCloseWaitTimeout adjusts nf_conntrack_tcp_timeout_close_wait.
    41  	SetTCPCloseWaitTimeout(ctx context.Context, seconds int) error
    42  	// SetTCPBeLiberal adjusts nf_conntrack_tcp_be_liberal.
    43  	SetTCPBeLiberal(ctx context.Context, value int) error
    44  	// SetUDPTimeout adjusts nf_conntrack_udp_timeout.
    45  	SetUDPTimeout(ctx context.Context, seconds int) error
    46  	// SetUDPStreamTimeout adjusts nf_conntrack_udp_timeout_stream.
    47  	SetUDPStreamTimeout(ctx context.Context, seconds int) error
    48  }
    49  
    50  type realConntracker struct {
    51  }
    52  
    53  var errReadOnlySysFS = errors.New("readOnlySysFS")
    54  
    55  func (rct realConntracker) SetMax(ctx context.Context, max int) error {
    56  	logger := klog.FromContext(ctx)
    57  	if err := rct.setIntSysCtl(ctx, "nf_conntrack_max", max); err != nil {
    58  		return err
    59  	}
    60  	logger.Info("Setting nf_conntrack_max", "nfConntrackMax", max)
    61  
    62  	// Linux does not support writing to /sys/module/nf_conntrack/parameters/hashsize
    63  	// when the writer process is not in the initial network namespace
    64  	// (https://github.com/torvalds/linux/blob/v4.10/net/netfilter/nf_conntrack_core.c#L1795-L1796).
    65  	// Usually that's fine. But in some configurations such as with github.com/kinvolk/kubeadm-nspawn,
    66  	// kube-proxy is in another netns.
    67  	// Therefore, check if writing in hashsize is necessary and skip the writing if not.
    68  	hashsize, err := readIntStringFile("/sys/module/nf_conntrack/parameters/hashsize")
    69  	if err != nil {
    70  		return err
    71  	}
    72  	if hashsize >= (max / 4) {
    73  		return nil
    74  	}
    75  
    76  	// sysfs is expected to be mounted as 'rw'. However, it may be
    77  	// unexpectedly mounted as 'ro' by docker because of a known docker
    78  	// issue (https://github.com/docker/docker/issues/24000). Setting
    79  	// conntrack will fail when sysfs is readonly. When that happens, we
    80  	// don't set conntrack hashsize and return a special error
    81  	// errReadOnlySysFS here. The caller should deal with
    82  	// errReadOnlySysFS differently.
    83  	writable, err := rct.isSysFSWritable(ctx)
    84  	if err != nil {
    85  		return err
    86  	}
    87  	if !writable {
    88  		return errReadOnlySysFS
    89  	}
    90  	// TODO: generify this and sysctl to a new sysfs.WriteInt()
    91  	logger.Info("Setting conntrack hashsize", "conntrackHashsize", max/4)
    92  	return writeIntStringFile("/sys/module/nf_conntrack/parameters/hashsize", max/4)
    93  }
    94  
    95  func (rct realConntracker) SetTCPEstablishedTimeout(ctx context.Context, seconds int) error {
    96  	return rct.setIntSysCtl(ctx, "nf_conntrack_tcp_timeout_established", seconds)
    97  }
    98  
    99  func (rct realConntracker) SetTCPCloseWaitTimeout(ctx context.Context, seconds int) error {
   100  	return rct.setIntSysCtl(ctx, "nf_conntrack_tcp_timeout_close_wait", seconds)
   101  }
   102  
   103  func (rct realConntracker) SetTCPBeLiberal(ctx context.Context, value int) error {
   104  	return rct.setIntSysCtl(ctx, "nf_conntrack_tcp_be_liberal", value)
   105  }
   106  
   107  func (rct realConntracker) SetUDPTimeout(ctx context.Context, seconds int) error {
   108  	return rct.setIntSysCtl(ctx, "nf_conntrack_udp_timeout", seconds)
   109  }
   110  
   111  func (rct realConntracker) SetUDPStreamTimeout(ctx context.Context, seconds int) error {
   112  	return rct.setIntSysCtl(ctx, "nf_conntrack_udp_timeout_stream", seconds)
   113  }
   114  
   115  func (rct realConntracker) setIntSysCtl(ctx context.Context, name string, value int) error {
   116  	logger := klog.FromContext(ctx)
   117  	entry := "net/netfilter/" + name
   118  
   119  	sys := sysctl.New()
   120  	if val, _ := sys.GetSysctl(entry); val != value {
   121  		logger.Info("Set sysctl", "entry", entry, "value", value)
   122  		if err := sys.SetSysctl(entry, value); err != nil {
   123  			return err
   124  		}
   125  	}
   126  	return nil
   127  }
   128  
   129  // isSysFSWritable checks /proc/mounts to see whether sysfs is 'rw' or not.
   130  func (rct realConntracker) isSysFSWritable(ctx context.Context) (bool, error) {
   131  	logger := klog.FromContext(ctx)
   132  	const permWritable = "rw"
   133  	const sysfsDevice = "sysfs"
   134  	m := mount.New("" /* default mount path */)
   135  	mountPoints, err := m.List()
   136  	if err != nil {
   137  		logger.Error(err, "Failed to list mount points")
   138  		return false, err
   139  	}
   140  
   141  	for _, mountPoint := range mountPoints {
   142  		if mountPoint.Type != sysfsDevice {
   143  			continue
   144  		}
   145  		// Check whether sysfs is 'rw'
   146  		if len(mountPoint.Opts) > 0 && mountPoint.Opts[0] == permWritable {
   147  			return true, nil
   148  		}
   149  		logger.Error(nil, "Sysfs is not writable", "mountPoint", mountPoint, "mountOptions", mountPoint.Opts)
   150  		return false, errReadOnlySysFS
   151  	}
   152  
   153  	return false, errors.New("no sysfs mounted")
   154  }
   155  
   156  func readIntStringFile(filename string) (int, error) {
   157  	b, err := os.ReadFile(filename)
   158  	if err != nil {
   159  		return -1, err
   160  	}
   161  	return strconv.Atoi(strings.TrimSpace(string(b)))
   162  }
   163  
   164  func writeIntStringFile(filename string, value int) error {
   165  	return os.WriteFile(filename, []byte(strconv.Itoa(value)), 0640)
   166  }