k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/cmd/kube-proxy/app/conntrack.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package app 18 19 import ( 20 "context" 21 "errors" 22 "os" 23 "strconv" 24 "strings" 25 26 "k8s.io/component-helpers/node/util/sysctl" 27 "k8s.io/klog/v2" 28 "k8s.io/mount-utils" 29 ) 30 31 // Conntracker is an interface to the global sysctl. Descriptions of the various 32 // sysctl fields can be found here: 33 // 34 // https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt 35 type Conntracker interface { 36 // SetMax adjusts nf_conntrack_max. 37 SetMax(ctx context.Context, max int) error 38 // SetTCPEstablishedTimeout adjusts nf_conntrack_tcp_timeout_established. 39 SetTCPEstablishedTimeout(ctx context.Context, seconds int) error 40 // SetTCPCloseWaitTimeout adjusts nf_conntrack_tcp_timeout_close_wait. 41 SetTCPCloseWaitTimeout(ctx context.Context, seconds int) error 42 // SetTCPBeLiberal adjusts nf_conntrack_tcp_be_liberal. 43 SetTCPBeLiberal(ctx context.Context, value int) error 44 // SetUDPTimeout adjusts nf_conntrack_udp_timeout. 45 SetUDPTimeout(ctx context.Context, seconds int) error 46 // SetUDPStreamTimeout adjusts nf_conntrack_udp_timeout_stream. 47 SetUDPStreamTimeout(ctx context.Context, seconds int) error 48 } 49 50 type realConntracker struct { 51 } 52 53 var errReadOnlySysFS = errors.New("readOnlySysFS") 54 55 func (rct realConntracker) SetMax(ctx context.Context, max int) error { 56 logger := klog.FromContext(ctx) 57 if err := rct.setIntSysCtl(ctx, "nf_conntrack_max", max); err != nil { 58 return err 59 } 60 logger.Info("Setting nf_conntrack_max", "nfConntrackMax", max) 61 62 // Linux does not support writing to /sys/module/nf_conntrack/parameters/hashsize 63 // when the writer process is not in the initial network namespace 64 // (https://github.com/torvalds/linux/blob/v4.10/net/netfilter/nf_conntrack_core.c#L1795-L1796). 65 // Usually that's fine. But in some configurations such as with github.com/kinvolk/kubeadm-nspawn, 66 // kube-proxy is in another netns. 67 // Therefore, check if writing in hashsize is necessary and skip the writing if not. 68 hashsize, err := readIntStringFile("/sys/module/nf_conntrack/parameters/hashsize") 69 if err != nil { 70 return err 71 } 72 if hashsize >= (max / 4) { 73 return nil 74 } 75 76 // sysfs is expected to be mounted as 'rw'. However, it may be 77 // unexpectedly mounted as 'ro' by docker because of a known docker 78 // issue (https://github.com/docker/docker/issues/24000). Setting 79 // conntrack will fail when sysfs is readonly. When that happens, we 80 // don't set conntrack hashsize and return a special error 81 // errReadOnlySysFS here. The caller should deal with 82 // errReadOnlySysFS differently. 83 writable, err := rct.isSysFSWritable(ctx) 84 if err != nil { 85 return err 86 } 87 if !writable { 88 return errReadOnlySysFS 89 } 90 // TODO: generify this and sysctl to a new sysfs.WriteInt() 91 logger.Info("Setting conntrack hashsize", "conntrackHashsize", max/4) 92 return writeIntStringFile("/sys/module/nf_conntrack/parameters/hashsize", max/4) 93 } 94 95 func (rct realConntracker) SetTCPEstablishedTimeout(ctx context.Context, seconds int) error { 96 return rct.setIntSysCtl(ctx, "nf_conntrack_tcp_timeout_established", seconds) 97 } 98 99 func (rct realConntracker) SetTCPCloseWaitTimeout(ctx context.Context, seconds int) error { 100 return rct.setIntSysCtl(ctx, "nf_conntrack_tcp_timeout_close_wait", seconds) 101 } 102 103 func (rct realConntracker) SetTCPBeLiberal(ctx context.Context, value int) error { 104 return rct.setIntSysCtl(ctx, "nf_conntrack_tcp_be_liberal", value) 105 } 106 107 func (rct realConntracker) SetUDPTimeout(ctx context.Context, seconds int) error { 108 return rct.setIntSysCtl(ctx, "nf_conntrack_udp_timeout", seconds) 109 } 110 111 func (rct realConntracker) SetUDPStreamTimeout(ctx context.Context, seconds int) error { 112 return rct.setIntSysCtl(ctx, "nf_conntrack_udp_timeout_stream", seconds) 113 } 114 115 func (rct realConntracker) setIntSysCtl(ctx context.Context, name string, value int) error { 116 logger := klog.FromContext(ctx) 117 entry := "net/netfilter/" + name 118 119 sys := sysctl.New() 120 if val, _ := sys.GetSysctl(entry); val != value { 121 logger.Info("Set sysctl", "entry", entry, "value", value) 122 if err := sys.SetSysctl(entry, value); err != nil { 123 return err 124 } 125 } 126 return nil 127 } 128 129 // isSysFSWritable checks /proc/mounts to see whether sysfs is 'rw' or not. 130 func (rct realConntracker) isSysFSWritable(ctx context.Context) (bool, error) { 131 logger := klog.FromContext(ctx) 132 const permWritable = "rw" 133 const sysfsDevice = "sysfs" 134 m := mount.New("" /* default mount path */) 135 mountPoints, err := m.List() 136 if err != nil { 137 logger.Error(err, "Failed to list mount points") 138 return false, err 139 } 140 141 for _, mountPoint := range mountPoints { 142 if mountPoint.Type != sysfsDevice { 143 continue 144 } 145 // Check whether sysfs is 'rw' 146 if len(mountPoint.Opts) > 0 && mountPoint.Opts[0] == permWritable { 147 return true, nil 148 } 149 logger.Error(nil, "Sysfs is not writable", "mountPoint", mountPoint, "mountOptions", mountPoint.Opts) 150 return false, errReadOnlySysFS 151 } 152 153 return false, errors.New("no sysfs mounted") 154 } 155 156 func readIntStringFile(filename string) (int, error) { 157 b, err := os.ReadFile(filename) 158 if err != nil { 159 return -1, err 160 } 161 return strconv.Atoi(strings.TrimSpace(string(b))) 162 } 163 164 func writeIntStringFile(filename string, value int) error { 165 return os.WriteFile(filename, []byte(strconv.Itoa(value)), 0640) 166 }