github.com/cilium/cilium@v1.16.2/pkg/datapath/linux/bandwidth/bandwidth.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 //go:build linux 5 6 // NOTE: We can only build on linux because we import bwmap which in turn imports pkg/ebpf and pkg/bpf 7 // which throw build errors when building on non-linux platforms. 8 9 package bandwidth 10 11 import ( 12 "fmt" 13 "strings" 14 15 "github.com/cilium/ebpf" 16 "github.com/cilium/ebpf/asm" 17 "k8s.io/apimachinery/pkg/api/resource" 18 19 "github.com/cilium/cilium/pkg/datapath/linux/config/defines" 20 "github.com/cilium/cilium/pkg/datapath/linux/probes" 21 "github.com/cilium/cilium/pkg/datapath/tables" 22 "github.com/cilium/cilium/pkg/datapath/types" 23 "github.com/cilium/cilium/pkg/logging/logfields" 24 "github.com/cilium/cilium/pkg/maps/bwmap" 25 ) 26 27 const ( 28 // EgressBandwidth is the K8s Pod annotation. 29 EgressBandwidth = "kubernetes.io/egress-bandwidth" 30 // IngressBandwidth is the K8s Pod annotation. 31 IngressBandwidth = "kubernetes.io/ingress-bandwidth" 32 33 // FqDefaultHorizon represents maximum allowed departure 34 // time delta in future. Given applications can set SO_TXTIME 35 // from user space this is a limit to prevent buggy applications 36 // to fill the FQ qdisc. 37 FqDefaultHorizon = bwmap.DefaultDropHorizon 38 // FqDefaultBuckets is the default 32k (2^15) bucket limit for bwm. 39 // Too low bucket limit can cause scalability issue. 40 FqDefaultBuckets = 15 41 ) 42 43 type manager struct { 44 resetQueues, enabled bool 45 46 params bandwidthManagerParams 47 } 48 49 func (m *manager) Enabled() bool { 50 return m.enabled 51 } 52 53 func (m *manager) BBREnabled() bool { 54 return m.params.Config.EnableBBR 55 } 56 57 func (m *manager) defines() (defines.Map, error) { 58 cDefinesMap := make(defines.Map) 59 if m.resetQueues { 60 cDefinesMap["RESET_QUEUES"] = "1" 61 } 62 63 if m.Enabled() { 64 cDefinesMap["ENABLE_BANDWIDTH_MANAGER"] = "1" 65 cDefinesMap["THROTTLE_MAP"] = bwmap.MapName 66 cDefinesMap["THROTTLE_MAP_SIZE"] = fmt.Sprintf("%d", bwmap.MapSize) 67 } 68 69 return cDefinesMap, nil 70 } 71 72 func (m *manager) UpdateBandwidthLimit(epID uint16, bytesPerSecond uint64) { 73 if m.enabled { 74 txn := m.params.DB.WriteTxn(m.params.EdtTable) 75 m.params.EdtTable.Insert( 76 txn, 77 bwmap.NewEdt(epID, bytesPerSecond), 78 ) 79 txn.Commit() 80 } 81 } 82 83 func (m *manager) DeleteBandwidthLimit(epID uint16) { 84 if m.enabled { 85 txn := m.params.DB.WriteTxn(m.params.EdtTable) 86 obj, _, found := m.params.EdtTable.Get(txn, bwmap.EdtIDIndex.Query(epID)) 87 if found { 88 m.params.EdtTable.Delete(txn, obj) 89 } 90 txn.Commit() 91 } 92 } 93 94 func GetBytesPerSec(bandwidth string) (uint64, error) { 95 res, err := resource.ParseQuantity(bandwidth) 96 if err != nil { 97 return 0, err 98 } 99 return uint64(res.Value() / 8), err 100 } 101 102 // probe checks the various system requirements of the bandwidth manager and disables it if they are 103 // not met. 104 func (m *manager) probe() error { 105 // We at least need 5.1 kernel for native TCP EDT integration 106 // and writable queue_mapping that we use. Below helper is 107 // available for 5.1 kernels and onwards. 108 kernelGood := probes.HaveProgramHelper(ebpf.SchedCLS, asm.FnSkbEcnSetCe) == nil 109 m.resetQueues = kernelGood 110 if !m.params.Config.EnableBandwidthManager { 111 return nil 112 } 113 if _, err := m.params.Sysctl.Read([]string{"net", "core", "default_qdisc"}); err != nil { 114 m.params.Log.Warn("BPF bandwidth manager could not read procfs. Disabling the feature.", logfields.Error, err) 115 return nil 116 } 117 if !kernelGood { 118 m.params.Log.Warn("BPF bandwidth manager needs kernel 5.1 or newer. Disabling the feature.") 119 return nil 120 } 121 if m.params.Config.EnableBBR { 122 // We at least need 5.18 kernel for Pod-based BBR TCP congestion 123 // control since earlier kernels just clear the skb->tstamp upon 124 // netns traversal. See also: 125 // 126 // - https://lpc.events/event/11/contributions/953/ 127 // - https://lore.kernel.org/bpf/20220302195519.3479274-1-kafai@fb.com/ 128 if probes.HaveProgramHelper(ebpf.SchedCLS, asm.FnSkbSetTstamp) != nil { 129 return fmt.Errorf("cannot enable --%s, needs kernel 5.18 or newer", types.EnableBBRFlag) 130 } 131 } 132 133 // Going via host stack will orphan skb->sk, so we do need BPF host 134 // routing for it to work properly. 135 if m.params.Config.EnableBBR && m.params.DaemonConfig.EnableHostLegacyRouting { 136 return fmt.Errorf("BPF bandwidth manager's BBR setup requires BPF host routing.") 137 } 138 139 if m.params.Config.EnableBandwidthManager && m.params.DaemonConfig.EnableIPSec { 140 m.params.Log.Warn("The bandwidth manager cannot be used with IPSec. Disabling the bandwidth manager.") 141 return nil 142 } 143 144 m.enabled = true 145 return nil 146 } 147 148 func (m *manager) init() error { 149 m.params.Log.Info("Setting up BPF bandwidth manager") 150 151 if err := bwmap.ThrottleMap().OpenOrCreate(); err != nil { 152 return fmt.Errorf("failed to access ThrottleMap: %w", err) 153 } 154 155 if err := setBaselineSysctls(m.params); err != nil { 156 return fmt.Errorf("failed to set sysctl needed by BPF bandwidth manager: %w", err) 157 } 158 return nil 159 } 160 161 func setBaselineSysctls(p bandwidthManagerParams) error { 162 // Ensure interger type sysctls are no smaller than our baseline settings 163 baseIntSettings := []struct { 164 name []string 165 val int64 166 }{ 167 {[]string{"net", "core", "netdev_max_backlog"}, 1000}, 168 {[]string{"net", "core", "somaxconn"}, 4096}, 169 {[]string{"net", "ipv4", "tcp_max_syn_backlog"}, 4096}, 170 } 171 172 for _, setting := range baseIntSettings { 173 currentValue, err := p.Sysctl.ReadInt(setting.name) 174 if err != nil { 175 return fmt.Errorf("read sysctl %s failed: %w", strings.Join(setting.name, "."), err) 176 } 177 178 scopedLog := p.Log.With( 179 logfields.SysParamName, strings.Join(setting.name, "."), 180 logfields.SysParamValue, currentValue, 181 "baselineValue", setting.val, 182 ) 183 184 if currentValue >= setting.val { 185 scopedLog.Info("Skip setting sysctl as it already meets baseline") 186 continue 187 } 188 189 scopedLog.Info("Setting sysctl to baseline for BPF bandwidth manager") 190 if err := p.Sysctl.WriteInt(setting.name, setting.val); err != nil { 191 return fmt.Errorf("set sysctl %s=%d failed: %w", strings.Join(setting.name, "."), setting.val, err) 192 } 193 } 194 195 // Ensure string type sysctls 196 congctl := "cubic" 197 if p.Config.EnableBBR { 198 congctl = "bbr" 199 } 200 201 sysctls := []tables.Sysctl{ 202 {Name: []string{"net", "core", "default_qdisc"}, Val: "fq"}, 203 {Name: []string{"net", "ipv4", "tcp_congestion_control"}, Val: congctl}, 204 } 205 206 // Few extra knobs which can be turned on along with pacing. EnableBBR 207 // also provides the right kernel dependency implicitly as well. 208 if p.Config.EnableBBR { 209 sysctls = append(sysctls, tables.Sysctl{ 210 Name: []string{"net", "ipv4", "tcp_slow_start_after_idle"}, Val: "0", 211 }) 212 } 213 214 if err := p.Sysctl.ApplySettings(sysctls); err != nil { 215 return fmt.Errorf("failed to apply sysctls: %w", err) 216 } 217 218 return nil 219 }