github.com/timstclair/heapster@v0.20.0-alpha1/Godeps/_workspace/src/k8s.io/kubernetes/pkg/util/bandwidth/linux.go (about) 1 // +build linux 2 3 /* 4 Copyright 2015 The Kubernetes Authors All rights reserved. 5 6 Licensed under the Apache License, Version 2.0 (the "License"); 7 you may not use this file except in compliance with the License. 8 You may obtain a copy of the License at 9 10 http://www.apache.org/licenses/LICENSE-2.0 11 12 Unless required by applicable law or agreed to in writing, software 13 distributed under the License is distributed on an "AS IS" BASIS, 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 See the License for the specific language governing permissions and 16 limitations under the License. 17 */ 18 19 package bandwidth 20 21 import ( 22 "bufio" 23 "bytes" 24 "encoding/hex" 25 "fmt" 26 "net" 27 "strings" 28 29 "k8s.io/kubernetes/pkg/api/resource" 30 "k8s.io/kubernetes/pkg/util/exec" 31 "k8s.io/kubernetes/pkg/util/sets" 32 33 "github.com/golang/glog" 34 ) 35 36 // tcShaper provides an implementation of the BandwidthShaper interface on Linux using the 'tc' tool. 37 // In general, using this requires that the caller posses the NET_CAP_ADMIN capability, though if you 38 // do this within an container, it only requires the NS_CAPABLE capability for manipulations to that 39 // container's network namespace. 40 // Uses the hierarchical token bucket queueing discipline (htb), this requires Linux 2.4.20 or newer 41 // or a custom kernel with that queueing discipline backported. 42 type tcShaper struct { 43 e exec.Interface 44 iface string 45 } 46 47 func NewTCShaper(iface string) BandwidthShaper { 48 shaper := &tcShaper{ 49 e: exec.New(), 50 iface: iface, 51 } 52 return shaper 53 } 54 55 func (t *tcShaper) execAndLog(cmdStr string, args ...string) error { 56 glog.V(6).Infof("Running: %s %s", cmdStr, strings.Join(args, " ")) 57 cmd := t.e.Command(cmdStr, args...) 58 out, err := cmd.CombinedOutput() 59 glog.V(6).Infof("Output from tc: %s", string(out)) 60 return err 61 } 62 63 func (t *tcShaper) nextClassID() (int, error) { 64 data, err := t.e.Command("tc", "class", "show", "dev", t.iface).CombinedOutput() 65 if err != nil { 66 return -1, err 67 } 68 69 scanner := bufio.NewScanner(bytes.NewBuffer(data)) 70 classes := sets.String{} 71 for scanner.Scan() { 72 line := strings.TrimSpace(scanner.Text()) 73 // skip empty lines 74 if len(line) == 0 { 75 continue 76 } 77 parts := strings.Split(line, " ") 78 // expected tc line: 79 // class htb 1:1 root prio 0 rate 1000Kbit ceil 1000Kbit burst 1600b cburst 1600b 80 if len(parts) != 14 { 81 return -1, fmt.Errorf("unexpected output from tc: %s (%v)", scanner.Text(), parts) 82 } 83 classes.Insert(parts[2]) 84 } 85 86 // Make sure it doesn't go forever 87 for nextClass := 1; nextClass < 10000; nextClass++ { 88 if !classes.Has(fmt.Sprintf("1:%d", nextClass)) { 89 return nextClass, nil 90 } 91 } 92 // This should really never happen 93 return -1, fmt.Errorf("exhausted class space, please try again") 94 } 95 96 // Convert a CIDR from text to a hex representation 97 // Strips any masked parts of the IP, so 1.2.3.4/16 becomes hex(1.2.0.0)/ffffffff 98 func hexCIDR(cidr string) (string, error) { 99 ip, ipnet, err := net.ParseCIDR(cidr) 100 if err != nil { 101 return "", err 102 } 103 ip = ip.Mask(ipnet.Mask) 104 hexIP := hex.EncodeToString([]byte(ip.To4())) 105 hexMask := ipnet.Mask.String() 106 return hexIP + "/" + hexMask, nil 107 } 108 109 // Convert a CIDR from hex representation to text, opposite of the above. 110 func asciiCIDR(cidr string) (string, error) { 111 parts := strings.Split(cidr, "/") 112 if len(parts) != 2 { 113 return "", fmt.Errorf("unexpected CIDR format: %s", cidr) 114 } 115 ipData, err := hex.DecodeString(parts[0]) 116 if err != nil { 117 return "", err 118 } 119 ip := net.IP(ipData) 120 121 maskData, err := hex.DecodeString(parts[1]) 122 mask := net.IPMask(maskData) 123 size, _ := mask.Size() 124 125 return fmt.Sprintf("%s/%d", ip.String(), size), nil 126 } 127 128 func (t *tcShaper) findCIDRClass(cidr string) (class, handle string, found bool, err error) { 129 data, err := t.e.Command("tc", "filter", "show", "dev", t.iface).CombinedOutput() 130 if err != nil { 131 return "", "", false, err 132 } 133 134 hex, err := hexCIDR(cidr) 135 if err != nil { 136 return "", "", false, err 137 } 138 spec := fmt.Sprintf("match %s", hex) 139 140 scanner := bufio.NewScanner(bytes.NewBuffer(data)) 141 filter := "" 142 for scanner.Scan() { 143 line := strings.TrimSpace(scanner.Text()) 144 if len(line) == 0 { 145 continue 146 } 147 if strings.HasPrefix(line, "filter") { 148 filter = line 149 continue 150 } 151 if strings.Contains(line, spec) { 152 parts := strings.Split(filter, " ") 153 // expected tc line: 154 // filter parent 1: protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 155 if len(parts) != 19 { 156 return "", "", false, fmt.Errorf("unexpected output from tc: %s %d (%v)", filter, len(parts), parts) 157 } 158 return parts[18], parts[9], true, nil 159 } 160 } 161 return "", "", false, nil 162 } 163 164 func makeKBitString(rsrc *resource.Quantity) string { 165 return fmt.Sprintf("%dkbit", (rsrc.Value() / 1000)) 166 } 167 168 func (t *tcShaper) makeNewClass(rate string) (int, error) { 169 class, err := t.nextClassID() 170 if err != nil { 171 return -1, err 172 } 173 if err := t.execAndLog("tc", "class", "add", 174 "dev", t.iface, 175 "parent", "1:", 176 "classid", fmt.Sprintf("1:%d", class), 177 "htb", "rate", rate); err != nil { 178 return -1, err 179 } 180 return class, nil 181 } 182 183 func (t *tcShaper) Limit(cidr string, upload, download *resource.Quantity) (err error) { 184 var downloadClass, uploadClass int 185 if download != nil { 186 if downloadClass, err = t.makeNewClass(makeKBitString(download)); err != nil { 187 return err 188 } 189 if err := t.execAndLog("tc", "filter", "add", 190 "dev", t.iface, 191 "protocol", "ip", 192 "parent", "1:0", 193 "prio", "1", "u32", 194 "match", "ip", "dst", cidr, 195 "flowid", fmt.Sprintf("1:%d", downloadClass)); err != nil { 196 return err 197 } 198 } 199 if upload != nil { 200 if uploadClass, err = t.makeNewClass(makeKBitString(upload)); err != nil { 201 return err 202 } 203 if err := t.execAndLog("tc", "filter", "add", 204 "dev", t.iface, 205 "protocol", "ip", 206 "parent", "1:0", 207 "prio", "1", "u32", 208 "match", "ip", "src", cidr, 209 "flowid", fmt.Sprintf("1:%d", uploadClass)); err != nil { 210 return err 211 } 212 } 213 return nil 214 } 215 216 // tests to see if an interface exists, if it does, return true and the status line for the interface 217 // returns false, "", <err> if an error occurs. 218 func (t *tcShaper) interfaceExists() (bool, string, error) { 219 data, err := t.e.Command("tc", "qdisc", "show", "dev", t.iface).CombinedOutput() 220 if err != nil { 221 return false, "", err 222 } 223 value := strings.TrimSpace(string(data)) 224 if len(value) == 0 { 225 return false, "", nil 226 } 227 return true, value, nil 228 } 229 230 func (t *tcShaper) ReconcileCIDR(cidr string, upload, download *resource.Quantity) error { 231 _, _, found, err := t.findCIDRClass(cidr) 232 if err != nil { 233 return err 234 } 235 if !found { 236 return t.Limit(cidr, upload, download) 237 } 238 // TODO: actually check bandwidth limits here 239 return nil 240 } 241 242 func (t *tcShaper) ReconcileInterface() error { 243 exists, output, err := t.interfaceExists() 244 if err != nil { 245 return err 246 } 247 if !exists { 248 glog.V(4).Info("Didn't find bandwidth interface, creating") 249 return t.initializeInterface() 250 } 251 fields := strings.Split(output, " ") 252 if len(fields) != 12 || fields[1] != "htb" || fields[2] != "1:" { 253 if err := t.deleteInterface(fields[2]); err != nil { 254 return err 255 } 256 return t.initializeInterface() 257 } 258 return nil 259 } 260 261 func (t *tcShaper) initializeInterface() error { 262 return t.execAndLog("tc", "qdisc", "add", "dev", t.iface, "root", "handle", "1:", "htb", "default", "30") 263 } 264 265 func (t *tcShaper) Reset(cidr string) error { 266 class, handle, found, err := t.findCIDRClass(cidr) 267 if err != nil { 268 return err 269 } 270 if !found { 271 return fmt.Errorf("Failed to find cidr: %s on interface: %s", cidr, t.iface) 272 } 273 if err := t.execAndLog("tc", "filter", "del", 274 "dev", t.iface, 275 "parent", "1:", 276 "proto", "ip", 277 "prio", "1", 278 "handle", handle, "u32"); err != nil { 279 return err 280 } 281 return t.execAndLog("tc", "class", "del", "dev", t.iface, "parent", "1:", "classid", class) 282 } 283 284 func (t *tcShaper) deleteInterface(class string) error { 285 return t.execAndLog("tc", "qdisc", "delete", "dev", t.iface, "root", "handle", class) 286 } 287 288 func (t *tcShaper) GetCIDRs() ([]string, error) { 289 data, err := t.e.Command("tc", "filter", "show", "dev", t.iface).CombinedOutput() 290 if err != nil { 291 return nil, err 292 } 293 294 result := []string{} 295 scanner := bufio.NewScanner(bytes.NewBuffer(data)) 296 for scanner.Scan() { 297 line := strings.TrimSpace(scanner.Text()) 298 if len(line) == 0 { 299 continue 300 } 301 if strings.Contains(line, "match") { 302 parts := strings.Split(line, " ") 303 // expected tc line: 304 // match <cidr> at <number> 305 if len(parts) != 4 { 306 return nil, fmt.Errorf("unexpected output: %v", parts) 307 } 308 cidr, err := asciiCIDR(parts[1]) 309 if err != nil { 310 return nil, err 311 } 312 result = append(result, cidr) 313 } 314 } 315 return result, nil 316 }