github.com/timstclair/heapster@v0.20.0-alpha1/Godeps/_workspace/src/k8s.io/kubernetes/pkg/util/bandwidth/linux.go (about)

     1  // +build linux
     2  
     3  /*
     4  Copyright 2015 The Kubernetes Authors All rights reserved.
     5  
     6  Licensed under the Apache License, Version 2.0 (the "License");
     7  you may not use this file except in compliance with the License.
     8  You may obtain a copy of the License at
     9  
    10      http://www.apache.org/licenses/LICENSE-2.0
    11  
    12  Unless required by applicable law or agreed to in writing, software
    13  distributed under the License is distributed on an "AS IS" BASIS,
    14  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  See the License for the specific language governing permissions and
    16  limitations under the License.
    17  */
    18  
    19  package bandwidth
    20  
    21  import (
    22  	"bufio"
    23  	"bytes"
    24  	"encoding/hex"
    25  	"fmt"
    26  	"net"
    27  	"strings"
    28  
    29  	"k8s.io/kubernetes/pkg/api/resource"
    30  	"k8s.io/kubernetes/pkg/util/exec"
    31  	"k8s.io/kubernetes/pkg/util/sets"
    32  
    33  	"github.com/golang/glog"
    34  )
    35  
    36  // tcShaper provides an implementation of the BandwidthShaper interface on Linux using the 'tc' tool.
    37  // In general, using this requires that the caller posses the NET_CAP_ADMIN capability, though if you
    38  // do this within an container, it only requires the NS_CAPABLE capability for manipulations to that
    39  // container's network namespace.
    40  // Uses the hierarchical token bucket queueing discipline (htb), this requires Linux 2.4.20 or newer
    41  // or a custom kernel with that queueing discipline backported.
    42  type tcShaper struct {
    43  	e     exec.Interface
    44  	iface string
    45  }
    46  
    47  func NewTCShaper(iface string) BandwidthShaper {
    48  	shaper := &tcShaper{
    49  		e:     exec.New(),
    50  		iface: iface,
    51  	}
    52  	return shaper
    53  }
    54  
    55  func (t *tcShaper) execAndLog(cmdStr string, args ...string) error {
    56  	glog.V(6).Infof("Running: %s %s", cmdStr, strings.Join(args, " "))
    57  	cmd := t.e.Command(cmdStr, args...)
    58  	out, err := cmd.CombinedOutput()
    59  	glog.V(6).Infof("Output from tc: %s", string(out))
    60  	return err
    61  }
    62  
    63  func (t *tcShaper) nextClassID() (int, error) {
    64  	data, err := t.e.Command("tc", "class", "show", "dev", t.iface).CombinedOutput()
    65  	if err != nil {
    66  		return -1, err
    67  	}
    68  
    69  	scanner := bufio.NewScanner(bytes.NewBuffer(data))
    70  	classes := sets.String{}
    71  	for scanner.Scan() {
    72  		line := strings.TrimSpace(scanner.Text())
    73  		// skip empty lines
    74  		if len(line) == 0 {
    75  			continue
    76  		}
    77  		parts := strings.Split(line, " ")
    78  		// expected tc line:
    79  		// class htb 1:1 root prio 0 rate 1000Kbit ceil 1000Kbit burst 1600b cburst 1600b
    80  		if len(parts) != 14 {
    81  			return -1, fmt.Errorf("unexpected output from tc: %s (%v)", scanner.Text(), parts)
    82  		}
    83  		classes.Insert(parts[2])
    84  	}
    85  
    86  	// Make sure it doesn't go forever
    87  	for nextClass := 1; nextClass < 10000; nextClass++ {
    88  		if !classes.Has(fmt.Sprintf("1:%d", nextClass)) {
    89  			return nextClass, nil
    90  		}
    91  	}
    92  	// This should really never happen
    93  	return -1, fmt.Errorf("exhausted class space, please try again")
    94  }
    95  
    96  // Convert a CIDR from text to a hex representation
    97  // Strips any masked parts of the IP, so 1.2.3.4/16 becomes hex(1.2.0.0)/ffffffff
    98  func hexCIDR(cidr string) (string, error) {
    99  	ip, ipnet, err := net.ParseCIDR(cidr)
   100  	if err != nil {
   101  		return "", err
   102  	}
   103  	ip = ip.Mask(ipnet.Mask)
   104  	hexIP := hex.EncodeToString([]byte(ip.To4()))
   105  	hexMask := ipnet.Mask.String()
   106  	return hexIP + "/" + hexMask, nil
   107  }
   108  
   109  // Convert a CIDR from hex representation to text, opposite of the above.
   110  func asciiCIDR(cidr string) (string, error) {
   111  	parts := strings.Split(cidr, "/")
   112  	if len(parts) != 2 {
   113  		return "", fmt.Errorf("unexpected CIDR format: %s", cidr)
   114  	}
   115  	ipData, err := hex.DecodeString(parts[0])
   116  	if err != nil {
   117  		return "", err
   118  	}
   119  	ip := net.IP(ipData)
   120  
   121  	maskData, err := hex.DecodeString(parts[1])
   122  	mask := net.IPMask(maskData)
   123  	size, _ := mask.Size()
   124  
   125  	return fmt.Sprintf("%s/%d", ip.String(), size), nil
   126  }
   127  
   128  func (t *tcShaper) findCIDRClass(cidr string) (class, handle string, found bool, err error) {
   129  	data, err := t.e.Command("tc", "filter", "show", "dev", t.iface).CombinedOutput()
   130  	if err != nil {
   131  		return "", "", false, err
   132  	}
   133  
   134  	hex, err := hexCIDR(cidr)
   135  	if err != nil {
   136  		return "", "", false, err
   137  	}
   138  	spec := fmt.Sprintf("match %s", hex)
   139  
   140  	scanner := bufio.NewScanner(bytes.NewBuffer(data))
   141  	filter := ""
   142  	for scanner.Scan() {
   143  		line := strings.TrimSpace(scanner.Text())
   144  		if len(line) == 0 {
   145  			continue
   146  		}
   147  		if strings.HasPrefix(line, "filter") {
   148  			filter = line
   149  			continue
   150  		}
   151  		if strings.Contains(line, spec) {
   152  			parts := strings.Split(filter, " ")
   153  			// expected tc line:
   154  			// filter parent 1: protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
   155  			if len(parts) != 19 {
   156  				return "", "", false, fmt.Errorf("unexpected output from tc: %s %d (%v)", filter, len(parts), parts)
   157  			}
   158  			return parts[18], parts[9], true, nil
   159  		}
   160  	}
   161  	return "", "", false, nil
   162  }
   163  
   164  func makeKBitString(rsrc *resource.Quantity) string {
   165  	return fmt.Sprintf("%dkbit", (rsrc.Value() / 1000))
   166  }
   167  
   168  func (t *tcShaper) makeNewClass(rate string) (int, error) {
   169  	class, err := t.nextClassID()
   170  	if err != nil {
   171  		return -1, err
   172  	}
   173  	if err := t.execAndLog("tc", "class", "add",
   174  		"dev", t.iface,
   175  		"parent", "1:",
   176  		"classid", fmt.Sprintf("1:%d", class),
   177  		"htb", "rate", rate); err != nil {
   178  		return -1, err
   179  	}
   180  	return class, nil
   181  }
   182  
   183  func (t *tcShaper) Limit(cidr string, upload, download *resource.Quantity) (err error) {
   184  	var downloadClass, uploadClass int
   185  	if download != nil {
   186  		if downloadClass, err = t.makeNewClass(makeKBitString(download)); err != nil {
   187  			return err
   188  		}
   189  		if err := t.execAndLog("tc", "filter", "add",
   190  			"dev", t.iface,
   191  			"protocol", "ip",
   192  			"parent", "1:0",
   193  			"prio", "1", "u32",
   194  			"match", "ip", "dst", cidr,
   195  			"flowid", fmt.Sprintf("1:%d", downloadClass)); err != nil {
   196  			return err
   197  		}
   198  	}
   199  	if upload != nil {
   200  		if uploadClass, err = t.makeNewClass(makeKBitString(upload)); err != nil {
   201  			return err
   202  		}
   203  		if err := t.execAndLog("tc", "filter", "add",
   204  			"dev", t.iface,
   205  			"protocol", "ip",
   206  			"parent", "1:0",
   207  			"prio", "1", "u32",
   208  			"match", "ip", "src", cidr,
   209  			"flowid", fmt.Sprintf("1:%d", uploadClass)); err != nil {
   210  			return err
   211  		}
   212  	}
   213  	return nil
   214  }
   215  
   216  // tests to see if an interface exists, if it does, return true and the status line for the interface
   217  // returns false, "", <err> if an error occurs.
   218  func (t *tcShaper) interfaceExists() (bool, string, error) {
   219  	data, err := t.e.Command("tc", "qdisc", "show", "dev", t.iface).CombinedOutput()
   220  	if err != nil {
   221  		return false, "", err
   222  	}
   223  	value := strings.TrimSpace(string(data))
   224  	if len(value) == 0 {
   225  		return false, "", nil
   226  	}
   227  	return true, value, nil
   228  }
   229  
   230  func (t *tcShaper) ReconcileCIDR(cidr string, upload, download *resource.Quantity) error {
   231  	_, _, found, err := t.findCIDRClass(cidr)
   232  	if err != nil {
   233  		return err
   234  	}
   235  	if !found {
   236  		return t.Limit(cidr, upload, download)
   237  	}
   238  	// TODO: actually check bandwidth limits here
   239  	return nil
   240  }
   241  
   242  func (t *tcShaper) ReconcileInterface() error {
   243  	exists, output, err := t.interfaceExists()
   244  	if err != nil {
   245  		return err
   246  	}
   247  	if !exists {
   248  		glog.V(4).Info("Didn't find bandwidth interface, creating")
   249  		return t.initializeInterface()
   250  	}
   251  	fields := strings.Split(output, " ")
   252  	if len(fields) != 12 || fields[1] != "htb" || fields[2] != "1:" {
   253  		if err := t.deleteInterface(fields[2]); err != nil {
   254  			return err
   255  		}
   256  		return t.initializeInterface()
   257  	}
   258  	return nil
   259  }
   260  
   261  func (t *tcShaper) initializeInterface() error {
   262  	return t.execAndLog("tc", "qdisc", "add", "dev", t.iface, "root", "handle", "1:", "htb", "default", "30")
   263  }
   264  
   265  func (t *tcShaper) Reset(cidr string) error {
   266  	class, handle, found, err := t.findCIDRClass(cidr)
   267  	if err != nil {
   268  		return err
   269  	}
   270  	if !found {
   271  		return fmt.Errorf("Failed to find cidr: %s on interface: %s", cidr, t.iface)
   272  	}
   273  	if err := t.execAndLog("tc", "filter", "del",
   274  		"dev", t.iface,
   275  		"parent", "1:",
   276  		"proto", "ip",
   277  		"prio", "1",
   278  		"handle", handle, "u32"); err != nil {
   279  		return err
   280  	}
   281  	return t.execAndLog("tc", "class", "del", "dev", t.iface, "parent", "1:", "classid", class)
   282  }
   283  
   284  func (t *tcShaper) deleteInterface(class string) error {
   285  	return t.execAndLog("tc", "qdisc", "delete", "dev", t.iface, "root", "handle", class)
   286  }
   287  
   288  func (t *tcShaper) GetCIDRs() ([]string, error) {
   289  	data, err := t.e.Command("tc", "filter", "show", "dev", t.iface).CombinedOutput()
   290  	if err != nil {
   291  		return nil, err
   292  	}
   293  
   294  	result := []string{}
   295  	scanner := bufio.NewScanner(bytes.NewBuffer(data))
   296  	for scanner.Scan() {
   297  		line := strings.TrimSpace(scanner.Text())
   298  		if len(line) == 0 {
   299  			continue
   300  		}
   301  		if strings.Contains(line, "match") {
   302  			parts := strings.Split(line, " ")
   303  			// expected tc line:
   304  			// match <cidr> at <number>
   305  			if len(parts) != 4 {
   306  				return nil, fmt.Errorf("unexpected output: %v", parts)
   307  			}
   308  			cidr, err := asciiCIDR(parts[1])
   309  			if err != nil {
   310  				return nil, err
   311  			}
   312  			result = append(result, cidr)
   313  		}
   314  	}
   315  	return result, nil
   316  }