github.com/fcwu/docker@v1.4.2-0.20150115145920-2a69ca89f0df/daemon/networkdriver/bridge/driver.go (about)

     1  package bridge
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"net"
     7  	"os"
     8  	"sync"
     9  
    10  	log "github.com/Sirupsen/logrus"
    11  	"github.com/docker/docker/daemon/networkdriver"
    12  	"github.com/docker/docker/daemon/networkdriver/ipallocator"
    13  	"github.com/docker/docker/daemon/networkdriver/portmapper"
    14  	"github.com/docker/docker/engine"
    15  	"github.com/docker/docker/nat"
    16  	"github.com/docker/docker/pkg/iptables"
    17  	"github.com/docker/docker/pkg/networkfs/resolvconf"
    18  	"github.com/docker/docker/pkg/parsers/kernel"
    19  	"github.com/docker/libcontainer/netlink"
    20  )
    21  
    22  const (
    23  	DefaultNetworkBridge     = "docker0"
    24  	MaxAllocatedPortAttempts = 10
    25  )
    26  
    27  // Network interface represents the networking stack of a container
    28  type networkInterface struct {
    29  	IP           net.IP
    30  	PortMappings []net.Addr // there are mappings to the host interfaces
    31  }
    32  
    33  type ifaces struct {
    34  	c map[string]*networkInterface
    35  	sync.Mutex
    36  }
    37  
    38  func (i *ifaces) Set(key string, n *networkInterface) {
    39  	i.Lock()
    40  	i.c[key] = n
    41  	i.Unlock()
    42  }
    43  
    44  func (i *ifaces) Get(key string) *networkInterface {
    45  	i.Lock()
    46  	res := i.c[key]
    47  	i.Unlock()
    48  	return res
    49  }
    50  
    51  var (
    52  	addrs = []string{
    53  		// Here we don't follow the convention of using the 1st IP of the range for the gateway.
    54  		// This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
    55  		// In theory this shouldn't matter - in practice there's bound to be a few scripts relying
    56  		// on the internal addressing or other stupid things like that.
    57  		// They shouldn't, but hey, let's not break them unless we really have to.
    58  		"172.17.42.1/16", // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
    59  		"10.0.42.1/16",   // Don't even try using the entire /8, that's too intrusive
    60  		"10.1.42.1/16",
    61  		"10.42.42.1/16",
    62  		"172.16.42.1/24",
    63  		"172.16.43.1/24",
    64  		"172.16.44.1/24",
    65  		"10.0.42.1/24",
    66  		"10.0.43.1/24",
    67  		"192.168.42.1/24",
    68  		"192.168.43.1/24",
    69  		"192.168.44.1/24",
    70  	}
    71  
    72  	bridgeIface   string
    73  	bridgeNetwork *net.IPNet
    74  
    75  	defaultBindingIP  = net.ParseIP("0.0.0.0")
    76  	currentInterfaces = ifaces{c: make(map[string]*networkInterface)}
    77  )
    78  
    79  func InitDriver(job *engine.Job) engine.Status {
    80  	var (
    81  		network        *net.IPNet
    82  		enableIPTables = job.GetenvBool("EnableIptables")
    83  		icc            = job.GetenvBool("InterContainerCommunication")
    84  		ipMasq         = job.GetenvBool("EnableIpMasq")
    85  		ipForward      = job.GetenvBool("EnableIpForward")
    86  		bridgeIP       = job.Getenv("BridgeIP")
    87  		fixedCIDR      = job.Getenv("FixedCIDR")
    88  	)
    89  
    90  	if defaultIP := job.Getenv("DefaultBindingIP"); defaultIP != "" {
    91  		defaultBindingIP = net.ParseIP(defaultIP)
    92  	}
    93  
    94  	bridgeIface = job.Getenv("BridgeIface")
    95  	usingDefaultBridge := false
    96  	if bridgeIface == "" {
    97  		usingDefaultBridge = true
    98  		bridgeIface = DefaultNetworkBridge
    99  	}
   100  
   101  	addr, err := networkdriver.GetIfaceAddr(bridgeIface)
   102  	if err != nil {
   103  		// If we're not using the default bridge, fail without trying to create it
   104  		if !usingDefaultBridge {
   105  			return job.Error(err)
   106  		}
   107  		// If the bridge interface is not found (or has no address), try to create it and/or add an address
   108  		if err := configureBridge(bridgeIP); err != nil {
   109  			return job.Error(err)
   110  		}
   111  
   112  		addr, err = networkdriver.GetIfaceAddr(bridgeIface)
   113  		if err != nil {
   114  			return job.Error(err)
   115  		}
   116  		network = addr.(*net.IPNet)
   117  	} else {
   118  		network = addr.(*net.IPNet)
   119  		// validate that the bridge ip matches the ip specified by BridgeIP
   120  		if bridgeIP != "" {
   121  			bip, _, err := net.ParseCIDR(bridgeIP)
   122  			if err != nil {
   123  				return job.Error(err)
   124  			}
   125  			if !network.IP.Equal(bip) {
   126  				return job.Errorf("bridge ip (%s) does not match existing bridge configuration %s", network.IP, bip)
   127  			}
   128  		}
   129  	}
   130  
   131  	// Configure iptables for link support
   132  	if enableIPTables {
   133  		if err := setupIPTables(addr, icc, ipMasq); err != nil {
   134  			return job.Error(err)
   135  		}
   136  	}
   137  
   138  	if ipForward {
   139  		// Enable IPv4 forwarding
   140  		if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil {
   141  			job.Logf("WARNING: unable to enable IPv4 forwarding: %s\n", err)
   142  		}
   143  	}
   144  
   145  	// We can always try removing the iptables
   146  	if err := iptables.RemoveExistingChain("DOCKER", iptables.Nat); err != nil {
   147  		return job.Error(err)
   148  	}
   149  
   150  	if enableIPTables {
   151  		_, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Nat)
   152  		if err != nil {
   153  			return job.Error(err)
   154  		}
   155  		chain, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Filter)
   156  		if err != nil {
   157  			return job.Error(err)
   158  		}
   159  		portmapper.SetIptablesChain(chain)
   160  	}
   161  
   162  	bridgeNetwork = network
   163  	if fixedCIDR != "" {
   164  		_, subnet, err := net.ParseCIDR(fixedCIDR)
   165  		if err != nil {
   166  			return job.Error(err)
   167  		}
   168  		log.Debugf("Subnet: %v", subnet)
   169  		if err := ipallocator.RegisterSubnet(bridgeNetwork, subnet); err != nil {
   170  			return job.Error(err)
   171  		}
   172  	}
   173  
   174  	// https://github.com/docker/docker/issues/2768
   175  	job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", bridgeNetwork.IP)
   176  
   177  	for name, f := range map[string]engine.Handler{
   178  		"allocate_interface": Allocate,
   179  		"release_interface":  Release,
   180  		"allocate_port":      AllocatePort,
   181  		"link":               LinkContainers,
   182  	} {
   183  		if err := job.Eng.Register(name, f); err != nil {
   184  			return job.Error(err)
   185  		}
   186  	}
   187  	return engine.StatusOK
   188  }
   189  
   190  func setupIPTables(addr net.Addr, icc, ipmasq bool) error {
   191  	// Enable NAT
   192  
   193  	if ipmasq {
   194  		natArgs := []string{"POSTROUTING", "-t", "nat", "-s", addr.String(), "!", "-o", bridgeIface, "-j", "MASQUERADE"}
   195  
   196  		if !iptables.Exists(natArgs...) {
   197  			if output, err := iptables.Raw(append([]string{"-I"}, natArgs...)...); err != nil {
   198  				return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
   199  			} else if len(output) != 0 {
   200  				return &iptables.ChainError{Chain: "POSTROUTING", Output: output}
   201  			}
   202  		}
   203  	}
   204  
   205  	var (
   206  		args       = []string{"FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-j"}
   207  		acceptArgs = append(args, "ACCEPT")
   208  		dropArgs   = append(args, "DROP")
   209  	)
   210  
   211  	if !icc {
   212  		iptables.Raw(append([]string{"-D"}, acceptArgs...)...)
   213  
   214  		if !iptables.Exists(dropArgs...) {
   215  			log.Debugf("Disable inter-container communication")
   216  			if output, err := iptables.Raw(append([]string{"-I"}, dropArgs...)...); err != nil {
   217  				return fmt.Errorf("Unable to prevent intercontainer communication: %s", err)
   218  			} else if len(output) != 0 {
   219  				return fmt.Errorf("Error disabling intercontainer communication: %s", output)
   220  			}
   221  		}
   222  	} else {
   223  		iptables.Raw(append([]string{"-D"}, dropArgs...)...)
   224  
   225  		if !iptables.Exists(acceptArgs...) {
   226  			log.Debugf("Enable inter-container communication")
   227  			if output, err := iptables.Raw(append([]string{"-I"}, acceptArgs...)...); err != nil {
   228  				return fmt.Errorf("Unable to allow intercontainer communication: %s", err)
   229  			} else if len(output) != 0 {
   230  				return fmt.Errorf("Error enabling intercontainer communication: %s", output)
   231  			}
   232  		}
   233  	}
   234  
   235  	// Accept all non-intercontainer outgoing packets
   236  	outgoingArgs := []string{"FORWARD", "-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}
   237  	if !iptables.Exists(outgoingArgs...) {
   238  		if output, err := iptables.Raw(append([]string{"-I"}, outgoingArgs...)...); err != nil {
   239  			return fmt.Errorf("Unable to allow outgoing packets: %s", err)
   240  		} else if len(output) != 0 {
   241  			return &iptables.ChainError{Chain: "FORWARD outgoing", Output: output}
   242  		}
   243  	}
   244  
   245  	// Accept incoming packets for existing connections
   246  	existingArgs := []string{"FORWARD", "-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"}
   247  
   248  	if !iptables.Exists(existingArgs...) {
   249  		if output, err := iptables.Raw(append([]string{"-I"}, existingArgs...)...); err != nil {
   250  			return fmt.Errorf("Unable to allow incoming packets: %s", err)
   251  		} else if len(output) != 0 {
   252  			return &iptables.ChainError{Chain: "FORWARD incoming", Output: output}
   253  		}
   254  	}
   255  	return nil
   256  }
   257  
   258  // configureBridge attempts to create and configure a network bridge interface named `bridgeIface` on the host
   259  // If bridgeIP is empty, it will try to find a non-conflicting IP from the Docker-specified private ranges
   260  // If the bridge `bridgeIface` already exists, it will only perform the IP address association with the existing
   261  // bridge (fixes issue #8444)
   262  // If an address which doesn't conflict with existing interfaces can't be found, an error is returned.
   263  func configureBridge(bridgeIP string) error {
   264  	nameservers := []string{}
   265  	resolvConf, _ := resolvconf.Get()
   266  	// we don't check for an error here, because we don't really care
   267  	// if we can't read /etc/resolv.conf. So instead we skip the append
   268  	// if resolvConf is nil. It either doesn't exist, or we can't read it
   269  	// for some reason.
   270  	if resolvConf != nil {
   271  		nameservers = append(nameservers, resolvconf.GetNameserversAsCIDR(resolvConf)...)
   272  	}
   273  
   274  	var ifaceAddr string
   275  	if len(bridgeIP) != 0 {
   276  		_, _, err := net.ParseCIDR(bridgeIP)
   277  		if err != nil {
   278  			return err
   279  		}
   280  		ifaceAddr = bridgeIP
   281  	} else {
   282  		for _, addr := range addrs {
   283  			_, dockerNetwork, err := net.ParseCIDR(addr)
   284  			if err != nil {
   285  				return err
   286  			}
   287  			if err := networkdriver.CheckNameserverOverlaps(nameservers, dockerNetwork); err == nil {
   288  				if err := networkdriver.CheckRouteOverlaps(dockerNetwork); err == nil {
   289  					ifaceAddr = addr
   290  					break
   291  				} else {
   292  					log.Debugf("%s %s", addr, err)
   293  				}
   294  			}
   295  		}
   296  	}
   297  
   298  	if ifaceAddr == "" {
   299  		return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", bridgeIface, bridgeIface)
   300  	}
   301  	log.Debugf("Creating bridge %s with network %s", bridgeIface, ifaceAddr)
   302  
   303  	if err := createBridgeIface(bridgeIface); err != nil {
   304  		// the bridge may already exist, therefore we can ignore an "exists" error
   305  		if !os.IsExist(err) {
   306  			return err
   307  		}
   308  	}
   309  
   310  	iface, err := net.InterfaceByName(bridgeIface)
   311  	if err != nil {
   312  		return err
   313  	}
   314  
   315  	ipAddr, ipNet, err := net.ParseCIDR(ifaceAddr)
   316  	if err != nil {
   317  		return err
   318  	}
   319  
   320  	if netlink.NetworkLinkAddIp(iface, ipAddr, ipNet); err != nil {
   321  		return fmt.Errorf("Unable to add private network: %s", err)
   322  	}
   323  	if err := netlink.NetworkLinkUp(iface); err != nil {
   324  		return fmt.Errorf("Unable to start network bridge: %s", err)
   325  	}
   326  	return nil
   327  }
   328  
   329  func createBridgeIface(name string) error {
   330  	kv, err := kernel.GetKernelVersion()
   331  	// only set the bridge's mac address if the kernel version is > 3.3
   332  	// before that it was not supported
   333  	setBridgeMacAddr := err == nil && (kv.Kernel >= 3 && kv.Major >= 3)
   334  	log.Debugf("setting bridge mac address = %v", setBridgeMacAddr)
   335  	return netlink.CreateBridge(name, setBridgeMacAddr)
   336  }
   337  
   338  // Generate a IEEE802 compliant MAC address from the given IP address.
   339  //
   340  // The generator is guaranteed to be consistent: the same IP will always yield the same
   341  // MAC address. This is to avoid ARP cache issues.
   342  func generateMacAddr(ip net.IP) net.HardwareAddr {
   343  	hw := make(net.HardwareAddr, 6)
   344  
   345  	// The first byte of the MAC address has to comply with these rules:
   346  	// 1. Unicast: Set the least-significant bit to 0.
   347  	// 2. Address is locally administered: Set the second-least-significant bit (U/L) to 1.
   348  	// 3. As "small" as possible: The veth address has to be "smaller" than the bridge address.
   349  	hw[0] = 0x02
   350  
   351  	// The first 24 bits of the MAC represent the Organizationally Unique Identifier (OUI).
   352  	// Since this address is locally administered, we can do whatever we want as long as
   353  	// it doesn't conflict with other addresses.
   354  	hw[1] = 0x42
   355  
   356  	// Insert the IP address into the last 32 bits of the MAC address.
   357  	// This is a simple way to guarantee the address will be consistent and unique.
   358  	copy(hw[2:], ip.To4())
   359  
   360  	return hw
   361  }
   362  
   363  // Allocate a network interface
   364  func Allocate(job *engine.Job) engine.Status {
   365  	var (
   366  		ip          net.IP
   367  		mac         net.HardwareAddr
   368  		err         error
   369  		id          = job.Args[0]
   370  		requestedIP = net.ParseIP(job.Getenv("RequestedIP"))
   371  	)
   372  
   373  	if requestedIP != nil {
   374  		ip, err = ipallocator.RequestIP(bridgeNetwork, requestedIP)
   375  	} else {
   376  		ip, err = ipallocator.RequestIP(bridgeNetwork, nil)
   377  	}
   378  	if err != nil {
   379  		return job.Error(err)
   380  	}
   381  
   382  	// If no explicit mac address was given, generate a random one.
   383  	if mac, err = net.ParseMAC(job.Getenv("RequestedMac")); err != nil {
   384  		mac = generateMacAddr(ip)
   385  	}
   386  
   387  	out := engine.Env{}
   388  	out.Set("IP", ip.String())
   389  	out.Set("Mask", bridgeNetwork.Mask.String())
   390  	out.Set("Gateway", bridgeNetwork.IP.String())
   391  	out.Set("MacAddress", mac.String())
   392  	out.Set("Bridge", bridgeIface)
   393  
   394  	size, _ := bridgeNetwork.Mask.Size()
   395  	out.SetInt("IPPrefixLen", size)
   396  
   397  	currentInterfaces.Set(id, &networkInterface{
   398  		IP: ip,
   399  	})
   400  
   401  	out.WriteTo(job.Stdout)
   402  
   403  	return engine.StatusOK
   404  }
   405  
   406  // release an interface for a select ip
   407  func Release(job *engine.Job) engine.Status {
   408  	var (
   409  		id                 = job.Args[0]
   410  		containerInterface = currentInterfaces.Get(id)
   411  	)
   412  
   413  	if containerInterface == nil {
   414  		return job.Errorf("No network information to release for %s", id)
   415  	}
   416  
   417  	for _, nat := range containerInterface.PortMappings {
   418  		if err := portmapper.Unmap(nat); err != nil {
   419  			log.Infof("Unable to unmap port %s: %s", nat, err)
   420  		}
   421  	}
   422  
   423  	if err := ipallocator.ReleaseIP(bridgeNetwork, containerInterface.IP); err != nil {
   424  		log.Infof("Unable to release ip %s", err)
   425  	}
   426  	return engine.StatusOK
   427  }
   428  
   429  // Allocate an external port and map it to the interface
   430  func AllocatePort(job *engine.Job) engine.Status {
   431  	var (
   432  		err error
   433  
   434  		ip            = defaultBindingIP
   435  		id            = job.Args[0]
   436  		hostIP        = job.Getenv("HostIP")
   437  		hostPort      = job.GetenvInt("HostPort")
   438  		containerPort = job.GetenvInt("ContainerPort")
   439  		proto         = job.Getenv("Proto")
   440  		network       = currentInterfaces.Get(id)
   441  	)
   442  
   443  	if hostIP != "" {
   444  		ip = net.ParseIP(hostIP)
   445  		if ip == nil {
   446  			return job.Errorf("Bad parameter: invalid host ip %s", hostIP)
   447  		}
   448  	}
   449  
   450  	// host ip, proto, and host port
   451  	var container net.Addr
   452  	switch proto {
   453  	case "tcp":
   454  		container = &net.TCPAddr{IP: network.IP, Port: containerPort}
   455  	case "udp":
   456  		container = &net.UDPAddr{IP: network.IP, Port: containerPort}
   457  	default:
   458  		return job.Errorf("unsupported address type %s", proto)
   459  	}
   460  
   461  	//
   462  	// Try up to 10 times to get a port that's not already allocated.
   463  	//
   464  	// In the event of failure to bind, return the error that portmapper.Map
   465  	// yields.
   466  	//
   467  
   468  	var host net.Addr
   469  	for i := 0; i < MaxAllocatedPortAttempts; i++ {
   470  		if host, err = portmapper.Map(container, ip, hostPort); err == nil {
   471  			break
   472  		}
   473  		// There is no point in immediately retrying to map an explicitly
   474  		// chosen port.
   475  		if hostPort != 0 {
   476  			job.Logf("Failed to allocate and map port %d: %s", hostPort, err)
   477  			break
   478  		}
   479  		job.Logf("Failed to allocate and map port: %s, retry: %d", err, i+1)
   480  	}
   481  
   482  	if err != nil {
   483  		return job.Error(err)
   484  	}
   485  
   486  	network.PortMappings = append(network.PortMappings, host)
   487  
   488  	out := engine.Env{}
   489  	switch netAddr := host.(type) {
   490  	case *net.TCPAddr:
   491  		out.Set("HostIP", netAddr.IP.String())
   492  		out.SetInt("HostPort", netAddr.Port)
   493  	case *net.UDPAddr:
   494  		out.Set("HostIP", netAddr.IP.String())
   495  		out.SetInt("HostPort", netAddr.Port)
   496  	}
   497  	if _, err := out.WriteTo(job.Stdout); err != nil {
   498  		return job.Error(err)
   499  	}
   500  
   501  	return engine.StatusOK
   502  }
   503  
   504  func LinkContainers(job *engine.Job) engine.Status {
   505  	var (
   506  		action       = job.Args[0]
   507  		nfAction     iptables.Action
   508  		childIP      = job.Getenv("ChildIP")
   509  		parentIP     = job.Getenv("ParentIP")
   510  		ignoreErrors = job.GetenvBool("IgnoreErrors")
   511  		ports        = job.GetenvList("Ports")
   512  	)
   513  
   514  	switch action {
   515  	case "-A":
   516  		nfAction = iptables.Append
   517  	case "-I":
   518  		nfAction = iptables.Insert
   519  	case "-D":
   520  		nfAction = iptables.Delete
   521  	default:
   522  		return job.Errorf("Invalid action '%s' specified", action)
   523  	}
   524  
   525  	ip1 := net.ParseIP(parentIP)
   526  	if ip1 == nil {
   527  		return job.Errorf("parent IP '%s' is invalid", parentIP)
   528  	}
   529  	ip2 := net.ParseIP(childIP)
   530  	if ip2 == nil {
   531  		return job.Errorf("child IP '%s' is invalid", childIP)
   532  	}
   533  
   534  	chain := iptables.Chain{Name: "DOCKER", Bridge: bridgeIface}
   535  	for _, p := range ports {
   536  		port := nat.Port(p)
   537  		if err := chain.Link(nfAction, ip1, ip2, port.Int(), port.Proto()); !ignoreErrors && err != nil {
   538  			return job.Error(err)
   539  		}
   540  	}
   541  	return engine.StatusOK
   542  }