github.com/titanous/docker@v1.4.1/daemon/networkdriver/bridge/driver.go (about)

     1  package bridge
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"net"
     7  	"os"
     8  	"strconv"
     9  	"sync"
    10  
    11  	log "github.com/Sirupsen/logrus"
    12  	"github.com/docker/docker/daemon/networkdriver"
    13  	"github.com/docker/docker/daemon/networkdriver/ipallocator"
    14  	"github.com/docker/docker/daemon/networkdriver/portmapper"
    15  	"github.com/docker/docker/engine"
    16  	"github.com/docker/docker/nat"
    17  	"github.com/docker/docker/pkg/iptables"
    18  	"github.com/docker/docker/pkg/networkfs/resolvconf"
    19  	"github.com/docker/docker/pkg/parsers/kernel"
    20  	"github.com/docker/libcontainer/netlink"
    21  )
    22  
    23  const (
    24  	DefaultNetworkBridge     = "docker0"
    25  	MaxAllocatedPortAttempts = 10
    26  )
    27  
    28  // Network interface represents the networking stack of a container
    29  type networkInterface struct {
    30  	IP           net.IP
    31  	PortMappings []net.Addr // there are mappings to the host interfaces
    32  }
    33  
    34  type ifaces struct {
    35  	c map[string]*networkInterface
    36  	sync.Mutex
    37  }
    38  
    39  func (i *ifaces) Set(key string, n *networkInterface) {
    40  	i.Lock()
    41  	i.c[key] = n
    42  	i.Unlock()
    43  }
    44  
    45  func (i *ifaces) Get(key string) *networkInterface {
    46  	i.Lock()
    47  	res := i.c[key]
    48  	i.Unlock()
    49  	return res
    50  }
    51  
    52  var (
    53  	addrs = []string{
    54  		// Here we don't follow the convention of using the 1st IP of the range for the gateway.
    55  		// This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges.
    56  		// In theory this shouldn't matter - in practice there's bound to be a few scripts relying
    57  		// on the internal addressing or other stupid things like that.
    58  		// They shouldn't, but hey, let's not break them unless we really have to.
    59  		"172.17.42.1/16", // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23
    60  		"10.0.42.1/16",   // Don't even try using the entire /8, that's too intrusive
    61  		"10.1.42.1/16",
    62  		"10.42.42.1/16",
    63  		"172.16.42.1/24",
    64  		"172.16.43.1/24",
    65  		"172.16.44.1/24",
    66  		"10.0.42.1/24",
    67  		"10.0.43.1/24",
    68  		"192.168.42.1/24",
    69  		"192.168.43.1/24",
    70  		"192.168.44.1/24",
    71  	}
    72  
    73  	bridgeIface   string
    74  	bridgeNetwork *net.IPNet
    75  
    76  	defaultBindingIP  = net.ParseIP("0.0.0.0")
    77  	currentInterfaces = ifaces{c: make(map[string]*networkInterface)}
    78  )
    79  
    80  func InitDriver(job *engine.Job) engine.Status {
    81  	var (
    82  		network        *net.IPNet
    83  		enableIPTables = job.GetenvBool("EnableIptables")
    84  		icc            = job.GetenvBool("InterContainerCommunication")
    85  		ipMasq         = job.GetenvBool("EnableIpMasq")
    86  		ipForward      = job.GetenvBool("EnableIpForward")
    87  		bridgeIP       = job.Getenv("BridgeIP")
    88  		fixedCIDR      = job.Getenv("FixedCIDR")
    89  	)
    90  
    91  	if defaultIP := job.Getenv("DefaultBindingIP"); defaultIP != "" {
    92  		defaultBindingIP = net.ParseIP(defaultIP)
    93  	}
    94  
    95  	bridgeIface = job.Getenv("BridgeIface")
    96  	usingDefaultBridge := false
    97  	if bridgeIface == "" {
    98  		usingDefaultBridge = true
    99  		bridgeIface = DefaultNetworkBridge
   100  	}
   101  
   102  	addr, err := networkdriver.GetIfaceAddr(bridgeIface)
   103  	if err != nil {
   104  		// If we're not using the default bridge, fail without trying to create it
   105  		if !usingDefaultBridge {
   106  			return job.Error(err)
   107  		}
   108  		// If the bridge interface is not found (or has no address), try to create it and/or add an address
   109  		if err := configureBridge(bridgeIP); err != nil {
   110  			return job.Error(err)
   111  		}
   112  
   113  		addr, err = networkdriver.GetIfaceAddr(bridgeIface)
   114  		if err != nil {
   115  			return job.Error(err)
   116  		}
   117  		network = addr.(*net.IPNet)
   118  	} else {
   119  		network = addr.(*net.IPNet)
   120  		// validate that the bridge ip matches the ip specified by BridgeIP
   121  		if bridgeIP != "" {
   122  			bip, _, err := net.ParseCIDR(bridgeIP)
   123  			if err != nil {
   124  				return job.Error(err)
   125  			}
   126  			if !network.IP.Equal(bip) {
   127  				return job.Errorf("bridge ip (%s) does not match existing bridge configuration %s", network.IP, bip)
   128  			}
   129  		}
   130  	}
   131  
   132  	// Configure iptables for link support
   133  	if enableIPTables {
   134  		if err := setupIPTables(addr, icc, ipMasq); err != nil {
   135  			return job.Error(err)
   136  		}
   137  	}
   138  
   139  	if ipForward {
   140  		// Enable IPv4 forwarding
   141  		if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil {
   142  			job.Logf("WARNING: unable to enable IPv4 forwarding: %s\n", err)
   143  		}
   144  	}
   145  
   146  	// We can always try removing the iptables
   147  	if err := iptables.RemoveExistingChain("DOCKER"); err != nil {
   148  		return job.Error(err)
   149  	}
   150  
   151  	if enableIPTables {
   152  		chain, err := iptables.NewChain("DOCKER", bridgeIface)
   153  		if err != nil {
   154  			return job.Error(err)
   155  		}
   156  		portmapper.SetIptablesChain(chain)
   157  	}
   158  
   159  	bridgeNetwork = network
   160  	if fixedCIDR != "" {
   161  		_, subnet, err := net.ParseCIDR(fixedCIDR)
   162  		if err != nil {
   163  			return job.Error(err)
   164  		}
   165  		log.Debugf("Subnet: %v", subnet)
   166  		if err := ipallocator.RegisterSubnet(bridgeNetwork, subnet); err != nil {
   167  			return job.Error(err)
   168  		}
   169  	}
   170  
   171  	// https://github.com/docker/docker/issues/2768
   172  	job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", bridgeNetwork.IP)
   173  
   174  	for name, f := range map[string]engine.Handler{
   175  		"allocate_interface": Allocate,
   176  		"release_interface":  Release,
   177  		"allocate_port":      AllocatePort,
   178  		"link":               LinkContainers,
   179  	} {
   180  		if err := job.Eng.Register(name, f); err != nil {
   181  			return job.Error(err)
   182  		}
   183  	}
   184  	return engine.StatusOK
   185  }
   186  
   187  func setupIPTables(addr net.Addr, icc, ipmasq bool) error {
   188  	// Enable NAT
   189  
   190  	if ipmasq {
   191  		natArgs := []string{"POSTROUTING", "-t", "nat", "-s", addr.String(), "!", "-o", bridgeIface, "-j", "MASQUERADE"}
   192  
   193  		if !iptables.Exists(natArgs...) {
   194  			if output, err := iptables.Raw(append([]string{"-I"}, natArgs...)...); err != nil {
   195  				return fmt.Errorf("Unable to enable network bridge NAT: %s", err)
   196  			} else if len(output) != 0 {
   197  				return &iptables.ChainError{Chain: "POSTROUTING", Output: output}
   198  			}
   199  		}
   200  	}
   201  
   202  	var (
   203  		args       = []string{"FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-j"}
   204  		acceptArgs = append(args, "ACCEPT")
   205  		dropArgs   = append(args, "DROP")
   206  	)
   207  
   208  	if !icc {
   209  		iptables.Raw(append([]string{"-D"}, acceptArgs...)...)
   210  
   211  		if !iptables.Exists(dropArgs...) {
   212  			log.Debugf("Disable inter-container communication")
   213  			if output, err := iptables.Raw(append([]string{"-I"}, dropArgs...)...); err != nil {
   214  				return fmt.Errorf("Unable to prevent intercontainer communication: %s", err)
   215  			} else if len(output) != 0 {
   216  				return fmt.Errorf("Error disabling intercontainer communication: %s", output)
   217  			}
   218  		}
   219  	} else {
   220  		iptables.Raw(append([]string{"-D"}, dropArgs...)...)
   221  
   222  		if !iptables.Exists(acceptArgs...) {
   223  			log.Debugf("Enable inter-container communication")
   224  			if output, err := iptables.Raw(append([]string{"-I"}, acceptArgs...)...); err != nil {
   225  				return fmt.Errorf("Unable to allow intercontainer communication: %s", err)
   226  			} else if len(output) != 0 {
   227  				return fmt.Errorf("Error enabling intercontainer communication: %s", output)
   228  			}
   229  		}
   230  	}
   231  
   232  	// Accept all non-intercontainer outgoing packets
   233  	outgoingArgs := []string{"FORWARD", "-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}
   234  	if !iptables.Exists(outgoingArgs...) {
   235  		if output, err := iptables.Raw(append([]string{"-I"}, outgoingArgs...)...); err != nil {
   236  			return fmt.Errorf("Unable to allow outgoing packets: %s", err)
   237  		} else if len(output) != 0 {
   238  			return &iptables.ChainError{Chain: "FORWARD outgoing", Output: output}
   239  		}
   240  	}
   241  
   242  	// Accept incoming packets for existing connections
   243  	existingArgs := []string{"FORWARD", "-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"}
   244  
   245  	if !iptables.Exists(existingArgs...) {
   246  		if output, err := iptables.Raw(append([]string{"-I"}, existingArgs...)...); err != nil {
   247  			return fmt.Errorf("Unable to allow incoming packets: %s", err)
   248  		} else if len(output) != 0 {
   249  			return &iptables.ChainError{Chain: "FORWARD incoming", Output: output}
   250  		}
   251  	}
   252  	return nil
   253  }
   254  
   255  // configureBridge attempts to create and configure a network bridge interface named `bridgeIface` on the host
   256  // If bridgeIP is empty, it will try to find a non-conflicting IP from the Docker-specified private ranges
   257  // If the bridge `bridgeIface` already exists, it will only perform the IP address association with the existing
   258  // bridge (fixes issue #8444)
   259  // If an address which doesn't conflict with existing interfaces can't be found, an error is returned.
   260  func configureBridge(bridgeIP string) error {
   261  	nameservers := []string{}
   262  	resolvConf, _ := resolvconf.Get()
   263  	// we don't check for an error here, because we don't really care
   264  	// if we can't read /etc/resolv.conf. So instead we skip the append
   265  	// if resolvConf is nil. It either doesn't exist, or we can't read it
   266  	// for some reason.
   267  	if resolvConf != nil {
   268  		nameservers = append(nameservers, resolvconf.GetNameserversAsCIDR(resolvConf)...)
   269  	}
   270  
   271  	var ifaceAddr string
   272  	if len(bridgeIP) != 0 {
   273  		_, _, err := net.ParseCIDR(bridgeIP)
   274  		if err != nil {
   275  			return err
   276  		}
   277  		ifaceAddr = bridgeIP
   278  	} else {
   279  		for _, addr := range addrs {
   280  			_, dockerNetwork, err := net.ParseCIDR(addr)
   281  			if err != nil {
   282  				return err
   283  			}
   284  			if err := networkdriver.CheckNameserverOverlaps(nameservers, dockerNetwork); err == nil {
   285  				if err := networkdriver.CheckRouteOverlaps(dockerNetwork); err == nil {
   286  					ifaceAddr = addr
   287  					break
   288  				} else {
   289  					log.Debugf("%s %s", addr, err)
   290  				}
   291  			}
   292  		}
   293  	}
   294  
   295  	if ifaceAddr == "" {
   296  		return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", bridgeIface, bridgeIface)
   297  	}
   298  	log.Debugf("Creating bridge %s with network %s", bridgeIface, ifaceAddr)
   299  
   300  	if err := createBridgeIface(bridgeIface); err != nil {
   301  		// the bridge may already exist, therefore we can ignore an "exists" error
   302  		if !os.IsExist(err) {
   303  			return err
   304  		}
   305  	}
   306  
   307  	iface, err := net.InterfaceByName(bridgeIface)
   308  	if err != nil {
   309  		return err
   310  	}
   311  
   312  	ipAddr, ipNet, err := net.ParseCIDR(ifaceAddr)
   313  	if err != nil {
   314  		return err
   315  	}
   316  
   317  	if netlink.NetworkLinkAddIp(iface, ipAddr, ipNet); err != nil {
   318  		return fmt.Errorf("Unable to add private network: %s", err)
   319  	}
   320  	if err := netlink.NetworkLinkUp(iface); err != nil {
   321  		return fmt.Errorf("Unable to start network bridge: %s", err)
   322  	}
   323  	return nil
   324  }
   325  
   326  func createBridgeIface(name string) error {
   327  	kv, err := kernel.GetKernelVersion()
   328  	// only set the bridge's mac address if the kernel version is > 3.3
   329  	// before that it was not supported
   330  	setBridgeMacAddr := err == nil && (kv.Kernel >= 3 && kv.Major >= 3)
   331  	log.Debugf("setting bridge mac address = %v", setBridgeMacAddr)
   332  	return netlink.CreateBridge(name, setBridgeMacAddr)
   333  }
   334  
   335  // Generate a IEEE802 compliant MAC address from the given IP address.
   336  //
   337  // The generator is guaranteed to be consistent: the same IP will always yield the same
   338  // MAC address. This is to avoid ARP cache issues.
   339  func generateMacAddr(ip net.IP) net.HardwareAddr {
   340  	hw := make(net.HardwareAddr, 6)
   341  
   342  	// The first byte of the MAC address has to comply with these rules:
   343  	// 1. Unicast: Set the least-significant bit to 0.
   344  	// 2. Address is locally administered: Set the second-least-significant bit (U/L) to 1.
   345  	// 3. As "small" as possible: The veth address has to be "smaller" than the bridge address.
   346  	hw[0] = 0x02
   347  
   348  	// The first 24 bits of the MAC represent the Organizationally Unique Identifier (OUI).
   349  	// Since this address is locally administered, we can do whatever we want as long as
   350  	// it doesn't conflict with other addresses.
   351  	hw[1] = 0x42
   352  
   353  	// Insert the IP address into the last 32 bits of the MAC address.
   354  	// This is a simple way to guarantee the address will be consistent and unique.
   355  	copy(hw[2:], ip.To4())
   356  
   357  	return hw
   358  }
   359  
   360  // Allocate a network interface
   361  func Allocate(job *engine.Job) engine.Status {
   362  	var (
   363  		ip          net.IP
   364  		mac         net.HardwareAddr
   365  		err         error
   366  		id          = job.Args[0]
   367  		requestedIP = net.ParseIP(job.Getenv("RequestedIP"))
   368  	)
   369  
   370  	if requestedIP != nil {
   371  		ip, err = ipallocator.RequestIP(bridgeNetwork, requestedIP)
   372  	} else {
   373  		ip, err = ipallocator.RequestIP(bridgeNetwork, nil)
   374  	}
   375  	if err != nil {
   376  		return job.Error(err)
   377  	}
   378  
   379  	// If no explicit mac address was given, generate a random one.
   380  	if mac, err = net.ParseMAC(job.Getenv("RequestedMac")); err != nil {
   381  		mac = generateMacAddr(ip)
   382  	}
   383  
   384  	out := engine.Env{}
   385  	out.Set("IP", ip.String())
   386  	out.Set("Mask", bridgeNetwork.Mask.String())
   387  	out.Set("Gateway", bridgeNetwork.IP.String())
   388  	out.Set("MacAddress", mac.String())
   389  	out.Set("Bridge", bridgeIface)
   390  
   391  	size, _ := bridgeNetwork.Mask.Size()
   392  	out.SetInt("IPPrefixLen", size)
   393  
   394  	currentInterfaces.Set(id, &networkInterface{
   395  		IP: ip,
   396  	})
   397  
   398  	out.WriteTo(job.Stdout)
   399  
   400  	return engine.StatusOK
   401  }
   402  
   403  // release an interface for a select ip
   404  func Release(job *engine.Job) engine.Status {
   405  	var (
   406  		id                 = job.Args[0]
   407  		containerInterface = currentInterfaces.Get(id)
   408  	)
   409  
   410  	if containerInterface == nil {
   411  		return job.Errorf("No network information to release for %s", id)
   412  	}
   413  
   414  	for _, nat := range containerInterface.PortMappings {
   415  		if err := portmapper.Unmap(nat); err != nil {
   416  			log.Infof("Unable to unmap port %s: %s", nat, err)
   417  		}
   418  	}
   419  
   420  	if err := ipallocator.ReleaseIP(bridgeNetwork, containerInterface.IP); err != nil {
   421  		log.Infof("Unable to release ip %s", err)
   422  	}
   423  	return engine.StatusOK
   424  }
   425  
   426  // Allocate an external port and map it to the interface
   427  func AllocatePort(job *engine.Job) engine.Status {
   428  	var (
   429  		err error
   430  
   431  		ip            = defaultBindingIP
   432  		id            = job.Args[0]
   433  		hostIP        = job.Getenv("HostIP")
   434  		hostPort      = job.GetenvInt("HostPort")
   435  		containerPort = job.GetenvInt("ContainerPort")
   436  		proto         = job.Getenv("Proto")
   437  		network       = currentInterfaces.Get(id)
   438  	)
   439  
   440  	if hostIP != "" {
   441  		ip = net.ParseIP(hostIP)
   442  		if ip == nil {
   443  			return job.Errorf("Bad parameter: invalid host ip %s", hostIP)
   444  		}
   445  	}
   446  
   447  	// host ip, proto, and host port
   448  	var container net.Addr
   449  	switch proto {
   450  	case "tcp":
   451  		container = &net.TCPAddr{IP: network.IP, Port: containerPort}
   452  	case "udp":
   453  		container = &net.UDPAddr{IP: network.IP, Port: containerPort}
   454  	default:
   455  		return job.Errorf("unsupported address type %s", proto)
   456  	}
   457  
   458  	//
   459  	// Try up to 10 times to get a port that's not already allocated.
   460  	//
   461  	// In the event of failure to bind, return the error that portmapper.Map
   462  	// yields.
   463  	//
   464  
   465  	var host net.Addr
   466  	for i := 0; i < MaxAllocatedPortAttempts; i++ {
   467  		if host, err = portmapper.Map(container, ip, hostPort); err == nil {
   468  			break
   469  		}
   470  		// There is no point in immediately retrying to map an explicitly
   471  		// chosen port.
   472  		if hostPort != 0 {
   473  			job.Logf("Failed to allocate and map port %d: %s", hostPort, err)
   474  			break
   475  		}
   476  		job.Logf("Failed to allocate and map port: %s, retry: %d", err, i+1)
   477  	}
   478  
   479  	if err != nil {
   480  		return job.Error(err)
   481  	}
   482  
   483  	network.PortMappings = append(network.PortMappings, host)
   484  
   485  	out := engine.Env{}
   486  	switch netAddr := host.(type) {
   487  	case *net.TCPAddr:
   488  		out.Set("HostIP", netAddr.IP.String())
   489  		out.SetInt("HostPort", netAddr.Port)
   490  	case *net.UDPAddr:
   491  		out.Set("HostIP", netAddr.IP.String())
   492  		out.SetInt("HostPort", netAddr.Port)
   493  	}
   494  	if _, err := out.WriteTo(job.Stdout); err != nil {
   495  		return job.Error(err)
   496  	}
   497  
   498  	return engine.StatusOK
   499  }
   500  
   501  func LinkContainers(job *engine.Job) engine.Status {
   502  	var (
   503  		action       = job.Args[0]
   504  		childIP      = job.Getenv("ChildIP")
   505  		parentIP     = job.Getenv("ParentIP")
   506  		ignoreErrors = job.GetenvBool("IgnoreErrors")
   507  		ports        = job.GetenvList("Ports")
   508  	)
   509  	for _, value := range ports {
   510  		port := nat.Port(value)
   511  		if output, err := iptables.Raw(action, "FORWARD",
   512  			"-i", bridgeIface, "-o", bridgeIface,
   513  			"-p", port.Proto(),
   514  			"-s", parentIP,
   515  			"--dport", strconv.Itoa(port.Int()),
   516  			"-d", childIP,
   517  			"-j", "ACCEPT"); !ignoreErrors && err != nil {
   518  			return job.Error(err)
   519  		} else if len(output) != 0 {
   520  			return job.Errorf("Error toggle iptables forward: %s", output)
   521  		}
   522  
   523  		if output, err := iptables.Raw(action, "FORWARD",
   524  			"-i", bridgeIface, "-o", bridgeIface,
   525  			"-p", port.Proto(),
   526  			"-s", childIP,
   527  			"--sport", strconv.Itoa(port.Int()),
   528  			"-d", parentIP,
   529  			"-j", "ACCEPT"); !ignoreErrors && err != nil {
   530  			return job.Error(err)
   531  		} else if len(output) != 0 {
   532  			return job.Errorf("Error toggle iptables forward: %s", output)
   533  		}
   534  	}
   535  	return engine.StatusOK
   536  }