github.com/fcwu/docker@v1.4.2-0.20150115145920-2a69ca89f0df/daemon/networkdriver/bridge/driver.go (about) 1 package bridge 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "net" 7 "os" 8 "sync" 9 10 log "github.com/Sirupsen/logrus" 11 "github.com/docker/docker/daemon/networkdriver" 12 "github.com/docker/docker/daemon/networkdriver/ipallocator" 13 "github.com/docker/docker/daemon/networkdriver/portmapper" 14 "github.com/docker/docker/engine" 15 "github.com/docker/docker/nat" 16 "github.com/docker/docker/pkg/iptables" 17 "github.com/docker/docker/pkg/networkfs/resolvconf" 18 "github.com/docker/docker/pkg/parsers/kernel" 19 "github.com/docker/libcontainer/netlink" 20 ) 21 22 const ( 23 DefaultNetworkBridge = "docker0" 24 MaxAllocatedPortAttempts = 10 25 ) 26 27 // Network interface represents the networking stack of a container 28 type networkInterface struct { 29 IP net.IP 30 PortMappings []net.Addr // there are mappings to the host interfaces 31 } 32 33 type ifaces struct { 34 c map[string]*networkInterface 35 sync.Mutex 36 } 37 38 func (i *ifaces) Set(key string, n *networkInterface) { 39 i.Lock() 40 i.c[key] = n 41 i.Unlock() 42 } 43 44 func (i *ifaces) Get(key string) *networkInterface { 45 i.Lock() 46 res := i.c[key] 47 i.Unlock() 48 return res 49 } 50 51 var ( 52 addrs = []string{ 53 // Here we don't follow the convention of using the 1st IP of the range for the gateway. 54 // This is to use the same gateway IPs as the /24 ranges, which predate the /16 ranges. 55 // In theory this shouldn't matter - in practice there's bound to be a few scripts relying 56 // on the internal addressing or other stupid things like that. 57 // They shouldn't, but hey, let's not break them unless we really have to. 58 "172.17.42.1/16", // Don't use 172.16.0.0/16, it conflicts with EC2 DNS 172.16.0.23 59 "10.0.42.1/16", // Don't even try using the entire /8, that's too intrusive 60 "10.1.42.1/16", 61 "10.42.42.1/16", 62 "172.16.42.1/24", 63 "172.16.43.1/24", 64 "172.16.44.1/24", 65 "10.0.42.1/24", 66 "10.0.43.1/24", 67 "192.168.42.1/24", 68 "192.168.43.1/24", 69 "192.168.44.1/24", 70 } 71 72 bridgeIface string 73 bridgeNetwork *net.IPNet 74 75 defaultBindingIP = net.ParseIP("0.0.0.0") 76 currentInterfaces = ifaces{c: make(map[string]*networkInterface)} 77 ) 78 79 func InitDriver(job *engine.Job) engine.Status { 80 var ( 81 network *net.IPNet 82 enableIPTables = job.GetenvBool("EnableIptables") 83 icc = job.GetenvBool("InterContainerCommunication") 84 ipMasq = job.GetenvBool("EnableIpMasq") 85 ipForward = job.GetenvBool("EnableIpForward") 86 bridgeIP = job.Getenv("BridgeIP") 87 fixedCIDR = job.Getenv("FixedCIDR") 88 ) 89 90 if defaultIP := job.Getenv("DefaultBindingIP"); defaultIP != "" { 91 defaultBindingIP = net.ParseIP(defaultIP) 92 } 93 94 bridgeIface = job.Getenv("BridgeIface") 95 usingDefaultBridge := false 96 if bridgeIface == "" { 97 usingDefaultBridge = true 98 bridgeIface = DefaultNetworkBridge 99 } 100 101 addr, err := networkdriver.GetIfaceAddr(bridgeIface) 102 if err != nil { 103 // If we're not using the default bridge, fail without trying to create it 104 if !usingDefaultBridge { 105 return job.Error(err) 106 } 107 // If the bridge interface is not found (or has no address), try to create it and/or add an address 108 if err := configureBridge(bridgeIP); err != nil { 109 return job.Error(err) 110 } 111 112 addr, err = networkdriver.GetIfaceAddr(bridgeIface) 113 if err != nil { 114 return job.Error(err) 115 } 116 network = addr.(*net.IPNet) 117 } else { 118 network = addr.(*net.IPNet) 119 // validate that the bridge ip matches the ip specified by BridgeIP 120 if bridgeIP != "" { 121 bip, _, err := net.ParseCIDR(bridgeIP) 122 if err != nil { 123 return job.Error(err) 124 } 125 if !network.IP.Equal(bip) { 126 return job.Errorf("bridge ip (%s) does not match existing bridge configuration %s", network.IP, bip) 127 } 128 } 129 } 130 131 // Configure iptables for link support 132 if enableIPTables { 133 if err := setupIPTables(addr, icc, ipMasq); err != nil { 134 return job.Error(err) 135 } 136 } 137 138 if ipForward { 139 // Enable IPv4 forwarding 140 if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte{'1', '\n'}, 0644); err != nil { 141 job.Logf("WARNING: unable to enable IPv4 forwarding: %s\n", err) 142 } 143 } 144 145 // We can always try removing the iptables 146 if err := iptables.RemoveExistingChain("DOCKER", iptables.Nat); err != nil { 147 return job.Error(err) 148 } 149 150 if enableIPTables { 151 _, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Nat) 152 if err != nil { 153 return job.Error(err) 154 } 155 chain, err := iptables.NewChain("DOCKER", bridgeIface, iptables.Filter) 156 if err != nil { 157 return job.Error(err) 158 } 159 portmapper.SetIptablesChain(chain) 160 } 161 162 bridgeNetwork = network 163 if fixedCIDR != "" { 164 _, subnet, err := net.ParseCIDR(fixedCIDR) 165 if err != nil { 166 return job.Error(err) 167 } 168 log.Debugf("Subnet: %v", subnet) 169 if err := ipallocator.RegisterSubnet(bridgeNetwork, subnet); err != nil { 170 return job.Error(err) 171 } 172 } 173 174 // https://github.com/docker/docker/issues/2768 175 job.Eng.Hack_SetGlobalVar("httpapi.bridgeIP", bridgeNetwork.IP) 176 177 for name, f := range map[string]engine.Handler{ 178 "allocate_interface": Allocate, 179 "release_interface": Release, 180 "allocate_port": AllocatePort, 181 "link": LinkContainers, 182 } { 183 if err := job.Eng.Register(name, f); err != nil { 184 return job.Error(err) 185 } 186 } 187 return engine.StatusOK 188 } 189 190 func setupIPTables(addr net.Addr, icc, ipmasq bool) error { 191 // Enable NAT 192 193 if ipmasq { 194 natArgs := []string{"POSTROUTING", "-t", "nat", "-s", addr.String(), "!", "-o", bridgeIface, "-j", "MASQUERADE"} 195 196 if !iptables.Exists(natArgs...) { 197 if output, err := iptables.Raw(append([]string{"-I"}, natArgs...)...); err != nil { 198 return fmt.Errorf("Unable to enable network bridge NAT: %s", err) 199 } else if len(output) != 0 { 200 return &iptables.ChainError{Chain: "POSTROUTING", Output: output} 201 } 202 } 203 } 204 205 var ( 206 args = []string{"FORWARD", "-i", bridgeIface, "-o", bridgeIface, "-j"} 207 acceptArgs = append(args, "ACCEPT") 208 dropArgs = append(args, "DROP") 209 ) 210 211 if !icc { 212 iptables.Raw(append([]string{"-D"}, acceptArgs...)...) 213 214 if !iptables.Exists(dropArgs...) { 215 log.Debugf("Disable inter-container communication") 216 if output, err := iptables.Raw(append([]string{"-I"}, dropArgs...)...); err != nil { 217 return fmt.Errorf("Unable to prevent intercontainer communication: %s", err) 218 } else if len(output) != 0 { 219 return fmt.Errorf("Error disabling intercontainer communication: %s", output) 220 } 221 } 222 } else { 223 iptables.Raw(append([]string{"-D"}, dropArgs...)...) 224 225 if !iptables.Exists(acceptArgs...) { 226 log.Debugf("Enable inter-container communication") 227 if output, err := iptables.Raw(append([]string{"-I"}, acceptArgs...)...); err != nil { 228 return fmt.Errorf("Unable to allow intercontainer communication: %s", err) 229 } else if len(output) != 0 { 230 return fmt.Errorf("Error enabling intercontainer communication: %s", output) 231 } 232 } 233 } 234 235 // Accept all non-intercontainer outgoing packets 236 outgoingArgs := []string{"FORWARD", "-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"} 237 if !iptables.Exists(outgoingArgs...) { 238 if output, err := iptables.Raw(append([]string{"-I"}, outgoingArgs...)...); err != nil { 239 return fmt.Errorf("Unable to allow outgoing packets: %s", err) 240 } else if len(output) != 0 { 241 return &iptables.ChainError{Chain: "FORWARD outgoing", Output: output} 242 } 243 } 244 245 // Accept incoming packets for existing connections 246 existingArgs := []string{"FORWARD", "-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"} 247 248 if !iptables.Exists(existingArgs...) { 249 if output, err := iptables.Raw(append([]string{"-I"}, existingArgs...)...); err != nil { 250 return fmt.Errorf("Unable to allow incoming packets: %s", err) 251 } else if len(output) != 0 { 252 return &iptables.ChainError{Chain: "FORWARD incoming", Output: output} 253 } 254 } 255 return nil 256 } 257 258 // configureBridge attempts to create and configure a network bridge interface named `bridgeIface` on the host 259 // If bridgeIP is empty, it will try to find a non-conflicting IP from the Docker-specified private ranges 260 // If the bridge `bridgeIface` already exists, it will only perform the IP address association with the existing 261 // bridge (fixes issue #8444) 262 // If an address which doesn't conflict with existing interfaces can't be found, an error is returned. 263 func configureBridge(bridgeIP string) error { 264 nameservers := []string{} 265 resolvConf, _ := resolvconf.Get() 266 // we don't check for an error here, because we don't really care 267 // if we can't read /etc/resolv.conf. So instead we skip the append 268 // if resolvConf is nil. It either doesn't exist, or we can't read it 269 // for some reason. 270 if resolvConf != nil { 271 nameservers = append(nameservers, resolvconf.GetNameserversAsCIDR(resolvConf)...) 272 } 273 274 var ifaceAddr string 275 if len(bridgeIP) != 0 { 276 _, _, err := net.ParseCIDR(bridgeIP) 277 if err != nil { 278 return err 279 } 280 ifaceAddr = bridgeIP 281 } else { 282 for _, addr := range addrs { 283 _, dockerNetwork, err := net.ParseCIDR(addr) 284 if err != nil { 285 return err 286 } 287 if err := networkdriver.CheckNameserverOverlaps(nameservers, dockerNetwork); err == nil { 288 if err := networkdriver.CheckRouteOverlaps(dockerNetwork); err == nil { 289 ifaceAddr = addr 290 break 291 } else { 292 log.Debugf("%s %s", addr, err) 293 } 294 } 295 } 296 } 297 298 if ifaceAddr == "" { 299 return fmt.Errorf("Could not find a free IP address range for interface '%s'. Please configure its address manually and run 'docker -b %s'", bridgeIface, bridgeIface) 300 } 301 log.Debugf("Creating bridge %s with network %s", bridgeIface, ifaceAddr) 302 303 if err := createBridgeIface(bridgeIface); err != nil { 304 // the bridge may already exist, therefore we can ignore an "exists" error 305 if !os.IsExist(err) { 306 return err 307 } 308 } 309 310 iface, err := net.InterfaceByName(bridgeIface) 311 if err != nil { 312 return err 313 } 314 315 ipAddr, ipNet, err := net.ParseCIDR(ifaceAddr) 316 if err != nil { 317 return err 318 } 319 320 if netlink.NetworkLinkAddIp(iface, ipAddr, ipNet); err != nil { 321 return fmt.Errorf("Unable to add private network: %s", err) 322 } 323 if err := netlink.NetworkLinkUp(iface); err != nil { 324 return fmt.Errorf("Unable to start network bridge: %s", err) 325 } 326 return nil 327 } 328 329 func createBridgeIface(name string) error { 330 kv, err := kernel.GetKernelVersion() 331 // only set the bridge's mac address if the kernel version is > 3.3 332 // before that it was not supported 333 setBridgeMacAddr := err == nil && (kv.Kernel >= 3 && kv.Major >= 3) 334 log.Debugf("setting bridge mac address = %v", setBridgeMacAddr) 335 return netlink.CreateBridge(name, setBridgeMacAddr) 336 } 337 338 // Generate a IEEE802 compliant MAC address from the given IP address. 339 // 340 // The generator is guaranteed to be consistent: the same IP will always yield the same 341 // MAC address. This is to avoid ARP cache issues. 342 func generateMacAddr(ip net.IP) net.HardwareAddr { 343 hw := make(net.HardwareAddr, 6) 344 345 // The first byte of the MAC address has to comply with these rules: 346 // 1. Unicast: Set the least-significant bit to 0. 347 // 2. Address is locally administered: Set the second-least-significant bit (U/L) to 1. 348 // 3. As "small" as possible: The veth address has to be "smaller" than the bridge address. 349 hw[0] = 0x02 350 351 // The first 24 bits of the MAC represent the Organizationally Unique Identifier (OUI). 352 // Since this address is locally administered, we can do whatever we want as long as 353 // it doesn't conflict with other addresses. 354 hw[1] = 0x42 355 356 // Insert the IP address into the last 32 bits of the MAC address. 357 // This is a simple way to guarantee the address will be consistent and unique. 358 copy(hw[2:], ip.To4()) 359 360 return hw 361 } 362 363 // Allocate a network interface 364 func Allocate(job *engine.Job) engine.Status { 365 var ( 366 ip net.IP 367 mac net.HardwareAddr 368 err error 369 id = job.Args[0] 370 requestedIP = net.ParseIP(job.Getenv("RequestedIP")) 371 ) 372 373 if requestedIP != nil { 374 ip, err = ipallocator.RequestIP(bridgeNetwork, requestedIP) 375 } else { 376 ip, err = ipallocator.RequestIP(bridgeNetwork, nil) 377 } 378 if err != nil { 379 return job.Error(err) 380 } 381 382 // If no explicit mac address was given, generate a random one. 383 if mac, err = net.ParseMAC(job.Getenv("RequestedMac")); err != nil { 384 mac = generateMacAddr(ip) 385 } 386 387 out := engine.Env{} 388 out.Set("IP", ip.String()) 389 out.Set("Mask", bridgeNetwork.Mask.String()) 390 out.Set("Gateway", bridgeNetwork.IP.String()) 391 out.Set("MacAddress", mac.String()) 392 out.Set("Bridge", bridgeIface) 393 394 size, _ := bridgeNetwork.Mask.Size() 395 out.SetInt("IPPrefixLen", size) 396 397 currentInterfaces.Set(id, &networkInterface{ 398 IP: ip, 399 }) 400 401 out.WriteTo(job.Stdout) 402 403 return engine.StatusOK 404 } 405 406 // release an interface for a select ip 407 func Release(job *engine.Job) engine.Status { 408 var ( 409 id = job.Args[0] 410 containerInterface = currentInterfaces.Get(id) 411 ) 412 413 if containerInterface == nil { 414 return job.Errorf("No network information to release for %s", id) 415 } 416 417 for _, nat := range containerInterface.PortMappings { 418 if err := portmapper.Unmap(nat); err != nil { 419 log.Infof("Unable to unmap port %s: %s", nat, err) 420 } 421 } 422 423 if err := ipallocator.ReleaseIP(bridgeNetwork, containerInterface.IP); err != nil { 424 log.Infof("Unable to release ip %s", err) 425 } 426 return engine.StatusOK 427 } 428 429 // Allocate an external port and map it to the interface 430 func AllocatePort(job *engine.Job) engine.Status { 431 var ( 432 err error 433 434 ip = defaultBindingIP 435 id = job.Args[0] 436 hostIP = job.Getenv("HostIP") 437 hostPort = job.GetenvInt("HostPort") 438 containerPort = job.GetenvInt("ContainerPort") 439 proto = job.Getenv("Proto") 440 network = currentInterfaces.Get(id) 441 ) 442 443 if hostIP != "" { 444 ip = net.ParseIP(hostIP) 445 if ip == nil { 446 return job.Errorf("Bad parameter: invalid host ip %s", hostIP) 447 } 448 } 449 450 // host ip, proto, and host port 451 var container net.Addr 452 switch proto { 453 case "tcp": 454 container = &net.TCPAddr{IP: network.IP, Port: containerPort} 455 case "udp": 456 container = &net.UDPAddr{IP: network.IP, Port: containerPort} 457 default: 458 return job.Errorf("unsupported address type %s", proto) 459 } 460 461 // 462 // Try up to 10 times to get a port that's not already allocated. 463 // 464 // In the event of failure to bind, return the error that portmapper.Map 465 // yields. 466 // 467 468 var host net.Addr 469 for i := 0; i < MaxAllocatedPortAttempts; i++ { 470 if host, err = portmapper.Map(container, ip, hostPort); err == nil { 471 break 472 } 473 // There is no point in immediately retrying to map an explicitly 474 // chosen port. 475 if hostPort != 0 { 476 job.Logf("Failed to allocate and map port %d: %s", hostPort, err) 477 break 478 } 479 job.Logf("Failed to allocate and map port: %s, retry: %d", err, i+1) 480 } 481 482 if err != nil { 483 return job.Error(err) 484 } 485 486 network.PortMappings = append(network.PortMappings, host) 487 488 out := engine.Env{} 489 switch netAddr := host.(type) { 490 case *net.TCPAddr: 491 out.Set("HostIP", netAddr.IP.String()) 492 out.SetInt("HostPort", netAddr.Port) 493 case *net.UDPAddr: 494 out.Set("HostIP", netAddr.IP.String()) 495 out.SetInt("HostPort", netAddr.Port) 496 } 497 if _, err := out.WriteTo(job.Stdout); err != nil { 498 return job.Error(err) 499 } 500 501 return engine.StatusOK 502 } 503 504 func LinkContainers(job *engine.Job) engine.Status { 505 var ( 506 action = job.Args[0] 507 nfAction iptables.Action 508 childIP = job.Getenv("ChildIP") 509 parentIP = job.Getenv("ParentIP") 510 ignoreErrors = job.GetenvBool("IgnoreErrors") 511 ports = job.GetenvList("Ports") 512 ) 513 514 switch action { 515 case "-A": 516 nfAction = iptables.Append 517 case "-I": 518 nfAction = iptables.Insert 519 case "-D": 520 nfAction = iptables.Delete 521 default: 522 return job.Errorf("Invalid action '%s' specified", action) 523 } 524 525 ip1 := net.ParseIP(parentIP) 526 if ip1 == nil { 527 return job.Errorf("parent IP '%s' is invalid", parentIP) 528 } 529 ip2 := net.ParseIP(childIP) 530 if ip2 == nil { 531 return job.Errorf("child IP '%s' is invalid", childIP) 532 } 533 534 chain := iptables.Chain{Name: "DOCKER", Bridge: bridgeIface} 535 for _, p := range ports { 536 port := nat.Port(p) 537 if err := chain.Link(nfAction, ip1, ip2, port.Int(), port.Proto()); !ignoreErrors && err != nil { 538 return job.Error(err) 539 } 540 } 541 return engine.StatusOK 542 }