github.com/jfrazelle/docker@v1.1.2-0.20210712172922-bf78e25fe508/libnetwork/service_linux.go (about) 1 package libnetwork 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "net" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "runtime" 12 "strconv" 13 "strings" 14 "sync" 15 "syscall" 16 17 "github.com/docker/docker/libnetwork/iptables" 18 "github.com/docker/docker/libnetwork/ns" 19 "github.com/docker/docker/pkg/reexec" 20 "github.com/gogo/protobuf/proto" 21 "github.com/ishidawataru/sctp" 22 "github.com/moby/ipvs" 23 "github.com/sirupsen/logrus" 24 "github.com/vishvananda/netlink/nl" 25 "github.com/vishvananda/netns" 26 ) 27 28 func init() { 29 reexec.Register("fwmarker", fwMarker) 30 reexec.Register("redirector", redirector) 31 } 32 33 // Populate all loadbalancers on the network that the passed endpoint 34 // belongs to, into this sandbox. 35 func (sb *sandbox) populateLoadBalancers(ep *endpoint) { 36 // This is an interface less endpoint. Nothing to do. 37 if ep.Iface() == nil { 38 return 39 } 40 41 n := ep.getNetwork() 42 eIP := ep.Iface().Address() 43 44 if n.ingress { 45 if err := addRedirectRules(sb.Key(), eIP, ep.ingressPorts); err != nil { 46 logrus.Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err) 47 } 48 } 49 } 50 51 func (n *network) findLBEndpointSandbox() (*endpoint, *sandbox, error) { 52 // TODO: get endpoint from store? See EndpointInfo() 53 var ep *endpoint 54 // Find this node's LB sandbox endpoint: there should be exactly one 55 for _, e := range n.Endpoints() { 56 epi := e.Info() 57 if epi != nil && epi.LoadBalancer() { 58 ep = e.(*endpoint) 59 break 60 } 61 } 62 if ep == nil { 63 return nil, nil, fmt.Errorf("Unable to find load balancing endpoint for network %s", n.ID()) 64 } 65 // Get the load balancer sandbox itself as well 66 sb, ok := ep.getSandbox() 67 if !ok { 68 return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID()) 69 } 70 sep := sb.getEndpoint(ep.ID()) 71 if sep == nil { 72 return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID()) 73 } 74 return sep, sb, nil 75 } 76 77 // Searches the OS sandbox for the name of the endpoint interface 78 // within the sandbox. This is required for adding/removing IP 79 // aliases to the interface. 80 func findIfaceDstName(sb *sandbox, ep *endpoint) string { 81 srcName := ep.Iface().SrcName() 82 for _, i := range sb.osSbox.Info().Interfaces() { 83 if i.SrcName() == srcName { 84 return i.DstName() 85 } 86 } 87 return "" 88 } 89 90 // Add loadbalancer backend to the loadbalncer sandbox for the network. 91 // If needed add the service as well. 92 func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) { 93 if len(lb.vip) == 0 { 94 return 95 } 96 ep, sb, err := n.findLBEndpointSandbox() 97 if err != nil { 98 logrus.Errorf("addLBBackend %s/%s: %v", n.ID(), n.Name(), err) 99 return 100 } 101 if sb.osSbox == nil { 102 return 103 } 104 105 eIP := ep.Iface().Address() 106 107 i, err := ipvs.New(sb.Key()) 108 if err != nil { 109 logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb addition: %v", sb.ID(), sb.ContainerID(), sb.Key(), err) 110 return 111 } 112 defer i.Close() 113 114 s := &ipvs.Service{ 115 AddressFamily: nl.FAMILY_V4, 116 FWMark: lb.fwMark, 117 SchedName: ipvs.RoundRobin, 118 } 119 120 if !i.IsServicePresent(s) { 121 // Add IP alias for the VIP to the endpoint 122 ifName := findIfaceDstName(sb, ep) 123 if ifName == "" { 124 logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name()) 125 return 126 } 127 err := sb.osSbox.AddAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)}) 128 if err != nil { 129 logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err) 130 return 131 } 132 133 if sb.ingress { 134 var gwIP net.IP 135 if ep := sb.getGatewayEndpoint(); ep != nil { 136 gwIP = ep.Iface().Address().IP 137 } 138 if err := programIngress(gwIP, lb.service.ingressPorts, false); err != nil { 139 logrus.Errorf("Failed to add ingress: %v", err) 140 return 141 } 142 } 143 144 logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID()) 145 if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil { 146 logrus.Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err) 147 return 148 } 149 150 if err := i.NewService(s); err != nil && err != syscall.EEXIST { 151 logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err) 152 return 153 } 154 } 155 156 d := &ipvs.Destination{ 157 AddressFamily: nl.FAMILY_V4, 158 Address: ip, 159 Weight: 1, 160 } 161 if n.loadBalancerMode == loadBalancerModeDSR { 162 d.ConnectionFlags = ipvs.ConnFwdDirectRoute 163 } 164 165 // Remove the sched name before using the service to add 166 // destination. 167 s.SchedName = "" 168 if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST { 169 logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err) 170 } 171 } 172 173 // Remove loadbalancer backend the load balancing endpoint for this 174 // network. If 'rmService' is true, then remove the service entry as well. 175 // If 'fullRemove' is true then completely remove the entry, otherwise 176 // just deweight it for now. 177 func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) { 178 if len(lb.vip) == 0 { 179 return 180 } 181 ep, sb, err := n.findLBEndpointSandbox() 182 if err != nil { 183 logrus.Debugf("rmLBBackend for %s/%s: %v -- probably transient state", n.ID(), n.Name(), err) 184 return 185 } 186 if sb.osSbox == nil { 187 return 188 } 189 190 eIP := ep.Iface().Address() 191 192 i, err := ipvs.New(sb.Key()) 193 if err != nil { 194 logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb removal: %v", sb.ID(), sb.ContainerID(), sb.Key(), err) 195 return 196 } 197 defer i.Close() 198 199 s := &ipvs.Service{ 200 AddressFamily: nl.FAMILY_V4, 201 FWMark: lb.fwMark, 202 } 203 204 d := &ipvs.Destination{ 205 AddressFamily: nl.FAMILY_V4, 206 Address: ip, 207 Weight: 1, 208 } 209 if n.loadBalancerMode == loadBalancerModeDSR { 210 d.ConnectionFlags = ipvs.ConnFwdDirectRoute 211 } 212 213 if fullRemove { 214 if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT { 215 logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err) 216 } 217 } else { 218 d.Weight = 0 219 if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT { 220 logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err) 221 } 222 } 223 224 if rmService { 225 s.SchedName = ipvs.RoundRobin 226 if err := i.DelService(s); err != nil && err != syscall.ENOENT { 227 logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err) 228 } 229 230 if sb.ingress { 231 var gwIP net.IP 232 if ep := sb.getGatewayEndpoint(); ep != nil { 233 gwIP = ep.Iface().Address().IP 234 } 235 if err := programIngress(gwIP, lb.service.ingressPorts, true); err != nil { 236 logrus.Errorf("Failed to delete ingress: %v", err) 237 } 238 } 239 240 if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil { 241 logrus.Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err) 242 } 243 244 // Remove IP alias from the VIP to the endpoint 245 ifName := findIfaceDstName(sb, ep) 246 if ifName == "" { 247 logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name()) 248 return 249 } 250 err := sb.osSbox.RemoveAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)}) 251 if err != nil { 252 logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err) 253 } 254 } 255 } 256 257 const ingressChain = "DOCKER-INGRESS" 258 259 var ( 260 ingressOnce sync.Once 261 ingressMu sync.Mutex // lock for operations on ingress 262 ingressProxyTbl = make(map[string]io.Closer) 263 portConfigMu sync.Mutex 264 portConfigTbl = make(map[PortConfig]int) 265 ) 266 267 func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig { 268 portConfigMu.Lock() 269 iPorts := make([]*PortConfig, 0, len(ingressPorts)) 270 for _, pc := range ingressPorts { 271 if isDelete { 272 if cnt, ok := portConfigTbl[*pc]; ok { 273 // This is the last reference to this 274 // port config. Delete the port config 275 // and add it to filtered list to be 276 // plumbed. 277 if cnt == 1 { 278 delete(portConfigTbl, *pc) 279 iPorts = append(iPorts, pc) 280 continue 281 } 282 283 portConfigTbl[*pc] = cnt - 1 284 } 285 286 continue 287 } 288 289 if cnt, ok := portConfigTbl[*pc]; ok { 290 portConfigTbl[*pc] = cnt + 1 291 continue 292 } 293 294 // We are adding it for the first time. Add it to the 295 // filter list to be plumbed. 296 portConfigTbl[*pc] = 1 297 iPorts = append(iPorts, pc) 298 } 299 portConfigMu.Unlock() 300 301 return iPorts 302 } 303 304 func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error { 305 // TODO IPv6 support 306 iptable := iptables.GetIptable(iptables.IPv4) 307 308 addDelOpt := "-I" 309 rollbackAddDelOpt := "-D" 310 if isDelete { 311 addDelOpt = "-D" 312 rollbackAddDelOpt = "-I" 313 } 314 315 ingressMu.Lock() 316 defer ingressMu.Unlock() 317 318 chainExists := iptable.ExistChain(ingressChain, iptables.Nat) 319 filterChainExists := iptable.ExistChain(ingressChain, iptables.Filter) 320 321 ingressOnce.Do(func() { 322 // Flush nat table and filter table ingress chain rules during init if it 323 // exists. It might contain stale rules from previous life. 324 if chainExists { 325 if err := iptable.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil { 326 logrus.Errorf("Could not flush nat table ingress chain rules during init: %v", err) 327 } 328 } 329 if filterChainExists { 330 if err := iptable.RawCombinedOutput("-F", ingressChain); err != nil { 331 logrus.Errorf("Could not flush filter table ingress chain rules during init: %v", err) 332 } 333 } 334 }) 335 336 if !isDelete { 337 if !chainExists { 338 if err := iptable.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil { 339 return fmt.Errorf("failed to create ingress chain: %v", err) 340 } 341 } 342 if !filterChainExists { 343 if err := iptable.RawCombinedOutput("-N", ingressChain); err != nil { 344 return fmt.Errorf("failed to create filter table ingress chain: %v", err) 345 } 346 } 347 348 if !iptable.Exists(iptables.Nat, ingressChain, "-j", "RETURN") { 349 if err := iptable.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil { 350 return fmt.Errorf("failed to add return rule in nat table ingress chain: %v", err) 351 } 352 } 353 354 if !iptable.Exists(iptables.Filter, ingressChain, "-j", "RETURN") { 355 if err := iptable.RawCombinedOutput("-A", ingressChain, "-j", "RETURN"); err != nil { 356 return fmt.Errorf("failed to add return rule to filter table ingress chain: %v", err) 357 } 358 } 359 360 for _, chain := range []string{"OUTPUT", "PREROUTING"} { 361 if !iptable.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) { 362 if err := iptable.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil { 363 return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err) 364 } 365 } 366 } 367 368 if !iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) { 369 if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil { 370 return fmt.Errorf("failed to add jump rule to %s in filter table forward chain: %v", ingressChain, err) 371 } 372 arrangeUserFilterRule() 373 } 374 375 oifName, err := findOIFName(gwIP) 376 if err != nil { 377 return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err) 378 } 379 380 path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet") 381 if err := ioutil.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil { //nolint:gosec // gosec complains about perms here, which must be 0644 in this case 382 return fmt.Errorf("could not write to %s: %v", path, err) 383 } 384 385 ruleArgs := strings.Fields(fmt.Sprintf("-m addrtype --src-type LOCAL -o %s -j MASQUERADE", oifName)) 386 if !iptable.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) { 387 if err := iptable.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil { 388 return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err) 389 } 390 } 391 } 392 393 //Filter the ingress ports until port rules start to be added/deleted 394 filteredPorts := filterPortConfigs(ingressPorts, isDelete) 395 rollbackRules := make([][]string, 0, len(filteredPorts)*3) 396 var portErr error 397 defer func() { 398 if portErr != nil && !isDelete { 399 filterPortConfigs(filteredPorts, !isDelete) 400 for _, rule := range rollbackRules { 401 if err := iptable.RawCombinedOutput(rule...); err != nil { 402 logrus.Warnf("roll back rule failed, %v: %v", rule, err) 403 } 404 } 405 } 406 }() 407 408 for _, iPort := range filteredPorts { 409 if iptable.ExistChain(ingressChain, iptables.Nat) { 410 rule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d", 411 addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort)) 412 if portErr = iptable.RawCombinedOutput(rule...); portErr != nil { 413 errStr := fmt.Sprintf("set up rule failed, %v: %v", rule, portErr) 414 if !isDelete { 415 return fmt.Errorf("%s", errStr) 416 } 417 logrus.Infof("%s", errStr) 418 } 419 rollbackRule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d", rollbackAddDelOpt, 420 ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort)) 421 rollbackRules = append(rollbackRules, rollbackRule) 422 } 423 424 // Filter table rules to allow a published service to be accessible in the local node from.. 425 // 1) service tasks attached to other networks 426 // 2) unmanaged containers on bridge networks 427 rule := strings.Fields(fmt.Sprintf("%s %s -m state -p %s --sport %d --state ESTABLISHED,RELATED -j ACCEPT", 428 addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort)) 429 if portErr = iptable.RawCombinedOutput(rule...); portErr != nil { 430 errStr := fmt.Sprintf("set up rule failed, %v: %v", rule, portErr) 431 if !isDelete { 432 return fmt.Errorf("%s", errStr) 433 } 434 logrus.Warnf("%s", errStr) 435 } 436 rollbackRule := strings.Fields(fmt.Sprintf("%s %s -m state -p %s --sport %d --state ESTABLISHED,RELATED -j ACCEPT", rollbackAddDelOpt, 437 ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort)) 438 rollbackRules = append(rollbackRules, rollbackRule) 439 440 rule = strings.Fields(fmt.Sprintf("%s %s -p %s --dport %d -j ACCEPT", 441 addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort)) 442 if portErr = iptable.RawCombinedOutput(rule...); portErr != nil { 443 errStr := fmt.Sprintf("set up rule failed, %v: %v", rule, portErr) 444 if !isDelete { 445 return fmt.Errorf("%s", errStr) 446 } 447 logrus.Warnf("%s", errStr) 448 } 449 rollbackRule = strings.Fields(fmt.Sprintf("%s %s -p %s --dport %d -j ACCEPT", rollbackAddDelOpt, 450 ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort)) 451 rollbackRules = append(rollbackRules, rollbackRule) 452 453 if err := plumbProxy(iPort, isDelete); err != nil { 454 logrus.Warnf("failed to create proxy for port %d: %v", iPort.PublishedPort, err) 455 } 456 } 457 458 return nil 459 } 460 461 // In the filter table FORWARD chain the first rule should be to jump to 462 // DOCKER-USER so the user is able to filter packet first. 463 // The second rule should be jump to INGRESS-CHAIN. 464 // This chain has the rules to allow access to the published ports for swarm tasks 465 // from local bridge networks and docker_gwbridge (ie:taks on other swarm networks) 466 func arrangeIngressFilterRule() { 467 // TODO IPv6 support 468 iptable := iptables.GetIptable(iptables.IPv4) 469 if iptable.ExistChain(ingressChain, iptables.Filter) { 470 if iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) { 471 if err := iptable.RawCombinedOutput("-D", "FORWARD", "-j", ingressChain); err != nil { 472 logrus.Warnf("failed to delete jump rule to ingressChain in filter table: %v", err) 473 } 474 } 475 if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil { 476 logrus.Warnf("failed to add jump rule to ingressChain in filter table: %v", err) 477 } 478 } 479 } 480 481 func findOIFName(ip net.IP) (string, error) { 482 nlh := ns.NlHandle() 483 484 routes, err := nlh.RouteGet(ip) 485 if err != nil { 486 return "", err 487 } 488 489 if len(routes) == 0 { 490 return "", fmt.Errorf("no route to %s", ip) 491 } 492 493 // Pick the first route(typically there is only one route). We 494 // don't support multipath. 495 link, err := nlh.LinkByIndex(routes[0].LinkIndex) 496 if err != nil { 497 return "", err 498 } 499 500 return link.Attrs().Name, nil 501 } 502 503 func plumbProxy(iPort *PortConfig, isDelete bool) error { 504 var ( 505 err error 506 l io.Closer 507 ) 508 509 portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])) 510 if isDelete { 511 if listener, ok := ingressProxyTbl[portSpec]; ok { 512 if listener != nil { 513 listener.Close() 514 } 515 } 516 517 return nil 518 } 519 520 switch iPort.Protocol { 521 case ProtocolTCP: 522 l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)}) 523 case ProtocolUDP: 524 l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)}) 525 case ProtocolSCTP: 526 l, err = sctp.ListenSCTP("sctp", &sctp.SCTPAddr{Port: int(iPort.PublishedPort)}) 527 default: 528 err = fmt.Errorf("unknown protocol %v", iPort.Protocol) 529 } 530 531 if err != nil { 532 return err 533 } 534 535 ingressProxyTbl[portSpec] = l 536 537 return nil 538 } 539 540 func writePortsToFile(ports []*PortConfig) (string, error) { 541 f, err := ioutil.TempFile("", "port_configs") 542 if err != nil { 543 return "", err 544 } 545 defer f.Close() //nolint:gosec 546 547 buf, _ := proto.Marshal(&EndpointRecord{ 548 IngressPorts: ports, 549 }) 550 551 n, err := f.Write(buf) 552 if err != nil { 553 return "", err 554 } 555 556 if n < len(buf) { 557 return "", io.ErrShortWrite 558 } 559 560 return f.Name(), nil 561 } 562 563 func readPortsFromFile(fileName string) ([]*PortConfig, error) { 564 buf, err := ioutil.ReadFile(fileName) 565 if err != nil { 566 return nil, err 567 } 568 569 var epRec EndpointRecord 570 err = proto.Unmarshal(buf, &epRec) 571 if err != nil { 572 return nil, err 573 } 574 575 return epRec.IngressPorts, nil 576 } 577 578 // Invoke fwmarker reexec routine to mark vip destined packets with 579 // the passed firewall mark. 580 func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error { 581 var ingressPortsFile string 582 583 if len(ingressPorts) != 0 { 584 var err error 585 ingressPortsFile, err = writePortsToFile(ingressPorts) 586 if err != nil { 587 return err 588 } 589 590 defer os.Remove(ingressPortsFile) 591 } 592 593 addDelOpt := "-A" 594 if isDelete { 595 addDelOpt = "-D" 596 } 597 598 cmd := &exec.Cmd{ 599 Path: reexec.Self(), 600 Args: append([]string{"fwmarker"}, path, vip.String(), fmt.Sprintf("%d", fwMark), addDelOpt, ingressPortsFile, eIP.String(), lbMode), 601 Stdout: os.Stdout, 602 Stderr: os.Stderr, 603 } 604 605 if err := cmd.Run(); err != nil { 606 return fmt.Errorf("reexec failed: %v", err) 607 } 608 609 return nil 610 } 611 612 // Firewall marker reexec function. 613 func fwMarker() { 614 // TODO IPv6 support 615 iptable := iptables.GetIptable(iptables.IPv4) 616 runtime.LockOSThread() 617 defer runtime.UnlockOSThread() 618 619 if len(os.Args) < 8 { 620 logrus.Error("invalid number of arguments..") 621 os.Exit(1) 622 } 623 624 var ingressPorts []*PortConfig 625 if os.Args[5] != "" { 626 var err error 627 ingressPorts, err = readPortsFromFile(os.Args[5]) 628 if err != nil { 629 logrus.Errorf("Failed reading ingress ports file: %v", err) 630 os.Exit(2) 631 } 632 } 633 634 vip := os.Args[2] 635 fwMark, err := strconv.ParseUint(os.Args[3], 10, 32) 636 if err != nil { 637 logrus.Errorf("bad fwmark value(%s) passed: %v", os.Args[3], err) 638 os.Exit(3) 639 } 640 addDelOpt := os.Args[4] 641 642 rules := [][]string{} 643 for _, iPort := range ingressPorts { 644 rule := strings.Fields(fmt.Sprintf("-t mangle %s PREROUTING -p %s --dport %d -j MARK --set-mark %d", 645 addDelOpt, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, fwMark)) 646 rules = append(rules, rule) 647 } 648 649 ns, err := netns.GetFromPath(os.Args[1]) 650 if err != nil { 651 logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err) 652 os.Exit(4) 653 } 654 defer ns.Close() 655 656 if err := netns.Set(ns); err != nil { 657 logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err) 658 os.Exit(5) 659 } 660 661 lbMode := os.Args[7] 662 if addDelOpt == "-A" && lbMode == loadBalancerModeNAT { 663 eIP, subnet, err := net.ParseCIDR(os.Args[6]) 664 if err != nil { 665 logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err) 666 os.Exit(6) 667 } 668 669 ruleParams := strings.Fields(fmt.Sprintf("-m ipvs --ipvs -d %s -j SNAT --to-source %s", subnet, eIP)) 670 if !iptable.Exists("nat", "POSTROUTING", ruleParams...) { 671 rule := append(strings.Fields("-t nat -A POSTROUTING"), ruleParams...) 672 rules = append(rules, rule) 673 674 err := ioutil.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644) 675 if err != nil { 676 logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err) 677 os.Exit(7) 678 } 679 } 680 } 681 682 rule := strings.Fields(fmt.Sprintf("-t mangle %s INPUT -d %s/32 -j MARK --set-mark %d", addDelOpt, vip, fwMark)) 683 rules = append(rules, rule) 684 685 for _, rule := range rules { 686 if err := iptable.RawCombinedOutputNative(rule...); err != nil { 687 logrus.Errorf("set up rule failed, %v: %v", rule, err) 688 os.Exit(8) 689 } 690 } 691 } 692 693 func addRedirectRules(path string, eIP *net.IPNet, ingressPorts []*PortConfig) error { 694 var ingressPortsFile string 695 696 if len(ingressPorts) != 0 { 697 var err error 698 ingressPortsFile, err = writePortsToFile(ingressPorts) 699 if err != nil { 700 return err 701 } 702 defer os.Remove(ingressPortsFile) 703 } 704 705 cmd := &exec.Cmd{ 706 Path: reexec.Self(), 707 Args: append([]string{"redirector"}, path, eIP.String(), ingressPortsFile), 708 Stdout: os.Stdout, 709 Stderr: os.Stderr, 710 } 711 712 if err := cmd.Run(); err != nil { 713 return fmt.Errorf("reexec failed: %v", err) 714 } 715 716 return nil 717 } 718 719 // Redirector reexec function. 720 func redirector() { 721 // TODO IPv6 support 722 iptable := iptables.GetIptable(iptables.IPv4) 723 runtime.LockOSThread() 724 defer runtime.UnlockOSThread() 725 726 if len(os.Args) < 4 { 727 logrus.Error("invalid number of arguments..") 728 os.Exit(1) 729 } 730 731 var ingressPorts []*PortConfig 732 if os.Args[3] != "" { 733 var err error 734 ingressPorts, err = readPortsFromFile(os.Args[3]) 735 if err != nil { 736 logrus.Errorf("Failed reading ingress ports file: %v", err) 737 os.Exit(2) 738 } 739 } 740 741 eIP, _, err := net.ParseCIDR(os.Args[2]) 742 if err != nil { 743 logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[2], err) 744 os.Exit(3) 745 } 746 747 rules := [][]string{} 748 for _, iPort := range ingressPorts { 749 rule := strings.Fields(fmt.Sprintf("-t nat -A PREROUTING -d %s -p %s --dport %d -j REDIRECT --to-port %d", 750 eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, iPort.TargetPort)) 751 rules = append(rules, rule) 752 // Allow only incoming connections to exposed ports 753 iRule := strings.Fields(fmt.Sprintf("-I INPUT -d %s -p %s --dport %d -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT", 754 eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.TargetPort)) 755 rules = append(rules, iRule) 756 // Allow only outgoing connections from exposed ports 757 oRule := strings.Fields(fmt.Sprintf("-I OUTPUT -s %s -p %s --sport %d -m conntrack --ctstate ESTABLISHED -j ACCEPT", 758 eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.TargetPort)) 759 rules = append(rules, oRule) 760 } 761 762 ns, err := netns.GetFromPath(os.Args[1]) 763 if err != nil { 764 logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err) 765 os.Exit(4) 766 } 767 defer ns.Close() 768 769 if err := netns.Set(ns); err != nil { 770 logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err) 771 os.Exit(5) 772 } 773 774 for _, rule := range rules { 775 if err := iptable.RawCombinedOutputNative(rule...); err != nil { 776 logrus.Errorf("set up rule failed, %v: %v", rule, err) 777 os.Exit(6) 778 } 779 } 780 781 if len(ingressPorts) == 0 { 782 return 783 } 784 785 // Ensure blocking rules for anything else in/to ingress network 786 for _, rule := range [][]string{ 787 {"-d", eIP.String(), "-p", "sctp", "-j", "DROP"}, 788 {"-d", eIP.String(), "-p", "udp", "-j", "DROP"}, 789 {"-d", eIP.String(), "-p", "tcp", "-j", "DROP"}, 790 } { 791 if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) { 792 if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil { 793 logrus.Errorf("set up rule failed, %v: %v", rule, err) 794 os.Exit(7) 795 } 796 } 797 rule[0] = "-s" 798 if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) { 799 if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil { 800 logrus.Errorf("set up rule failed, %v: %v", rule, err) 801 os.Exit(8) 802 } 803 } 804 } 805 }