github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/libnetwork/service_linux.go (about)

     1  package libnetwork
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"io/ioutil"
     7  	"net"
     8  	"os"
     9  	"os/exec"
    10  	"path/filepath"
    11  	"runtime"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"syscall"
    16  
    17  	"github.com/docker/docker/pkg/reexec"
    18  	"github.com/docker/libnetwork/iptables"
    19  	"github.com/docker/libnetwork/ns"
    20  	"github.com/gogo/protobuf/proto"
    21  	"github.com/ishidawataru/sctp"
    22  	"github.com/moby/ipvs"
    23  	"github.com/sirupsen/logrus"
    24  	"github.com/vishvananda/netlink/nl"
    25  	"github.com/vishvananda/netns"
    26  )
    27  
    28  func init() {
    29  	reexec.Register("fwmarker", fwMarker)
    30  	reexec.Register("redirector", redirector)
    31  }
    32  
    33  // Populate all loadbalancers on the network that the passed endpoint
    34  // belongs to, into this sandbox.
    35  func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
    36  	// This is an interface less endpoint. Nothing to do.
    37  	if ep.Iface() == nil {
    38  		return
    39  	}
    40  
    41  	n := ep.getNetwork()
    42  	eIP := ep.Iface().Address()
    43  
    44  	if n.ingress {
    45  		if err := addRedirectRules(sb.Key(), eIP, ep.ingressPorts); err != nil {
    46  			logrus.Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err)
    47  		}
    48  	}
    49  }
    50  
    51  func (n *network) findLBEndpointSandbox() (*endpoint, *sandbox, error) {
    52  	// TODO: get endpoint from store?  See EndpointInfo()
    53  	var ep *endpoint
    54  	// Find this node's LB sandbox endpoint:  there should be exactly one
    55  	for _, e := range n.Endpoints() {
    56  		epi := e.Info()
    57  		if epi != nil && epi.LoadBalancer() {
    58  			ep = e.(*endpoint)
    59  			break
    60  		}
    61  	}
    62  	if ep == nil {
    63  		return nil, nil, fmt.Errorf("Unable to find load balancing endpoint for network %s", n.ID())
    64  	}
    65  	// Get the load balancer sandbox itself as well
    66  	sb, ok := ep.getSandbox()
    67  	if !ok {
    68  		return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
    69  	}
    70  	var sep *endpoint
    71  	sep = sb.getEndpoint(ep.ID())
    72  	if sep == nil {
    73  		return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
    74  	}
    75  	return sep, sb, nil
    76  }
    77  
    78  // Searches the OS sandbox for the name of the endpoint interface
    79  // within the sandbox.   This is required for adding/removing IP
    80  // aliases to the interface.
    81  func findIfaceDstName(sb *sandbox, ep *endpoint) string {
    82  	srcName := ep.Iface().SrcName()
    83  	for _, i := range sb.osSbox.Info().Interfaces() {
    84  		if i.SrcName() == srcName {
    85  			return i.DstName()
    86  		}
    87  	}
    88  	return ""
    89  }
    90  
    91  // Add loadbalancer backend to the loadbalncer sandbox for the network.
    92  // If needed add the service as well.
    93  func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
    94  	if len(lb.vip) == 0 {
    95  		return
    96  	}
    97  	ep, sb, err := n.findLBEndpointSandbox()
    98  	if err != nil {
    99  		logrus.Errorf("addLBBackend %s/%s: %v", n.ID(), n.Name(), err)
   100  		return
   101  	}
   102  	if sb.osSbox == nil {
   103  		return
   104  	}
   105  
   106  	eIP := ep.Iface().Address()
   107  
   108  	i, err := ipvs.New(sb.Key())
   109  	if err != nil {
   110  		logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb addition: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
   111  		return
   112  	}
   113  	defer i.Close()
   114  
   115  	s := &ipvs.Service{
   116  		AddressFamily: nl.FAMILY_V4,
   117  		FWMark:        lb.fwMark,
   118  		SchedName:     ipvs.RoundRobin,
   119  	}
   120  
   121  	if !i.IsServicePresent(s) {
   122  		// Add IP alias for the VIP to the endpoint
   123  		ifName := findIfaceDstName(sb, ep)
   124  		if ifName == "" {
   125  			logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
   126  			return
   127  		}
   128  		err := sb.osSbox.AddAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
   129  		if err != nil {
   130  			logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
   131  			return
   132  		}
   133  
   134  		if sb.ingress {
   135  			var gwIP net.IP
   136  			if ep := sb.getGatewayEndpoint(); ep != nil {
   137  				gwIP = ep.Iface().Address().IP
   138  			}
   139  			if err := programIngress(gwIP, lb.service.ingressPorts, false); err != nil {
   140  				logrus.Errorf("Failed to add ingress: %v", err)
   141  				return
   142  			}
   143  		}
   144  
   145  		logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
   146  		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
   147  			logrus.Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
   148  			return
   149  		}
   150  
   151  		if err := i.NewService(s); err != nil && err != syscall.EEXIST {
   152  			logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   153  			return
   154  		}
   155  	}
   156  
   157  	d := &ipvs.Destination{
   158  		AddressFamily: nl.FAMILY_V4,
   159  		Address:       ip,
   160  		Weight:        1,
   161  	}
   162  	if n.loadBalancerMode == loadBalancerModeDSR {
   163  		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
   164  	}
   165  
   166  	// Remove the sched name before using the service to add
   167  	// destination.
   168  	s.SchedName = ""
   169  	if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
   170  		logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   171  	}
   172  
   173  	// Ensure that kernel tweaks are applied in case this is the first time
   174  	// we've initialized ip_vs
   175  	sb.osSbox.ApplyOSTweaks(sb.oslTypes)
   176  }
   177  
   178  // Remove loadbalancer backend the load balancing endpoint for this
   179  // network. If 'rmService' is true, then remove the service entry as well.
   180  // If 'fullRemove' is true then completely remove the entry, otherwise
   181  // just deweight it for now.
   182  func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) {
   183  	if len(lb.vip) == 0 {
   184  		return
   185  	}
   186  	ep, sb, err := n.findLBEndpointSandbox()
   187  	if err != nil {
   188  		logrus.Debugf("rmLBBackend for %s/%s: %v -- probably transient state", n.ID(), n.Name(), err)
   189  		return
   190  	}
   191  	if sb.osSbox == nil {
   192  		return
   193  	}
   194  
   195  	eIP := ep.Iface().Address()
   196  
   197  	i, err := ipvs.New(sb.Key())
   198  	if err != nil {
   199  		logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb removal: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
   200  		return
   201  	}
   202  	defer i.Close()
   203  
   204  	s := &ipvs.Service{
   205  		AddressFamily: nl.FAMILY_V4,
   206  		FWMark:        lb.fwMark,
   207  	}
   208  
   209  	d := &ipvs.Destination{
   210  		AddressFamily: nl.FAMILY_V4,
   211  		Address:       ip,
   212  		Weight:        1,
   213  	}
   214  	if n.loadBalancerMode == loadBalancerModeDSR {
   215  		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
   216  	}
   217  
   218  	if fullRemove {
   219  		if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
   220  			logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   221  		}
   222  	} else {
   223  		d.Weight = 0
   224  		if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
   225  			logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   226  		}
   227  	}
   228  
   229  	if rmService {
   230  		s.SchedName = ipvs.RoundRobin
   231  		if err := i.DelService(s); err != nil && err != syscall.ENOENT {
   232  			logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   233  		}
   234  
   235  		if sb.ingress {
   236  			var gwIP net.IP
   237  			if ep := sb.getGatewayEndpoint(); ep != nil {
   238  				gwIP = ep.Iface().Address().IP
   239  			}
   240  			if err := programIngress(gwIP, lb.service.ingressPorts, true); err != nil {
   241  				logrus.Errorf("Failed to delete ingress: %v", err)
   242  			}
   243  		}
   244  
   245  		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
   246  			logrus.Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
   247  		}
   248  
   249  		// Remove IP alias from the VIP to the endpoint
   250  		ifName := findIfaceDstName(sb, ep)
   251  		if ifName == "" {
   252  			logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
   253  			return
   254  		}
   255  		err := sb.osSbox.RemoveAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
   256  		if err != nil {
   257  			logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
   258  		}
   259  	}
   260  }
   261  
   262  const ingressChain = "DOCKER-INGRESS"
   263  
   264  var (
   265  	ingressOnce     sync.Once
   266  	ingressMu       sync.Mutex // lock for operations on ingress
   267  	ingressProxyTbl = make(map[string]io.Closer)
   268  	portConfigMu    sync.Mutex
   269  	portConfigTbl   = make(map[PortConfig]int)
   270  )
   271  
   272  func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig {
   273  	portConfigMu.Lock()
   274  	iPorts := make([]*PortConfig, 0, len(ingressPorts))
   275  	for _, pc := range ingressPorts {
   276  		if isDelete {
   277  			if cnt, ok := portConfigTbl[*pc]; ok {
   278  				// This is the last reference to this
   279  				// port config. Delete the port config
   280  				// and add it to filtered list to be
   281  				// plumbed.
   282  				if cnt == 1 {
   283  					delete(portConfigTbl, *pc)
   284  					iPorts = append(iPorts, pc)
   285  					continue
   286  				}
   287  
   288  				portConfigTbl[*pc] = cnt - 1
   289  			}
   290  
   291  			continue
   292  		}
   293  
   294  		if cnt, ok := portConfigTbl[*pc]; ok {
   295  			portConfigTbl[*pc] = cnt + 1
   296  			continue
   297  		}
   298  
   299  		// We are adding it for the first time. Add it to the
   300  		// filter list to be plumbed.
   301  		portConfigTbl[*pc] = 1
   302  		iPorts = append(iPorts, pc)
   303  	}
   304  	portConfigMu.Unlock()
   305  
   306  	return iPorts
   307  }
   308  
   309  func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error {
   310  	// TODO IPv6 support
   311  	iptable := iptables.GetIptable(iptables.IPv4)
   312  
   313  	addDelOpt := "-I"
   314  	rollbackAddDelOpt := "-D"
   315  	if isDelete {
   316  		addDelOpt = "-D"
   317  		rollbackAddDelOpt = "-I"
   318  	}
   319  
   320  	ingressMu.Lock()
   321  	defer ingressMu.Unlock()
   322  
   323  	chainExists := iptable.ExistChain(ingressChain, iptables.Nat)
   324  	filterChainExists := iptable.ExistChain(ingressChain, iptables.Filter)
   325  
   326  	ingressOnce.Do(func() {
   327  		// Flush nat table and filter table ingress chain rules during init if it
   328  		// exists. It might contain stale rules from previous life.
   329  		if chainExists {
   330  			if err := iptable.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil {
   331  				logrus.Errorf("Could not flush nat table ingress chain rules during init: %v", err)
   332  			}
   333  		}
   334  		if filterChainExists {
   335  			if err := iptable.RawCombinedOutput("-F", ingressChain); err != nil {
   336  				logrus.Errorf("Could not flush filter table ingress chain rules during init: %v", err)
   337  			}
   338  		}
   339  	})
   340  
   341  	if !isDelete {
   342  		if !chainExists {
   343  			if err := iptable.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil {
   344  				return fmt.Errorf("failed to create ingress chain: %v", err)
   345  			}
   346  		}
   347  		if !filterChainExists {
   348  			if err := iptable.RawCombinedOutput("-N", ingressChain); err != nil {
   349  				return fmt.Errorf("failed to create filter table ingress chain: %v", err)
   350  			}
   351  		}
   352  
   353  		if !iptable.Exists(iptables.Nat, ingressChain, "-j", "RETURN") {
   354  			if err := iptable.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil {
   355  				return fmt.Errorf("failed to add return rule in nat table ingress chain: %v", err)
   356  			}
   357  		}
   358  
   359  		if !iptable.Exists(iptables.Filter, ingressChain, "-j", "RETURN") {
   360  			if err := iptable.RawCombinedOutput("-A", ingressChain, "-j", "RETURN"); err != nil {
   361  				return fmt.Errorf("failed to add return rule to filter table ingress chain: %v", err)
   362  			}
   363  		}
   364  
   365  		for _, chain := range []string{"OUTPUT", "PREROUTING"} {
   366  			if !iptable.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) {
   367  				if err := iptable.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil {
   368  					return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err)
   369  				}
   370  			}
   371  		}
   372  
   373  		if !iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
   374  			if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
   375  				return fmt.Errorf("failed to add jump rule to %s in filter table forward chain: %v", ingressChain, err)
   376  			}
   377  			arrangeUserFilterRule()
   378  		}
   379  
   380  		oifName, err := findOIFName(gwIP)
   381  		if err != nil {
   382  			return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err)
   383  		}
   384  
   385  		path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
   386  		if err := ioutil.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil {
   387  			return fmt.Errorf("could not write to %s: %v", path, err)
   388  		}
   389  
   390  		ruleArgs := strings.Fields(fmt.Sprintf("-m addrtype --src-type LOCAL -o %s -j MASQUERADE", oifName))
   391  		if !iptable.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) {
   392  			if err := iptable.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil {
   393  				return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err)
   394  			}
   395  		}
   396  	}
   397  
   398  	//Filter the ingress ports until port rules start to be added/deleted
   399  	filteredPorts := filterPortConfigs(ingressPorts, isDelete)
   400  	rollbackRules := make([][]string, 0, len(filteredPorts)*3)
   401  	var portErr error
   402  	defer func() {
   403  		if portErr != nil && !isDelete {
   404  			filterPortConfigs(filteredPorts, !isDelete)
   405  			for _, rule := range rollbackRules {
   406  				if err := iptable.RawCombinedOutput(rule...); err != nil {
   407  					logrus.Warnf("roll back rule failed, %v: %v", rule, err)
   408  				}
   409  			}
   410  		}
   411  	}()
   412  
   413  	for _, iPort := range filteredPorts {
   414  		if iptable.ExistChain(ingressChain, iptables.Nat) {
   415  			rule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d",
   416  				addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort))
   417  			if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   418  				errStr := fmt.Sprintf("set up rule failed, %v: %v", rule, portErr)
   419  				if !isDelete {
   420  					return fmt.Errorf("%s", errStr)
   421  				}
   422  				logrus.Infof("%s", errStr)
   423  			}
   424  			rollbackRule := strings.Fields(fmt.Sprintf("-t nat %s %s -p %s --dport %d -j DNAT --to-destination %s:%d", rollbackAddDelOpt,
   425  				ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, gwIP, iPort.PublishedPort))
   426  			rollbackRules = append(rollbackRules, rollbackRule)
   427  		}
   428  
   429  		// Filter table rules to allow a published service to be accessible in the local node from..
   430  		// 1) service tasks attached to other networks
   431  		// 2) unmanaged containers on bridge networks
   432  		rule := strings.Fields(fmt.Sprintf("%s %s -m state -p %s --sport %d --state ESTABLISHED,RELATED -j ACCEPT",
   433  			addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
   434  		if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   435  			errStr := fmt.Sprintf("set up rule failed, %v: %v", rule, portErr)
   436  			if !isDelete {
   437  				return fmt.Errorf("%s", errStr)
   438  			}
   439  			logrus.Warnf("%s", errStr)
   440  		}
   441  		rollbackRule := strings.Fields(fmt.Sprintf("%s %s -m state -p %s --sport %d --state ESTABLISHED,RELATED -j ACCEPT", rollbackAddDelOpt,
   442  			ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
   443  		rollbackRules = append(rollbackRules, rollbackRule)
   444  
   445  		rule = strings.Fields(fmt.Sprintf("%s %s -p %s --dport %d -j ACCEPT",
   446  			addDelOpt, ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
   447  		if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   448  			errStr := fmt.Sprintf("set up rule failed, %v: %v", rule, portErr)
   449  			if !isDelete {
   450  				return fmt.Errorf("%s", errStr)
   451  			}
   452  			logrus.Warnf("%s", errStr)
   453  		}
   454  		rollbackRule = strings.Fields(fmt.Sprintf("%s %s -p %s --dport %d -j ACCEPT", rollbackAddDelOpt,
   455  			ingressChain, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort))
   456  		rollbackRules = append(rollbackRules, rollbackRule)
   457  
   458  		if err := plumbProxy(iPort, isDelete); err != nil {
   459  			logrus.Warnf("failed to create proxy for port %d: %v", iPort.PublishedPort, err)
   460  		}
   461  	}
   462  
   463  	return nil
   464  }
   465  
   466  // In the filter table FORWARD chain the first rule should be to jump to
   467  // DOCKER-USER so the user is able to filter packet first.
   468  // The second rule should be jump to INGRESS-CHAIN.
   469  // This chain has the rules to allow access to the published ports for swarm tasks
   470  // from local bridge networks and docker_gwbridge (ie:taks on other swarm networks)
   471  func arrangeIngressFilterRule() {
   472  	// TODO IPv6 support
   473  	iptable := iptables.GetIptable(iptables.IPv4)
   474  	if iptable.ExistChain(ingressChain, iptables.Filter) {
   475  		if iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
   476  			if err := iptable.RawCombinedOutput("-D", "FORWARD", "-j", ingressChain); err != nil {
   477  				logrus.Warnf("failed to delete jump rule to ingressChain in filter table: %v", err)
   478  			}
   479  		}
   480  		if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
   481  			logrus.Warnf("failed to add jump rule to ingressChain in filter table: %v", err)
   482  		}
   483  	}
   484  }
   485  
   486  func findOIFName(ip net.IP) (string, error) {
   487  	nlh := ns.NlHandle()
   488  
   489  	routes, err := nlh.RouteGet(ip)
   490  	if err != nil {
   491  		return "", err
   492  	}
   493  
   494  	if len(routes) == 0 {
   495  		return "", fmt.Errorf("no route to %s", ip)
   496  	}
   497  
   498  	// Pick the first route(typically there is only one route). We
   499  	// don't support multipath.
   500  	link, err := nlh.LinkByIndex(routes[0].LinkIndex)
   501  	if err != nil {
   502  		return "", err
   503  	}
   504  
   505  	return link.Attrs().Name, nil
   506  }
   507  
   508  func plumbProxy(iPort *PortConfig, isDelete bool) error {
   509  	var (
   510  		err error
   511  		l   io.Closer
   512  	)
   513  
   514  	portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]))
   515  	if isDelete {
   516  		if listener, ok := ingressProxyTbl[portSpec]; ok {
   517  			if listener != nil {
   518  				listener.Close()
   519  			}
   520  		}
   521  
   522  		return nil
   523  	}
   524  
   525  	switch iPort.Protocol {
   526  	case ProtocolTCP:
   527  		l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)})
   528  	case ProtocolUDP:
   529  		l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)})
   530  	case ProtocolSCTP:
   531  		l, err = sctp.ListenSCTP("sctp", &sctp.SCTPAddr{Port: int(iPort.PublishedPort)})
   532  	default:
   533  		err = fmt.Errorf("unknown protocol %v", iPort.Protocol)
   534  	}
   535  
   536  	if err != nil {
   537  		return err
   538  	}
   539  
   540  	ingressProxyTbl[portSpec] = l
   541  
   542  	return nil
   543  }
   544  
   545  func writePortsToFile(ports []*PortConfig) (string, error) {
   546  	f, err := ioutil.TempFile("", "port_configs")
   547  	if err != nil {
   548  		return "", err
   549  	}
   550  	defer f.Close()
   551  
   552  	buf, _ := proto.Marshal(&EndpointRecord{
   553  		IngressPorts: ports,
   554  	})
   555  
   556  	n, err := f.Write(buf)
   557  	if err != nil {
   558  		return "", err
   559  	}
   560  
   561  	if n < len(buf) {
   562  		return "", io.ErrShortWrite
   563  	}
   564  
   565  	return f.Name(), nil
   566  }
   567  
   568  func readPortsFromFile(fileName string) ([]*PortConfig, error) {
   569  	buf, err := ioutil.ReadFile(fileName)
   570  	if err != nil {
   571  		return nil, err
   572  	}
   573  
   574  	var epRec EndpointRecord
   575  	err = proto.Unmarshal(buf, &epRec)
   576  	if err != nil {
   577  		return nil, err
   578  	}
   579  
   580  	return epRec.IngressPorts, nil
   581  }
   582  
   583  // Invoke fwmarker reexec routine to mark vip destined packets with
   584  // the passed firewall mark.
   585  func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
   586  	var ingressPortsFile string
   587  
   588  	if len(ingressPorts) != 0 {
   589  		var err error
   590  		ingressPortsFile, err = writePortsToFile(ingressPorts)
   591  		if err != nil {
   592  			return err
   593  		}
   594  
   595  		defer os.Remove(ingressPortsFile)
   596  	}
   597  
   598  	addDelOpt := "-A"
   599  	if isDelete {
   600  		addDelOpt = "-D"
   601  	}
   602  
   603  	cmd := &exec.Cmd{
   604  		Path:   reexec.Self(),
   605  		Args:   append([]string{"fwmarker"}, path, vip.String(), fmt.Sprintf("%d", fwMark), addDelOpt, ingressPortsFile, eIP.String(), lbMode),
   606  		Stdout: os.Stdout,
   607  		Stderr: os.Stderr,
   608  	}
   609  
   610  	if err := cmd.Run(); err != nil {
   611  		return fmt.Errorf("reexec failed: %v", err)
   612  	}
   613  
   614  	return nil
   615  }
   616  
   617  // Firewall marker reexec function.
   618  func fwMarker() {
   619  	// TODO IPv6 support
   620  	iptable := iptables.GetIptable(iptables.IPv4)
   621  	runtime.LockOSThread()
   622  	defer runtime.UnlockOSThread()
   623  
   624  	if len(os.Args) < 8 {
   625  		logrus.Error("invalid number of arguments..")
   626  		os.Exit(1)
   627  	}
   628  
   629  	var ingressPorts []*PortConfig
   630  	if os.Args[5] != "" {
   631  		var err error
   632  		ingressPorts, err = readPortsFromFile(os.Args[5])
   633  		if err != nil {
   634  			logrus.Errorf("Failed reading ingress ports file: %v", err)
   635  			os.Exit(2)
   636  		}
   637  	}
   638  
   639  	vip := os.Args[2]
   640  	fwMark, err := strconv.ParseUint(os.Args[3], 10, 32)
   641  	if err != nil {
   642  		logrus.Errorf("bad fwmark value(%s) passed: %v", os.Args[3], err)
   643  		os.Exit(3)
   644  	}
   645  	addDelOpt := os.Args[4]
   646  
   647  	rules := [][]string{}
   648  	for _, iPort := range ingressPorts {
   649  		rule := strings.Fields(fmt.Sprintf("-t mangle %s PREROUTING -p %s --dport %d -j MARK --set-mark %d",
   650  			addDelOpt, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, fwMark))
   651  		rules = append(rules, rule)
   652  	}
   653  
   654  	ns, err := netns.GetFromPath(os.Args[1])
   655  	if err != nil {
   656  		logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
   657  		os.Exit(4)
   658  	}
   659  	defer ns.Close()
   660  
   661  	if err := netns.Set(ns); err != nil {
   662  		logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
   663  		os.Exit(5)
   664  	}
   665  
   666  	lbMode := os.Args[7]
   667  	if addDelOpt == "-A" && lbMode == loadBalancerModeNAT {
   668  		eIP, subnet, err := net.ParseCIDR(os.Args[6])
   669  		if err != nil {
   670  			logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)
   671  			os.Exit(6)
   672  		}
   673  
   674  		ruleParams := strings.Fields(fmt.Sprintf("-m ipvs --ipvs -d %s -j SNAT --to-source %s", subnet, eIP))
   675  		if !iptable.Exists("nat", "POSTROUTING", ruleParams...) {
   676  			rule := append(strings.Fields("-t nat -A POSTROUTING"), ruleParams...)
   677  			rules = append(rules, rule)
   678  
   679  			err := ioutil.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
   680  			if err != nil {
   681  				logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err)
   682  				os.Exit(7)
   683  			}
   684  		}
   685  	}
   686  
   687  	rule := strings.Fields(fmt.Sprintf("-t mangle %s INPUT -d %s/32 -j MARK --set-mark %d", addDelOpt, vip, fwMark))
   688  	rules = append(rules, rule)
   689  
   690  	for _, rule := range rules {
   691  		if err := iptable.RawCombinedOutputNative(rule...); err != nil {
   692  			logrus.Errorf("set up rule failed, %v: %v", rule, err)
   693  			os.Exit(8)
   694  		}
   695  	}
   696  }
   697  
   698  func addRedirectRules(path string, eIP *net.IPNet, ingressPorts []*PortConfig) error {
   699  	var ingressPortsFile string
   700  
   701  	if len(ingressPorts) != 0 {
   702  		var err error
   703  		ingressPortsFile, err = writePortsToFile(ingressPorts)
   704  		if err != nil {
   705  			return err
   706  		}
   707  		defer os.Remove(ingressPortsFile)
   708  	}
   709  
   710  	cmd := &exec.Cmd{
   711  		Path:   reexec.Self(),
   712  		Args:   append([]string{"redirector"}, path, eIP.String(), ingressPortsFile),
   713  		Stdout: os.Stdout,
   714  		Stderr: os.Stderr,
   715  	}
   716  
   717  	if err := cmd.Run(); err != nil {
   718  		return fmt.Errorf("reexec failed: %v", err)
   719  	}
   720  
   721  	return nil
   722  }
   723  
   724  // Redirector reexec function.
   725  func redirector() {
   726  	// TODO IPv6 support
   727  	iptable := iptables.GetIptable(iptables.IPv4)
   728  	runtime.LockOSThread()
   729  	defer runtime.UnlockOSThread()
   730  
   731  	if len(os.Args) < 4 {
   732  		logrus.Error("invalid number of arguments..")
   733  		os.Exit(1)
   734  	}
   735  
   736  	var ingressPorts []*PortConfig
   737  	if os.Args[3] != "" {
   738  		var err error
   739  		ingressPorts, err = readPortsFromFile(os.Args[3])
   740  		if err != nil {
   741  			logrus.Errorf("Failed reading ingress ports file: %v", err)
   742  			os.Exit(2)
   743  		}
   744  	}
   745  
   746  	eIP, _, err := net.ParseCIDR(os.Args[2])
   747  	if err != nil {
   748  		logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[2], err)
   749  		os.Exit(3)
   750  	}
   751  
   752  	rules := [][]string{}
   753  	for _, iPort := range ingressPorts {
   754  		rule := strings.Fields(fmt.Sprintf("-t nat -A PREROUTING -d %s -p %s --dport %d -j REDIRECT --to-port %d",
   755  			eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.PublishedPort, iPort.TargetPort))
   756  		rules = append(rules, rule)
   757  		// Allow only incoming connections to exposed ports
   758  		iRule := strings.Fields(fmt.Sprintf("-I INPUT -d %s -p %s --dport %d -m conntrack --ctstate NEW,ESTABLISHED -j ACCEPT",
   759  			eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.TargetPort))
   760  		rules = append(rules, iRule)
   761  		// Allow only outgoing connections from exposed ports
   762  		oRule := strings.Fields(fmt.Sprintf("-I OUTPUT -s %s -p %s --sport %d -m conntrack --ctstate ESTABLISHED -j ACCEPT",
   763  			eIP.String(), strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]), iPort.TargetPort))
   764  		rules = append(rules, oRule)
   765  	}
   766  
   767  	ns, err := netns.GetFromPath(os.Args[1])
   768  	if err != nil {
   769  		logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
   770  		os.Exit(4)
   771  	}
   772  	defer ns.Close()
   773  
   774  	if err := netns.Set(ns); err != nil {
   775  		logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
   776  		os.Exit(5)
   777  	}
   778  
   779  	for _, rule := range rules {
   780  		if err := iptable.RawCombinedOutputNative(rule...); err != nil {
   781  			logrus.Errorf("set up rule failed, %v: %v", rule, err)
   782  			os.Exit(6)
   783  		}
   784  	}
   785  
   786  	if len(ingressPorts) == 0 {
   787  		return
   788  	}
   789  
   790  	// Ensure blocking rules for anything else in/to ingress network
   791  	for _, rule := range [][]string{
   792  		{"-d", eIP.String(), "-p", "sctp", "-j", "DROP"},
   793  		{"-d", eIP.String(), "-p", "udp", "-j", "DROP"},
   794  		{"-d", eIP.String(), "-p", "tcp", "-j", "DROP"},
   795  	} {
   796  		if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) {
   797  			if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
   798  				logrus.Errorf("set up rule failed, %v: %v", rule, err)
   799  				os.Exit(7)
   800  			}
   801  		}
   802  		rule[0] = "-s"
   803  		if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
   804  			if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
   805  				logrus.Errorf("set up rule failed, %v: %v", rule, err)
   806  				os.Exit(8)
   807  			}
   808  		}
   809  	}
   810  }