github.com/rumpl/bof@v23.0.0-rc.2+incompatible/libnetwork/service_linux.go (about)

     1  package libnetwork
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"net"
     7  	"os"
     8  	"os/exec"
     9  	"path/filepath"
    10  	"runtime"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"syscall"
    15  
    16  	"github.com/docker/docker/libnetwork/iptables"
    17  	"github.com/docker/docker/libnetwork/ns"
    18  	"github.com/docker/docker/pkg/reexec"
    19  	"github.com/gogo/protobuf/proto"
    20  	"github.com/ishidawataru/sctp"
    21  	"github.com/moby/ipvs"
    22  	"github.com/sirupsen/logrus"
    23  	"github.com/vishvananda/netlink/nl"
    24  	"github.com/vishvananda/netns"
    25  )
    26  
    27  func init() {
    28  	reexec.Register("fwmarker", fwMarker)
    29  	reexec.Register("redirector", redirector)
    30  }
    31  
    32  // Populate all loadbalancers on the network that the passed endpoint
    33  // belongs to, into this sandbox.
    34  func (sb *sandbox) populateLoadBalancers(ep *endpoint) {
    35  	// This is an interface less endpoint. Nothing to do.
    36  	if ep.Iface() == nil {
    37  		return
    38  	}
    39  
    40  	n := ep.getNetwork()
    41  	eIP := ep.Iface().Address()
    42  
    43  	if n.ingress {
    44  		if err := addRedirectRules(sb.Key(), eIP, ep.ingressPorts); err != nil {
    45  			logrus.Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err)
    46  		}
    47  	}
    48  }
    49  
    50  func (n *network) findLBEndpointSandbox() (*endpoint, *sandbox, error) {
    51  	// TODO: get endpoint from store?  See EndpointInfo()
    52  	var ep *endpoint
    53  	// Find this node's LB sandbox endpoint:  there should be exactly one
    54  	for _, e := range n.Endpoints() {
    55  		epi := e.Info()
    56  		if epi != nil && epi.LoadBalancer() {
    57  			ep = e.(*endpoint)
    58  			break
    59  		}
    60  	}
    61  	if ep == nil {
    62  		return nil, nil, fmt.Errorf("Unable to find load balancing endpoint for network %s", n.ID())
    63  	}
    64  	// Get the load balancer sandbox itself as well
    65  	sb, ok := ep.getSandbox()
    66  	if !ok {
    67  		return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
    68  	}
    69  	sep := sb.getEndpoint(ep.ID())
    70  	if sep == nil {
    71  		return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
    72  	}
    73  	return sep, sb, nil
    74  }
    75  
    76  // Searches the OS sandbox for the name of the endpoint interface
    77  // within the sandbox.   This is required for adding/removing IP
    78  // aliases to the interface.
    79  func findIfaceDstName(sb *sandbox, ep *endpoint) string {
    80  	srcName := ep.Iface().SrcName()
    81  	for _, i := range sb.osSbox.Info().Interfaces() {
    82  		if i.SrcName() == srcName {
    83  			return i.DstName()
    84  		}
    85  	}
    86  	return ""
    87  }
    88  
    89  // Add loadbalancer backend to the loadbalncer sandbox for the network.
    90  // If needed add the service as well.
    91  func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
    92  	if len(lb.vip) == 0 {
    93  		return
    94  	}
    95  	ep, sb, err := n.findLBEndpointSandbox()
    96  	if err != nil {
    97  		logrus.Errorf("addLBBackend %s/%s: %v", n.ID(), n.Name(), err)
    98  		return
    99  	}
   100  	if sb.osSbox == nil {
   101  		return
   102  	}
   103  
   104  	eIP := ep.Iface().Address()
   105  
   106  	i, err := ipvs.New(sb.Key())
   107  	if err != nil {
   108  		logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb addition: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
   109  		return
   110  	}
   111  	defer i.Close()
   112  
   113  	s := &ipvs.Service{
   114  		AddressFamily: nl.FAMILY_V4,
   115  		FWMark:        lb.fwMark,
   116  		SchedName:     ipvs.RoundRobin,
   117  	}
   118  
   119  	if !i.IsServicePresent(s) {
   120  		// Add IP alias for the VIP to the endpoint
   121  		ifName := findIfaceDstName(sb, ep)
   122  		if ifName == "" {
   123  			logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
   124  			return
   125  		}
   126  		err := sb.osSbox.AddAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
   127  		if err != nil {
   128  			logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
   129  			return
   130  		}
   131  
   132  		if sb.ingress {
   133  			var gwIP net.IP
   134  			if ep := sb.getGatewayEndpoint(); ep != nil {
   135  				gwIP = ep.Iface().Address().IP
   136  			}
   137  			if err := programIngress(gwIP, lb.service.ingressPorts, false); err != nil {
   138  				logrus.Errorf("Failed to add ingress: %v", err)
   139  				return
   140  			}
   141  		}
   142  
   143  		logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
   144  		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
   145  			logrus.Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
   146  			return
   147  		}
   148  
   149  		if err := i.NewService(s); err != nil && err != syscall.EEXIST {
   150  			logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   151  			return
   152  		}
   153  	}
   154  
   155  	d := &ipvs.Destination{
   156  		AddressFamily: nl.FAMILY_V4,
   157  		Address:       ip,
   158  		Weight:        1,
   159  	}
   160  	if n.loadBalancerMode == loadBalancerModeDSR {
   161  		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
   162  	}
   163  
   164  	// Remove the sched name before using the service to add
   165  	// destination.
   166  	s.SchedName = ""
   167  	if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
   168  		logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   169  	}
   170  
   171  	// Ensure that kernel tweaks are applied in case this is the first time
   172  	// we've initialized ip_vs
   173  	sb.osSbox.ApplyOSTweaks(sb.oslTypes)
   174  }
   175  
   176  // Remove loadbalancer backend the load balancing endpoint for this
   177  // network. If 'rmService' is true, then remove the service entry as well.
   178  // If 'fullRemove' is true then completely remove the entry, otherwise
   179  // just deweight it for now.
   180  func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) {
   181  	if len(lb.vip) == 0 {
   182  		return
   183  	}
   184  	ep, sb, err := n.findLBEndpointSandbox()
   185  	if err != nil {
   186  		logrus.Debugf("rmLBBackend for %s/%s: %v -- probably transient state", n.ID(), n.Name(), err)
   187  		return
   188  	}
   189  	if sb.osSbox == nil {
   190  		return
   191  	}
   192  
   193  	eIP := ep.Iface().Address()
   194  
   195  	i, err := ipvs.New(sb.Key())
   196  	if err != nil {
   197  		logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb removal: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
   198  		return
   199  	}
   200  	defer i.Close()
   201  
   202  	s := &ipvs.Service{
   203  		AddressFamily: nl.FAMILY_V4,
   204  		FWMark:        lb.fwMark,
   205  	}
   206  
   207  	d := &ipvs.Destination{
   208  		AddressFamily: nl.FAMILY_V4,
   209  		Address:       ip,
   210  		Weight:        1,
   211  	}
   212  	if n.loadBalancerMode == loadBalancerModeDSR {
   213  		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
   214  	}
   215  
   216  	if fullRemove {
   217  		if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
   218  			logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   219  		}
   220  	} else {
   221  		d.Weight = 0
   222  		if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
   223  			logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   224  		}
   225  	}
   226  
   227  	if rmService {
   228  		s.SchedName = ipvs.RoundRobin
   229  		if err := i.DelService(s); err != nil && err != syscall.ENOENT {
   230  			logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   231  		}
   232  
   233  		if sb.ingress {
   234  			var gwIP net.IP
   235  			if ep := sb.getGatewayEndpoint(); ep != nil {
   236  				gwIP = ep.Iface().Address().IP
   237  			}
   238  			if err := programIngress(gwIP, lb.service.ingressPorts, true); err != nil {
   239  				logrus.Errorf("Failed to delete ingress: %v", err)
   240  			}
   241  		}
   242  
   243  		if err := invokeFWMarker(sb.Key(), lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
   244  			logrus.Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
   245  		}
   246  
   247  		// Remove IP alias from the VIP to the endpoint
   248  		ifName := findIfaceDstName(sb, ep)
   249  		if ifName == "" {
   250  			logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
   251  			return
   252  		}
   253  		err := sb.osSbox.RemoveAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
   254  		if err != nil {
   255  			logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
   256  		}
   257  	}
   258  }
   259  
   260  const ingressChain = "DOCKER-INGRESS"
   261  
   262  var (
   263  	ingressOnce     sync.Once
   264  	ingressMu       sync.Mutex // lock for operations on ingress
   265  	ingressProxyTbl = make(map[string]io.Closer)
   266  	portConfigMu    sync.Mutex
   267  	portConfigTbl   = make(map[PortConfig]int)
   268  )
   269  
   270  func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig {
   271  	portConfigMu.Lock()
   272  	iPorts := make([]*PortConfig, 0, len(ingressPorts))
   273  	for _, pc := range ingressPorts {
   274  		if isDelete {
   275  			if cnt, ok := portConfigTbl[*pc]; ok {
   276  				// This is the last reference to this
   277  				// port config. Delete the port config
   278  				// and add it to filtered list to be
   279  				// plumbed.
   280  				if cnt == 1 {
   281  					delete(portConfigTbl, *pc)
   282  					iPorts = append(iPorts, pc)
   283  					continue
   284  				}
   285  
   286  				portConfigTbl[*pc] = cnt - 1
   287  			}
   288  
   289  			continue
   290  		}
   291  
   292  		if cnt, ok := portConfigTbl[*pc]; ok {
   293  			portConfigTbl[*pc] = cnt + 1
   294  			continue
   295  		}
   296  
   297  		// We are adding it for the first time. Add it to the
   298  		// filter list to be plumbed.
   299  		portConfigTbl[*pc] = 1
   300  		iPorts = append(iPorts, pc)
   301  	}
   302  	portConfigMu.Unlock()
   303  
   304  	return iPorts
   305  }
   306  
   307  func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error {
   308  	// TODO IPv6 support
   309  	iptable := iptables.GetIptable(iptables.IPv4)
   310  
   311  	addDelOpt := "-I"
   312  	rollbackAddDelOpt := "-D"
   313  	if isDelete {
   314  		addDelOpt = "-D"
   315  		rollbackAddDelOpt = "-I"
   316  	}
   317  
   318  	ingressMu.Lock()
   319  	defer ingressMu.Unlock()
   320  
   321  	chainExists := iptable.ExistChain(ingressChain, iptables.Nat)
   322  	filterChainExists := iptable.ExistChain(ingressChain, iptables.Filter)
   323  
   324  	ingressOnce.Do(func() {
   325  		// Flush nat table and filter table ingress chain rules during init if it
   326  		// exists. It might contain stale rules from previous life.
   327  		if chainExists {
   328  			if err := iptable.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil {
   329  				logrus.Errorf("Could not flush nat table ingress chain rules during init: %v", err)
   330  			}
   331  		}
   332  		if filterChainExists {
   333  			if err := iptable.RawCombinedOutput("-F", ingressChain); err != nil {
   334  				logrus.Errorf("Could not flush filter table ingress chain rules during init: %v", err)
   335  			}
   336  		}
   337  	})
   338  
   339  	if !isDelete {
   340  		if !chainExists {
   341  			if err := iptable.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil {
   342  				return fmt.Errorf("failed to create ingress chain: %v", err)
   343  			}
   344  		}
   345  		if !filterChainExists {
   346  			if err := iptable.RawCombinedOutput("-N", ingressChain); err != nil {
   347  				return fmt.Errorf("failed to create filter table ingress chain: %v", err)
   348  			}
   349  		}
   350  
   351  		if !iptable.Exists(iptables.Nat, ingressChain, "-j", "RETURN") {
   352  			if err := iptable.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil {
   353  				return fmt.Errorf("failed to add return rule in nat table ingress chain: %v", err)
   354  			}
   355  		}
   356  
   357  		if !iptable.Exists(iptables.Filter, ingressChain, "-j", "RETURN") {
   358  			if err := iptable.RawCombinedOutput("-A", ingressChain, "-j", "RETURN"); err != nil {
   359  				return fmt.Errorf("failed to add return rule to filter table ingress chain: %v", err)
   360  			}
   361  		}
   362  
   363  		for _, chain := range []string{"OUTPUT", "PREROUTING"} {
   364  			if !iptable.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) {
   365  				if err := iptable.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil {
   366  					return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err)
   367  				}
   368  			}
   369  		}
   370  
   371  		if !iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
   372  			if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
   373  				return fmt.Errorf("failed to add jump rule to %s in filter table forward chain: %v", ingressChain, err)
   374  			}
   375  			arrangeUserFilterRule()
   376  		}
   377  
   378  		oifName, err := findOIFName(gwIP)
   379  		if err != nil {
   380  			return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err)
   381  		}
   382  
   383  		path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
   384  		if err := os.WriteFile(path, []byte{'1', '\n'}, 0644); err != nil { //nolint:gosec // gosec complains about perms here, which must be 0644 in this case
   385  			return fmt.Errorf("could not write to %s: %v", path, err)
   386  		}
   387  
   388  		ruleArgs := []string{"-m", "addrtype", "--src-type", "LOCAL", "-o", oifName, "-j", "MASQUERADE"}
   389  		if !iptable.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) {
   390  			if err := iptable.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil {
   391  				return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err)
   392  			}
   393  		}
   394  	}
   395  
   396  	// Filter the ingress ports until port rules start to be added/deleted
   397  	filteredPorts := filterPortConfigs(ingressPorts, isDelete)
   398  	rollbackRules := make([][]string, 0, len(filteredPorts)*3)
   399  	var portErr error
   400  	defer func() {
   401  		if portErr != nil && !isDelete {
   402  			filterPortConfigs(filteredPorts, !isDelete)
   403  			for _, rule := range rollbackRules {
   404  				if err := iptable.RawCombinedOutput(rule...); err != nil {
   405  					logrus.Warnf("roll back rule failed, %v: %v", rule, err)
   406  				}
   407  			}
   408  		}
   409  	}()
   410  
   411  	for _, iPort := range filteredPorts {
   412  		var (
   413  			protocol      = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
   414  			publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
   415  			destination   = net.JoinHostPort(gwIP.String(), publishedPort)
   416  		)
   417  		if iptable.ExistChain(ingressChain, iptables.Nat) {
   418  			rule := []string{"-t", "nat", addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
   419  
   420  			if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   421  				err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
   422  				if !isDelete {
   423  					return err
   424  				}
   425  				logrus.Info(err)
   426  			}
   427  			rollbackRule := []string{"-t", "nat", rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
   428  			rollbackRules = append(rollbackRules, rollbackRule)
   429  		}
   430  
   431  		// Filter table rules to allow a published service to be accessible in the local node from..
   432  		// 1) service tasks attached to other networks
   433  		// 2) unmanaged containers on bridge networks
   434  		rule := []string{addDelOpt, ingressChain, "-m", "state", "-p", protocol, "--sport", publishedPort, "--state", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
   435  		if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   436  			err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
   437  			if !isDelete {
   438  				return err
   439  			}
   440  			logrus.Warn(err)
   441  		}
   442  		rollbackRule := []string{rollbackAddDelOpt, ingressChain, "-m", "state", "-p", protocol, "--sport", publishedPort, "--state", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
   443  		rollbackRules = append(rollbackRules, rollbackRule)
   444  
   445  		rule = []string{addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
   446  		if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   447  			err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
   448  			if !isDelete {
   449  				return err
   450  			}
   451  			logrus.Warn(err)
   452  		}
   453  		rollbackRule = []string{rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
   454  		rollbackRules = append(rollbackRules, rollbackRule)
   455  
   456  		if err := plumbProxy(iPort, isDelete); err != nil {
   457  			logrus.Warnf("failed to create proxy for port %s: %v", publishedPort, err)
   458  		}
   459  	}
   460  
   461  	return nil
   462  }
   463  
   464  // In the filter table FORWARD chain the first rule should be to jump to
   465  // DOCKER-USER so the user is able to filter packet first.
   466  // The second rule should be jump to INGRESS-CHAIN.
   467  // This chain has the rules to allow access to the published ports for swarm tasks
   468  // from local bridge networks and docker_gwbridge (ie:taks on other swarm networks)
   469  func arrangeIngressFilterRule() {
   470  	// TODO IPv6 support
   471  	iptable := iptables.GetIptable(iptables.IPv4)
   472  	if iptable.ExistChain(ingressChain, iptables.Filter) {
   473  		if iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
   474  			if err := iptable.RawCombinedOutput("-D", "FORWARD", "-j", ingressChain); err != nil {
   475  				logrus.Warnf("failed to delete jump rule to ingressChain in filter table: %v", err)
   476  			}
   477  		}
   478  		if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
   479  			logrus.Warnf("failed to add jump rule to ingressChain in filter table: %v", err)
   480  		}
   481  	}
   482  }
   483  
   484  func findOIFName(ip net.IP) (string, error) {
   485  	nlh := ns.NlHandle()
   486  
   487  	routes, err := nlh.RouteGet(ip)
   488  	if err != nil {
   489  		return "", err
   490  	}
   491  
   492  	if len(routes) == 0 {
   493  		return "", fmt.Errorf("no route to %s", ip)
   494  	}
   495  
   496  	// Pick the first route(typically there is only one route). We
   497  	// don't support multipath.
   498  	link, err := nlh.LinkByIndex(routes[0].LinkIndex)
   499  	if err != nil {
   500  		return "", err
   501  	}
   502  
   503  	return link.Attrs().Name, nil
   504  }
   505  
   506  func plumbProxy(iPort *PortConfig, isDelete bool) error {
   507  	var (
   508  		err error
   509  		l   io.Closer
   510  	)
   511  
   512  	portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]))
   513  	if isDelete {
   514  		if listener, ok := ingressProxyTbl[portSpec]; ok {
   515  			if listener != nil {
   516  				listener.Close()
   517  			}
   518  		}
   519  
   520  		return nil
   521  	}
   522  
   523  	switch iPort.Protocol {
   524  	case ProtocolTCP:
   525  		l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)})
   526  	case ProtocolUDP:
   527  		l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)})
   528  	case ProtocolSCTP:
   529  		l, err = sctp.ListenSCTP("sctp", &sctp.SCTPAddr{Port: int(iPort.PublishedPort)})
   530  	default:
   531  		err = fmt.Errorf("unknown protocol %v", iPort.Protocol)
   532  	}
   533  
   534  	if err != nil {
   535  		return err
   536  	}
   537  
   538  	ingressProxyTbl[portSpec] = l
   539  
   540  	return nil
   541  }
   542  
   543  func writePortsToFile(ports []*PortConfig) (string, error) {
   544  	f, err := os.CreateTemp("", "port_configs")
   545  	if err != nil {
   546  		return "", err
   547  	}
   548  	defer f.Close() //nolint:gosec
   549  
   550  	buf, _ := proto.Marshal(&EndpointRecord{
   551  		IngressPorts: ports,
   552  	})
   553  
   554  	n, err := f.Write(buf)
   555  	if err != nil {
   556  		return "", err
   557  	}
   558  
   559  	if n < len(buf) {
   560  		return "", io.ErrShortWrite
   561  	}
   562  
   563  	return f.Name(), nil
   564  }
   565  
   566  func readPortsFromFile(fileName string) ([]*PortConfig, error) {
   567  	buf, err := os.ReadFile(fileName)
   568  	if err != nil {
   569  		return nil, err
   570  	}
   571  
   572  	var epRec EndpointRecord
   573  	err = proto.Unmarshal(buf, &epRec)
   574  	if err != nil {
   575  		return nil, err
   576  	}
   577  
   578  	return epRec.IngressPorts, nil
   579  }
   580  
   581  // Invoke fwmarker reexec routine to mark vip destined packets with
   582  // the passed firewall mark.
   583  func invokeFWMarker(path string, vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
   584  	var ingressPortsFile string
   585  
   586  	if len(ingressPorts) != 0 {
   587  		var err error
   588  		ingressPortsFile, err = writePortsToFile(ingressPorts)
   589  		if err != nil {
   590  			return err
   591  		}
   592  
   593  		defer os.Remove(ingressPortsFile)
   594  	}
   595  
   596  	addDelOpt := "-A"
   597  	if isDelete {
   598  		addDelOpt = "-D"
   599  	}
   600  
   601  	cmd := &exec.Cmd{
   602  		Path:   reexec.Self(),
   603  		Args:   append([]string{"fwmarker"}, path, vip.String(), fmt.Sprintf("%d", fwMark), addDelOpt, ingressPortsFile, eIP.String(), lbMode),
   604  		Stdout: os.Stdout,
   605  		Stderr: os.Stderr,
   606  	}
   607  
   608  	if err := cmd.Run(); err != nil {
   609  		return fmt.Errorf("reexec failed: %v", err)
   610  	}
   611  
   612  	return nil
   613  }
   614  
   615  // Firewall marker reexec function.
   616  func fwMarker() {
   617  	// TODO IPv6 support
   618  	iptable := iptables.GetIptable(iptables.IPv4)
   619  	runtime.LockOSThread()
   620  	defer runtime.UnlockOSThread()
   621  
   622  	if len(os.Args) < 8 {
   623  		logrus.Error("invalid number of arguments..")
   624  		os.Exit(1)
   625  	}
   626  
   627  	var ingressPorts []*PortConfig
   628  	if os.Args[5] != "" {
   629  		var err error
   630  		ingressPorts, err = readPortsFromFile(os.Args[5])
   631  		if err != nil {
   632  			logrus.Errorf("Failed reading ingress ports file: %v", err)
   633  			os.Exit(2)
   634  		}
   635  	}
   636  
   637  	vip := os.Args[2]
   638  	fwMark := os.Args[3]
   639  	if _, err := strconv.ParseUint(fwMark, 10, 32); err != nil {
   640  		logrus.Errorf("bad fwmark value(%s) passed: %v", fwMark, err)
   641  		os.Exit(3)
   642  	}
   643  	addDelOpt := os.Args[4]
   644  
   645  	rules := make([][]string, 0, len(ingressPorts))
   646  	for _, iPort := range ingressPorts {
   647  		var (
   648  			protocol      = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
   649  			publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
   650  		)
   651  		rule := []string{"-t", "mangle", addDelOpt, "PREROUTING", "-p", protocol, "--dport", publishedPort, "-j", "MARK", "--set-mark", fwMark}
   652  		rules = append(rules, rule)
   653  	}
   654  
   655  	ns, err := netns.GetFromPath(os.Args[1])
   656  	if err != nil {
   657  		logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
   658  		os.Exit(4)
   659  	}
   660  	defer ns.Close()
   661  
   662  	if err := netns.Set(ns); err != nil {
   663  		logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
   664  		os.Exit(5)
   665  	}
   666  
   667  	lbMode := os.Args[7]
   668  	if addDelOpt == "-A" && lbMode == loadBalancerModeNAT {
   669  		eIP, subnet, err := net.ParseCIDR(os.Args[6])
   670  		if err != nil {
   671  			logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[6], err)
   672  			os.Exit(6)
   673  		}
   674  
   675  		ruleParams := []string{"-m", "ipvs", "--ipvs", "-d", subnet.String(), "-j", "SNAT", "--to-source", eIP.String()}
   676  		if !iptable.Exists("nat", "POSTROUTING", ruleParams...) {
   677  			rule := append([]string{"-t", "nat", "-A", "POSTROUTING"}, ruleParams...)
   678  			rules = append(rules, rule)
   679  
   680  			err := os.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
   681  			if err != nil {
   682  				logrus.Errorf("Failed to write to /proc/sys/net/ipv4/vs/conntrack: %v", err)
   683  				os.Exit(7)
   684  			}
   685  		}
   686  	}
   687  
   688  	rule := []string{"-t", "mangle", addDelOpt, "INPUT", "-d", vip + "/32", "-j", "MARK", "--set-mark", fwMark}
   689  	rules = append(rules, rule)
   690  
   691  	for _, rule := range rules {
   692  		if err := iptable.RawCombinedOutputNative(rule...); err != nil {
   693  			logrus.Errorf("set up rule failed, %v: %v", rule, err)
   694  			os.Exit(8)
   695  		}
   696  	}
   697  }
   698  
   699  func addRedirectRules(path string, eIP *net.IPNet, ingressPorts []*PortConfig) error {
   700  	var ingressPortsFile string
   701  
   702  	if len(ingressPorts) != 0 {
   703  		var err error
   704  		ingressPortsFile, err = writePortsToFile(ingressPorts)
   705  		if err != nil {
   706  			return err
   707  		}
   708  		defer os.Remove(ingressPortsFile)
   709  	}
   710  
   711  	cmd := &exec.Cmd{
   712  		Path:   reexec.Self(),
   713  		Args:   append([]string{"redirector"}, path, eIP.String(), ingressPortsFile),
   714  		Stdout: os.Stdout,
   715  		Stderr: os.Stderr,
   716  	}
   717  
   718  	if err := cmd.Run(); err != nil {
   719  		return fmt.Errorf("reexec failed: %v", err)
   720  	}
   721  
   722  	return nil
   723  }
   724  
   725  // Redirector reexec function.
   726  func redirector() {
   727  	// TODO IPv6 support
   728  	iptable := iptables.GetIptable(iptables.IPv4)
   729  	runtime.LockOSThread()
   730  	defer runtime.UnlockOSThread()
   731  
   732  	if len(os.Args) < 4 {
   733  		logrus.Error("invalid number of arguments..")
   734  		os.Exit(1)
   735  	}
   736  
   737  	var ingressPorts []*PortConfig
   738  	if os.Args[3] != "" {
   739  		var err error
   740  		ingressPorts, err = readPortsFromFile(os.Args[3])
   741  		if err != nil {
   742  			logrus.Errorf("Failed reading ingress ports file: %v", err)
   743  			os.Exit(2)
   744  		}
   745  	}
   746  
   747  	eIP, _, err := net.ParseCIDR(os.Args[2])
   748  	if err != nil {
   749  		logrus.Errorf("Failed to parse endpoint IP %s: %v", os.Args[2], err)
   750  		os.Exit(3)
   751  	}
   752  	ipAddr := eIP.String()
   753  
   754  	rules := make([][]string, 0, len(ingressPorts)*3) // 3 rules per port
   755  	for _, iPort := range ingressPorts {
   756  		var (
   757  			protocol      = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
   758  			publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
   759  			targetPort    = strconv.FormatUint(uint64(iPort.TargetPort), 10)
   760  		)
   761  
   762  		rules = append(rules,
   763  			[]string{"-t", "nat", "-A", "PREROUTING", "-d", ipAddr, "-p", protocol, "--dport", publishedPort, "-j", "REDIRECT", "--to-port", targetPort},
   764  
   765  			// Allow only incoming connections to exposed ports
   766  			[]string{"-I", "INPUT", "-d", ipAddr, "-p", protocol, "--dport", targetPort, "-m", "conntrack", "--ctstate", "NEW,ESTABLISHED", "-j", "ACCEPT"},
   767  
   768  			// Allow only outgoing connections from exposed ports
   769  			[]string{"-I", "OUTPUT", "-s", ipAddr, "-p", protocol, "--sport", targetPort, "-m", "conntrack", "--ctstate", "ESTABLISHED", "-j", "ACCEPT"},
   770  		)
   771  	}
   772  
   773  	ns, err := netns.GetFromPath(os.Args[1])
   774  	if err != nil {
   775  		logrus.Errorf("failed get network namespace %q: %v", os.Args[1], err)
   776  		os.Exit(4)
   777  	}
   778  	defer ns.Close()
   779  
   780  	if err := netns.Set(ns); err != nil {
   781  		logrus.Errorf("setting into container net ns %v failed, %v", os.Args[1], err)
   782  		os.Exit(5)
   783  	}
   784  
   785  	for _, rule := range rules {
   786  		if err := iptable.RawCombinedOutputNative(rule...); err != nil {
   787  			logrus.Errorf("set up rule failed, %v: %v", rule, err)
   788  			os.Exit(6)
   789  		}
   790  	}
   791  
   792  	if len(ingressPorts) == 0 {
   793  		return
   794  	}
   795  
   796  	// Ensure blocking rules for anything else in/to ingress network
   797  	for _, rule := range [][]string{
   798  		{"-d", ipAddr, "-p", "sctp", "-j", "DROP"},
   799  		{"-d", ipAddr, "-p", "udp", "-j", "DROP"},
   800  		{"-d", ipAddr, "-p", "tcp", "-j", "DROP"},
   801  	} {
   802  		if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) {
   803  			if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
   804  				logrus.Errorf("set up rule failed, %v: %v", rule, err)
   805  				os.Exit(7)
   806  			}
   807  		}
   808  		rule[0] = "-s"
   809  		if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
   810  			if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
   811  				logrus.Errorf("set up rule failed, %v: %v", rule, err)
   812  				os.Exit(8)
   813  			}
   814  		}
   815  	}
   816  }