github.com/rawahars/moby@v24.0.4+incompatible/libnetwork/service_linux.go (about)

     1  package libnetwork
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"net"
     7  	"os"
     8  	"path/filepath"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"syscall"
    13  
    14  	"github.com/docker/docker/libnetwork/iptables"
    15  	"github.com/docker/docker/libnetwork/ns"
    16  	"github.com/ishidawataru/sctp"
    17  	"github.com/moby/ipvs"
    18  	"github.com/sirupsen/logrus"
    19  	"github.com/vishvananda/netlink/nl"
    20  )
    21  
    22  // Populate all loadbalancers on the network that the passed endpoint
    23  // belongs to, into this sandbox.
    24  func (sb *Sandbox) populateLoadBalancers(ep *Endpoint) {
    25  	// This is an interface less endpoint. Nothing to do.
    26  	if ep.Iface() == nil {
    27  		return
    28  	}
    29  
    30  	n := ep.getNetwork()
    31  	eIP := ep.Iface().Address()
    32  
    33  	if n.ingress {
    34  		if err := sb.addRedirectRules(eIP, ep.ingressPorts); err != nil {
    35  			logrus.Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err)
    36  		}
    37  	}
    38  }
    39  
    40  func (n *network) findLBEndpointSandbox() (*Endpoint, *Sandbox, error) {
    41  	// TODO: get endpoint from store?  See EndpointInfo()
    42  	var ep *Endpoint
    43  	// Find this node's LB sandbox endpoint:  there should be exactly one
    44  	for _, e := range n.Endpoints() {
    45  		epi := e.Info()
    46  		if epi != nil && epi.LoadBalancer() {
    47  			ep = e
    48  			break
    49  		}
    50  	}
    51  	if ep == nil {
    52  		return nil, nil, fmt.Errorf("Unable to find load balancing endpoint for network %s", n.ID())
    53  	}
    54  	// Get the load balancer sandbox itself as well
    55  	sb, ok := ep.getSandbox()
    56  	if !ok {
    57  		return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
    58  	}
    59  	sep := sb.getEndpoint(ep.ID())
    60  	if sep == nil {
    61  		return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
    62  	}
    63  	return sep, sb, nil
    64  }
    65  
    66  // Searches the OS sandbox for the name of the endpoint interface
    67  // within the sandbox.   This is required for adding/removing IP
    68  // aliases to the interface.
    69  func findIfaceDstName(sb *Sandbox, ep *Endpoint) string {
    70  	srcName := ep.Iface().SrcName()
    71  	for _, i := range sb.osSbox.Info().Interfaces() {
    72  		if i.SrcName() == srcName {
    73  			return i.DstName()
    74  		}
    75  	}
    76  	return ""
    77  }
    78  
    79  // Add loadbalancer backend to the loadbalncer sandbox for the network.
    80  // If needed add the service as well.
    81  func (n *network) addLBBackend(ip net.IP, lb *loadBalancer) {
    82  	if len(lb.vip) == 0 {
    83  		return
    84  	}
    85  	ep, sb, err := n.findLBEndpointSandbox()
    86  	if err != nil {
    87  		logrus.Errorf("addLBBackend %s/%s: %v", n.ID(), n.Name(), err)
    88  		return
    89  	}
    90  	if sb.osSbox == nil {
    91  		return
    92  	}
    93  
    94  	eIP := ep.Iface().Address()
    95  
    96  	i, err := ipvs.New(sb.Key())
    97  	if err != nil {
    98  		logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb addition: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
    99  		return
   100  	}
   101  	defer i.Close()
   102  
   103  	s := &ipvs.Service{
   104  		AddressFamily: nl.FAMILY_V4,
   105  		FWMark:        lb.fwMark,
   106  		SchedName:     ipvs.RoundRobin,
   107  	}
   108  
   109  	if !i.IsServicePresent(s) {
   110  		// Add IP alias for the VIP to the endpoint
   111  		ifName := findIfaceDstName(sb, ep)
   112  		if ifName == "" {
   113  			logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
   114  			return
   115  		}
   116  		err := sb.osSbox.AddAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
   117  		if err != nil {
   118  			logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
   119  			return
   120  		}
   121  
   122  		if sb.ingress {
   123  			var gwIP net.IP
   124  			if ep := sb.getGatewayEndpoint(); ep != nil {
   125  				gwIP = ep.Iface().Address().IP
   126  			}
   127  			if err := programIngress(gwIP, lb.service.ingressPorts, false); err != nil {
   128  				logrus.Errorf("Failed to add ingress: %v", err)
   129  				return
   130  			}
   131  		}
   132  
   133  		logrus.Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
   134  		if err := sb.configureFWMark(lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
   135  			logrus.Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
   136  			return
   137  		}
   138  
   139  		if err := i.NewService(s); err != nil && err != syscall.EEXIST {
   140  			logrus.Errorf("Failed to create a new service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   141  			return
   142  		}
   143  	}
   144  
   145  	d := &ipvs.Destination{
   146  		AddressFamily: nl.FAMILY_V4,
   147  		Address:       ip,
   148  		Weight:        1,
   149  	}
   150  	if n.loadBalancerMode == loadBalancerModeDSR {
   151  		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
   152  	}
   153  
   154  	// Remove the sched name before using the service to add
   155  	// destination.
   156  	s.SchedName = ""
   157  	if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
   158  		logrus.Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   159  	}
   160  
   161  	// Ensure that kernel tweaks are applied in case this is the first time
   162  	// we've initialized ip_vs
   163  	sb.osSbox.ApplyOSTweaks(sb.oslTypes)
   164  }
   165  
   166  // Remove loadbalancer backend the load balancing endpoint for this
   167  // network. If 'rmService' is true, then remove the service entry as well.
   168  // If 'fullRemove' is true then completely remove the entry, otherwise
   169  // just deweight it for now.
   170  func (n *network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) {
   171  	if len(lb.vip) == 0 {
   172  		return
   173  	}
   174  	ep, sb, err := n.findLBEndpointSandbox()
   175  	if err != nil {
   176  		logrus.Debugf("rmLBBackend for %s/%s: %v -- probably transient state", n.ID(), n.Name(), err)
   177  		return
   178  	}
   179  	if sb.osSbox == nil {
   180  		return
   181  	}
   182  
   183  	eIP := ep.Iface().Address()
   184  
   185  	i, err := ipvs.New(sb.Key())
   186  	if err != nil {
   187  		logrus.Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb removal: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
   188  		return
   189  	}
   190  	defer i.Close()
   191  
   192  	s := &ipvs.Service{
   193  		AddressFamily: nl.FAMILY_V4,
   194  		FWMark:        lb.fwMark,
   195  	}
   196  
   197  	d := &ipvs.Destination{
   198  		AddressFamily: nl.FAMILY_V4,
   199  		Address:       ip,
   200  		Weight:        1,
   201  	}
   202  	if n.loadBalancerMode == loadBalancerModeDSR {
   203  		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
   204  	}
   205  
   206  	if fullRemove {
   207  		if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
   208  			logrus.Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   209  		}
   210  	} else {
   211  		d.Weight = 0
   212  		if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
   213  			logrus.Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   214  		}
   215  	}
   216  
   217  	if rmService {
   218  		s.SchedName = ipvs.RoundRobin
   219  		if err := i.DelService(s); err != nil && err != syscall.ENOENT {
   220  			logrus.Errorf("Failed to delete service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   221  		}
   222  
   223  		if sb.ingress {
   224  			var gwIP net.IP
   225  			if ep := sb.getGatewayEndpoint(); ep != nil {
   226  				gwIP = ep.Iface().Address().IP
   227  			}
   228  			if err := programIngress(gwIP, lb.service.ingressPorts, true); err != nil {
   229  				logrus.Errorf("Failed to delete ingress: %v", err)
   230  			}
   231  		}
   232  
   233  		if err := sb.configureFWMark(lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
   234  			logrus.Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
   235  		}
   236  
   237  		// Remove IP alias from the VIP to the endpoint
   238  		ifName := findIfaceDstName(sb, ep)
   239  		if ifName == "" {
   240  			logrus.Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
   241  			return
   242  		}
   243  		err := sb.osSbox.RemoveAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
   244  		if err != nil {
   245  			logrus.Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
   246  		}
   247  	}
   248  }
   249  
   250  const ingressChain = "DOCKER-INGRESS"
   251  
   252  var (
   253  	ingressOnce     sync.Once
   254  	ingressMu       sync.Mutex // lock for operations on ingress
   255  	ingressProxyTbl = make(map[string]io.Closer)
   256  	portConfigMu    sync.Mutex
   257  	portConfigTbl   = make(map[PortConfig]int)
   258  )
   259  
   260  func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig {
   261  	portConfigMu.Lock()
   262  	iPorts := make([]*PortConfig, 0, len(ingressPorts))
   263  	for _, pc := range ingressPorts {
   264  		if isDelete {
   265  			if cnt, ok := portConfigTbl[*pc]; ok {
   266  				// This is the last reference to this
   267  				// port config. Delete the port config
   268  				// and add it to filtered list to be
   269  				// plumbed.
   270  				if cnt == 1 {
   271  					delete(portConfigTbl, *pc)
   272  					iPorts = append(iPorts, pc)
   273  					continue
   274  				}
   275  
   276  				portConfigTbl[*pc] = cnt - 1
   277  			}
   278  
   279  			continue
   280  		}
   281  
   282  		if cnt, ok := portConfigTbl[*pc]; ok {
   283  			portConfigTbl[*pc] = cnt + 1
   284  			continue
   285  		}
   286  
   287  		// We are adding it for the first time. Add it to the
   288  		// filter list to be plumbed.
   289  		portConfigTbl[*pc] = 1
   290  		iPorts = append(iPorts, pc)
   291  	}
   292  	portConfigMu.Unlock()
   293  
   294  	return iPorts
   295  }
   296  
   297  func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error {
   298  	// TODO IPv6 support
   299  	iptable := iptables.GetIptable(iptables.IPv4)
   300  
   301  	addDelOpt := "-I"
   302  	rollbackAddDelOpt := "-D"
   303  	if isDelete {
   304  		addDelOpt = "-D"
   305  		rollbackAddDelOpt = "-I"
   306  	}
   307  
   308  	ingressMu.Lock()
   309  	defer ingressMu.Unlock()
   310  
   311  	chainExists := iptable.ExistChain(ingressChain, iptables.Nat)
   312  	filterChainExists := iptable.ExistChain(ingressChain, iptables.Filter)
   313  
   314  	ingressOnce.Do(func() {
   315  		// Flush nat table and filter table ingress chain rules during init if it
   316  		// exists. It might contain stale rules from previous life.
   317  		if chainExists {
   318  			if err := iptable.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil {
   319  				logrus.Errorf("Could not flush nat table ingress chain rules during init: %v", err)
   320  			}
   321  		}
   322  		if filterChainExists {
   323  			if err := iptable.RawCombinedOutput("-F", ingressChain); err != nil {
   324  				logrus.Errorf("Could not flush filter table ingress chain rules during init: %v", err)
   325  			}
   326  		}
   327  	})
   328  
   329  	if !isDelete {
   330  		if !chainExists {
   331  			if err := iptable.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil {
   332  				return fmt.Errorf("failed to create ingress chain: %v", err)
   333  			}
   334  		}
   335  		if !filterChainExists {
   336  			if err := iptable.RawCombinedOutput("-N", ingressChain); err != nil {
   337  				return fmt.Errorf("failed to create filter table ingress chain: %v", err)
   338  			}
   339  		}
   340  
   341  		if !iptable.Exists(iptables.Nat, ingressChain, "-j", "RETURN") {
   342  			if err := iptable.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil {
   343  				return fmt.Errorf("failed to add return rule in nat table ingress chain: %v", err)
   344  			}
   345  		}
   346  
   347  		if !iptable.Exists(iptables.Filter, ingressChain, "-j", "RETURN") {
   348  			if err := iptable.RawCombinedOutput("-A", ingressChain, "-j", "RETURN"); err != nil {
   349  				return fmt.Errorf("failed to add return rule to filter table ingress chain: %v", err)
   350  			}
   351  		}
   352  
   353  		for _, chain := range []string{"OUTPUT", "PREROUTING"} {
   354  			if !iptable.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) {
   355  				if err := iptable.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil {
   356  					return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err)
   357  				}
   358  			}
   359  		}
   360  
   361  		if !iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
   362  			if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
   363  				return fmt.Errorf("failed to add jump rule to %s in filter table forward chain: %v", ingressChain, err)
   364  			}
   365  			arrangeUserFilterRule()
   366  		}
   367  
   368  		oifName, err := findOIFName(gwIP)
   369  		if err != nil {
   370  			return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err)
   371  		}
   372  
   373  		path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
   374  		if err := os.WriteFile(path, []byte{'1', '\n'}, 0o644); err != nil { //nolint:gosec // gosec complains about perms here, which must be 0644 in this case
   375  			return fmt.Errorf("could not write to %s: %v", path, err)
   376  		}
   377  
   378  		ruleArgs := []string{"-m", "addrtype", "--src-type", "LOCAL", "-o", oifName, "-j", "MASQUERADE"}
   379  		if !iptable.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) {
   380  			if err := iptable.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil {
   381  				return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err)
   382  			}
   383  		}
   384  	}
   385  
   386  	// Filter the ingress ports until port rules start to be added/deleted
   387  	filteredPorts := filterPortConfigs(ingressPorts, isDelete)
   388  	rollbackRules := make([][]string, 0, len(filteredPorts)*3)
   389  	var portErr error
   390  	defer func() {
   391  		if portErr != nil && !isDelete {
   392  			filterPortConfigs(filteredPorts, !isDelete)
   393  			for _, rule := range rollbackRules {
   394  				if err := iptable.RawCombinedOutput(rule...); err != nil {
   395  					logrus.Warnf("roll back rule failed, %v: %v", rule, err)
   396  				}
   397  			}
   398  		}
   399  	}()
   400  
   401  	for _, iPort := range filteredPorts {
   402  		var (
   403  			protocol      = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
   404  			publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
   405  			destination   = net.JoinHostPort(gwIP.String(), publishedPort)
   406  		)
   407  		if iptable.ExistChain(ingressChain, iptables.Nat) {
   408  			rule := []string{"-t", "nat", addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
   409  
   410  			if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   411  				err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
   412  				if !isDelete {
   413  					return err
   414  				}
   415  				logrus.Info(err)
   416  			}
   417  			rollbackRule := []string{"-t", "nat", rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
   418  			rollbackRules = append(rollbackRules, rollbackRule)
   419  		}
   420  
   421  		// Filter table rules to allow a published service to be accessible in the local node from..
   422  		// 1) service tasks attached to other networks
   423  		// 2) unmanaged containers on bridge networks
   424  		rule := []string{addDelOpt, ingressChain, "-m", "state", "-p", protocol, "--sport", publishedPort, "--state", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
   425  		if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   426  			err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
   427  			if !isDelete {
   428  				return err
   429  			}
   430  			logrus.Warn(err)
   431  		}
   432  		rollbackRule := []string{rollbackAddDelOpt, ingressChain, "-m", "state", "-p", protocol, "--sport", publishedPort, "--state", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
   433  		rollbackRules = append(rollbackRules, rollbackRule)
   434  
   435  		rule = []string{addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
   436  		if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   437  			err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
   438  			if !isDelete {
   439  				return err
   440  			}
   441  			logrus.Warn(err)
   442  		}
   443  		rollbackRule = []string{rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
   444  		rollbackRules = append(rollbackRules, rollbackRule)
   445  
   446  		if err := plumbProxy(iPort, isDelete); err != nil {
   447  			logrus.Warnf("failed to create proxy for port %s: %v", publishedPort, err)
   448  		}
   449  	}
   450  
   451  	return nil
   452  }
   453  
   454  // In the filter table FORWARD chain the first rule should be to jump to
   455  // DOCKER-USER so the user is able to filter packet first.
   456  // The second rule should be jump to INGRESS-CHAIN.
   457  // This chain has the rules to allow access to the published ports for swarm tasks
   458  // from local bridge networks and docker_gwbridge (ie:taks on other swarm networks)
   459  func arrangeIngressFilterRule() {
   460  	// TODO IPv6 support
   461  	iptable := iptables.GetIptable(iptables.IPv4)
   462  	if iptable.ExistChain(ingressChain, iptables.Filter) {
   463  		if iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
   464  			if err := iptable.RawCombinedOutput("-D", "FORWARD", "-j", ingressChain); err != nil {
   465  				logrus.Warnf("failed to delete jump rule to ingressChain in filter table: %v", err)
   466  			}
   467  		}
   468  		if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
   469  			logrus.Warnf("failed to add jump rule to ingressChain in filter table: %v", err)
   470  		}
   471  	}
   472  }
   473  
   474  func findOIFName(ip net.IP) (string, error) {
   475  	nlh := ns.NlHandle()
   476  
   477  	routes, err := nlh.RouteGet(ip)
   478  	if err != nil {
   479  		return "", err
   480  	}
   481  
   482  	if len(routes) == 0 {
   483  		return "", fmt.Errorf("no route to %s", ip)
   484  	}
   485  
   486  	// Pick the first route(typically there is only one route). We
   487  	// don't support multipath.
   488  	link, err := nlh.LinkByIndex(routes[0].LinkIndex)
   489  	if err != nil {
   490  		return "", err
   491  	}
   492  
   493  	return link.Attrs().Name, nil
   494  }
   495  
   496  func plumbProxy(iPort *PortConfig, isDelete bool) error {
   497  	var (
   498  		err error
   499  		l   io.Closer
   500  	)
   501  
   502  	portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]))
   503  	if isDelete {
   504  		if listener, ok := ingressProxyTbl[portSpec]; ok {
   505  			if listener != nil {
   506  				listener.Close()
   507  			}
   508  		}
   509  
   510  		return nil
   511  	}
   512  
   513  	switch iPort.Protocol {
   514  	case ProtocolTCP:
   515  		l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)})
   516  	case ProtocolUDP:
   517  		l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)})
   518  	case ProtocolSCTP:
   519  		l, err = sctp.ListenSCTP("sctp", &sctp.SCTPAddr{Port: int(iPort.PublishedPort)})
   520  	default:
   521  		err = fmt.Errorf("unknown protocol %v", iPort.Protocol)
   522  	}
   523  
   524  	if err != nil {
   525  		return err
   526  	}
   527  
   528  	ingressProxyTbl[portSpec] = l
   529  
   530  	return nil
   531  }
   532  
   533  // configureFWMark configures the sandbox firewall to mark vip destined packets
   534  // with the firewall mark fwMark.
   535  func (sb *Sandbox) configureFWMark(vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
   536  	// TODO IPv6 support
   537  	iptable := iptables.GetIptable(iptables.IPv4)
   538  
   539  	fwMarkStr := strconv.FormatUint(uint64(fwMark), 10)
   540  	addDelOpt := "-A"
   541  	if isDelete {
   542  		addDelOpt = "-D"
   543  	}
   544  
   545  	rules := make([][]string, 0, len(ingressPorts))
   546  	for _, iPort := range ingressPorts {
   547  		var (
   548  			protocol      = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
   549  			publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
   550  		)
   551  		rule := []string{"-t", "mangle", addDelOpt, "PREROUTING", "-p", protocol, "--dport", publishedPort, "-j", "MARK", "--set-mark", fwMarkStr}
   552  		rules = append(rules, rule)
   553  	}
   554  
   555  	var innerErr error
   556  	err := sb.ExecFunc(func() {
   557  		if !isDelete && lbMode == loadBalancerModeNAT {
   558  			subnet := net.IPNet{IP: eIP.IP.Mask(eIP.Mask), Mask: eIP.Mask}
   559  			ruleParams := []string{"-m", "ipvs", "--ipvs", "-d", subnet.String(), "-j", "SNAT", "--to-source", eIP.IP.String()}
   560  			if !iptable.Exists("nat", "POSTROUTING", ruleParams...) {
   561  				rule := append([]string{"-t", "nat", "-A", "POSTROUTING"}, ruleParams...)
   562  				rules = append(rules, rule)
   563  
   564  				err := os.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0644)
   565  				if err != nil {
   566  					innerErr = err
   567  					return
   568  				}
   569  			}
   570  		}
   571  
   572  		rule := []string{"-t", "mangle", addDelOpt, "INPUT", "-d", vip.String() + "/32", "-j", "MARK", "--set-mark", fwMarkStr}
   573  		rules = append(rules, rule)
   574  
   575  		for _, rule := range rules {
   576  			if err := iptable.RawCombinedOutputNative(rule...); err != nil {
   577  				innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
   578  				return
   579  			}
   580  		}
   581  	})
   582  	if err != nil {
   583  		return err
   584  	}
   585  	return innerErr
   586  }
   587  
   588  func (sb *Sandbox) addRedirectRules(eIP *net.IPNet, ingressPorts []*PortConfig) error {
   589  	// TODO IPv6 support
   590  	iptable := iptables.GetIptable(iptables.IPv4)
   591  	ipAddr := eIP.IP.String()
   592  
   593  	rules := make([][]string, 0, len(ingressPorts)*3) // 3 rules per port
   594  	for _, iPort := range ingressPorts {
   595  		var (
   596  			protocol      = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
   597  			publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
   598  			targetPort    = strconv.FormatUint(uint64(iPort.TargetPort), 10)
   599  		)
   600  
   601  		rules = append(rules,
   602  			[]string{"-t", "nat", "-A", "PREROUTING", "-d", ipAddr, "-p", protocol, "--dport", publishedPort, "-j", "REDIRECT", "--to-port", targetPort},
   603  
   604  			// Allow only incoming connections to exposed ports
   605  			[]string{"-I", "INPUT", "-d", ipAddr, "-p", protocol, "--dport", targetPort, "-m", "conntrack", "--ctstate", "NEW,ESTABLISHED", "-j", "ACCEPT"},
   606  
   607  			// Allow only outgoing connections from exposed ports
   608  			[]string{"-I", "OUTPUT", "-s", ipAddr, "-p", protocol, "--sport", targetPort, "-m", "conntrack", "--ctstate", "ESTABLISHED", "-j", "ACCEPT"},
   609  		)
   610  	}
   611  
   612  	var innerErr error
   613  	err := sb.ExecFunc(func() {
   614  		for _, rule := range rules {
   615  			if err := iptable.RawCombinedOutputNative(rule...); err != nil {
   616  				innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
   617  				return
   618  			}
   619  		}
   620  
   621  		if len(ingressPorts) == 0 {
   622  			return
   623  		}
   624  
   625  		// Ensure blocking rules for anything else in/to ingress network
   626  		for _, rule := range [][]string{
   627  			{"-d", ipAddr, "-p", "sctp", "-j", "DROP"},
   628  			{"-d", ipAddr, "-p", "udp", "-j", "DROP"},
   629  			{"-d", ipAddr, "-p", "tcp", "-j", "DROP"},
   630  		} {
   631  			if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) {
   632  				if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
   633  					innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
   634  					return
   635  				}
   636  			}
   637  			rule[0] = "-s"
   638  			if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
   639  				if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
   640  					innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
   641  					return
   642  				}
   643  			}
   644  		}
   645  	})
   646  	if err != nil {
   647  		return err
   648  	}
   649  	return innerErr
   650  }