github.com/tonistiigi/docker@v0.10.1-0.20240229224939-974013b0dc6a/libnetwork/service_linux.go (about)

     1  package libnetwork
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"net"
     8  	"os"
     9  	"path/filepath"
    10  	"strconv"
    11  	"strings"
    12  	"sync"
    13  	"syscall"
    14  
    15  	"github.com/containerd/log"
    16  	"github.com/docker/docker/libnetwork/iptables"
    17  	"github.com/docker/docker/libnetwork/ns"
    18  	"github.com/ishidawataru/sctp"
    19  	"github.com/moby/ipvs"
    20  	"github.com/vishvananda/netlink/nl"
    21  )
    22  
    23  // Populate all loadbalancers on the network that the passed endpoint
    24  // belongs to, into this sandbox.
    25  func (sb *Sandbox) populateLoadBalancers(ep *Endpoint) {
    26  	// This is an interface less endpoint. Nothing to do.
    27  	if ep.Iface() == nil {
    28  		return
    29  	}
    30  
    31  	n := ep.getNetwork()
    32  	eIP := ep.Iface().Address()
    33  
    34  	if n.ingress {
    35  		if err := sb.addRedirectRules(eIP, ep.ingressPorts); err != nil {
    36  			log.G(context.TODO()).Errorf("Failed to add redirect rules for ep %s (%.7s): %v", ep.Name(), ep.ID(), err)
    37  		}
    38  	}
    39  }
    40  
    41  func (n *Network) findLBEndpointSandbox() (*Endpoint, *Sandbox, error) {
    42  	// TODO: get endpoint from store?  See EndpointInfo()
    43  	var ep *Endpoint
    44  	// Find this node's LB sandbox endpoint:  there should be exactly one
    45  	for _, e := range n.Endpoints() {
    46  		epi := e.Info()
    47  		if epi != nil && epi.LoadBalancer() {
    48  			ep = e
    49  			break
    50  		}
    51  	}
    52  	if ep == nil {
    53  		return nil, nil, fmt.Errorf("Unable to find load balancing endpoint for network %s", n.ID())
    54  	}
    55  	// Get the load balancer sandbox itself as well
    56  	sb, ok := ep.getSandbox()
    57  	if !ok {
    58  		return nil, nil, fmt.Errorf("Unable to get sandbox for %s(%s) in for %s", ep.Name(), ep.ID(), n.ID())
    59  	}
    60  	sep := sb.GetEndpoint(ep.ID())
    61  	if sep == nil {
    62  		return nil, nil, fmt.Errorf("Load balancing endpoint %s(%s) removed from %s", ep.Name(), ep.ID(), n.ID())
    63  	}
    64  	return sep, sb, nil
    65  }
    66  
    67  // Searches the OS sandbox for the name of the endpoint interface
    68  // within the sandbox.   This is required for adding/removing IP
    69  // aliases to the interface.
    70  func findIfaceDstName(sb *Sandbox, ep *Endpoint) string {
    71  	srcName := ep.Iface().SrcName()
    72  	for _, i := range sb.osSbox.Interfaces() {
    73  		if i.SrcName() == srcName {
    74  			return i.DstName()
    75  		}
    76  	}
    77  	return ""
    78  }
    79  
    80  // Add loadbalancer backend to the loadbalncer sandbox for the network.
    81  // If needed add the service as well.
    82  func (n *Network) addLBBackend(ip net.IP, lb *loadBalancer) {
    83  	if len(lb.vip) == 0 {
    84  		return
    85  	}
    86  	ep, sb, err := n.findLBEndpointSandbox()
    87  	if err != nil {
    88  		log.G(context.TODO()).Errorf("addLBBackend %s/%s: %v", n.ID(), n.Name(), err)
    89  		return
    90  	}
    91  	if sb.osSbox == nil {
    92  		return
    93  	}
    94  
    95  	eIP := ep.Iface().Address()
    96  
    97  	i, err := ipvs.New(sb.Key())
    98  	if err != nil {
    99  		log.G(context.TODO()).Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb addition: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
   100  		return
   101  	}
   102  	defer i.Close()
   103  
   104  	s := &ipvs.Service{
   105  		AddressFamily: nl.FAMILY_V4,
   106  		FWMark:        lb.fwMark,
   107  		SchedName:     ipvs.RoundRobin,
   108  	}
   109  
   110  	if !i.IsServicePresent(s) {
   111  		// Add IP alias for the VIP to the endpoint
   112  		ifName := findIfaceDstName(sb, ep)
   113  		if ifName == "" {
   114  			log.G(context.TODO()).Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
   115  			return
   116  		}
   117  		err := sb.osSbox.AddAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
   118  		if err != nil {
   119  			log.G(context.TODO()).Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
   120  			return
   121  		}
   122  
   123  		if sb.ingress {
   124  			var gwIP net.IP
   125  			if ep := sb.getGatewayEndpoint(); ep != nil {
   126  				gwIP = ep.Iface().Address().IP
   127  			}
   128  			if err := programIngress(gwIP, lb.service.ingressPorts, false); err != nil {
   129  				log.G(context.TODO()).Errorf("Failed to add ingress: %v", err)
   130  				return
   131  			}
   132  		}
   133  
   134  		log.G(context.TODO()).Debugf("Creating service for vip %s fwMark %d ingressPorts %#v in sbox %.7s (%.7s)", lb.vip, lb.fwMark, lb.service.ingressPorts, sb.ID(), sb.ContainerID())
   135  		if err := sb.configureFWMark(lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, false, n.loadBalancerMode); err != nil {
   136  			log.G(context.TODO()).Errorf("Failed to add firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
   137  			return
   138  		}
   139  
   140  		if err := i.NewService(s); err != nil && err != syscall.EEXIST {
   141  			log.G(context.TODO()).Errorf("Failed to create a new service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   142  			return
   143  		}
   144  	}
   145  
   146  	d := &ipvs.Destination{
   147  		AddressFamily: nl.FAMILY_V4,
   148  		Address:       ip,
   149  		Weight:        1,
   150  	}
   151  	if n.loadBalancerMode == loadBalancerModeDSR {
   152  		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
   153  	}
   154  
   155  	// Remove the sched name before using the service to add
   156  	// destination.
   157  	s.SchedName = ""
   158  	if err := i.NewDestination(s, d); err != nil && err != syscall.EEXIST {
   159  		log.G(context.TODO()).Errorf("Failed to create real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   160  	}
   161  
   162  	// Ensure that kernel tweaks are applied in case this is the first time
   163  	// we've initialized ip_vs
   164  	sb.osSbox.ApplyOSTweaks(sb.oslTypes)
   165  }
   166  
   167  // Remove loadbalancer backend the load balancing endpoint for this
   168  // network. If 'rmService' is true, then remove the service entry as well.
   169  // If 'fullRemove' is true then completely remove the entry, otherwise
   170  // just deweight it for now.
   171  func (n *Network) rmLBBackend(ip net.IP, lb *loadBalancer, rmService bool, fullRemove bool) {
   172  	if len(lb.vip) == 0 {
   173  		return
   174  	}
   175  	ep, sb, err := n.findLBEndpointSandbox()
   176  	if err != nil {
   177  		log.G(context.TODO()).Debugf("rmLBBackend for %s/%s: %v -- probably transient state", n.ID(), n.Name(), err)
   178  		return
   179  	}
   180  	if sb.osSbox == nil {
   181  		return
   182  	}
   183  
   184  	eIP := ep.Iface().Address()
   185  
   186  	i, err := ipvs.New(sb.Key())
   187  	if err != nil {
   188  		log.G(context.TODO()).Errorf("Failed to create an ipvs handle for sbox %.7s (%.7s,%s) for lb removal: %v", sb.ID(), sb.ContainerID(), sb.Key(), err)
   189  		return
   190  	}
   191  	defer i.Close()
   192  
   193  	s := &ipvs.Service{
   194  		AddressFamily: nl.FAMILY_V4,
   195  		FWMark:        lb.fwMark,
   196  	}
   197  
   198  	d := &ipvs.Destination{
   199  		AddressFamily: nl.FAMILY_V4,
   200  		Address:       ip,
   201  		Weight:        1,
   202  	}
   203  	if n.loadBalancerMode == loadBalancerModeDSR {
   204  		d.ConnectionFlags = ipvs.ConnFwdDirectRoute
   205  	}
   206  
   207  	if fullRemove {
   208  		if err := i.DelDestination(s, d); err != nil && err != syscall.ENOENT {
   209  			log.G(context.TODO()).Errorf("Failed to delete real server %s for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   210  		}
   211  	} else {
   212  		d.Weight = 0
   213  		if err := i.UpdateDestination(s, d); err != nil && err != syscall.ENOENT {
   214  			log.G(context.TODO()).Errorf("Failed to set LB weight of real server %s to 0 for vip %s fwmark %d in sbox %.7s (%.7s): %v", ip, lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   215  		}
   216  	}
   217  
   218  	if rmService {
   219  		s.SchedName = ipvs.RoundRobin
   220  		if err := i.DelService(s); err != nil && err != syscall.ENOENT {
   221  			log.G(context.TODO()).Errorf("Failed to delete service for vip %s fwmark %d in sbox %.7s (%.7s): %v", lb.vip, lb.fwMark, sb.ID(), sb.ContainerID(), err)
   222  		}
   223  
   224  		if sb.ingress {
   225  			var gwIP net.IP
   226  			if ep := sb.getGatewayEndpoint(); ep != nil {
   227  				gwIP = ep.Iface().Address().IP
   228  			}
   229  			if err := programIngress(gwIP, lb.service.ingressPorts, true); err != nil {
   230  				log.G(context.TODO()).Errorf("Failed to delete ingress: %v", err)
   231  			}
   232  		}
   233  
   234  		if err := sb.configureFWMark(lb.vip, lb.fwMark, lb.service.ingressPorts, eIP, true, n.loadBalancerMode); err != nil {
   235  			log.G(context.TODO()).Errorf("Failed to delete firewall mark rule in sbox %.7s (%.7s): %v", sb.ID(), sb.ContainerID(), err)
   236  		}
   237  
   238  		// Remove IP alias from the VIP to the endpoint
   239  		ifName := findIfaceDstName(sb, ep)
   240  		if ifName == "" {
   241  			log.G(context.TODO()).Errorf("Failed find interface name for endpoint %s(%s) to create LB alias", ep.ID(), ep.Name())
   242  			return
   243  		}
   244  		err := sb.osSbox.RemoveAliasIP(ifName, &net.IPNet{IP: lb.vip, Mask: net.CIDRMask(32, 32)})
   245  		if err != nil {
   246  			log.G(context.TODO()).Errorf("Failed add IP alias %s to network %s LB endpoint interface %s: %v", lb.vip, n.ID(), ifName, err)
   247  		}
   248  	}
   249  }
   250  
   251  const ingressChain = "DOCKER-INGRESS"
   252  
   253  var (
   254  	ingressOnce     sync.Once
   255  	ingressMu       sync.Mutex // lock for operations on ingress
   256  	ingressProxyTbl = make(map[string]io.Closer)
   257  	portConfigMu    sync.Mutex
   258  	portConfigTbl   = make(map[PortConfig]int)
   259  )
   260  
   261  func filterPortConfigs(ingressPorts []*PortConfig, isDelete bool) []*PortConfig {
   262  	portConfigMu.Lock()
   263  	iPorts := make([]*PortConfig, 0, len(ingressPorts))
   264  	for _, pc := range ingressPorts {
   265  		if isDelete {
   266  			if cnt, ok := portConfigTbl[*pc]; ok {
   267  				// This is the last reference to this
   268  				// port config. Delete the port config
   269  				// and add it to filtered list to be
   270  				// plumbed.
   271  				if cnt == 1 {
   272  					delete(portConfigTbl, *pc)
   273  					iPorts = append(iPorts, pc)
   274  					continue
   275  				}
   276  
   277  				portConfigTbl[*pc] = cnt - 1
   278  			}
   279  
   280  			continue
   281  		}
   282  
   283  		if cnt, ok := portConfigTbl[*pc]; ok {
   284  			portConfigTbl[*pc] = cnt + 1
   285  			continue
   286  		}
   287  
   288  		// We are adding it for the first time. Add it to the
   289  		// filter list to be plumbed.
   290  		portConfigTbl[*pc] = 1
   291  		iPorts = append(iPorts, pc)
   292  	}
   293  	portConfigMu.Unlock()
   294  
   295  	return iPorts
   296  }
   297  
   298  func programIngress(gwIP net.IP, ingressPorts []*PortConfig, isDelete bool) error {
   299  	// TODO IPv6 support
   300  	iptable := iptables.GetIptable(iptables.IPv4)
   301  
   302  	addDelOpt := "-I"
   303  	rollbackAddDelOpt := "-D"
   304  	if isDelete {
   305  		addDelOpt = "-D"
   306  		rollbackAddDelOpt = "-I"
   307  	}
   308  
   309  	ingressMu.Lock()
   310  	defer ingressMu.Unlock()
   311  
   312  	chainExists := iptable.ExistChain(ingressChain, iptables.Nat)
   313  	filterChainExists := iptable.ExistChain(ingressChain, iptables.Filter)
   314  
   315  	ingressOnce.Do(func() {
   316  		// Flush nat table and filter table ingress chain rules during init if it
   317  		// exists. It might contain stale rules from previous life.
   318  		if chainExists {
   319  			if err := iptable.RawCombinedOutput("-t", "nat", "-F", ingressChain); err != nil {
   320  				log.G(context.TODO()).Errorf("Could not flush nat table ingress chain rules during init: %v", err)
   321  			}
   322  		}
   323  		if filterChainExists {
   324  			if err := iptable.RawCombinedOutput("-F", ingressChain); err != nil {
   325  				log.G(context.TODO()).Errorf("Could not flush filter table ingress chain rules during init: %v", err)
   326  			}
   327  		}
   328  	})
   329  
   330  	if !isDelete {
   331  		if !chainExists {
   332  			if err := iptable.RawCombinedOutput("-t", "nat", "-N", ingressChain); err != nil {
   333  				return fmt.Errorf("failed to create ingress chain: %v", err)
   334  			}
   335  		}
   336  		if !filterChainExists {
   337  			if err := iptable.RawCombinedOutput("-N", ingressChain); err != nil {
   338  				return fmt.Errorf("failed to create filter table ingress chain: %v", err)
   339  			}
   340  		}
   341  
   342  		if !iptable.Exists(iptables.Nat, ingressChain, "-j", "RETURN") {
   343  			if err := iptable.RawCombinedOutput("-t", "nat", "-A", ingressChain, "-j", "RETURN"); err != nil {
   344  				return fmt.Errorf("failed to add return rule in nat table ingress chain: %v", err)
   345  			}
   346  		}
   347  
   348  		if !iptable.Exists(iptables.Filter, ingressChain, "-j", "RETURN") {
   349  			if err := iptable.RawCombinedOutput("-A", ingressChain, "-j", "RETURN"); err != nil {
   350  				return fmt.Errorf("failed to add return rule to filter table ingress chain: %v", err)
   351  			}
   352  		}
   353  
   354  		for _, chain := range []string{"OUTPUT", "PREROUTING"} {
   355  			if !iptable.Exists(iptables.Nat, chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain) {
   356  				if err := iptable.RawCombinedOutput("-t", "nat", "-I", chain, "-m", "addrtype", "--dst-type", "LOCAL", "-j", ingressChain); err != nil {
   357  					return fmt.Errorf("failed to add jump rule in %s to ingress chain: %v", chain, err)
   358  				}
   359  			}
   360  		}
   361  
   362  		if !iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
   363  			if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
   364  				return fmt.Errorf("failed to add jump rule to %s in filter table forward chain: %v", ingressChain, err)
   365  			}
   366  			arrangeUserFilterRule()
   367  		}
   368  
   369  		oifName, err := findOIFName(gwIP)
   370  		if err != nil {
   371  			return fmt.Errorf("failed to find gateway bridge interface name for %s: %v", gwIP, err)
   372  		}
   373  
   374  		path := filepath.Join("/proc/sys/net/ipv4/conf", oifName, "route_localnet")
   375  		if err := os.WriteFile(path, []byte{'1', '\n'}, 0o644); err != nil { //nolint:gosec // gosec complains about perms here, which must be 0644 in this case
   376  			return fmt.Errorf("could not write to %s: %v", path, err)
   377  		}
   378  
   379  		ruleArgs := []string{"-m", "addrtype", "--src-type", "LOCAL", "-o", oifName, "-j", "MASQUERADE"}
   380  		if !iptable.Exists(iptables.Nat, "POSTROUTING", ruleArgs...) {
   381  			if err := iptable.RawCombinedOutput(append([]string{"-t", "nat", "-I", "POSTROUTING"}, ruleArgs...)...); err != nil {
   382  				return fmt.Errorf("failed to add ingress localhost POSTROUTING rule for %s: %v", oifName, err)
   383  			}
   384  		}
   385  	}
   386  
   387  	// Filter the ingress ports until port rules start to be added/deleted
   388  	filteredPorts := filterPortConfigs(ingressPorts, isDelete)
   389  	rollbackRules := make([][]string, 0, len(filteredPorts)*3)
   390  	var portErr error
   391  	defer func() {
   392  		if portErr != nil && !isDelete {
   393  			filterPortConfigs(filteredPorts, !isDelete)
   394  			for _, rule := range rollbackRules {
   395  				if err := iptable.RawCombinedOutput(rule...); err != nil {
   396  					log.G(context.TODO()).Warnf("roll back rule failed, %v: %v", rule, err)
   397  				}
   398  			}
   399  		}
   400  	}()
   401  
   402  	for _, iPort := range filteredPorts {
   403  		var (
   404  			protocol      = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
   405  			publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
   406  			destination   = net.JoinHostPort(gwIP.String(), publishedPort)
   407  		)
   408  		if iptable.ExistChain(ingressChain, iptables.Nat) {
   409  			rule := []string{"-t", "nat", addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
   410  
   411  			if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   412  				err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
   413  				if !isDelete {
   414  					return err
   415  				}
   416  				log.G(context.TODO()).Info(err)
   417  			}
   418  			rollbackRule := []string{"-t", "nat", rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "DNAT", "--to-destination", destination}
   419  			rollbackRules = append(rollbackRules, rollbackRule)
   420  		}
   421  
   422  		// Filter table rules to allow a published service to be accessible in the local node from..
   423  		// 1) service tasks attached to other networks
   424  		// 2) unmanaged containers on bridge networks
   425  		rule := []string{addDelOpt, ingressChain, "-p", protocol, "--sport", publishedPort, "-m", "conntrack", "--ctstate", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
   426  		if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   427  			err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
   428  			if !isDelete {
   429  				return err
   430  			}
   431  			log.G(context.TODO()).Warn(err)
   432  		}
   433  		rollbackRule := []string{rollbackAddDelOpt, ingressChain, "-p", protocol, "--sport", publishedPort, "-m", "conntrack", "--ctstate", "ESTABLISHED,RELATED", "-j", "ACCEPT"}
   434  		rollbackRules = append(rollbackRules, rollbackRule)
   435  
   436  		rule = []string{addDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
   437  		if portErr = iptable.RawCombinedOutput(rule...); portErr != nil {
   438  			err := fmt.Errorf("set up rule failed, %v: %v", rule, portErr)
   439  			if !isDelete {
   440  				return err
   441  			}
   442  			log.G(context.TODO()).Warn(err)
   443  		}
   444  		rollbackRule = []string{rollbackAddDelOpt, ingressChain, "-p", protocol, "--dport", publishedPort, "-j", "ACCEPT"}
   445  		rollbackRules = append(rollbackRules, rollbackRule)
   446  
   447  		if err := plumbProxy(iPort, isDelete); err != nil {
   448  			log.G(context.TODO()).Warnf("failed to create proxy for port %s: %v", publishedPort, err)
   449  		}
   450  	}
   451  
   452  	return nil
   453  }
   454  
   455  // In the filter table FORWARD chain the first rule should be to jump to
   456  // DOCKER-USER so the user is able to filter packet first.
   457  // The second rule should be jump to INGRESS-CHAIN.
   458  // This chain has the rules to allow access to the published ports for swarm tasks
   459  // from local bridge networks and docker_gwbridge (ie:taks on other swarm networks)
   460  func arrangeIngressFilterRule() {
   461  	// TODO IPv6 support
   462  	iptable := iptables.GetIptable(iptables.IPv4)
   463  	if iptable.ExistChain(ingressChain, iptables.Filter) {
   464  		if iptable.Exists(iptables.Filter, "FORWARD", "-j", ingressChain) {
   465  			if err := iptable.RawCombinedOutput("-D", "FORWARD", "-j", ingressChain); err != nil {
   466  				log.G(context.TODO()).Warnf("failed to delete jump rule to ingressChain in filter table: %v", err)
   467  			}
   468  		}
   469  		if err := iptable.RawCombinedOutput("-I", "FORWARD", "-j", ingressChain); err != nil {
   470  			log.G(context.TODO()).Warnf("failed to add jump rule to ingressChain in filter table: %v", err)
   471  		}
   472  	}
   473  }
   474  
   475  func findOIFName(ip net.IP) (string, error) {
   476  	nlh := ns.NlHandle()
   477  
   478  	routes, err := nlh.RouteGet(ip)
   479  	if err != nil {
   480  		return "", err
   481  	}
   482  
   483  	if len(routes) == 0 {
   484  		return "", fmt.Errorf("no route to %s", ip)
   485  	}
   486  
   487  	// Pick the first route(typically there is only one route). We
   488  	// don't support multipath.
   489  	link, err := nlh.LinkByIndex(routes[0].LinkIndex)
   490  	if err != nil {
   491  		return "", err
   492  	}
   493  
   494  	return link.Attrs().Name, nil
   495  }
   496  
   497  func plumbProxy(iPort *PortConfig, isDelete bool) error {
   498  	var (
   499  		err error
   500  		l   io.Closer
   501  	)
   502  
   503  	portSpec := fmt.Sprintf("%d/%s", iPort.PublishedPort, strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)]))
   504  	if isDelete {
   505  		if listener, ok := ingressProxyTbl[portSpec]; ok {
   506  			if listener != nil {
   507  				listener.Close()
   508  			}
   509  		}
   510  
   511  		return nil
   512  	}
   513  
   514  	switch iPort.Protocol {
   515  	case ProtocolTCP:
   516  		l, err = net.ListenTCP("tcp", &net.TCPAddr{Port: int(iPort.PublishedPort)})
   517  	case ProtocolUDP:
   518  		l, err = net.ListenUDP("udp", &net.UDPAddr{Port: int(iPort.PublishedPort)})
   519  	case ProtocolSCTP:
   520  		l, err = sctp.ListenSCTP("sctp", &sctp.SCTPAddr{Port: int(iPort.PublishedPort)})
   521  	default:
   522  		err = fmt.Errorf("unknown protocol %v", iPort.Protocol)
   523  	}
   524  
   525  	if err != nil {
   526  		return err
   527  	}
   528  
   529  	ingressProxyTbl[portSpec] = l
   530  
   531  	return nil
   532  }
   533  
   534  // configureFWMark configures the sandbox firewall to mark vip destined packets
   535  // with the firewall mark fwMark.
   536  func (sb *Sandbox) configureFWMark(vip net.IP, fwMark uint32, ingressPorts []*PortConfig, eIP *net.IPNet, isDelete bool, lbMode string) error {
   537  	// TODO IPv6 support
   538  	iptable := iptables.GetIptable(iptables.IPv4)
   539  
   540  	fwMarkStr := strconv.FormatUint(uint64(fwMark), 10)
   541  	addDelOpt := "-A"
   542  	if isDelete {
   543  		addDelOpt = "-D"
   544  	}
   545  
   546  	rules := make([][]string, 0, len(ingressPorts))
   547  	for _, iPort := range ingressPorts {
   548  		var (
   549  			protocol      = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
   550  			publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
   551  		)
   552  		rule := []string{"-t", "mangle", addDelOpt, "PREROUTING", "-p", protocol, "--dport", publishedPort, "-j", "MARK", "--set-mark", fwMarkStr}
   553  		rules = append(rules, rule)
   554  	}
   555  
   556  	var innerErr error
   557  	err := sb.ExecFunc(func() {
   558  		if !isDelete && lbMode == loadBalancerModeNAT {
   559  			subnet := net.IPNet{IP: eIP.IP.Mask(eIP.Mask), Mask: eIP.Mask}
   560  			ruleParams := []string{"-m", "ipvs", "--ipvs", "-d", subnet.String(), "-j", "SNAT", "--to-source", eIP.IP.String()}
   561  			if !iptable.Exists("nat", "POSTROUTING", ruleParams...) {
   562  				rule := append([]string{"-t", "nat", "-A", "POSTROUTING"}, ruleParams...)
   563  				rules = append(rules, rule)
   564  
   565  				err := os.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte{'1', '\n'}, 0o644)
   566  				if err != nil {
   567  					innerErr = err
   568  					return
   569  				}
   570  			}
   571  		}
   572  
   573  		rule := []string{"-t", "mangle", addDelOpt, "INPUT", "-d", vip.String() + "/32", "-j", "MARK", "--set-mark", fwMarkStr}
   574  		rules = append(rules, rule)
   575  
   576  		for _, rule := range rules {
   577  			if err := iptable.RawCombinedOutputNative(rule...); err != nil {
   578  				innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
   579  				return
   580  			}
   581  		}
   582  	})
   583  	if err != nil {
   584  		return err
   585  	}
   586  	return innerErr
   587  }
   588  
   589  func (sb *Sandbox) addRedirectRules(eIP *net.IPNet, ingressPorts []*PortConfig) error {
   590  	// TODO IPv6 support
   591  	iptable := iptables.GetIptable(iptables.IPv4)
   592  	ipAddr := eIP.IP.String()
   593  
   594  	rules := make([][]string, 0, len(ingressPorts)*3) // 3 rules per port
   595  	for _, iPort := range ingressPorts {
   596  		var (
   597  			protocol      = strings.ToLower(PortConfig_Protocol_name[int32(iPort.Protocol)])
   598  			publishedPort = strconv.FormatUint(uint64(iPort.PublishedPort), 10)
   599  			targetPort    = strconv.FormatUint(uint64(iPort.TargetPort), 10)
   600  		)
   601  
   602  		rules = append(rules,
   603  			[]string{"-t", "nat", "-A", "PREROUTING", "-d", ipAddr, "-p", protocol, "--dport", publishedPort, "-j", "REDIRECT", "--to-port", targetPort},
   604  
   605  			// Allow only incoming connections to exposed ports
   606  			[]string{"-I", "INPUT", "-d", ipAddr, "-p", protocol, "--dport", targetPort, "-m", "conntrack", "--ctstate", "NEW,ESTABLISHED", "-j", "ACCEPT"},
   607  
   608  			// Allow only outgoing connections from exposed ports
   609  			[]string{"-I", "OUTPUT", "-s", ipAddr, "-p", protocol, "--sport", targetPort, "-m", "conntrack", "--ctstate", "ESTABLISHED", "-j", "ACCEPT"},
   610  		)
   611  	}
   612  
   613  	var innerErr error
   614  	err := sb.ExecFunc(func() {
   615  		for _, rule := range rules {
   616  			if err := iptable.RawCombinedOutputNative(rule...); err != nil {
   617  				innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
   618  				return
   619  			}
   620  		}
   621  
   622  		if len(ingressPorts) == 0 {
   623  			return
   624  		}
   625  
   626  		// Ensure blocking rules for anything else in/to ingress network
   627  		for _, rule := range [][]string{
   628  			{"-d", ipAddr, "-p", "sctp", "-j", "DROP"},
   629  			{"-d", ipAddr, "-p", "udp", "-j", "DROP"},
   630  			{"-d", ipAddr, "-p", "tcp", "-j", "DROP"},
   631  		} {
   632  			if !iptable.ExistsNative(iptables.Filter, "INPUT", rule...) {
   633  				if err := iptable.RawCombinedOutputNative(append([]string{"-A", "INPUT"}, rule...)...); err != nil {
   634  					innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
   635  					return
   636  				}
   637  			}
   638  			rule[0] = "-s"
   639  			if !iptable.ExistsNative(iptables.Filter, "OUTPUT", rule...) {
   640  				if err := iptable.RawCombinedOutputNative(append([]string{"-A", "OUTPUT"}, rule...)...); err != nil {
   641  					innerErr = fmt.Errorf("set up rule failed, %v: %w", rule, err)
   642  					return
   643  				}
   644  			}
   645  		}
   646  	})
   647  	if err != nil {
   648  		return err
   649  	}
   650  	return innerErr
   651  }