github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/controller/internal/supervisor/iptablesctrl/iptables.go (about)

     1  package iptablesctrl
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"net"
     8  	"strconv"
     9  	"text/template"
    10  
    11  	"go.aporeto.io/enforcerd/trireme-lib/controller/constants"
    12  	provider "go.aporeto.io/enforcerd/trireme-lib/controller/pkg/aclprovider"
    13  	"go.aporeto.io/enforcerd/trireme-lib/controller/pkg/ebpf"
    14  	"go.aporeto.io/enforcerd/trireme-lib/controller/pkg/fqconfig"
    15  	"go.aporeto.io/enforcerd/trireme-lib/controller/pkg/ipsetmanager"
    16  	"go.aporeto.io/enforcerd/trireme-lib/controller/runtime"
    17  	"go.aporeto.io/enforcerd/trireme-lib/monitor/extractors"
    18  	"go.aporeto.io/enforcerd/trireme-lib/policy"
    19  	"go.uber.org/zap"
    20  )
    21  
    22  const (
    23  	mainAppChain        = constants.ChainPrefix + "App"
    24  	mainNetChain        = constants.ChainPrefix + "Net"
    25  	appChainPrefix      = constants.ChainPrefix + "App-"
    26  	netChainPrefix      = constants.ChainPrefix + "Net-"
    27  	natProxyOutputChain = constants.ChainPrefix + "Redir-App"
    28  	natProxyInputChain  = constants.ChainPrefix + "Redir-Net"
    29  	proxyOutputChain    = constants.ChainPrefix + "Prx-App"
    30  	proxyInputChain     = constants.ChainPrefix + "Prx-Net"
    31  	istioChain          = constants.ChainPrefix + "Istio"
    32  
    33  	// TriremeInput represent the chain that contains pu input rules.
    34  	TriremeInput = constants.ChainPrefix + "Pid-Net"
    35  	// TriremeOutput represent the chain that contains pu output rules.
    36  	TriremeOutput = constants.ChainPrefix + "Pid-App"
    37  
    38  	// NetworkSvcInput represent the chain that contains NetworkSvc input rules.
    39  	NetworkSvcInput = constants.ChainPrefix + "Svc-Net"
    40  
    41  	// NetworkSvcOutput represent the chain that contains NetworkSvc output rules.
    42  	NetworkSvcOutput = constants.ChainPrefix + "Svc-App"
    43  
    44  	// HostModeInput represent the chain that contains Hostmode input rules.
    45  	HostModeInput = constants.ChainPrefix + "Hst-Net"
    46  
    47  	// HostModeOutput represent the chain that contains Hostmode output rules.
    48  	HostModeOutput = constants.ChainPrefix + "Hst-App"
    49  	// NfqueueOutput represents the chain that contains the nfqueue output rules
    50  	NfqueueOutput = constants.ChainPrefix + "Nfq-OUT"
    51  	// NfqueueInput represents the chain that contains the nfqueue input rules
    52  	NfqueueInput = constants.ChainPrefix + "Nfq-IN"
    53  	// IstioUID is the UID of the istio-proxy(envoy) that is used in the iptables to identify the
    54  	// envoy generated traffic
    55  	IstioUID = "1337"
    56  	// IstioRedirPort is the port where the App traffic from the output chain
    57  	// is redirected into Istio-proxy, we need to accept this traffic as we don't to come in between
    58  	// APP --> Envoy traffic.
    59  	IstioRedirPort = "15001"
    60  )
    61  
    62  type iptables struct {
    63  	impl            IPImpl
    64  	fqc             fqconfig.FilterQueue
    65  	mode            constants.ModeType
    66  	ipsetmanager    ipsetmanager.IPSetManager
    67  	bpf             ebpf.BPFModule
    68  	serviceMeshType policy.ServiceMesh
    69  }
    70  
    71  // IPImpl interface is to be used by the iptable implentors like ipv4 and ipv6.
    72  type IPImpl interface {
    73  	provider.IptablesProvider
    74  	IPVersion() int
    75  	ProtocolAllowed(proto string) bool
    76  	IPFilter() func(net.IP) bool
    77  	GetDefaultIP() string
    78  	NeedICMP() bool
    79  }
    80  
    81  type ipFilter func(net.IP) bool
    82  
    83  func createIPInstance(impl IPImpl, ipsetmanager ipsetmanager.IPSetManager, fqc fqconfig.FilterQueue, mode constants.ModeType, ebpf ebpf.BPFModule, ServiceMeshType policy.ServiceMesh) *iptables {
    84  
    85  	return &iptables{
    86  		impl:            impl,
    87  		fqc:             fqc,
    88  		mode:            mode,
    89  		ipsetmanager:    ipsetmanager,
    90  		bpf:             ebpf,
    91  		serviceMeshType: ServiceMeshType,
    92  	}
    93  }
    94  
    95  func (i *iptables) SetTargetNetworks(c *runtime.Configuration) error {
    96  	if c == nil {
    97  		return nil
    98  	}
    99  
   100  	tcp := c.TCPTargetNetworks
   101  	udp := c.UDPTargetNetworks
   102  	excluded := c.ExcludedNetworks
   103  
   104  	// If there are no target networks, capture all traffic
   105  	if len(tcp) == 0 {
   106  		tcp = []string{IPv4DefaultIP, IPv6DefaultIP}
   107  	}
   108  
   109  	return i.ipsetmanager.UpdateIPsetsForTargetAndExcludedNetworks(tcp, udp, excluded)
   110  }
   111  
   112  func (i *iptables) Run(ctx context.Context) error {
   113  
   114  	// Clean any previous ACLs. This is needed in case we crashed at some
   115  	// earlier point or there are other ACLs that create conflicts. We
   116  	// try to clean only ACLs related to Trireme.
   117  	if err := i.cleanACLs(); err != nil {
   118  		return fmt.Errorf("Unable to clean previous acls while starting the supervisor: %s", err)
   119  	}
   120  
   121  	if err := i.ipsetmanager.DestroyAllIPsets(); err != nil {
   122  		zap.L().Debug("ipset destroy all ipset returned error", zap.Error(err))
   123  	}
   124  
   125  	if err := i.ipsetmanager.CreateIPsetsForTargetAndExcludedNetworks(); err != nil {
   126  		if err1 := i.ipsetmanager.DestroyAllIPsets(); err1 != nil {
   127  			zap.L().Debug("ipset destroy all ipset returned error", zap.Error(err1))
   128  		}
   129  		return fmt.Errorf("unable to create target network ipsets: %s", err)
   130  	}
   131  
   132  	// Windows needs to initialize some ipsets
   133  	if err := i.platformInit(); err != nil {
   134  		return err
   135  	}
   136  
   137  	// Initialize all the global Trireme chains. There are several global chaims
   138  	// that apply to all PUs:
   139  	// Tri-App/Tri-Net are the main chains for the egress/ingress directions
   140  	// UID related chains for any UID PUs.
   141  	// Host, Service, Pid chains for the different modes of operation (host mode, pu mode, host service).
   142  	// The priority is explicit (Pid activations take precedence of Service activations and Host Services)
   143  	if err := i.initializeChains(); err != nil {
   144  		return fmt.Errorf("Unable to initialize chains: %s", err)
   145  	}
   146  
   147  	// Insert the global ACLS. These are the main ACLs that will direct traffic from
   148  	// the INPUT/OUTPUT chains to the Trireme chains. They also includes the main
   149  	// rules of the main chains. These rules are never touched again, unless
   150  	// if we gracefully terminate.
   151  	if err := i.setGlobalRules(); err != nil {
   152  		return fmt.Errorf("failed to update synack networks: %s", err)
   153  	}
   154  
   155  	if err := i.impl.Commit(); err != nil {
   156  		return err
   157  	}
   158  
   159  	return nil
   160  }
   161  
   162  func (i *iptables) ConfigureRules(version int, contextID string, pu *policy.PUInfo) error {
   163  	var err error
   164  	var cfg *ACLInfo
   165  
   166  	// First we create an IPSet for destination matching ports. This only
   167  	// applies to Linux type PUs. A port set is associated with every PU,
   168  	// and packets matching this destination get associated with the context
   169  	// of the PU.
   170  	if i.mode != constants.RemoteContainer {
   171  		if err = i.ipsetmanager.CreateServerPortSet(contextID); err != nil {
   172  			return err
   173  		}
   174  	}
   175  
   176  	// Create the proxy sets. These are the target sets that will match
   177  	// traffic towards the L4 and L4 services. There are two sets created
   178  	// for every PU in this context (for outgoing and incoming traffic).
   179  	// The outgoing sets capture all traffic towards specific destinations
   180  	// as proxied traffic. Incoming sets correspond to the listening
   181  	// services.
   182  	// create proxySets only if there is no serviceMesh.
   183  	if i.serviceMeshType == policy.None {
   184  		if err := i.ipsetmanager.CreateProxySets(contextID); err != nil {
   185  			return err
   186  		}
   187  	}
   188  
   189  	// We create the generic ACL object that is used for all the templates.
   190  	cfg, err = i.newACLInfo(version, contextID, pu, pu.Runtime.PUType())
   191  	if err != nil {
   192  		return err
   193  	}
   194  
   195  	// At this point we can install all the ACL rules that will direct
   196  	// traffic to user space, allow for external access or direct
   197  	// traffic towards the proxies
   198  	if err = i.installRules(cfg, pu); err != nil {
   199  		return err
   200  	}
   201  
   202  	// We commit the ACLs at the end. Note, that some of the ACLs in the
   203  	// NAT table are not committed as a group. The commit function only
   204  	// applies when newer versions of tables are installed (1.6.2 and above).
   205  	if err = i.impl.Commit(); err != nil {
   206  		zap.L().Error("unable to configure rules", zap.Error(err))
   207  		return err
   208  	}
   209  
   210  	return nil
   211  }
   212  
   213  func (i *iptables) DeleteRules(version int, contextID string, tcpPorts, udpPorts string, mark string, username string, containerInfo *policy.PUInfo) error {
   214  	cfg, err := i.newACLInfo(version, contextID, nil, containerInfo.Runtime.PUType())
   215  	if err != nil {
   216  		zap.L().Error("unable to create cleanup configuration", zap.Error(err))
   217  		return err
   218  	}
   219  	if i.mode == constants.LocalServer {
   220  		cfg.PacketMark = mark
   221  	}
   222  	cfg.UDPPorts = udpPorts
   223  	cfg.TCPPorts = tcpPorts
   224  	cfg.CgroupMark = mark
   225  	cfg.Mark = mark
   226  
   227  	cfg.PUType = containerInfo.Runtime.PUType()
   228  	cfg.ProxyPort = containerInfo.Policy.ServicesListeningPort()
   229  	cfg.DNSProxyPort = containerInfo.Policy.DNSProxyPort()
   230  	// We clean up the chain rules first, so that we can delete the chains.
   231  	// If any rule is not deleted, then the chain will show as busy.
   232  	if err := i.deleteChainRules(cfg); err != nil {
   233  		zap.L().Warn("Failed to clean rules", zap.Error(err))
   234  	}
   235  
   236  	// We can now delete the chains we have created for this PU. Note that
   237  	// in every case we only create two chains for every PU. All other
   238  	// chains are global.
   239  	if err = i.deletePUChains(cfg); err != nil {
   240  		zap.L().Warn("Failed to clean container chains while deleting the rules", zap.Error(err))
   241  	}
   242  
   243  	// We call commit to update all the changes, before destroying the ipsets.
   244  	// References must be deleted for ipset deletion to succeed.
   245  	if err := i.impl.Commit(); err != nil {
   246  		zap.L().Warn("Failed to commit ACL changes", zap.Error(err))
   247  	}
   248  
   249  	if i.mode != constants.RemoteContainer {
   250  		// We delete the set that captures all destination ports of the
   251  		// PU. This only holds for Linux PUs.
   252  		if err := i.ipsetmanager.DestroyServerPortSet(contextID); err != nil {
   253  			zap.L().Warn("Failed to remove port set")
   254  		}
   255  	}
   256  
   257  	// if serviceMesh is enabled then don't detroy the proxySets as we have not create them.
   258  	if i.serviceMeshType == policy.None {
   259  		// We delete the proxy port sets that were created for this PU.
   260  		i.ipsetmanager.DestroyProxySets(contextID)
   261  	}
   262  	return nil
   263  }
   264  
   265  func (i *iptables) UpdateRules(version int, contextID string, containerInfo *policy.PUInfo, oldContainerInfo *policy.PUInfo) error {
   266  	policyrules := containerInfo.Policy
   267  	if policyrules == nil {
   268  		return errors.New("policy rules cannot be nil")
   269  	}
   270  
   271  	// We cache the old config and we use it to delete the previous
   272  	// rules. Every time we update the policy the version changes to
   273  	// its binary complement.
   274  	newCfg, err := i.newACLInfo(version, contextID, containerInfo, containerInfo.Runtime.PUType())
   275  	if err != nil {
   276  		return err
   277  	}
   278  
   279  	oldCfg, err := i.newACLInfo(version^1, contextID, oldContainerInfo, containerInfo.Runtime.PUType())
   280  	if err != nil {
   281  		return err
   282  	}
   283  
   284  	// Install all the new rules. The hooks to the new chains are appended
   285  	// and do not take effect yet.
   286  	if err := i.installRules(newCfg, containerInfo); err != nil {
   287  		return err
   288  	}
   289  
   290  	// Remove mapping from old chain. By removing the old hooks the new
   291  	// hooks take priority.
   292  	if err := i.deleteChainRules(oldCfg); err != nil {
   293  		return err
   294  	}
   295  
   296  	// Delete the old chains, since there are not references any more.
   297  	if err := i.deletePUChains(oldCfg); err != nil {
   298  		return err
   299  	}
   300  
   301  	// Commit all actions in on iptables-restore function.
   302  	if err := i.impl.Commit(); err != nil {
   303  		return err
   304  	}
   305  
   306  	return nil
   307  }
   308  
   309  func (i *iptables) CleanUp() error {
   310  
   311  	if err := i.cleanACLs(); err != nil {
   312  		zap.L().Error("Failed to clean acls while stopping the supervisor", zap.Error(err))
   313  	}
   314  
   315  	if err := i.ipsetmanager.DestroyAllIPsets(); err != nil {
   316  		zap.L().Error("Failed to clean up ipsets", zap.Error(err))
   317  	}
   318  
   319  	i.ipsetmanager.Reset()
   320  
   321  	return nil
   322  }
   323  
   324  // InitializeChains initializes the chains.
   325  func (i *iptables) initializeChains() error {
   326  
   327  	cfg, err := i.newACLInfo(0, "", nil, 0)
   328  	if err != nil {
   329  		return err
   330  	}
   331  	tmpl := template.Must(template.New(triremChains).Funcs(template.FuncMap{
   332  		"isLocalServer": func() bool {
   333  			return i.mode == constants.LocalServer
   334  		},
   335  		"isIstioEnabled": func() bool {
   336  			return i.serviceMeshType == policy.Istio
   337  		},
   338  	}).Parse(triremChains))
   339  
   340  	rules, err := extractRulesFromTemplate(tmpl, cfg)
   341  	if err != nil {
   342  		return fmt.Errorf("unable to create trireme chains:%s", err)
   343  	}
   344  	for _, rule := range rules {
   345  		if len(rule) != 4 {
   346  			continue
   347  		}
   348  		if err := i.impl.NewChain(rule[1], rule[3]); err != nil {
   349  			return err
   350  		}
   351  	}
   352  
   353  	return nil
   354  }
   355  
   356  // configureContainerRules adds the chain rules for a container.
   357  // We separate in different methods to keep track of the changes
   358  // independently.
   359  func (i *iptables) configureContainerRules(cfg *ACLInfo) error {
   360  	return i.addChainRules(cfg)
   361  }
   362  
   363  // configureLinuxRules adds the chain rules for a linux process or a UID process.
   364  func (i *iptables) configureLinuxRules(cfg *ACLInfo) error {
   365  
   366  	// These checks are for rather unusal error scenarios. We should
   367  	// never see errors here. But better safe than sorry.
   368  	if cfg.CgroupMark == "" {
   369  		return errors.New("no mark value found")
   370  	}
   371  
   372  	if cfg.TCPPortSet == "" {
   373  		return fmt.Errorf("port set was not found for the contextID. This should not happen")
   374  	}
   375  
   376  	return i.addChainRules(cfg)
   377  }
   378  
   379  type aclIPset struct {
   380  	ipset string
   381  	*policy.IPRule
   382  }
   383  
   384  func (i *iptables) getACLIPSets(ipRules policy.IPRuleList) []aclIPset {
   385  
   386  	ipsets := i.ipsetmanager.GetACLIPsetsNames(ipRules)
   387  
   388  	aclIPsets := make([]aclIPset, 0)
   389  
   390  	for i, ipset := range ipsets {
   391  		if len(ipset) > 0 {
   392  			aclIPsets = append(aclIPsets, aclIPset{ipset, &ipRules[i]})
   393  		}
   394  	}
   395  
   396  	return aclIPsets
   397  }
   398  
   399  // Install rules will install all the rules and update the port sets.
   400  func (i *iptables) installRules(cfg *ACLInfo, containerInfo *policy.PUInfo) error {
   401  
   402  	policyrules := containerInfo.Policy
   403  
   404  	// update the proxy set only if there is no serviceMesh enabled.
   405  	if i.serviceMeshType == policy.None {
   406  		if err := i.updateProxySet(cfg.ContextID, containerInfo.Policy); err != nil {
   407  			return err
   408  		}
   409  	}
   410  
   411  	appACLIPset := i.getACLIPSets(policyrules.ApplicationACLs())
   412  	netACLIPset := i.getACLIPSets(policyrules.NetworkACLs())
   413  
   414  	// Install the PU specific chain first.
   415  	if err := i.addContainerChain(cfg); err != nil {
   416  		return err
   417  	}
   418  
   419  	// If its a remote and thus container, configure container rules.
   420  	if i.mode == constants.RemoteContainer {
   421  		if err := i.configureContainerRules(cfg); err != nil {
   422  			return err
   423  		}
   424  	}
   425  
   426  	// If its a Linux process configure the Linux rules.
   427  	if i.mode == constants.LocalServer {
   428  		if err := i.configureLinuxRules(cfg); err != nil {
   429  			return err
   430  		}
   431  	}
   432  
   433  	isHostPU := extractors.IsHostPU(containerInfo.Runtime, i.mode)
   434  
   435  	if err := i.addPreNetworkACLRules(cfg); err != nil {
   436  		return err
   437  	}
   438  
   439  	if err := i.addExternalACLs(cfg, cfg.AppChain, cfg.NetChain, appACLIPset, true); err != nil {
   440  		return err
   441  	}
   442  
   443  	if err := i.addExternalACLs(cfg, cfg.NetChain, cfg.AppChain, netACLIPset, false); err != nil {
   444  		return err
   445  	}
   446  
   447  	appAnyRules, netAnyRules, err := i.getProtocolAnyRules(cfg, appACLIPset, netACLIPset)
   448  	if err != nil {
   449  		return err
   450  	}
   451  
   452  	return i.addPacketTrap(cfg, isHostPU, appAnyRules, netAnyRules)
   453  }
   454  
   455  func (i *iptables) updateProxySet(contextID string, policy *policy.PUPolicy) error {
   456  	i.ipsetmanager.FlushProxySets(contextID)
   457  
   458  	for _, dependentService := range policy.DependentServices() {
   459  		addresses := dependentService.NetworkInfo.Addresses
   460  		min, max := dependentService.NetworkInfo.Ports.Range()
   461  
   462  		for addrS := range addresses {
   463  			_, addr, _ := net.ParseCIDR(addrS)
   464  			for port := int(min); port <= int(max); port++ {
   465  				if err := i.ipsetmanager.AddIPPortToDependentService(contextID, addr, strconv.Itoa(port)); err != nil {
   466  					return fmt.Errorf("unable to add dependent ip %v to dependent networks ipset: %v", port, err)
   467  				}
   468  			}
   469  		}
   470  	}
   471  
   472  	for _, exposedService := range policy.ExposedServices() {
   473  		min, max := exposedService.PrivateNetworkInfo.Ports.Range()
   474  		for port := int(min); port <= int(max); port++ {
   475  			if err := i.ipsetmanager.AddPortToExposedService(contextID, strconv.Itoa(port)); err != nil {
   476  				zap.L().Error("Failed to add vip", zap.Error(err))
   477  				return fmt.Errorf("unable to add port %d to exposed ports ipset: %s", port, err)
   478  			}
   479  		}
   480  
   481  		if exposedService.PublicNetworkInfo != nil {
   482  			min, max := exposedService.PublicNetworkInfo.Ports.Range()
   483  			for port := int(min); port <= int(max); port++ {
   484  				if err := i.ipsetmanager.AddPortToExposedService(contextID, strconv.Itoa(port)); err != nil {
   485  					zap.L().Error("Failed to VIP for public network", zap.Error(err))
   486  					return fmt.Errorf("Failed to program VIP: %s", err)
   487  				}
   488  			}
   489  		}
   490  	}
   491  
   492  	return nil
   493  }