github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/controller/internal/supervisor/iptablesctrl/iptables.go (about) 1 package iptablesctrl 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "net" 8 "strconv" 9 "text/template" 10 11 "go.aporeto.io/enforcerd/trireme-lib/controller/constants" 12 provider "go.aporeto.io/enforcerd/trireme-lib/controller/pkg/aclprovider" 13 "go.aporeto.io/enforcerd/trireme-lib/controller/pkg/ebpf" 14 "go.aporeto.io/enforcerd/trireme-lib/controller/pkg/fqconfig" 15 "go.aporeto.io/enforcerd/trireme-lib/controller/pkg/ipsetmanager" 16 "go.aporeto.io/enforcerd/trireme-lib/controller/runtime" 17 "go.aporeto.io/enforcerd/trireme-lib/monitor/extractors" 18 "go.aporeto.io/enforcerd/trireme-lib/policy" 19 "go.uber.org/zap" 20 ) 21 22 const ( 23 mainAppChain = constants.ChainPrefix + "App" 24 mainNetChain = constants.ChainPrefix + "Net" 25 appChainPrefix = constants.ChainPrefix + "App-" 26 netChainPrefix = constants.ChainPrefix + "Net-" 27 natProxyOutputChain = constants.ChainPrefix + "Redir-App" 28 natProxyInputChain = constants.ChainPrefix + "Redir-Net" 29 proxyOutputChain = constants.ChainPrefix + "Prx-App" 30 proxyInputChain = constants.ChainPrefix + "Prx-Net" 31 istioChain = constants.ChainPrefix + "Istio" 32 33 // TriremeInput represent the chain that contains pu input rules. 34 TriremeInput = constants.ChainPrefix + "Pid-Net" 35 // TriremeOutput represent the chain that contains pu output rules. 36 TriremeOutput = constants.ChainPrefix + "Pid-App" 37 38 // NetworkSvcInput represent the chain that contains NetworkSvc input rules. 39 NetworkSvcInput = constants.ChainPrefix + "Svc-Net" 40 41 // NetworkSvcOutput represent the chain that contains NetworkSvc output rules. 42 NetworkSvcOutput = constants.ChainPrefix + "Svc-App" 43 44 // HostModeInput represent the chain that contains Hostmode input rules. 45 HostModeInput = constants.ChainPrefix + "Hst-Net" 46 47 // HostModeOutput represent the chain that contains Hostmode output rules. 48 HostModeOutput = constants.ChainPrefix + "Hst-App" 49 // NfqueueOutput represents the chain that contains the nfqueue output rules 50 NfqueueOutput = constants.ChainPrefix + "Nfq-OUT" 51 // NfqueueInput represents the chain that contains the nfqueue input rules 52 NfqueueInput = constants.ChainPrefix + "Nfq-IN" 53 // IstioUID is the UID of the istio-proxy(envoy) that is used in the iptables to identify the 54 // envoy generated traffic 55 IstioUID = "1337" 56 // IstioRedirPort is the port where the App traffic from the output chain 57 // is redirected into Istio-proxy, we need to accept this traffic as we don't to come in between 58 // APP --> Envoy traffic. 59 IstioRedirPort = "15001" 60 ) 61 62 type iptables struct { 63 impl IPImpl 64 fqc fqconfig.FilterQueue 65 mode constants.ModeType 66 ipsetmanager ipsetmanager.IPSetManager 67 bpf ebpf.BPFModule 68 serviceMeshType policy.ServiceMesh 69 } 70 71 // IPImpl interface is to be used by the iptable implentors like ipv4 and ipv6. 72 type IPImpl interface { 73 provider.IptablesProvider 74 IPVersion() int 75 ProtocolAllowed(proto string) bool 76 IPFilter() func(net.IP) bool 77 GetDefaultIP() string 78 NeedICMP() bool 79 } 80 81 type ipFilter func(net.IP) bool 82 83 func createIPInstance(impl IPImpl, ipsetmanager ipsetmanager.IPSetManager, fqc fqconfig.FilterQueue, mode constants.ModeType, ebpf ebpf.BPFModule, ServiceMeshType policy.ServiceMesh) *iptables { 84 85 return &iptables{ 86 impl: impl, 87 fqc: fqc, 88 mode: mode, 89 ipsetmanager: ipsetmanager, 90 bpf: ebpf, 91 serviceMeshType: ServiceMeshType, 92 } 93 } 94 95 func (i *iptables) SetTargetNetworks(c *runtime.Configuration) error { 96 if c == nil { 97 return nil 98 } 99 100 tcp := c.TCPTargetNetworks 101 udp := c.UDPTargetNetworks 102 excluded := c.ExcludedNetworks 103 104 // If there are no target networks, capture all traffic 105 if len(tcp) == 0 { 106 tcp = []string{IPv4DefaultIP, IPv6DefaultIP} 107 } 108 109 return i.ipsetmanager.UpdateIPsetsForTargetAndExcludedNetworks(tcp, udp, excluded) 110 } 111 112 func (i *iptables) Run(ctx context.Context) error { 113 114 // Clean any previous ACLs. This is needed in case we crashed at some 115 // earlier point or there are other ACLs that create conflicts. We 116 // try to clean only ACLs related to Trireme. 117 if err := i.cleanACLs(); err != nil { 118 return fmt.Errorf("Unable to clean previous acls while starting the supervisor: %s", err) 119 } 120 121 if err := i.ipsetmanager.DestroyAllIPsets(); err != nil { 122 zap.L().Debug("ipset destroy all ipset returned error", zap.Error(err)) 123 } 124 125 if err := i.ipsetmanager.CreateIPsetsForTargetAndExcludedNetworks(); err != nil { 126 if err1 := i.ipsetmanager.DestroyAllIPsets(); err1 != nil { 127 zap.L().Debug("ipset destroy all ipset returned error", zap.Error(err1)) 128 } 129 return fmt.Errorf("unable to create target network ipsets: %s", err) 130 } 131 132 // Windows needs to initialize some ipsets 133 if err := i.platformInit(); err != nil { 134 return err 135 } 136 137 // Initialize all the global Trireme chains. There are several global chaims 138 // that apply to all PUs: 139 // Tri-App/Tri-Net are the main chains for the egress/ingress directions 140 // UID related chains for any UID PUs. 141 // Host, Service, Pid chains for the different modes of operation (host mode, pu mode, host service). 142 // The priority is explicit (Pid activations take precedence of Service activations and Host Services) 143 if err := i.initializeChains(); err != nil { 144 return fmt.Errorf("Unable to initialize chains: %s", err) 145 } 146 147 // Insert the global ACLS. These are the main ACLs that will direct traffic from 148 // the INPUT/OUTPUT chains to the Trireme chains. They also includes the main 149 // rules of the main chains. These rules are never touched again, unless 150 // if we gracefully terminate. 151 if err := i.setGlobalRules(); err != nil { 152 return fmt.Errorf("failed to update synack networks: %s", err) 153 } 154 155 if err := i.impl.Commit(); err != nil { 156 return err 157 } 158 159 return nil 160 } 161 162 func (i *iptables) ConfigureRules(version int, contextID string, pu *policy.PUInfo) error { 163 var err error 164 var cfg *ACLInfo 165 166 // First we create an IPSet for destination matching ports. This only 167 // applies to Linux type PUs. A port set is associated with every PU, 168 // and packets matching this destination get associated with the context 169 // of the PU. 170 if i.mode != constants.RemoteContainer { 171 if err = i.ipsetmanager.CreateServerPortSet(contextID); err != nil { 172 return err 173 } 174 } 175 176 // Create the proxy sets. These are the target sets that will match 177 // traffic towards the L4 and L4 services. There are two sets created 178 // for every PU in this context (for outgoing and incoming traffic). 179 // The outgoing sets capture all traffic towards specific destinations 180 // as proxied traffic. Incoming sets correspond to the listening 181 // services. 182 // create proxySets only if there is no serviceMesh. 183 if i.serviceMeshType == policy.None { 184 if err := i.ipsetmanager.CreateProxySets(contextID); err != nil { 185 return err 186 } 187 } 188 189 // We create the generic ACL object that is used for all the templates. 190 cfg, err = i.newACLInfo(version, contextID, pu, pu.Runtime.PUType()) 191 if err != nil { 192 return err 193 } 194 195 // At this point we can install all the ACL rules that will direct 196 // traffic to user space, allow for external access or direct 197 // traffic towards the proxies 198 if err = i.installRules(cfg, pu); err != nil { 199 return err 200 } 201 202 // We commit the ACLs at the end. Note, that some of the ACLs in the 203 // NAT table are not committed as a group. The commit function only 204 // applies when newer versions of tables are installed (1.6.2 and above). 205 if err = i.impl.Commit(); err != nil { 206 zap.L().Error("unable to configure rules", zap.Error(err)) 207 return err 208 } 209 210 return nil 211 } 212 213 func (i *iptables) DeleteRules(version int, contextID string, tcpPorts, udpPorts string, mark string, username string, containerInfo *policy.PUInfo) error { 214 cfg, err := i.newACLInfo(version, contextID, nil, containerInfo.Runtime.PUType()) 215 if err != nil { 216 zap.L().Error("unable to create cleanup configuration", zap.Error(err)) 217 return err 218 } 219 if i.mode == constants.LocalServer { 220 cfg.PacketMark = mark 221 } 222 cfg.UDPPorts = udpPorts 223 cfg.TCPPorts = tcpPorts 224 cfg.CgroupMark = mark 225 cfg.Mark = mark 226 227 cfg.PUType = containerInfo.Runtime.PUType() 228 cfg.ProxyPort = containerInfo.Policy.ServicesListeningPort() 229 cfg.DNSProxyPort = containerInfo.Policy.DNSProxyPort() 230 // We clean up the chain rules first, so that we can delete the chains. 231 // If any rule is not deleted, then the chain will show as busy. 232 if err := i.deleteChainRules(cfg); err != nil { 233 zap.L().Warn("Failed to clean rules", zap.Error(err)) 234 } 235 236 // We can now delete the chains we have created for this PU. Note that 237 // in every case we only create two chains for every PU. All other 238 // chains are global. 239 if err = i.deletePUChains(cfg); err != nil { 240 zap.L().Warn("Failed to clean container chains while deleting the rules", zap.Error(err)) 241 } 242 243 // We call commit to update all the changes, before destroying the ipsets. 244 // References must be deleted for ipset deletion to succeed. 245 if err := i.impl.Commit(); err != nil { 246 zap.L().Warn("Failed to commit ACL changes", zap.Error(err)) 247 } 248 249 if i.mode != constants.RemoteContainer { 250 // We delete the set that captures all destination ports of the 251 // PU. This only holds for Linux PUs. 252 if err := i.ipsetmanager.DestroyServerPortSet(contextID); err != nil { 253 zap.L().Warn("Failed to remove port set") 254 } 255 } 256 257 // if serviceMesh is enabled then don't detroy the proxySets as we have not create them. 258 if i.serviceMeshType == policy.None { 259 // We delete the proxy port sets that were created for this PU. 260 i.ipsetmanager.DestroyProxySets(contextID) 261 } 262 return nil 263 } 264 265 func (i *iptables) UpdateRules(version int, contextID string, containerInfo *policy.PUInfo, oldContainerInfo *policy.PUInfo) error { 266 policyrules := containerInfo.Policy 267 if policyrules == nil { 268 return errors.New("policy rules cannot be nil") 269 } 270 271 // We cache the old config and we use it to delete the previous 272 // rules. Every time we update the policy the version changes to 273 // its binary complement. 274 newCfg, err := i.newACLInfo(version, contextID, containerInfo, containerInfo.Runtime.PUType()) 275 if err != nil { 276 return err 277 } 278 279 oldCfg, err := i.newACLInfo(version^1, contextID, oldContainerInfo, containerInfo.Runtime.PUType()) 280 if err != nil { 281 return err 282 } 283 284 // Install all the new rules. The hooks to the new chains are appended 285 // and do not take effect yet. 286 if err := i.installRules(newCfg, containerInfo); err != nil { 287 return err 288 } 289 290 // Remove mapping from old chain. By removing the old hooks the new 291 // hooks take priority. 292 if err := i.deleteChainRules(oldCfg); err != nil { 293 return err 294 } 295 296 // Delete the old chains, since there are not references any more. 297 if err := i.deletePUChains(oldCfg); err != nil { 298 return err 299 } 300 301 // Commit all actions in on iptables-restore function. 302 if err := i.impl.Commit(); err != nil { 303 return err 304 } 305 306 return nil 307 } 308 309 func (i *iptables) CleanUp() error { 310 311 if err := i.cleanACLs(); err != nil { 312 zap.L().Error("Failed to clean acls while stopping the supervisor", zap.Error(err)) 313 } 314 315 if err := i.ipsetmanager.DestroyAllIPsets(); err != nil { 316 zap.L().Error("Failed to clean up ipsets", zap.Error(err)) 317 } 318 319 i.ipsetmanager.Reset() 320 321 return nil 322 } 323 324 // InitializeChains initializes the chains. 325 func (i *iptables) initializeChains() error { 326 327 cfg, err := i.newACLInfo(0, "", nil, 0) 328 if err != nil { 329 return err 330 } 331 tmpl := template.Must(template.New(triremChains).Funcs(template.FuncMap{ 332 "isLocalServer": func() bool { 333 return i.mode == constants.LocalServer 334 }, 335 "isIstioEnabled": func() bool { 336 return i.serviceMeshType == policy.Istio 337 }, 338 }).Parse(triremChains)) 339 340 rules, err := extractRulesFromTemplate(tmpl, cfg) 341 if err != nil { 342 return fmt.Errorf("unable to create trireme chains:%s", err) 343 } 344 for _, rule := range rules { 345 if len(rule) != 4 { 346 continue 347 } 348 if err := i.impl.NewChain(rule[1], rule[3]); err != nil { 349 return err 350 } 351 } 352 353 return nil 354 } 355 356 // configureContainerRules adds the chain rules for a container. 357 // We separate in different methods to keep track of the changes 358 // independently. 359 func (i *iptables) configureContainerRules(cfg *ACLInfo) error { 360 return i.addChainRules(cfg) 361 } 362 363 // configureLinuxRules adds the chain rules for a linux process or a UID process. 364 func (i *iptables) configureLinuxRules(cfg *ACLInfo) error { 365 366 // These checks are for rather unusal error scenarios. We should 367 // never see errors here. But better safe than sorry. 368 if cfg.CgroupMark == "" { 369 return errors.New("no mark value found") 370 } 371 372 if cfg.TCPPortSet == "" { 373 return fmt.Errorf("port set was not found for the contextID. This should not happen") 374 } 375 376 return i.addChainRules(cfg) 377 } 378 379 type aclIPset struct { 380 ipset string 381 *policy.IPRule 382 } 383 384 func (i *iptables) getACLIPSets(ipRules policy.IPRuleList) []aclIPset { 385 386 ipsets := i.ipsetmanager.GetACLIPsetsNames(ipRules) 387 388 aclIPsets := make([]aclIPset, 0) 389 390 for i, ipset := range ipsets { 391 if len(ipset) > 0 { 392 aclIPsets = append(aclIPsets, aclIPset{ipset, &ipRules[i]}) 393 } 394 } 395 396 return aclIPsets 397 } 398 399 // Install rules will install all the rules and update the port sets. 400 func (i *iptables) installRules(cfg *ACLInfo, containerInfo *policy.PUInfo) error { 401 402 policyrules := containerInfo.Policy 403 404 // update the proxy set only if there is no serviceMesh enabled. 405 if i.serviceMeshType == policy.None { 406 if err := i.updateProxySet(cfg.ContextID, containerInfo.Policy); err != nil { 407 return err 408 } 409 } 410 411 appACLIPset := i.getACLIPSets(policyrules.ApplicationACLs()) 412 netACLIPset := i.getACLIPSets(policyrules.NetworkACLs()) 413 414 // Install the PU specific chain first. 415 if err := i.addContainerChain(cfg); err != nil { 416 return err 417 } 418 419 // If its a remote and thus container, configure container rules. 420 if i.mode == constants.RemoteContainer { 421 if err := i.configureContainerRules(cfg); err != nil { 422 return err 423 } 424 } 425 426 // If its a Linux process configure the Linux rules. 427 if i.mode == constants.LocalServer { 428 if err := i.configureLinuxRules(cfg); err != nil { 429 return err 430 } 431 } 432 433 isHostPU := extractors.IsHostPU(containerInfo.Runtime, i.mode) 434 435 if err := i.addPreNetworkACLRules(cfg); err != nil { 436 return err 437 } 438 439 if err := i.addExternalACLs(cfg, cfg.AppChain, cfg.NetChain, appACLIPset, true); err != nil { 440 return err 441 } 442 443 if err := i.addExternalACLs(cfg, cfg.NetChain, cfg.AppChain, netACLIPset, false); err != nil { 444 return err 445 } 446 447 appAnyRules, netAnyRules, err := i.getProtocolAnyRules(cfg, appACLIPset, netACLIPset) 448 if err != nil { 449 return err 450 } 451 452 return i.addPacketTrap(cfg, isHostPU, appAnyRules, netAnyRules) 453 } 454 455 func (i *iptables) updateProxySet(contextID string, policy *policy.PUPolicy) error { 456 i.ipsetmanager.FlushProxySets(contextID) 457 458 for _, dependentService := range policy.DependentServices() { 459 addresses := dependentService.NetworkInfo.Addresses 460 min, max := dependentService.NetworkInfo.Ports.Range() 461 462 for addrS := range addresses { 463 _, addr, _ := net.ParseCIDR(addrS) 464 for port := int(min); port <= int(max); port++ { 465 if err := i.ipsetmanager.AddIPPortToDependentService(contextID, addr, strconv.Itoa(port)); err != nil { 466 return fmt.Errorf("unable to add dependent ip %v to dependent networks ipset: %v", port, err) 467 } 468 } 469 } 470 } 471 472 for _, exposedService := range policy.ExposedServices() { 473 min, max := exposedService.PrivateNetworkInfo.Ports.Range() 474 for port := int(min); port <= int(max); port++ { 475 if err := i.ipsetmanager.AddPortToExposedService(contextID, strconv.Itoa(port)); err != nil { 476 zap.L().Error("Failed to add vip", zap.Error(err)) 477 return fmt.Errorf("unable to add port %d to exposed ports ipset: %s", port, err) 478 } 479 } 480 481 if exposedService.PublicNetworkInfo != nil { 482 min, max := exposedService.PublicNetworkInfo.Ports.Range() 483 for port := int(min); port <= int(max); port++ { 484 if err := i.ipsetmanager.AddPortToExposedService(contextID, strconv.Itoa(port)); err != nil { 485 zap.L().Error("Failed to VIP for public network", zap.Error(err)) 486 return fmt.Errorf("Failed to program VIP: %s", err) 487 } 488 } 489 } 490 } 491 492 return nil 493 }