github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/networking_cni.go (about) 1 // For now CNI is supported only on Linux. 2 // 3 //go:build linux 4 // +build linux 5 6 package allocrunner 7 8 import ( 9 "context" 10 "encoding/json" 11 "fmt" 12 "math/rand" 13 "os" 14 "path/filepath" 15 "regexp" 16 "sort" 17 "strings" 18 "time" 19 20 cni "github.com/containerd/go-cni" 21 cnilibrary "github.com/containernetworking/cni/libcni" 22 "github.com/coreos/go-iptables/iptables" 23 log "github.com/hashicorp/go-hclog" 24 "github.com/hashicorp/nomad/nomad/structs" 25 "github.com/hashicorp/nomad/plugins/drivers" 26 ) 27 28 const ( 29 30 // envCNIPath is the environment variable name to use to derive the CNI path 31 // when it is not explicitly set by the client 32 envCNIPath = "CNI_PATH" 33 34 // defaultCNIPath is the CNI path to use when it is not set by the client 35 // and is not set by environment variable 36 defaultCNIPath = "/opt/cni/bin" 37 38 // defaultCNIInterfacePrefix is the network interface to use if not set in 39 // client config 40 defaultCNIInterfacePrefix = "eth" 41 ) 42 43 type cniNetworkConfigurator struct { 44 cni cni.CNI 45 cniConf []byte 46 ignorePortMappingHostIP bool 47 48 rand *rand.Rand 49 logger log.Logger 50 } 51 52 func newCNINetworkConfigurator(logger log.Logger, cniPath, cniInterfacePrefix, cniConfDir, networkName string, ignorePortMappingHostIP bool) (*cniNetworkConfigurator, error) { 53 cniConf, err := loadCNIConf(cniConfDir, networkName) 54 if err != nil { 55 return nil, fmt.Errorf("failed to load CNI config: %v", err) 56 } 57 58 return newCNINetworkConfiguratorWithConf(logger, cniPath, cniInterfacePrefix, ignorePortMappingHostIP, cniConf) 59 } 60 61 func newCNINetworkConfiguratorWithConf(logger log.Logger, cniPath, cniInterfacePrefix string, ignorePortMappingHostIP bool, cniConf []byte) (*cniNetworkConfigurator, error) { 62 conf := &cniNetworkConfigurator{ 63 cniConf: cniConf, 64 rand: rand.New(rand.NewSource(time.Now().Unix())), 65 logger: logger, 66 ignorePortMappingHostIP: ignorePortMappingHostIP, 67 } 68 if cniPath == "" { 69 if cniPath = os.Getenv(envCNIPath); cniPath == "" { 70 cniPath = defaultCNIPath 71 } 72 } 73 74 if cniInterfacePrefix == "" { 75 cniInterfacePrefix = defaultCNIInterfacePrefix 76 } 77 78 c, err := cni.New(cni.WithPluginDir(filepath.SplitList(cniPath)), 79 cni.WithInterfacePrefix(cniInterfacePrefix)) 80 if err != nil { 81 return nil, err 82 } 83 conf.cni = c 84 85 return conf, nil 86 } 87 88 // Setup calls the CNI plugins with the add action 89 func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) { 90 if err := c.ensureCNIInitialized(); err != nil { 91 return nil, err 92 } 93 94 // Depending on the version of bridge cni plugin used, a known race could occure 95 // where two alloc attempt to create the nomad bridge at the same time, resulting 96 // in one of them to fail. This rety attempts to overcome those erroneous failures. 97 const retry = 3 98 var firstError error 99 var res *cni.CNIResult 100 for attempt := 1; ; attempt++ { 101 var err error 102 if res, err = c.cni.Setup(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil { 103 c.logger.Warn("failed to configure network", "error", err, "attempt", attempt) 104 switch attempt { 105 case 1: 106 firstError = err 107 case retry: 108 return nil, fmt.Errorf("failed to configure network: %v", firstError) 109 } 110 111 // Sleep for 1 second + jitter 112 time.Sleep(time.Second + (time.Duration(c.rand.Int63n(1000)) * time.Millisecond)) 113 continue 114 } 115 break 116 } 117 118 if c.logger.IsDebug() { 119 resultJSON, _ := json.Marshal(res) 120 c.logger.Debug("received result from CNI", "result", string(resultJSON)) 121 } 122 123 return c.cniToAllocNet(res) 124 125 } 126 127 // cniToAllocNet converts a CNIResult to an AllocNetworkStatus or returns an 128 // error. The first interface and IP with a sandbox and address set are 129 // preferred. Failing that the first interface with an IP is selected. 130 // 131 // Unfortunately the go-cni library returns interfaces in an unordered map so 132 // the results may be nondeterministic depending on CNI plugin output. 133 func (c *cniNetworkConfigurator) cniToAllocNet(res *cni.CNIResult) (*structs.AllocNetworkStatus, error) { 134 netStatus := new(structs.AllocNetworkStatus) 135 136 // Use the first sandbox interface with an IP address 137 if len(res.Interfaces) > 0 { 138 for name, iface := range res.Interfaces { 139 if iface == nil { 140 // this should never happen but this value is coming from external 141 // plugins so we should guard against it 142 delete(res.Interfaces, name) 143 } 144 145 if iface.Sandbox != "" && len(iface.IPConfigs) > 0 { 146 netStatus.Address = iface.IPConfigs[0].IP.String() 147 netStatus.InterfaceName = name 148 break 149 } 150 } 151 } 152 153 // If no IP address was found, use the first interface with an address 154 // found as a fallback 155 if netStatus.Address == "" { 156 var found bool 157 for name, iface := range res.Interfaces { 158 if len(iface.IPConfigs) > 0 { 159 ip := iface.IPConfigs[0].IP.String() 160 c.logger.Debug("no sandbox interface with an address found CNI result, using first available", "interface", name, "ip", ip) 161 netStatus.Address = ip 162 netStatus.InterfaceName = name 163 found = true 164 break 165 } 166 } 167 if !found { 168 c.logger.Warn("no address could be found from CNI result") 169 } 170 } 171 172 // If no IP address could be found, return an error 173 if netStatus.Address == "" { 174 return nil, fmt.Errorf("failed to configure network: no interface with an address") 175 176 } 177 178 // Use the first DNS results. 179 if len(res.DNS) > 0 { 180 netStatus.DNS = &structs.DNSConfig{ 181 Servers: res.DNS[0].Nameservers, 182 Searches: res.DNS[0].Search, 183 Options: res.DNS[0].Options, 184 } 185 } 186 187 return netStatus, nil 188 } 189 190 func loadCNIConf(confDir, name string) ([]byte, error) { 191 files, err := cnilibrary.ConfFiles(confDir, []string{".conf", ".conflist", ".json"}) 192 switch { 193 case err != nil: 194 return nil, fmt.Errorf("failed to detect CNI config file: %v", err) 195 case len(files) == 0: 196 return nil, fmt.Errorf("no CNI network config found in %s", confDir) 197 } 198 199 // files contains the network config files associated with cni network. 200 // Use lexicographical way as a defined order for network config files. 201 sort.Strings(files) 202 for _, confFile := range files { 203 if strings.HasSuffix(confFile, ".conflist") { 204 confList, err := cnilibrary.ConfListFromFile(confFile) 205 if err != nil { 206 return nil, fmt.Errorf("failed to load CNI config list file %s: %v", confFile, err) 207 } 208 if confList.Name == name { 209 return confList.Bytes, nil 210 } 211 } else { 212 conf, err := cnilibrary.ConfFromFile(confFile) 213 if err != nil { 214 return nil, fmt.Errorf("failed to load CNI config file %s: %v", confFile, err) 215 } 216 if conf.Network.Name == name { 217 return conf.Bytes, nil 218 } 219 } 220 } 221 222 return nil, fmt.Errorf("CNI network config not found for name %q", name) 223 } 224 225 // Teardown calls the CNI plugins with the delete action 226 func (c *cniNetworkConfigurator) Teardown(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) error { 227 if err := c.ensureCNIInitialized(); err != nil { 228 return err 229 } 230 231 if err := c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil { 232 // create a real handle to iptables 233 ipt, iptErr := iptables.New() 234 if iptErr != nil { 235 return fmt.Errorf("failed to detect iptables: %w", iptErr) 236 } 237 // most likely the pause container was removed from underneath nomad 238 return c.forceCleanup(ipt, alloc.ID) 239 } 240 241 return nil 242 } 243 244 // IPTables is a subset of iptables.IPTables 245 type IPTables interface { 246 List(table, chain string) ([]string, error) 247 Delete(table, chain string, rule ...string) error 248 ClearAndDeleteChain(table, chain string) error 249 } 250 251 var ( 252 // ipRuleRe is used to parse a postrouting iptables rule created by nomad, e.g. 253 // -A POSTROUTING -s 172.26.64.191/32 -m comment --comment "name: \"nomad\" id: \"6b235529-8111-4bbe-520b-d639b1d2a94e\"" -j CNI-50e58ea77dc52e0c731e3799 254 ipRuleRe = regexp.MustCompile(`-A POSTROUTING -s (\S+) -m comment --comment "name: \\"nomad\\" id: \\"([[:xdigit:]-]+)\\"" -j (CNI-[[:xdigit:]]+)`) 255 ) 256 257 // forceCleanup is the backup plan for removing the iptables rule and chain associated with 258 // an allocation that was using bridge networking. The cni library refuses to handle a 259 // dirty state - e.g. the pause container is removed out of band, and so we must cleanup 260 // iptables ourselves to avoid leaking rules. 261 func (c *cniNetworkConfigurator) forceCleanup(ipt IPTables, allocID string) error { 262 const ( 263 natTable = "nat" 264 postRoutingChain = "POSTROUTING" 265 commentFmt = `--comment "name: \"nomad\" id: \"%s\""` 266 ) 267 268 // list the rules on the POSTROUTING chain of the nat table 269 rules, err := ipt.List(natTable, postRoutingChain) 270 if err != nil { 271 return fmt.Errorf("failed to list iptables rules: %w", err) 272 } 273 274 // find the POSTROUTING rule associated with our allocation 275 matcher := fmt.Sprintf(commentFmt, allocID) 276 var ruleToPurge string 277 for _, rule := range rules { 278 if strings.Contains(rule, matcher) { 279 ruleToPurge = rule 280 break 281 } 282 } 283 284 // no rule found for our allocation, just give up 285 if ruleToPurge == "" { 286 return fmt.Errorf("failed to find postrouting rule for alloc %s", allocID) 287 } 288 289 // re-create the rule we need to delete, as tokens 290 subs := ipRuleRe.FindStringSubmatch(ruleToPurge) 291 if len(subs) != 4 { 292 return fmt.Errorf("failed to parse postrouting rule for alloc %s", allocID) 293 } 294 cidr := subs[1] 295 id := subs[2] 296 chainID := subs[3] 297 toDel := []string{ 298 `-s`, 299 cidr, 300 `-m`, 301 `comment`, 302 `--comment`, 303 `name: "nomad" id: "` + id + `"`, 304 `-j`, 305 chainID, 306 } 307 308 // remove the jump rule 309 ok := true 310 if err = ipt.Delete(natTable, postRoutingChain, toDel...); err != nil { 311 c.logger.Warn("failed to remove iptables nat.POSTROUTING rule", "alloc_id", allocID, "chain", chainID, "error", err) 312 ok = false 313 } 314 315 // remote the associated chain 316 if err = ipt.ClearAndDeleteChain(natTable, chainID); err != nil { 317 c.logger.Warn("failed to remove iptables nat chain", "chain", chainID, "error", err) 318 ok = false 319 } 320 321 if !ok { 322 return fmt.Errorf("failed to cleanup iptables rules for alloc %s", allocID) 323 } 324 325 return nil 326 } 327 328 func (c *cniNetworkConfigurator) ensureCNIInitialized() error { 329 if err := c.cni.Status(); cni.IsCNINotInitialized(err) { 330 return c.cni.Load(cni.WithConfListBytes(c.cniConf)) 331 } else { 332 return err 333 } 334 } 335 336 // getPortMapping builds a list of portMapping structs that are used as the 337 // portmapping capability arguments for the portmap CNI plugin 338 func getPortMapping(alloc *structs.Allocation, ignoreHostIP bool) []cni.PortMapping { 339 var ports []cni.PortMapping 340 341 if len(alloc.AllocatedResources.Shared.Ports) == 0 && len(alloc.AllocatedResources.Shared.Networks) > 0 { 342 for _, network := range alloc.AllocatedResources.Shared.Networks { 343 for _, port := range append(network.DynamicPorts, network.ReservedPorts...) { 344 if port.To < 1 { 345 port.To = port.Value 346 } 347 for _, proto := range []string{"tcp", "udp"} { 348 ports = append(ports, cni.PortMapping{ 349 HostPort: int32(port.Value), 350 ContainerPort: int32(port.To), 351 Protocol: proto, 352 }) 353 } 354 } 355 } 356 } else { 357 for _, port := range alloc.AllocatedResources.Shared.Ports { 358 if port.To < 1 { 359 port.To = port.Value 360 } 361 for _, proto := range []string{"tcp", "udp"} { 362 portMapping := cni.PortMapping{ 363 HostPort: int32(port.Value), 364 ContainerPort: int32(port.To), 365 Protocol: proto, 366 } 367 if !ignoreHostIP { 368 portMapping.HostIP = port.HostIP 369 } 370 ports = append(ports, portMapping) 371 } 372 } 373 } 374 return ports 375 }