istio.io/istio@v0.0.0-20240520182934-d79c90f27776/tools/istio-iptables/pkg/dependencies/implementation_linux.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package dependencies 16 17 import ( 18 "bytes" 19 "fmt" 20 "io" 21 "os/exec" 22 "runtime" 23 "strings" 24 "syscall" 25 26 netns "github.com/containernetworking/plugins/pkg/ns" 27 "golang.org/x/sys/unix" 28 utilversion "k8s.io/apimachinery/pkg/util/version" 29 30 "istio.io/istio/pkg/log" 31 "istio.io/istio/tools/istio-iptables/pkg/constants" 32 ) 33 34 // TODO the entire `istio-iptables` package is linux-specific, I'm not sure we really need 35 // platform-differentiators for the `dependencies` package itself. 36 37 // NoLocks returns true if this version does not use or support locks 38 func (v IptablesVersion) NoLocks() bool { 39 // nf_tables does not use locks 40 // legacy added locks in 1.6.2 41 return !v.Legacy || v.Version.LessThan(IptablesRestoreLocking) 42 } 43 44 var ( 45 // IptablesRestoreLocking is the version where locking and -w is added to iptables-restore 46 IptablesRestoreLocking = utilversion.MustParseGeneric("1.6.2") 47 // IptablesLockfileEnv is the version where XTABLES_LOCKFILE is added to iptables. 48 IptablesLockfileEnv = utilversion.MustParseGeneric("1.8.6") 49 ) 50 51 func shouldUseBinaryForCurrentContext(iptablesBin string) (IptablesVersion, error) { 52 // We assume that whatever `iptablesXXX` binary you pass us also has a `iptablesXXX-save` and `iptablesXXX-restore` 53 // binary - which should always be true for any valid iptables installation 54 // (we use both in our iptables code later on anyway) 55 // 56 // We could explicitly check for all 3 every time to be sure, but that's likely not necessary, 57 // if we find one unless the host OS is badly broken we will find the others. 58 iptablesSaveBin := fmt.Sprintf("%s-save", iptablesBin) 59 iptablesRestoreBin := fmt.Sprintf("%s-restore", iptablesBin) 60 var parsedVer *utilversion.Version 61 var isNft bool 62 // does the "xx-save" binary exist? 63 rulesDump, binExistsErr := exec.Command(iptablesSaveBin).CombinedOutput() 64 if binExistsErr != nil { 65 return IptablesVersion{}, fmt.Errorf("binary %s not found in path: %w", iptablesSaveBin, binExistsErr) 66 } 67 68 // Binary is there, so try to parse version 69 verCmd := exec.Command(iptablesSaveBin, "--version") 70 // shockingly, `iptables-save` returns 0 if you pass it an unrecognized/bad option, so 71 // `os/exec` will return a *nil* error, even if the command fails. So, we must slurp stderr, and check it to 72 // see if the command *actually* failed due to not recognizing the version flag. 73 var verStdOut bytes.Buffer 74 var verStdErr bytes.Buffer 75 verCmd.Stdout = &verStdOut 76 verCmd.Stderr = &verStdErr 77 78 verExec := verCmd.Run() 79 if verExec == nil && !strings.Contains(verStdErr.String(), "unrecognized option") { 80 var parseErr error 81 // we found the binary - extract the version, then try to detect if rules already exist for that variant 82 parsedVer, parseErr = parseIptablesVer(verStdOut.String()) 83 if parseErr != nil { 84 return IptablesVersion{}, fmt.Errorf("iptables version %q is not a valid version string: %v", verStdOut.Bytes(), parseErr) 85 } 86 // Legacy will have no marking or 'legacy', so just look for nf_tables 87 isNft = strings.Contains(verStdOut.String(), "nf_tables") 88 } else { 89 log.Warnf("found iptables binary %s, but it does not appear to support the '--version' flag, assuming very old legacy version", iptablesSaveBin) 90 // Some really old iptables-legacy-save versions (1.6.1, ubuntu bionic) don't support any arguments at all, including `--version` 91 // So if we get here, we found `iptables-save` in PATH, but it's too outdated to understand `--version`. 92 // 93 // We can eventually remove this. 94 // 95 // So assume it's legacy/an unknown version, but assume we can use it since it's in PATH 96 parsedVer = utilversion.MustParseGeneric("0.0.0") 97 isNft = false 98 } 99 100 // if binary seems to exist, check the dump of rules in our netns, and see if any rules exist there 101 // Note that this is highly dependent on context. 102 // new pod netns? probably no rules. Hostnetns? probably rules 103 // So this is mostly just a "hint"/heuristic as to which version we should be using, if more than one binary is present. 104 // `xx-save` should return _no_ output (0 lines) if no rules are defined in this netns for that binary variant. 105 // `xx-save` should return at least 3 output lines if at least one rule is defined in this netns for that binary variant. 106 existingRules := false 107 if strings.Count(string(rulesDump), "\n") >= 3 { 108 existingRules = true 109 log.Debugf("found existing rules for %s", iptablesSaveBin) 110 } 111 return IptablesVersion{ 112 DetectedBinary: iptablesBin, 113 DetectedSaveBinary: iptablesSaveBin, 114 DetectedRestoreBinary: iptablesRestoreBin, 115 Version: parsedVer, 116 Legacy: !isNft, 117 ExistingRules: existingRules, 118 }, nil 119 } 120 121 // runInSandbox builds a lightweight sandbox ("container") to build a suitable environment to run iptables commands in. 122 // This is used in CNI, where commands are executed from the host but from within the container network namespace. 123 // This puts us in somewhat unconventionally territory. 124 func runInSandbox(lockFile string, f func() error) error { 125 chErr := make(chan error, 1) 126 n, nerr := netns.GetCurrentNS() 127 if nerr != nil { 128 return fmt.Errorf("failed to get current namespace: %v", nerr) 129 } 130 // setupSandbox builds the sandbox. 131 setupSandbox := func() error { 132 // First, unshare the mount namespace. This allows us to create custom mounts without impacting the host 133 if err := unix.Unshare(unix.CLONE_NEWNS); err != nil { 134 return fmt.Errorf("failed to unshare to new mount namespace: %v", err) 135 } 136 if err := n.Set(); err != nil { 137 return fmt.Errorf("failed to reset network namespace: %v", err) 138 } 139 // Remount / as a private mount so that our mounts do not impact outside the namespace 140 // (see https://unix.stackexchange.com/questions/246312/why-is-my-bind-mount-visible-outside-its-mount-namespace). 141 if err := unix.Mount("", "/", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil { 142 return fmt.Errorf("failed to remount /: %v", err) 143 } 144 // In CNI, we are running the pod network namespace, but the host filesystem. Locking the host is both useless and harmful, 145 // as it opens the risk of lock contention with other node actors (such as kube-proxy), and isn't actually needed at all. 146 // Older iptables cannot turn off the lock explicitly, so we hack around it... 147 // Overwrite the lock file with the network namespace file (which is assumed to be unique). 148 // We are setting the lockfile to `r.NetworkNamespace`. 149 // /dev/null looks like a good option, but actually doesn't work (it will ensure only one actor can access it) 150 if lockFile != "" { 151 if err := mount(lockFile, "/run/xtables.lock"); err != nil { 152 return fmt.Errorf("bind mount of %q failed: %v", lockFile, err) 153 } 154 } 155 156 // In some setups, iptables can make remote network calls(!!). Since these come from a partially initialized pod network namespace, 157 // these calls can be blocked (or NetworkPolicy, etc could block them anyways). 158 // This is triggered by NSS, which allows various things to use arbitrary code to lookup configuration that typically comes from files. 159 // In our case, the culprit is the `xt_owner` (`-m owner`) module in iptables calls the `passwd` service to lookup the user. 160 // To disallow this, bindmount /dev/null over nsswitch.conf so this never happens. 161 // This should be safe to do, even if the user has an nsswitch entry that would work fine: we always use a numeric ID 162 // so the passwd lookup doesn't need to succeed at all for Istio to function. 163 // Effectively, we want a mini-container. In fact, running in a real container would be ideal but it is hard to do portably. 164 // See https://github.com/istio/istio/issues/48416 for a real world example of this case. 165 if err := mount("/dev/null", "/etc/nsswitch.conf"); err != nil { 166 return fmt.Errorf("bind mount to %q failed: %v", "/etc/nsswitch.conf", err) 167 } 168 return nil 169 } 170 171 executed := false 172 // Once we call unshare(CLONE_NEWNS), we cannot undo it explicitly. Instead, we need to unshare on a specific thread, 173 // then kill that thread when we are done (or rather, let Go runtime kill the thread). 174 // Unfortunately, making a new thread breaks us out of the network namespace we entered previously, so we need to restore that as well 175 go func() { 176 chErr <- func() error { 177 // We now have exclusive access to this thread. Once the goroutine exits without calling UnlockOSThread, the go runtime will kill the thread for us 178 // Warning: Do not call UnlockOSThread! Notably, netns.Do does call this. 179 runtime.LockOSThread() 180 if err := setupSandbox(); err != nil { 181 return err 182 } 183 // Mark we have actually run the command. This lets us distinguish from a failure in setupSandbox() vs f() 184 executed = true 185 return f() 186 }() 187 }() 188 err := <-chErr 189 if err != nil && !executed { 190 // We failed to setup the environment. Now we go into best effort mode. 191 // Users running into this may have IPTables lock used unexpectedly or make unexpected NSS calls. 192 // This is to support environments with restrictive access (from SELinux, but possibly others) that block these calls 193 // See https://github.com/istio/istio/issues/48746 194 log.Warnf("failed to setup execution environment, attempting to continue anyways: %v", err) 195 // Try to execute as-is 196 return f() 197 } 198 // Otherwise, we did execute; return the error from that execution. 199 return err 200 } 201 202 func mount(src, dst string) error { 203 return syscall.Mount(src, dst, "", syscall.MS_BIND|syscall.MS_RDONLY, "") 204 } 205 206 func (r *RealDependencies) executeXTables(cmd constants.IptablesCmd, iptVer *IptablesVersion, ignoreErrors bool, stdin io.ReadSeeker, args ...string) error { 207 mode := "without lock" 208 cmdBin := iptVer.CmdToString(cmd) 209 if cmdBin == "" { 210 return fmt.Errorf("called without iptables binary, cannot execute!: %+v", iptVer) 211 } 212 var c *exec.Cmd 213 needLock := iptVer.IsWriteCmd(cmd) && !iptVer.NoLocks() 214 run := func(c *exec.Cmd) error { 215 return c.Run() 216 } 217 if r.CNIMode { 218 c = exec.Command(cmdBin, args...) 219 // In CNI, we are running the pod network namespace, but the host filesystem, so we need to do some tricks 220 // Call our binary again, but with <original binary> "unshare (subcommand to trigger mounts)" --lock-file=<network namespace> <original command...> 221 // We do not shell out and call `mount` since this and sh are not available on all systems 222 var lockFile string 223 if needLock { 224 if iptVer.Version.LessThan(IptablesLockfileEnv) { 225 mode = "without lock by mount and nss" 226 lockFile = r.NetworkNamespace 227 } else { 228 mode = "without lock by env and nss" 229 c.Env = append(c.Env, "XTABLES_LOCKFILE="+r.NetworkNamespace) 230 } 231 } else { 232 mode = "without nss" 233 } 234 235 run = func(c *exec.Cmd) error { 236 return runInSandbox(lockFile, func() error { 237 return c.Run() 238 }) 239 } 240 } else { 241 if needLock { 242 // We want the lock. Wait up to 30s for it. 243 args = append(args, "--wait=30") 244 c = exec.Command(cmdBin, args...) 245 log.Debugf("running with lock") 246 mode = "with wait lock" 247 } else { 248 // No locking supported/needed, just run as is. Nothing special 249 c = exec.Command(cmdBin, args...) 250 } 251 } 252 253 log.Infof("Running command (%s): %s %s", mode, cmdBin, strings.Join(args, " ")) 254 stdout := &bytes.Buffer{} 255 stderr := &bytes.Buffer{} 256 c.Stdout = stdout 257 c.Stderr = stderr 258 c.Stdin = stdin 259 err := run(c) 260 if len(stdout.String()) != 0 { 261 log.Infof("Command output: \n%v", stdout.String()) 262 } 263 264 // TODO Check naming and redirection logic 265 if (err != nil || len(stderr.String()) != 0) && !ignoreErrors { 266 stderrStr := stderr.String() 267 268 // Transform to xtables-specific error messages with more useful and actionable hints. 269 if err != nil { 270 stderrStr = transformToXTablesErrorMessage(stderrStr, err) 271 } 272 273 log.Errorf("Command error output: %v", stderrStr) 274 } 275 276 return err 277 }