istio.io/istio@v0.0.0-20240520182934-d79c90f27776/tools/istio-iptables/pkg/dependencies/implementation_linux.go

istio.io/istio@v0.0.0-20240520182934-d79c90f27776/tools/istio-iptables/pkg/dependencies/implementation_linux.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dependencies
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"io"
    21  	"os/exec"
    22  	"runtime"
    23  	"strings"
    24  	"syscall"
    25  
    26  	netns "github.com/containernetworking/plugins/pkg/ns"
    27  	"golang.org/x/sys/unix"
    28  	utilversion "k8s.io/apimachinery/pkg/util/version"
    29  
    30  	"istio.io/istio/pkg/log"
    31  	"istio.io/istio/tools/istio-iptables/pkg/constants"
    32  )
    33  
    34  // TODO the entire `istio-iptables` package is linux-specific, I'm not sure we really need
    35  // platform-differentiators for the `dependencies` package itself.
    36  
    37  // NoLocks returns true if this version does not use or support locks
    38  func (v IptablesVersion) NoLocks() bool {
    39  	// nf_tables does not use locks
    40  	// legacy added locks in 1.6.2
    41  	return !v.Legacy || v.Version.LessThan(IptablesRestoreLocking)
    42  }
    43  
    44  var (
    45  	// IptablesRestoreLocking is the version where locking and -w is added to iptables-restore
    46  	IptablesRestoreLocking = utilversion.MustParseGeneric("1.6.2")
    47  	// IptablesLockfileEnv is the version where XTABLES_LOCKFILE is added to iptables.
    48  	IptablesLockfileEnv = utilversion.MustParseGeneric("1.8.6")
    49  )
    50  
    51  func shouldUseBinaryForCurrentContext(iptablesBin string) (IptablesVersion, error) {
    52  	// We assume that whatever `iptablesXXX` binary you pass us also has a `iptablesXXX-save` and `iptablesXXX-restore`
    53  	// binary - which should always be true for any valid iptables installation
    54  	// (we use both in our iptables code later on anyway)
    55  	//
    56  	// We could explicitly check for all 3 every time to be sure, but that's likely not necessary,
    57  	// if we find one unless the host OS is badly broken we will find the others.
    58  	iptablesSaveBin := fmt.Sprintf("%s-save", iptablesBin)
    59  	iptablesRestoreBin := fmt.Sprintf("%s-restore", iptablesBin)
    60  	var parsedVer *utilversion.Version
    61  	var isNft bool
    62  	// does the "xx-save" binary exist?
    63  	rulesDump, binExistsErr := exec.Command(iptablesSaveBin).CombinedOutput()
    64  	if binExistsErr != nil {
    65  		return IptablesVersion{}, fmt.Errorf("binary %s not found in path: %w", iptablesSaveBin, binExistsErr)
    66  	}
    67  
    68  	// Binary is there, so try to parse version
    69  	verCmd := exec.Command(iptablesSaveBin, "--version")
    70  	// shockingly, `iptables-save` returns 0 if you pass it an unrecognized/bad option, so
    71  	// `os/exec` will return a *nil* error, even if the command fails. So, we must slurp stderr, and check it to
    72  	// see if the command *actually* failed due to not recognizing the version flag.
    73  	var verStdOut bytes.Buffer
    74  	var verStdErr bytes.Buffer
    75  	verCmd.Stdout = &verStdOut
    76  	verCmd.Stderr = &verStdErr
    77  
    78  	verExec := verCmd.Run()
    79  	if verExec == nil && !strings.Contains(verStdErr.String(), "unrecognized option") {
    80  		var parseErr error
    81  		// we found the binary - extract the version, then try to detect if rules already exist for that variant
    82  		parsedVer, parseErr = parseIptablesVer(verStdOut.String())
    83  		if parseErr != nil {
    84  			return IptablesVersion{}, fmt.Errorf("iptables version %q is not a valid version string: %v", verStdOut.Bytes(), parseErr)
    85  		}
    86  		// Legacy will have no marking or 'legacy', so just look for nf_tables
    87  		isNft = strings.Contains(verStdOut.String(), "nf_tables")
    88  	} else {
    89  		log.Warnf("found iptables binary %s, but it does not appear to support the '--version' flag, assuming very old legacy version", iptablesSaveBin)
    90  		// Some really old iptables-legacy-save versions (1.6.1, ubuntu bionic) don't support any arguments at all, including `--version`
    91  		// So if we get here, we found `iptables-save` in PATH, but it's too outdated to understand `--version`.
    92  		//
    93  		// We can eventually remove this.
    94  		//
    95  		// So assume it's legacy/an unknown version, but assume we can use it since it's in PATH
    96  		parsedVer = utilversion.MustParseGeneric("0.0.0")
    97  		isNft = false
    98  	}
    99  
   100  	// if binary seems to exist, check the dump of rules in our netns, and see if any rules exist there
   101  	// Note that this is highly dependent on context.
   102  	// new pod netns? probably no rules. Hostnetns? probably rules
   103  	// So this is mostly just a "hint"/heuristic as to which version we should be using, if more than one binary is present.
   104  	// `xx-save` should return _no_ output (0 lines) if no rules are defined in this netns for that binary variant.
   105  	// `xx-save` should return at least 3 output lines if at least one rule is defined in this netns for that binary variant.
   106  	existingRules := false
   107  	if strings.Count(string(rulesDump), "\n") >= 3 {
   108  		existingRules = true
   109  		log.Debugf("found existing rules for %s", iptablesSaveBin)
   110  	}
   111  	return IptablesVersion{
   112  		DetectedBinary:        iptablesBin,
   113  		DetectedSaveBinary:    iptablesSaveBin,
   114  		DetectedRestoreBinary: iptablesRestoreBin,
   115  		Version:               parsedVer,
   116  		Legacy:                !isNft,
   117  		ExistingRules:         existingRules,
   118  	}, nil
   119  }
   120  
   121  // runInSandbox builds a lightweight sandbox ("container") to build a suitable environment to run iptables commands in.
   122  // This is used in CNI, where commands are executed from the host but from within the container network namespace.
   123  // This puts us in somewhat unconventionally territory.
   124  func runInSandbox(lockFile string, f func() error) error {
   125  	chErr := make(chan error, 1)
   126  	n, nerr := netns.GetCurrentNS()
   127  	if nerr != nil {
   128  		return fmt.Errorf("failed to get current namespace: %v", nerr)
   129  	}
   130  	// setupSandbox builds the sandbox.
   131  	setupSandbox := func() error {
   132  		// First, unshare the mount namespace. This allows us to create custom mounts without impacting the host
   133  		if err := unix.Unshare(unix.CLONE_NEWNS); err != nil {
   134  			return fmt.Errorf("failed to unshare to new mount namespace: %v", err)
   135  		}
   136  		if err := n.Set(); err != nil {
   137  			return fmt.Errorf("failed to reset network namespace: %v", err)
   138  		}
   139  		// Remount / as a private mount so that our mounts do not impact outside the namespace
   140  		// (see https://unix.stackexchange.com/questions/246312/why-is-my-bind-mount-visible-outside-its-mount-namespace).
   141  		if err := unix.Mount("", "/", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil {
   142  			return fmt.Errorf("failed to remount /: %v", err)
   143  		}
   144  		// In CNI, we are running the pod network namespace, but the host filesystem. Locking the host is both useless and harmful,
   145  		// as it opens the risk of lock contention with other node actors (such as kube-proxy), and isn't actually needed at all.
   146  		// Older iptables cannot turn off the lock explicitly, so we hack around it...
   147  		// Overwrite the lock file with the network namespace file (which is assumed to be unique).
   148  		// We are setting the lockfile to `r.NetworkNamespace`.
   149  		// /dev/null looks like a good option, but actually doesn't work (it will ensure only one actor can access it)
   150  		if lockFile != "" {
   151  			if err := mount(lockFile, "/run/xtables.lock"); err != nil {
   152  				return fmt.Errorf("bind mount of %q failed: %v", lockFile, err)
   153  			}
   154  		}
   155  
   156  		// In some setups, iptables can make remote network calls(!!). Since these come from a partially initialized pod network namespace,
   157  		// these calls can be blocked (or NetworkPolicy, etc could block them anyways).
   158  		// This is triggered by NSS, which allows various things to use arbitrary code to lookup configuration that typically comes from files.
   159  		// In our case, the culprit is the `xt_owner` (`-m owner`) module in iptables calls the `passwd` service to lookup the user.
   160  		// To disallow this, bindmount /dev/null over nsswitch.conf so this never happens.
   161  		// This should be safe to do, even if the user has an nsswitch entry that would work fine: we always use a numeric ID
   162  		// so the passwd lookup doesn't need to succeed at all for Istio to function.
   163  		// Effectively, we want a mini-container. In fact, running in a real container would be ideal but it is hard to do portably.
   164  		// See https://github.com/istio/istio/issues/48416 for a real world example of this case.
   165  		if err := mount("/dev/null", "/etc/nsswitch.conf"); err != nil {
   166  			return fmt.Errorf("bind mount to %q failed: %v", "/etc/nsswitch.conf", err)
   167  		}
   168  		return nil
   169  	}
   170  
   171  	executed := false
   172  	// Once we call unshare(CLONE_NEWNS), we cannot undo it explicitly. Instead, we need to unshare on a specific thread,
   173  	// then kill that thread when we are done (or rather, let Go runtime kill the thread).
   174  	// Unfortunately, making a new thread breaks us out of the network namespace we entered previously, so we need to restore that as well
   175  	go func() {
   176  		chErr <- func() error {
   177  			// We now have exclusive access to this thread. Once the goroutine exits without calling UnlockOSThread, the go runtime will kill the thread for us
   178  			// Warning: Do not call UnlockOSThread! Notably, netns.Do does call this.
   179  			runtime.LockOSThread()
   180  			if err := setupSandbox(); err != nil {
   181  				return err
   182  			}
   183  			// Mark we have actually run the command. This lets us distinguish from a failure in setupSandbox() vs f()
   184  			executed = true
   185  			return f()
   186  		}()
   187  	}()
   188  	err := <-chErr
   189  	if err != nil && !executed {
   190  		// We failed to setup the environment. Now we go into best effort mode.
   191  		// Users running into this may have IPTables lock used unexpectedly or make unexpected NSS calls.
   192  		// This is to support environments with restrictive access (from SELinux, but possibly others) that block these calls
   193  		// See https://github.com/istio/istio/issues/48746
   194  		log.Warnf("failed to setup execution environment, attempting to continue anyways: %v", err)
   195  		// Try to execute as-is
   196  		return f()
   197  	}
   198  	// Otherwise, we did execute; return the error from that execution.
   199  	return err
   200  }
   201  
   202  func mount(src, dst string) error {
   203  	return syscall.Mount(src, dst, "", syscall.MS_BIND|syscall.MS_RDONLY, "")
   204  }
   205  
   206  func (r *RealDependencies) executeXTables(cmd constants.IptablesCmd, iptVer *IptablesVersion, ignoreErrors bool, stdin io.ReadSeeker, args ...string) error {
   207  	mode := "without lock"
   208  	cmdBin := iptVer.CmdToString(cmd)
   209  	if cmdBin == "" {
   210  		return fmt.Errorf("called without iptables binary, cannot execute!: %+v", iptVer)
   211  	}
   212  	var c *exec.Cmd
   213  	needLock := iptVer.IsWriteCmd(cmd) && !iptVer.NoLocks()
   214  	run := func(c *exec.Cmd) error {
   215  		return c.Run()
   216  	}
   217  	if r.CNIMode {
   218  		c = exec.Command(cmdBin, args...)
   219  		// In CNI, we are running the pod network namespace, but the host filesystem, so we need to do some tricks
   220  		// Call our binary again, but with <original binary> "unshare (subcommand to trigger mounts)" --lock-file=<network namespace> <original command...>
   221  		// We do not shell out and call `mount` since this and sh are not available on all systems
   222  		var lockFile string
   223  		if needLock {
   224  			if iptVer.Version.LessThan(IptablesLockfileEnv) {
   225  				mode = "without lock by mount and nss"
   226  				lockFile = r.NetworkNamespace
   227  			} else {
   228  				mode = "without lock by env and nss"
   229  				c.Env = append(c.Env, "XTABLES_LOCKFILE="+r.NetworkNamespace)
   230  			}
   231  		} else {
   232  			mode = "without nss"
   233  		}
   234  
   235  		run = func(c *exec.Cmd) error {
   236  			return runInSandbox(lockFile, func() error {
   237  				return c.Run()
   238  			})
   239  		}
   240  	} else {
   241  		if needLock {
   242  			// We want the lock. Wait up to 30s for it.
   243  			args = append(args, "--wait=30")
   244  			c = exec.Command(cmdBin, args...)
   245  			log.Debugf("running with lock")
   246  			mode = "with wait lock"
   247  		} else {
   248  			// No locking supported/needed, just run as is. Nothing special
   249  			c = exec.Command(cmdBin, args...)
   250  		}
   251  	}
   252  
   253  	log.Infof("Running command (%s): %s %s", mode, cmdBin, strings.Join(args, " "))
   254  	stdout := &bytes.Buffer{}
   255  	stderr := &bytes.Buffer{}
   256  	c.Stdout = stdout
   257  	c.Stderr = stderr
   258  	c.Stdin = stdin
   259  	err := run(c)
   260  	if len(stdout.String()) != 0 {
   261  		log.Infof("Command output: \n%v", stdout.String())
   262  	}
   263  
   264  	// TODO Check naming and redirection logic
   265  	if (err != nil || len(stderr.String()) != 0) && !ignoreErrors {
   266  		stderrStr := stderr.String()
   267  
   268  		// Transform to xtables-specific error messages with more useful and actionable hints.
   269  		if err != nil {
   270  			stderrStr = transformToXTablesErrorMessage(stderrStr, err)
   271  		}
   272  
   273  		log.Errorf("Command error output: %v", stderrStr)
   274  	}
   275  
   276  	return err
   277  }