github.com/cilium/cilium@v1.16.2/pkg/datapath/linux/sysctl/sysctl.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package sysctl
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"path/filepath"
    12  	"regexp"
    13  	"strconv"
    14  	"strings"
    15  
    16  	"github.com/spf13/afero"
    17  
    18  	"github.com/cilium/statedb"
    19  	"github.com/cilium/statedb/reconciler"
    20  
    21  	"github.com/cilium/cilium/pkg/datapath/tables"
    22  	"github.com/cilium/cilium/pkg/safeio"
    23  	"github.com/cilium/cilium/pkg/time"
    24  )
    25  
    26  // reconciliationTimeout is the maximum time available for reconciling
    27  // sysctl kernel parameters with the desired state in statedb[Sysctl].
    28  const reconciliationTimeout = time.Second
    29  
    30  type Sysctl interface {
    31  	// Disable disables the given sysctl parameter.
    32  	// It blocks until the parameter has been actually set to "0",
    33  	// or timeouts after reconciliationTimeout.
    34  	Disable(name []string) error
    35  
    36  	// Enable enables the given sysctl parameter.
    37  	// It blocks until the parameter has been actually set to "1",
    38  	// or timeouts after reconciliationTimeout.
    39  	Enable(name []string) error
    40  
    41  	// Write writes the given sysctl parameter.
    42  	// It blocks until the parameter has been actually set to val,
    43  	// or timeouts after reconciliationTimeout.
    44  	Write(name []string, val string) error
    45  
    46  	// WriteInt writes the given integer type sysctl parameter.
    47  	// It blocks until the parameter has been actually set to val,
    48  	// or timeouts after reconciliationTimeout.
    49  	WriteInt(name []string, val int64) error
    50  
    51  	// ApplySettings applies all settings in sysSettings.
    52  	// After applying all settings, it blocks until the parameters have been
    53  	// reconciled, or timeouts after reconciliationTimeout.
    54  	ApplySettings(sysSettings []tables.Sysctl) error
    55  
    56  	// Read reads the given sysctl parameter.
    57  	Read(name []string) (string, error)
    58  
    59  	// ReadInt reads the given sysctl parameter, return an int64 value.
    60  	ReadInt(name []string) (int64, error)
    61  }
    62  
    63  // reconcilingSysctl is a Sysctl implementation that uses reconciliation to
    64  // ensure that the desired state is applied. It is the preffered implementation
    65  // for any binary with hive infrastructure.
    66  type reconcilingSysctl struct {
    67  	db       *statedb.DB
    68  	settings statedb.RWTable[*tables.Sysctl]
    69  
    70  	fs     afero.Fs
    71  	procFs string
    72  }
    73  
    74  func newReconcilingSysctl(
    75  	db *statedb.DB,
    76  	settings statedb.RWTable[*tables.Sysctl],
    77  	cfg Config,
    78  	fs afero.Fs,
    79  	_ reconciler.Reconciler[*tables.Sysctl], // needed to enforce the correct hive ordering
    80  ) Sysctl {
    81  	db.RegisterTable(settings)
    82  	return &reconcilingSysctl{db, settings, fs, cfg.ProcFs}
    83  }
    84  
    85  func (sysctl *reconcilingSysctl) Disable(name []string) error {
    86  	txn := sysctl.db.WriteTxn(sysctl.settings)
    87  	_, _, _ = sysctl.settings.Insert(txn, &tables.Sysctl{
    88  		Name:   name,
    89  		Val:    "0",
    90  		Status: reconciler.StatusPending(),
    91  	})
    92  	txn.Commit()
    93  
    94  	return sysctl.waitForReconciliation(name)
    95  }
    96  
    97  func (sysctl *reconcilingSysctl) Enable(name []string) error {
    98  	txn := sysctl.db.WriteTxn(sysctl.settings)
    99  	_, _, _ = sysctl.settings.Insert(txn, &tables.Sysctl{
   100  		Name:   name,
   101  		Val:    "1",
   102  		Status: reconciler.StatusPending(),
   103  	})
   104  	txn.Commit()
   105  
   106  	return sysctl.waitForReconciliation(name)
   107  }
   108  
   109  func (sysctl *reconcilingSysctl) Write(name []string, val string) error {
   110  	txn := sysctl.db.WriteTxn(sysctl.settings)
   111  	_, _, _ = sysctl.settings.Insert(txn, &tables.Sysctl{
   112  		Name:   name,
   113  		Val:    val,
   114  		Status: reconciler.StatusPending(),
   115  	})
   116  	txn.Commit()
   117  
   118  	return sysctl.waitForReconciliation(name)
   119  }
   120  
   121  func (sysctl *reconcilingSysctl) WriteInt(name []string, val int64) error {
   122  	txn := sysctl.db.WriteTxn(sysctl.settings)
   123  	_, _, _ = sysctl.settings.Insert(txn, &tables.Sysctl{
   124  		Name:   name,
   125  		Val:    strconv.FormatInt(val, 10),
   126  		Status: reconciler.StatusPending(),
   127  	})
   128  	txn.Commit()
   129  
   130  	return sysctl.waitForReconciliation(name)
   131  }
   132  
   133  func (sysctl *reconcilingSysctl) ApplySettings(sysSettings []tables.Sysctl) error {
   134  	txn := sysctl.db.WriteTxn(sysctl.settings)
   135  	for _, s := range sysSettings {
   136  		_, _, _ = sysctl.settings.Insert(txn, s.Clone().SetStatus(reconciler.StatusPending()))
   137  	}
   138  	txn.Commit()
   139  
   140  	var errs []error
   141  	for _, s := range sysSettings {
   142  		errs = append(errs, sysctl.waitForReconciliation(s.Name))
   143  	}
   144  
   145  	return errors.Join(errs...)
   146  }
   147  
   148  func (sysctl *reconcilingSysctl) Read(name []string) (string, error) {
   149  	path, err := parameterPath(sysctl.procFs, name)
   150  	if err != nil {
   151  		return "", err
   152  	}
   153  
   154  	val, err := readSysctl(sysctl.fs, path)
   155  	if err != nil {
   156  		return "", err
   157  	}
   158  
   159  	return val, nil
   160  }
   161  
   162  func (sysctl *reconcilingSysctl) ReadInt(name []string) (int64, error) {
   163  	val, err := sysctl.Read(name)
   164  	if err != nil {
   165  		return -1, err
   166  	}
   167  
   168  	v, err := strconv.ParseInt(val, 10, 64)
   169  	if err != nil {
   170  		return -1, err
   171  	}
   172  
   173  	return v, nil
   174  }
   175  
   176  type directSysctl struct {
   177  	fs     afero.Fs
   178  	procFs string
   179  }
   180  
   181  // NewDirectSysctl creates a Sysctl implementation that directly interacts with the given `fs`.
   182  // It doesn't reconcile and should only be used when using the reconciling variant is not available.
   183  func NewDirectSysctl(fs afero.Fs, procFs string) Sysctl {
   184  	return &directSysctl{fs, procFs}
   185  }
   186  
   187  func (ay *directSysctl) Disable(name []string) error {
   188  	return ay.WriteInt(name, 0)
   189  }
   190  
   191  func (ay *directSysctl) Enable(name []string) error {
   192  	return ay.WriteInt(name, 1)
   193  }
   194  
   195  func (ay *directSysctl) Write(name []string, value string) error {
   196  	path, err := parameterPath(ay.procFs, name)
   197  	if err != nil {
   198  		return err
   199  	}
   200  
   201  	// Check if the value is already set to the desired value.
   202  	val, err := ay.Read(name)
   203  	if err != nil {
   204  		return fmt.Errorf("could not read the sysctl file %s: %w", path, err)
   205  	}
   206  	// If the value is already set, return.
   207  	if strings.TrimRight(string(val), "\n") == value {
   208  		return nil
   209  	}
   210  
   211  	f, err := ay.fs.OpenFile(path, os.O_RDWR, 0644)
   212  	if err != nil {
   213  		return fmt.Errorf("could not open the sysctl file %s: %w", path, err)
   214  	}
   215  	defer f.Close()
   216  
   217  	if _, err := io.WriteString(f, value); err != nil {
   218  		return fmt.Errorf("could not write to the sysctl file %s: %w",
   219  			path, err)
   220  	}
   221  	return nil
   222  }
   223  
   224  func (ay *directSysctl) WriteInt(name []string, val int64) error {
   225  	return ay.Write(name, strconv.FormatInt(val, 10))
   226  }
   227  
   228  func (ay *directSysctl) ApplySettings(sysSettings []tables.Sysctl) error {
   229  	for _, s := range sysSettings {
   230  		if err := ay.Write(s.Name, s.Val); err != nil {
   231  			return err
   232  		}
   233  	}
   234  
   235  	return nil
   236  }
   237  
   238  func (ay *directSysctl) Read(name []string) (string, error) {
   239  	path, err := parameterPath(ay.procFs, name)
   240  	if err != nil {
   241  		return "", err
   242  	}
   243  
   244  	f, err := ay.fs.Open(path)
   245  	if err != nil {
   246  		return "", fmt.Errorf("could not open the sysctl file %s: %w", path, err)
   247  	}
   248  	defer f.Close()
   249  
   250  	val, err := safeio.ReadAllLimit(f, safeio.KB)
   251  	if err != nil {
   252  		return "", fmt.Errorf("could not read the systctl file %s: %w", path, err)
   253  	}
   254  
   255  	return strings.TrimRight(string(val), "\n"), nil
   256  }
   257  
   258  func (ay *directSysctl) ReadInt(name []string) (int64, error) {
   259  	val, err := ay.Read(name)
   260  	if err != nil {
   261  		return -1, err
   262  	}
   263  
   264  	v, err := strconv.ParseInt(val, 10, 64)
   265  	if err != nil {
   266  		return -1, err
   267  	}
   268  
   269  	return v, nil
   270  }
   271  
   272  // parameterElemRx matches an element of a sysctl parameter.
   273  var parameterElemRx = regexp.MustCompile(`(?i)\A[-0-9_a-z\.]+\z`)
   274  
   275  // parameterPath returns the path to the sysctl file for parameter name.
   276  //
   277  // It should by used directly only by binaries that do not rely on the
   278  // hive and cells framework, like cilium-cni and cilium-health.
   279  func parameterPath(procFs string, name []string) (string, error) {
   280  	for _, elem := range name {
   281  		if !parameterElemRx.MatchString(elem) {
   282  			return "", fmt.Errorf("invalid sysctl parameter: %q", strings.Join(name, "."))
   283  		}
   284  	}
   285  	return filepath.Join(append([]string{procFs, "sys"}, name...)...), nil
   286  }
   287  
   288  // writeSysctl writes a value in a sysctl parameter loacated at path.
   289  //
   290  // It should by used directly only by binaries that do not rely on the
   291  // hive and cells framework, like cilium-cni and cilium-health.
   292  func writeSysctl(fs afero.Fs, path, value string) error {
   293  	f, err := fs.OpenFile(path, os.O_RDWR, 0644)
   294  	if err != nil {
   295  		return fmt.Errorf("could not open the sysctl file %s: %w", path, err)
   296  	}
   297  	defer f.Close()
   298  
   299  	if _, err := io.WriteString(f, value); err != nil {
   300  		return fmt.Errorf("could not write to the sysctl file %s: %w",
   301  			path, err)
   302  	}
   303  	return nil
   304  }
   305  
   306  // readSysctl reads a value from a sysctl parameter located at path.
   307  //
   308  // It should by used directly only by binaries that do not rely on the
   309  // hive and cells framework, like cilium-cni and cilium-health.
   310  func readSysctl(fs afero.Fs, path string) (string, error) {
   311  	f, err := fs.Open(path)
   312  	if err != nil {
   313  		return "", fmt.Errorf("could not open the sysctl file %s: %w", path, err)
   314  	}
   315  	defer f.Close()
   316  
   317  	val, err := safeio.ReadAllLimit(f, safeio.KB)
   318  	if err != nil {
   319  		return "", fmt.Errorf("could not read the systctl file %s: %w", path, err)
   320  	}
   321  
   322  	return strings.TrimRight(string(val), "\n"), nil
   323  }
   324  
   325  func (sysctl *reconcilingSysctl) waitForReconciliation(name []string) error {
   326  	t := time.NewTimer(reconciliationTimeout)
   327  	defer t.Stop()
   328  
   329  	var err error
   330  	for {
   331  		obj, _, watch, _ := sysctl.settings.GetWatch(sysctl.db.ReadTxn(), tables.SysctlNameIndex.Query(strings.Join(name, ".")))
   332  		if obj.Status.Kind == reconciler.StatusKindDone {
   333  			// already reconciled
   334  			return nil
   335  		}
   336  
   337  		select {
   338  		case <-t.C:
   339  			return fmt.Errorf("timeout waiting for parameter %s reconciliation: %w", name, err)
   340  		case <-watch:
   341  			if obj.Status.Kind == reconciler.StatusKindDone {
   342  				return nil
   343  			}
   344  			if obj.Status.Kind == reconciler.StatusKindError {
   345  				err = errors.New(obj.Status.Error)
   346  			}
   347  		}
   348  	}
   349  }