github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/notify_socket.go (about)

     1  package main
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"io"
     7  	"net"
     8  	"os"
     9  	"path"
    10  	"path/filepath"
    11  	"strconv"
    12  	"time"
    13  
    14  	"github.com/opencontainers/runc/libcontainer"
    15  	"github.com/opencontainers/runtime-spec/specs-go"
    16  	"github.com/sirupsen/logrus"
    17  	"github.com/urfave/cli"
    18  	"golang.org/x/sys/unix"
    19  )
    20  
    21  type notifySocket struct {
    22  	socket     *net.UnixConn
    23  	host       string
    24  	socketPath string
    25  }
    26  
    27  func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *notifySocket {
    28  	if notifySocketHost == "" {
    29  		return nil
    30  	}
    31  
    32  	root := filepath.Join(context.GlobalString("root"), id)
    33  	socketPath := filepath.Join(root, "notify", "notify.sock")
    34  
    35  	notifySocket := &notifySocket{
    36  		socket:     nil,
    37  		host:       notifySocketHost,
    38  		socketPath: socketPath,
    39  	}
    40  
    41  	return notifySocket
    42  }
    43  
    44  func (s *notifySocket) Close() error {
    45  	return s.socket.Close()
    46  }
    47  
    48  // If systemd is supporting sd_notify protocol, this function will add support
    49  // for sd_notify protocol from within the container.
    50  func (s *notifySocket) setupSpec(spec *specs.Spec) {
    51  	pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath))
    52  	mount := specs.Mount{
    53  		Destination: path.Dir(pathInContainer),
    54  		Source:      path.Dir(s.socketPath),
    55  		Options:     []string{"bind", "nosuid", "noexec", "nodev", "ro"},
    56  	}
    57  	spec.Mounts = append(spec.Mounts, mount)
    58  	spec.Process.Env = append(spec.Process.Env, "NOTIFY_SOCKET="+pathInContainer)
    59  }
    60  
    61  func (s *notifySocket) bindSocket() error {
    62  	addr := net.UnixAddr{
    63  		Name: s.socketPath,
    64  		Net:  "unixgram",
    65  	}
    66  
    67  	socket, err := net.ListenUnixgram("unixgram", &addr)
    68  	if err != nil {
    69  		return err
    70  	}
    71  
    72  	err = os.Chmod(s.socketPath, 0o777)
    73  	if err != nil {
    74  		socket.Close()
    75  		return err
    76  	}
    77  
    78  	s.socket = socket
    79  	return nil
    80  }
    81  
    82  func (s *notifySocket) setupSocketDirectory() error {
    83  	return os.Mkdir(path.Dir(s.socketPath), 0o755)
    84  }
    85  
    86  func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) {
    87  	notifySocket := newNotifySocket(context, notifySocketHost, id)
    88  	if notifySocket == nil {
    89  		return nil, nil
    90  	}
    91  
    92  	if err := notifySocket.bindSocket(); err != nil {
    93  		return nil, err
    94  	}
    95  	return notifySocket, nil
    96  }
    97  
    98  func (s *notifySocket) waitForContainer(container *libcontainer.Container) error {
    99  	state, err := container.State()
   100  	if err != nil {
   101  		return err
   102  	}
   103  	return s.run(state.InitProcessPid)
   104  }
   105  
   106  func (n *notifySocket) run(pid1 int) error {
   107  	if n.socket == nil {
   108  		return nil
   109  	}
   110  	notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}
   111  	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
   112  	if err != nil {
   113  		return err
   114  	}
   115  
   116  	ticker := time.NewTicker(time.Millisecond * 100)
   117  	defer ticker.Stop()
   118  
   119  	fileChan := make(chan []byte)
   120  	go func() {
   121  		for {
   122  			buf := make([]byte, 4096)
   123  			r, err := n.socket.Read(buf)
   124  			if err != nil {
   125  				return
   126  			}
   127  			got := buf[0:r]
   128  			// systemd-ready sends a single datagram with the state string as payload,
   129  			// so we don't need to worry about partial messages.
   130  			for _, line := range bytes.Split(got, []byte{'\n'}) {
   131  				if bytes.HasPrefix(got, []byte("READY=")) {
   132  					fileChan <- line
   133  					return
   134  				}
   135  			}
   136  
   137  		}
   138  	}()
   139  
   140  	for {
   141  		select {
   142  		case <-ticker.C:
   143  			_, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))
   144  			if err != nil {
   145  				return nil
   146  			}
   147  		case b := <-fileChan:
   148  			return notifyHost(client, b, pid1)
   149  		}
   150  	}
   151  }
   152  
   153  // notifyHost tells the host (usually systemd) that the container reported READY.
   154  // Also sends MAINPID and BARRIER.
   155  func notifyHost(client *net.UnixConn, ready []byte, pid1 int) error {
   156  	_, err := client.Write(append(ready, '\n'))
   157  	if err != nil {
   158  		return err
   159  	}
   160  
   161  	// now we can inform systemd to use pid1 as the pid to monitor
   162  	newPid := "MAINPID=" + strconv.Itoa(pid1)
   163  	_, err = client.Write([]byte(newPid + "\n"))
   164  	if err != nil {
   165  		return err
   166  	}
   167  
   168  	// wait for systemd to acknowledge the communication
   169  	return sdNotifyBarrier(client)
   170  }
   171  
   172  // errUnexpectedRead is reported when actual data was read from the pipe used
   173  // to synchronize with systemd. Usually, that pipe is only closed.
   174  var errUnexpectedRead = errors.New("unexpected read from synchronization pipe")
   175  
   176  // sdNotifyBarrier performs synchronization with systemd by means of the sd_notify_barrier protocol.
   177  func sdNotifyBarrier(client *net.UnixConn) error {
   178  	// Create a pipe for communicating with systemd daemon.
   179  	pipeR, pipeW, err := os.Pipe()
   180  	if err != nil {
   181  		return err
   182  	}
   183  
   184  	// Get the FD for the unix socket file to be able to do perform syscall.Sendmsg.
   185  	clientFd, err := client.File()
   186  	if err != nil {
   187  		return err
   188  	}
   189  
   190  	// Send the write end of the pipe along with a BARRIER=1 message.
   191  	fdRights := unix.UnixRights(int(pipeW.Fd()))
   192  	err = unix.Sendmsg(int(clientFd.Fd()), []byte("BARRIER=1"), fdRights, nil, 0)
   193  	if err != nil {
   194  		return &os.SyscallError{Syscall: "sendmsg", Err: err}
   195  	}
   196  
   197  	// Close our copy of pipeW.
   198  	err = pipeW.Close()
   199  	if err != nil {
   200  		return err
   201  	}
   202  
   203  	// Expect the read end of the pipe to be closed after 30 seconds.
   204  	err = pipeR.SetReadDeadline(time.Now().Add(30 * time.Second))
   205  	if err != nil {
   206  		return nil
   207  	}
   208  
   209  	// Read a single byte expecting EOF.
   210  	var buf [1]byte
   211  	n, err := pipeR.Read(buf[:])
   212  	if n != 0 || err == nil {
   213  		return errUnexpectedRead
   214  	} else if errors.Is(err, os.ErrDeadlineExceeded) {
   215  		// Probably the other end doesn't support the sd_notify_barrier protocol.
   216  		logrus.Warn("Timeout after waiting 30s for barrier. Ignored.")
   217  		return nil
   218  	} else if err == io.EOF { //nolint:errorlint // https://github.com/polyfloyd/go-errorlint/issues/49
   219  		return nil
   220  	} else {
   221  		return err
   222  	}
   223  }