github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/notify_socket.go (about) 1 package main 2 3 import ( 4 "bytes" 5 "errors" 6 "io" 7 "net" 8 "os" 9 "path" 10 "path/filepath" 11 "strconv" 12 "time" 13 14 "github.com/opencontainers/runc/libcontainer" 15 "github.com/opencontainers/runtime-spec/specs-go" 16 "github.com/sirupsen/logrus" 17 "github.com/urfave/cli" 18 "golang.org/x/sys/unix" 19 ) 20 21 type notifySocket struct { 22 socket *net.UnixConn 23 host string 24 socketPath string 25 } 26 27 func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *notifySocket { 28 if notifySocketHost == "" { 29 return nil 30 } 31 32 root := filepath.Join(context.GlobalString("root"), id) 33 socketPath := filepath.Join(root, "notify", "notify.sock") 34 35 notifySocket := ¬ifySocket{ 36 socket: nil, 37 host: notifySocketHost, 38 socketPath: socketPath, 39 } 40 41 return notifySocket 42 } 43 44 func (s *notifySocket) Close() error { 45 return s.socket.Close() 46 } 47 48 // If systemd is supporting sd_notify protocol, this function will add support 49 // for sd_notify protocol from within the container. 50 func (s *notifySocket) setupSpec(spec *specs.Spec) { 51 pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath)) 52 mount := specs.Mount{ 53 Destination: path.Dir(pathInContainer), 54 Source: path.Dir(s.socketPath), 55 Options: []string{"bind", "nosuid", "noexec", "nodev", "ro"}, 56 } 57 spec.Mounts = append(spec.Mounts, mount) 58 spec.Process.Env = append(spec.Process.Env, "NOTIFY_SOCKET="+pathInContainer) 59 } 60 61 func (s *notifySocket) bindSocket() error { 62 addr := net.UnixAddr{ 63 Name: s.socketPath, 64 Net: "unixgram", 65 } 66 67 socket, err := net.ListenUnixgram("unixgram", &addr) 68 if err != nil { 69 return err 70 } 71 72 err = os.Chmod(s.socketPath, 0o777) 73 if err != nil { 74 socket.Close() 75 return err 76 } 77 78 s.socket = socket 79 return nil 80 } 81 82 func (s *notifySocket) setupSocketDirectory() error { 83 return os.Mkdir(path.Dir(s.socketPath), 0o755) 84 } 85 86 func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) { 87 notifySocket := newNotifySocket(context, notifySocketHost, id) 88 if notifySocket == nil { 89 return nil, nil 90 } 91 92 if err := notifySocket.bindSocket(); err != nil { 93 return nil, err 94 } 95 return notifySocket, nil 96 } 97 98 func (s *notifySocket) waitForContainer(container *libcontainer.Container) error { 99 state, err := container.State() 100 if err != nil { 101 return err 102 } 103 return s.run(state.InitProcessPid) 104 } 105 106 func (n *notifySocket) run(pid1 int) error { 107 if n.socket == nil { 108 return nil 109 } 110 notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"} 111 client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr) 112 if err != nil { 113 return err 114 } 115 116 ticker := time.NewTicker(time.Millisecond * 100) 117 defer ticker.Stop() 118 119 fileChan := make(chan []byte) 120 go func() { 121 for { 122 buf := make([]byte, 4096) 123 r, err := n.socket.Read(buf) 124 if err != nil { 125 return 126 } 127 got := buf[0:r] 128 // systemd-ready sends a single datagram with the state string as payload, 129 // so we don't need to worry about partial messages. 130 for _, line := range bytes.Split(got, []byte{'\n'}) { 131 if bytes.HasPrefix(got, []byte("READY=")) { 132 fileChan <- line 133 return 134 } 135 } 136 137 } 138 }() 139 140 for { 141 select { 142 case <-ticker.C: 143 _, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1))) 144 if err != nil { 145 return nil 146 } 147 case b := <-fileChan: 148 return notifyHost(client, b, pid1) 149 } 150 } 151 } 152 153 // notifyHost tells the host (usually systemd) that the container reported READY. 154 // Also sends MAINPID and BARRIER. 155 func notifyHost(client *net.UnixConn, ready []byte, pid1 int) error { 156 _, err := client.Write(append(ready, '\n')) 157 if err != nil { 158 return err 159 } 160 161 // now we can inform systemd to use pid1 as the pid to monitor 162 newPid := "MAINPID=" + strconv.Itoa(pid1) 163 _, err = client.Write([]byte(newPid + "\n")) 164 if err != nil { 165 return err 166 } 167 168 // wait for systemd to acknowledge the communication 169 return sdNotifyBarrier(client) 170 } 171 172 // errUnexpectedRead is reported when actual data was read from the pipe used 173 // to synchronize with systemd. Usually, that pipe is only closed. 174 var errUnexpectedRead = errors.New("unexpected read from synchronization pipe") 175 176 // sdNotifyBarrier performs synchronization with systemd by means of the sd_notify_barrier protocol. 177 func sdNotifyBarrier(client *net.UnixConn) error { 178 // Create a pipe for communicating with systemd daemon. 179 pipeR, pipeW, err := os.Pipe() 180 if err != nil { 181 return err 182 } 183 184 // Get the FD for the unix socket file to be able to do perform syscall.Sendmsg. 185 clientFd, err := client.File() 186 if err != nil { 187 return err 188 } 189 190 // Send the write end of the pipe along with a BARRIER=1 message. 191 fdRights := unix.UnixRights(int(pipeW.Fd())) 192 err = unix.Sendmsg(int(clientFd.Fd()), []byte("BARRIER=1"), fdRights, nil, 0) 193 if err != nil { 194 return &os.SyscallError{Syscall: "sendmsg", Err: err} 195 } 196 197 // Close our copy of pipeW. 198 err = pipeW.Close() 199 if err != nil { 200 return err 201 } 202 203 // Expect the read end of the pipe to be closed after 30 seconds. 204 err = pipeR.SetReadDeadline(time.Now().Add(30 * time.Second)) 205 if err != nil { 206 return nil 207 } 208 209 // Read a single byte expecting EOF. 210 var buf [1]byte 211 n, err := pipeR.Read(buf[:]) 212 if n != 0 || err == nil { 213 return errUnexpectedRead 214 } else if errors.Is(err, os.ErrDeadlineExceeded) { 215 // Probably the other end doesn't support the sd_notify_barrier protocol. 216 logrus.Warn("Timeout after waiting 30s for barrier. Ignored.") 217 return nil 218 } else if err == io.EOF { //nolint:errorlint // https://github.com/polyfloyd/go-errorlint/issues/49 219 return nil 220 } else { 221 return err 222 } 223 }