github.com/sijibomii/docker@v0.0.0-20231230191044-5cf6ca554647/libcontainerd/remote_linux.go (about) 1 package libcontainerd 2 3 import ( 4 "fmt" 5 "io" 6 "io/ioutil" 7 "log" 8 "net" 9 "os" 10 "os/exec" 11 "path/filepath" 12 "strconv" 13 "strings" 14 "sync" 15 "syscall" 16 "time" 17 18 "github.com/Sirupsen/logrus" 19 containerd "github.com/docker/containerd/api/grpc/types" 20 "github.com/docker/docker/pkg/locker" 21 sysinfo "github.com/docker/docker/pkg/system" 22 "github.com/docker/docker/utils" 23 "golang.org/x/net/context" 24 "google.golang.org/grpc" 25 "google.golang.org/grpc/grpclog" 26 ) 27 28 const ( 29 maxConnectionRetryCount = 3 30 connectionRetryDelay = 3 * time.Second 31 containerdShutdownTimeout = 15 * time.Second 32 containerdBinary = "docker-containerd" 33 containerdPidFilename = "docker-containerd.pid" 34 containerdSockFilename = "docker-containerd.sock" 35 eventTimestampFilename = "event.ts" 36 ) 37 38 type remote struct { 39 sync.RWMutex 40 apiClient containerd.APIClient 41 daemonPid int 42 stateDir string 43 rpcAddr string 44 startDaemon bool 45 debugLog bool 46 rpcConn *grpc.ClientConn 47 clients []*client 48 eventTsPath string 49 pastEvents map[string]*containerd.Event 50 runtimeArgs []string 51 } 52 53 // New creates a fresh instance of libcontainerd remote. 54 func New(stateDir string, options ...RemoteOption) (_ Remote, err error) { 55 defer func() { 56 if err != nil { 57 err = fmt.Errorf("Failed to connect to containerd. Please make sure containerd is installed in your PATH or you have specificed the correct address. Got error: %v", err) 58 } 59 }() 60 r := &remote{ 61 stateDir: stateDir, 62 daemonPid: -1, 63 eventTsPath: filepath.Join(stateDir, eventTimestampFilename), 64 pastEvents: make(map[string]*containerd.Event), 65 } 66 for _, option := range options { 67 if err := option.Apply(r); err != nil { 68 return nil, err 69 } 70 } 71 72 if err := sysinfo.MkdirAll(stateDir, 0700); err != nil { 73 return nil, err 74 } 75 76 if r.rpcAddr == "" { 77 r.rpcAddr = filepath.Join(stateDir, containerdSockFilename) 78 } 79 80 if r.startDaemon { 81 if err := r.runContainerdDaemon(); err != nil { 82 return nil, err 83 } 84 } 85 86 // don't output the grpc reconnect logging 87 grpclog.SetLogger(log.New(ioutil.Discard, "", log.LstdFlags)) 88 dialOpts := append([]grpc.DialOption{grpc.WithInsecure()}, 89 grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) { 90 return net.DialTimeout("unix", addr, timeout) 91 }), 92 ) 93 conn, err := grpc.Dial(r.rpcAddr, dialOpts...) 94 if err != nil { 95 return nil, fmt.Errorf("error connecting to containerd: %v", err) 96 } 97 98 r.rpcConn = conn 99 r.apiClient = containerd.NewAPIClient(conn) 100 101 go r.handleConnectionChange() 102 103 if err := r.startEventsMonitor(); err != nil { 104 return nil, err 105 } 106 107 return r, nil 108 } 109 110 func (r *remote) handleConnectionChange() { 111 var transientFailureCount = 0 112 state := grpc.Idle 113 for { 114 s, err := r.rpcConn.WaitForStateChange(context.Background(), state) 115 if err != nil { 116 break 117 } 118 state = s 119 logrus.Debugf("containerd connection state change: %v", s) 120 121 if r.daemonPid != -1 { 122 switch state { 123 case grpc.TransientFailure: 124 // Reset state to be notified of next failure 125 transientFailureCount++ 126 if transientFailureCount >= maxConnectionRetryCount { 127 transientFailureCount = 0 128 if utils.IsProcessAlive(r.daemonPid) { 129 utils.KillProcess(r.daemonPid) 130 } 131 if err := r.runContainerdDaemon(); err != nil { //FIXME: Handle error 132 logrus.Errorf("error restarting containerd: %v", err) 133 } 134 } else { 135 state = grpc.Idle 136 time.Sleep(connectionRetryDelay) 137 } 138 case grpc.Shutdown: 139 // Well, we asked for it to stop, just return 140 return 141 } 142 } 143 } 144 } 145 146 func (r *remote) Cleanup() { 147 if r.daemonPid == -1 { 148 return 149 } 150 r.rpcConn.Close() 151 // Ask the daemon to quit 152 syscall.Kill(r.daemonPid, syscall.SIGTERM) 153 154 // Wait up to 15secs for it to stop 155 for i := time.Duration(0); i < containerdShutdownTimeout; i += time.Second { 156 if !utils.IsProcessAlive(r.daemonPid) { 157 break 158 } 159 time.Sleep(time.Second) 160 } 161 162 if utils.IsProcessAlive(r.daemonPid) { 163 logrus.Warnf("libcontainerd: containerd (%d) didn't stop within 15 secs, killing it\n", r.daemonPid) 164 syscall.Kill(r.daemonPid, syscall.SIGKILL) 165 } 166 167 // cleanup some files 168 os.Remove(filepath.Join(r.stateDir, containerdPidFilename)) 169 os.Remove(filepath.Join(r.stateDir, containerdSockFilename)) 170 } 171 172 func (r *remote) Client(b Backend) (Client, error) { 173 c := &client{ 174 clientCommon: clientCommon{ 175 backend: b, 176 containers: make(map[string]*container), 177 locker: locker.New(), 178 }, 179 remote: r, 180 exitNotifiers: make(map[string]*exitNotifier), 181 } 182 183 r.Lock() 184 r.clients = append(r.clients, c) 185 r.Unlock() 186 return c, nil 187 } 188 189 func (r *remote) updateEventTimestamp(t time.Time) { 190 f, err := os.OpenFile(r.eventTsPath, syscall.O_CREAT|syscall.O_WRONLY|syscall.O_TRUNC, 0600) 191 defer f.Close() 192 if err != nil { 193 logrus.Warnf("libcontainerd: failed to open event timestamp file: %v", err) 194 return 195 } 196 197 b, err := t.MarshalText() 198 if err != nil { 199 logrus.Warnf("libcontainerd: failed to encode timestamp: %v", err) 200 return 201 } 202 203 n, err := f.Write(b) 204 if err != nil || n != len(b) { 205 logrus.Warnf("libcontainerd: failed to update event timestamp file: %v", err) 206 f.Truncate(0) 207 return 208 } 209 210 } 211 212 func (r *remote) getLastEventTimestamp() int64 { 213 t := time.Now() 214 215 fi, err := os.Stat(r.eventTsPath) 216 if os.IsNotExist(err) || fi.Size() == 0 { 217 return t.Unix() 218 } 219 220 f, err := os.Open(r.eventTsPath) 221 defer f.Close() 222 if err != nil { 223 logrus.Warn("libcontainerd: Unable to access last event ts: %v", err) 224 return t.Unix() 225 } 226 227 b := make([]byte, fi.Size()) 228 n, err := f.Read(b) 229 if err != nil || n != len(b) { 230 logrus.Warn("libcontainerd: Unable to read last event ts: %v", err) 231 return t.Unix() 232 } 233 234 t.UnmarshalText(b) 235 236 return t.Unix() 237 } 238 239 func (r *remote) startEventsMonitor() error { 240 // First, get past events 241 er := &containerd.EventsRequest{ 242 Timestamp: uint64(r.getLastEventTimestamp()), 243 } 244 events, err := r.apiClient.Events(context.Background(), er) 245 if err != nil { 246 return err 247 } 248 go r.handleEventStream(events) 249 return nil 250 } 251 252 func (r *remote) handleEventStream(events containerd.API_EventsClient) { 253 live := false 254 for { 255 e, err := events.Recv() 256 if err != nil { 257 logrus.Errorf("failed to receive event from containerd: %v", err) 258 go r.startEventsMonitor() 259 return 260 } 261 262 if live == false { 263 logrus.Debugf("received past containerd event: %#v", e) 264 265 // Pause/Resume events should never happens after exit one 266 switch e.Type { 267 case StateExit: 268 r.pastEvents[e.Id] = e 269 case StatePause: 270 r.pastEvents[e.Id] = e 271 case StateResume: 272 r.pastEvents[e.Id] = e 273 case stateLive: 274 live = true 275 r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0)) 276 } 277 } else { 278 logrus.Debugf("received containerd event: %#v", e) 279 280 var container *container 281 var c *client 282 r.RLock() 283 for _, c = range r.clients { 284 container, err = c.getContainer(e.Id) 285 if err == nil { 286 break 287 } 288 } 289 r.RUnlock() 290 if container == nil { 291 logrus.Errorf("no state for container: %q", err) 292 continue 293 } 294 295 if err := container.handleEvent(e); err != nil { 296 logrus.Errorf("error processing state change for %s: %v", e.Id, err) 297 } 298 299 r.updateEventTimestamp(time.Unix(int64(e.Timestamp), 0)) 300 } 301 } 302 } 303 304 func (r *remote) runContainerdDaemon() error { 305 pidFilename := filepath.Join(r.stateDir, containerdPidFilename) 306 f, err := os.OpenFile(pidFilename, os.O_RDWR|os.O_CREATE, 0600) 307 defer f.Close() 308 if err != nil { 309 return err 310 } 311 312 // File exist, check if the daemon is alive 313 b := make([]byte, 8) 314 n, err := f.Read(b) 315 if err != nil && err != io.EOF { 316 return err 317 } 318 319 if n > 0 { 320 pid, err := strconv.ParseUint(string(b[:n]), 10, 64) 321 if err != nil { 322 return err 323 } 324 if utils.IsProcessAlive(int(pid)) { 325 logrus.Infof("previous instance of containerd still alive (%d)", pid) 326 r.daemonPid = int(pid) 327 return nil 328 } 329 } 330 331 // rewind the file 332 _, err = f.Seek(0, os.SEEK_SET) 333 if err != nil { 334 return err 335 } 336 337 // Truncate it 338 err = f.Truncate(0) 339 if err != nil { 340 return err 341 } 342 343 // Start a new instance 344 args := []string{"-l", r.rpcAddr, "--runtime", "docker-runc"} 345 if r.debugLog { 346 args = append(args, "--debug", "--metrics-interval=0") 347 } 348 if len(r.runtimeArgs) > 0 { 349 for _, v := range r.runtimeArgs { 350 args = append(args, "--runtime-args") 351 args = append(args, v) 352 } 353 logrus.Debugf("runContainerdDaemon: runtimeArgs: %s", args) 354 } 355 356 cmd := exec.Command(containerdBinary, args...) 357 // redirect containerd logs to docker logs 358 cmd.Stdout = os.Stdout 359 cmd.Stderr = os.Stderr 360 cmd.SysProcAttr = &syscall.SysProcAttr{Setsid: true} 361 cmd.Env = nil 362 // clear the NOTIFY_SOCKET from the env when starting containerd 363 for _, e := range os.Environ() { 364 if !strings.HasPrefix(e, "NOTIFY_SOCKET") { 365 cmd.Env = append(cmd.Env, e) 366 } 367 } 368 if err := cmd.Start(); err != nil { 369 return err 370 } 371 logrus.Infof("New containerd process, pid: %d\n", cmd.Process.Pid) 372 373 if _, err := f.WriteString(fmt.Sprintf("%d", cmd.Process.Pid)); err != nil { 374 utils.KillProcess(cmd.Process.Pid) 375 return err 376 } 377 378 go cmd.Wait() // Reap our child when needed 379 r.daemonPid = cmd.Process.Pid 380 return nil 381 } 382 383 // WithRemoteAddr sets the external containerd socket to connect to. 384 func WithRemoteAddr(addr string) RemoteOption { 385 return rpcAddr(addr) 386 } 387 388 type rpcAddr string 389 390 func (a rpcAddr) Apply(r Remote) error { 391 if remote, ok := r.(*remote); ok { 392 remote.rpcAddr = string(a) 393 return nil 394 } 395 return fmt.Errorf("WithRemoteAddr option not supported for this remote") 396 } 397 398 // WithRuntimeArgs sets the list of runtime args passed to containerd 399 func WithRuntimeArgs(args []string) RemoteOption { 400 return runtimeArgs(args) 401 } 402 403 type runtimeArgs []string 404 405 func (rt runtimeArgs) Apply(r Remote) error { 406 if remote, ok := r.(*remote); ok { 407 remote.runtimeArgs = rt 408 return nil 409 } 410 return fmt.Errorf("WithRuntimeArgs option not supported for this remote") 411 } 412 413 // WithStartDaemon defines if libcontainerd should also run containerd daemon. 414 func WithStartDaemon(start bool) RemoteOption { 415 return startDaemon(start) 416 } 417 418 type startDaemon bool 419 420 func (s startDaemon) Apply(r Remote) error { 421 if remote, ok := r.(*remote); ok { 422 remote.startDaemon = bool(s) 423 return nil 424 } 425 return fmt.Errorf("WithStartDaemon option not supported for this remote") 426 } 427 428 // WithDebugLog defines if containerd debug logs will be enabled for daemon. 429 func WithDebugLog(debug bool) RemoteOption { 430 return debugLog(debug) 431 } 432 433 type debugLog bool 434 435 func (d debugLog) Apply(r Remote) error { 436 if remote, ok := r.(*remote); ok { 437 remote.debugLog = bool(d) 438 return nil 439 } 440 return fmt.Errorf("WithDebugLog option not supported for this remote") 441 }