github.com/hms58/moby@v1.13.1/libcontainerd/client_linux.go (about) 1 package libcontainerd 2 3 import ( 4 "fmt" 5 "os" 6 "strings" 7 "sync" 8 "syscall" 9 "time" 10 11 "github.com/Sirupsen/logrus" 12 containerd "github.com/docker/containerd/api/grpc/types" 13 "github.com/docker/docker/pkg/ioutils" 14 "github.com/docker/docker/pkg/mount" 15 "github.com/golang/protobuf/ptypes" 16 "github.com/golang/protobuf/ptypes/timestamp" 17 specs "github.com/opencontainers/runtime-spec/specs-go" 18 "golang.org/x/net/context" 19 ) 20 21 type client struct { 22 clientCommon 23 24 // Platform specific properties below here. 25 remote *remote 26 q queue 27 exitNotifiers map[string]*exitNotifier 28 liveRestore bool 29 } 30 31 // GetServerVersion returns the connected server version information 32 func (clnt *client) GetServerVersion(ctx context.Context) (*ServerVersion, error) { 33 resp, err := clnt.remote.apiClient.GetServerVersion(ctx, &containerd.GetServerVersionRequest{}) 34 if err != nil { 35 return nil, err 36 } 37 38 sv := &ServerVersion{ 39 GetServerVersionResponse: *resp, 40 } 41 42 return sv, nil 43 } 44 45 // AddProcess is the handler for adding a process to an already running 46 // container. It's called through docker exec. It returns the system pid of the 47 // exec'd process. 48 func (clnt *client) AddProcess(ctx context.Context, containerID, processFriendlyName string, specp Process, attachStdio StdioCallback) (pid int, err error) { 49 clnt.lock(containerID) 50 defer clnt.unlock(containerID) 51 container, err := clnt.getContainer(containerID) 52 if err != nil { 53 return -1, err 54 } 55 56 spec, err := container.spec() 57 if err != nil { 58 return -1, err 59 } 60 sp := spec.Process 61 sp.Args = specp.Args 62 sp.Terminal = specp.Terminal 63 if len(specp.Env) > 0 { 64 sp.Env = specp.Env 65 } 66 if specp.Cwd != nil { 67 sp.Cwd = *specp.Cwd 68 } 69 if specp.User != nil { 70 sp.User = specs.User{ 71 UID: specp.User.UID, 72 GID: specp.User.GID, 73 AdditionalGids: specp.User.AdditionalGids, 74 } 75 } 76 if specp.Capabilities != nil { 77 sp.Capabilities = specp.Capabilities 78 } 79 80 p := container.newProcess(processFriendlyName) 81 82 r := &containerd.AddProcessRequest{ 83 Args: sp.Args, 84 Cwd: sp.Cwd, 85 Terminal: sp.Terminal, 86 Id: containerID, 87 Env: sp.Env, 88 User: &containerd.User{ 89 Uid: sp.User.UID, 90 Gid: sp.User.GID, 91 AdditionalGids: sp.User.AdditionalGids, 92 }, 93 Pid: processFriendlyName, 94 Stdin: p.fifo(syscall.Stdin), 95 Stdout: p.fifo(syscall.Stdout), 96 Stderr: p.fifo(syscall.Stderr), 97 Capabilities: sp.Capabilities, 98 ApparmorProfile: sp.ApparmorProfile, 99 SelinuxLabel: sp.SelinuxLabel, 100 NoNewPrivileges: sp.NoNewPrivileges, 101 Rlimits: convertRlimits(sp.Rlimits), 102 } 103 104 fifoCtx, cancel := context.WithCancel(context.Background()) 105 defer func() { 106 if err != nil { 107 cancel() 108 } 109 }() 110 111 iopipe, err := p.openFifos(fifoCtx, sp.Terminal) 112 if err != nil { 113 return -1, err 114 } 115 116 resp, err := clnt.remote.apiClient.AddProcess(ctx, r) 117 if err != nil { 118 p.closeFifos(iopipe) 119 return -1, err 120 } 121 122 var stdinOnce sync.Once 123 stdin := iopipe.Stdin 124 iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error { 125 var err error 126 stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed 127 err = stdin.Close() 128 if err2 := p.sendCloseStdin(); err == nil { 129 err = err2 130 } 131 }) 132 return err 133 }) 134 135 container.processes[processFriendlyName] = p 136 137 if err := attachStdio(*iopipe); err != nil { 138 p.closeFifos(iopipe) 139 return -1, err 140 } 141 142 return int(resp.SystemPid), nil 143 } 144 145 func (clnt *client) SignalProcess(containerID string, pid string, sig int) error { 146 clnt.lock(containerID) 147 defer clnt.unlock(containerID) 148 _, err := clnt.remote.apiClient.Signal(context.Background(), &containerd.SignalRequest{ 149 Id: containerID, 150 Pid: pid, 151 Signal: uint32(sig), 152 }) 153 return err 154 } 155 156 func (clnt *client) Resize(containerID, processFriendlyName string, width, height int) error { 157 clnt.lock(containerID) 158 defer clnt.unlock(containerID) 159 if _, err := clnt.getContainer(containerID); err != nil { 160 return err 161 } 162 _, err := clnt.remote.apiClient.UpdateProcess(context.Background(), &containerd.UpdateProcessRequest{ 163 Id: containerID, 164 Pid: processFriendlyName, 165 Width: uint32(width), 166 Height: uint32(height), 167 }) 168 return err 169 } 170 171 func (clnt *client) Pause(containerID string) error { 172 return clnt.setState(containerID, StatePause) 173 } 174 175 func (clnt *client) setState(containerID, state string) error { 176 clnt.lock(containerID) 177 container, err := clnt.getContainer(containerID) 178 if err != nil { 179 clnt.unlock(containerID) 180 return err 181 } 182 if container.systemPid == 0 { 183 clnt.unlock(containerID) 184 return fmt.Errorf("No active process for container %s", containerID) 185 } 186 st := "running" 187 if state == StatePause { 188 st = "paused" 189 } 190 chstate := make(chan struct{}) 191 _, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{ 192 Id: containerID, 193 Pid: InitFriendlyName, 194 Status: st, 195 }) 196 if err != nil { 197 clnt.unlock(containerID) 198 return err 199 } 200 container.pauseMonitor.append(state, chstate) 201 clnt.unlock(containerID) 202 <-chstate 203 return nil 204 } 205 206 func (clnt *client) Resume(containerID string) error { 207 return clnt.setState(containerID, StateResume) 208 } 209 210 func (clnt *client) Stats(containerID string) (*Stats, error) { 211 resp, err := clnt.remote.apiClient.Stats(context.Background(), &containerd.StatsRequest{containerID}) 212 if err != nil { 213 return nil, err 214 } 215 return (*Stats)(resp), nil 216 } 217 218 // Take care of the old 1.11.0 behavior in case the version upgrade 219 // happened without a clean daemon shutdown 220 func (clnt *client) cleanupOldRootfs(containerID string) { 221 // Unmount and delete the bundle folder 222 if mts, err := mount.GetMounts(); err == nil { 223 for _, mts := range mts { 224 if strings.HasSuffix(mts.Mountpoint, containerID+"/rootfs") { 225 if err := syscall.Unmount(mts.Mountpoint, syscall.MNT_DETACH); err == nil { 226 os.RemoveAll(strings.TrimSuffix(mts.Mountpoint, "/rootfs")) 227 } 228 break 229 } 230 } 231 } 232 } 233 234 func (clnt *client) setExited(containerID string, exitCode uint32) error { 235 clnt.lock(containerID) 236 defer clnt.unlock(containerID) 237 238 err := clnt.backend.StateChanged(containerID, StateInfo{ 239 CommonStateInfo: CommonStateInfo{ 240 State: StateExit, 241 ExitCode: exitCode, 242 }}) 243 244 clnt.cleanupOldRootfs(containerID) 245 246 return err 247 } 248 249 func (clnt *client) GetPidsForContainer(containerID string) ([]int, error) { 250 cont, err := clnt.getContainerdContainer(containerID) 251 if err != nil { 252 return nil, err 253 } 254 pids := make([]int, len(cont.Pids)) 255 for i, p := range cont.Pids { 256 pids[i] = int(p) 257 } 258 return pids, nil 259 } 260 261 // Summary returns a summary of the processes running in a container. 262 // This is a no-op on Linux. 263 func (clnt *client) Summary(containerID string) ([]Summary, error) { 264 return nil, nil 265 } 266 267 func (clnt *client) getContainerdContainer(containerID string) (*containerd.Container, error) { 268 resp, err := clnt.remote.apiClient.State(context.Background(), &containerd.StateRequest{Id: containerID}) 269 if err != nil { 270 return nil, err 271 } 272 for _, cont := range resp.Containers { 273 if cont.Id == containerID { 274 return cont, nil 275 } 276 } 277 return nil, fmt.Errorf("invalid state response") 278 } 279 280 func (clnt *client) UpdateResources(containerID string, resources Resources) error { 281 clnt.lock(containerID) 282 defer clnt.unlock(containerID) 283 container, err := clnt.getContainer(containerID) 284 if err != nil { 285 return err 286 } 287 if container.systemPid == 0 { 288 return fmt.Errorf("No active process for container %s", containerID) 289 } 290 _, err = clnt.remote.apiClient.UpdateContainer(context.Background(), &containerd.UpdateContainerRequest{ 291 Id: containerID, 292 Pid: InitFriendlyName, 293 Resources: (*containerd.UpdateResource)(&resources), 294 }) 295 if err != nil { 296 return err 297 } 298 return nil 299 } 300 301 func (clnt *client) getExitNotifier(containerID string) *exitNotifier { 302 clnt.mapMutex.RLock() 303 defer clnt.mapMutex.RUnlock() 304 return clnt.exitNotifiers[containerID] 305 } 306 307 func (clnt *client) getOrCreateExitNotifier(containerID string) *exitNotifier { 308 clnt.mapMutex.Lock() 309 w, ok := clnt.exitNotifiers[containerID] 310 defer clnt.mapMutex.Unlock() 311 if !ok { 312 w = &exitNotifier{c: make(chan struct{}), client: clnt} 313 clnt.exitNotifiers[containerID] = w 314 } 315 return w 316 } 317 318 func (clnt *client) restore(cont *containerd.Container, lastEvent *containerd.Event, attachStdio StdioCallback, options ...CreateOption) (err error) { 319 clnt.lock(cont.Id) 320 defer clnt.unlock(cont.Id) 321 322 logrus.Debugf("libcontainerd: restore container %s state %s", cont.Id, cont.Status) 323 324 containerID := cont.Id 325 if _, err := clnt.getContainer(containerID); err == nil { 326 return fmt.Errorf("container %s is already active", containerID) 327 } 328 329 defer func() { 330 if err != nil { 331 clnt.deleteContainer(cont.Id) 332 } 333 }() 334 335 container := clnt.newContainer(cont.BundlePath, options...) 336 container.systemPid = systemPid(cont) 337 338 var terminal bool 339 for _, p := range cont.Processes { 340 if p.Pid == InitFriendlyName { 341 terminal = p.Terminal 342 } 343 } 344 345 fifoCtx, cancel := context.WithCancel(context.Background()) 346 defer func() { 347 if err != nil { 348 cancel() 349 } 350 }() 351 352 iopipe, err := container.openFifos(fifoCtx, terminal) 353 if err != nil { 354 return err 355 } 356 var stdinOnce sync.Once 357 stdin := iopipe.Stdin 358 iopipe.Stdin = ioutils.NewWriteCloserWrapper(stdin, func() error { 359 var err error 360 stdinOnce.Do(func() { // on error from attach we don't know if stdin was already closed 361 err = stdin.Close() 362 }) 363 return err 364 }) 365 366 if err := attachStdio(*iopipe); err != nil { 367 container.closeFifos(iopipe) 368 return err 369 } 370 371 clnt.appendContainer(container) 372 373 err = clnt.backend.StateChanged(containerID, StateInfo{ 374 CommonStateInfo: CommonStateInfo{ 375 State: StateRestore, 376 Pid: container.systemPid, 377 }}) 378 379 if err != nil { 380 container.closeFifos(iopipe) 381 return err 382 } 383 384 if lastEvent != nil { 385 // This should only be a pause or resume event 386 if lastEvent.Type == StatePause || lastEvent.Type == StateResume { 387 return clnt.backend.StateChanged(containerID, StateInfo{ 388 CommonStateInfo: CommonStateInfo{ 389 State: lastEvent.Type, 390 Pid: container.systemPid, 391 }}) 392 } 393 394 logrus.Warnf("libcontainerd: unexpected backlog event: %#v", lastEvent) 395 } 396 397 return nil 398 } 399 400 func (clnt *client) getContainerLastEventSinceTime(id string, tsp *timestamp.Timestamp) (*containerd.Event, error) { 401 er := &containerd.EventsRequest{ 402 Timestamp: tsp, 403 StoredOnly: true, 404 Id: id, 405 } 406 events, err := clnt.remote.apiClient.Events(context.Background(), er) 407 if err != nil { 408 logrus.Errorf("libcontainerd: failed to get container events stream for %s: %q", er.Id, err) 409 return nil, err 410 } 411 412 var ev *containerd.Event 413 for { 414 e, err := events.Recv() 415 if err != nil { 416 if err.Error() == "EOF" { 417 break 418 } 419 logrus.Errorf("libcontainerd: failed to get container event for %s: %q", id, err) 420 return nil, err 421 } 422 ev = e 423 logrus.Debugf("libcontainerd: received past event %#v", ev) 424 } 425 426 return ev, nil 427 } 428 429 func (clnt *client) getContainerLastEvent(id string) (*containerd.Event, error) { 430 ev, err := clnt.getContainerLastEventSinceTime(id, clnt.remote.restoreFromTimestamp) 431 if err == nil && ev == nil { 432 // If ev is nil and the container is running in containerd, 433 // we already consumed all the event of the 434 // container, included the "exit" one. 435 // Thus, we request all events containerd has in memory for 436 // this container in order to get the last one (which should 437 // be an exit event) 438 logrus.Warnf("libcontainerd: client is out of sync, restore was called on a fully synced container (%s).", id) 439 // Request all events since beginning of time 440 t := time.Unix(0, 0) 441 tsp, err := ptypes.TimestampProto(t) 442 if err != nil { 443 logrus.Errorf("libcontainerd: getLastEventSinceTime() failed to convert timestamp: %q", err) 444 return nil, err 445 } 446 447 return clnt.getContainerLastEventSinceTime(id, tsp) 448 } 449 450 return ev, err 451 } 452 453 func (clnt *client) Restore(containerID string, attachStdio StdioCallback, options ...CreateOption) error { 454 // Synchronize with live events 455 clnt.remote.Lock() 456 defer clnt.remote.Unlock() 457 // Check that containerd still knows this container. 458 // 459 // In the unlikely event that Restore for this container process 460 // the its past event before the main loop, the event will be 461 // processed twice. However, this is not an issue as all those 462 // events will do is change the state of the container to be 463 // exactly the same. 464 cont, err := clnt.getContainerdContainer(containerID) 465 // Get its last event 466 ev, eerr := clnt.getContainerLastEvent(containerID) 467 if err != nil || cont.Status == "Stopped" { 468 if err != nil { 469 logrus.Warnf("libcontainerd: failed to retrieve container %s state: %v", containerID, err) 470 } 471 if ev != nil && (ev.Pid != InitFriendlyName || ev.Type != StateExit) { 472 // Wait a while for the exit event 473 timeout := time.NewTimer(10 * time.Second) 474 tick := time.NewTicker(100 * time.Millisecond) 475 stop: 476 for { 477 select { 478 case <-timeout.C: 479 break stop 480 case <-tick.C: 481 ev, eerr = clnt.getContainerLastEvent(containerID) 482 if eerr != nil { 483 break stop 484 } 485 if ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit { 486 break stop 487 } 488 } 489 } 490 timeout.Stop() 491 tick.Stop() 492 } 493 494 // get the exit status for this container, if we don't have 495 // one, indicate an error 496 ec := uint32(255) 497 if eerr == nil && ev != nil && ev.Pid == InitFriendlyName && ev.Type == StateExit { 498 ec = ev.Status 499 } 500 clnt.setExited(containerID, ec) 501 502 return nil 503 } 504 505 // container is still alive 506 if clnt.liveRestore { 507 if err := clnt.restore(cont, ev, attachStdio, options...); err != nil { 508 logrus.Errorf("libcontainerd: error restoring %s: %v", containerID, err) 509 } 510 return nil 511 } 512 513 // Kill the container if liveRestore == false 514 w := clnt.getOrCreateExitNotifier(containerID) 515 clnt.lock(cont.Id) 516 container := clnt.newContainer(cont.BundlePath) 517 container.systemPid = systemPid(cont) 518 clnt.appendContainer(container) 519 clnt.unlock(cont.Id) 520 521 container.discardFifos() 522 523 if err := clnt.Signal(containerID, int(syscall.SIGTERM)); err != nil { 524 logrus.Errorf("libcontainerd: error sending sigterm to %v: %v", containerID, err) 525 } 526 // Let the main loop handle the exit event 527 clnt.remote.Unlock() 528 select { 529 case <-time.After(10 * time.Second): 530 if err := clnt.Signal(containerID, int(syscall.SIGKILL)); err != nil { 531 logrus.Errorf("libcontainerd: error sending sigkill to %v: %v", containerID, err) 532 } 533 select { 534 case <-time.After(2 * time.Second): 535 case <-w.wait(): 536 // relock because of the defer 537 clnt.remote.Lock() 538 return nil 539 } 540 case <-w.wait(): 541 // relock because of the defer 542 clnt.remote.Lock() 543 return nil 544 } 545 // relock because of the defer 546 clnt.remote.Lock() 547 548 clnt.deleteContainer(containerID) 549 550 return clnt.setExited(containerID, uint32(255)) 551 } 552 553 func (clnt *client) CreateCheckpoint(containerID string, checkpointID string, checkpointDir string, exit bool) error { 554 clnt.lock(containerID) 555 defer clnt.unlock(containerID) 556 if _, err := clnt.getContainer(containerID); err != nil { 557 return err 558 } 559 560 _, err := clnt.remote.apiClient.CreateCheckpoint(context.Background(), &containerd.CreateCheckpointRequest{ 561 Id: containerID, 562 Checkpoint: &containerd.Checkpoint{ 563 Name: checkpointID, 564 Exit: exit, 565 Tcp: true, 566 UnixSockets: true, 567 Shell: false, 568 EmptyNS: []string{"network"}, 569 }, 570 CheckpointDir: checkpointDir, 571 }) 572 return err 573 } 574 575 func (clnt *client) DeleteCheckpoint(containerID string, checkpointID string, checkpointDir string) error { 576 clnt.lock(containerID) 577 defer clnt.unlock(containerID) 578 if _, err := clnt.getContainer(containerID); err != nil { 579 return err 580 } 581 582 _, err := clnt.remote.apiClient.DeleteCheckpoint(context.Background(), &containerd.DeleteCheckpointRequest{ 583 Id: containerID, 584 Name: checkpointID, 585 CheckpointDir: checkpointDir, 586 }) 587 return err 588 } 589 590 func (clnt *client) ListCheckpoints(containerID string, checkpointDir string) (*Checkpoints, error) { 591 clnt.lock(containerID) 592 defer clnt.unlock(containerID) 593 if _, err := clnt.getContainer(containerID); err != nil { 594 return nil, err 595 } 596 597 resp, err := clnt.remote.apiClient.ListCheckpoint(context.Background(), &containerd.ListCheckpointRequest{ 598 Id: containerID, 599 CheckpointDir: checkpointDir, 600 }) 601 if err != nil { 602 return nil, err 603 } 604 return (*Checkpoints)(resp), nil 605 }