github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/engine/docker/container.go (about) 1 package docker 2 3 import ( 4 "archive/tar" 5 "context" 6 "encoding/json" 7 "fmt" 8 "io" 9 "math" 10 "os" 11 "path/filepath" 12 "strconv" 13 "strings" 14 "time" 15 16 dockertypes "github.com/docker/docker/api/types" 17 dockercontainer "github.com/docker/docker/api/types/container" 18 dockernetwork "github.com/docker/docker/api/types/network" 19 dockerslice "github.com/docker/docker/api/types/strslice" 20 "github.com/docker/go-connections/nat" 21 "github.com/docker/go-units" 22 "github.com/mitchellh/mapstructure" 23 "golang.org/x/sync/errgroup" 24 25 "github.com/projecteru2/core/engine" 26 enginetypes "github.com/projecteru2/core/engine/types" 27 "github.com/projecteru2/core/log" 28 resourcetypes "github.com/projecteru2/core/resource/types" 29 "github.com/projecteru2/core/types" 30 coretypes "github.com/projecteru2/core/types" 31 ) 32 33 const ( 34 minMemory = units.MiB * 4 35 maxMemory = math.MaxInt64 36 defaultCPUShare = 1024 37 root = "root" 38 ) 39 40 // RawArgs means some underlay args 41 type RawArgs struct { 42 PidMode dockercontainer.PidMode `json:"pid_mod"` 43 StorageOpt map[string]string `json:"storage_opt"` 44 CapAdd []string `json:"cap_add"` 45 CapDrop []string `json:"cap_drop"` 46 Ulimits []*units.Ulimit `json:"ulimits"` 47 Runtime string `json:"runtime"` 48 } 49 50 // ensureValues checks if value is nil, 51 // if so, initiate the value. 52 // Though a nil slice won't panic in this situation, 53 // still we initiate the values. 54 func (r *RawArgs) ensureValues() { 55 if r.StorageOpt == nil { 56 r.StorageOpt = map[string]string{} 57 } 58 if r.CapAdd == nil { 59 r.CapAdd = []string{} 60 } 61 if r.CapDrop == nil { 62 r.CapDrop = []string{} 63 } 64 if r.Ulimits == nil { 65 r.Ulimits = []*units.Ulimit{} 66 } 67 } 68 69 // loadRawArgs loads RawArgs, if b is given, 70 // values from b will over write default values. 71 func loadRawArgs(b []byte) (*RawArgs, error) { 72 r := &RawArgs{} 73 if len(b) > 0 { 74 if err := json.Unmarshal(b, r); err != nil { 75 return nil, err 76 } 77 } 78 r.ensureValues() 79 return r, nil 80 } 81 82 // VirtualizationCreate create a workload 83 func (e *Engine) VirtualizationCreate(ctx context.Context, opts *enginetypes.VirtualizationCreateOptions) (*enginetypes.VirtualizationCreated, error) { //nolint 84 logger := log.WithFunc("engine.docker.VirtualizationCreate") 85 r := &enginetypes.VirtualizationCreated{} 86 var err error 87 88 // parse engine args to resource options 89 resourceOpts := &engine.VirtualizationResource{} 90 if err = engine.MakeVirtualizationResource(opts.EngineParams, resourceOpts, func(p resourcetypes.Resources, d *engine.VirtualizationResource) error { 91 for _, v := range p { 92 if err := mapstructure.Decode(v, d); err != nil { 93 return err 94 } 95 } 96 return nil 97 }); err != nil { 98 logger.Errorf(ctx, err, "failed to parse engine args %+v", opts.EngineParams) 99 return r, coretypes.ErrInvalidEngineArgs 100 } 101 102 // memory should more than 4MiB 103 if resourceOpts.Memory > 0 && resourceOpts.Memory < minMemory || resourceOpts.Memory < 0 { 104 return r, coretypes.ErrInvaildMemory 105 } 106 // set default log driver if lambda 107 if opts.Lambda { 108 opts.LogType = "json-file" 109 } 110 111 restartPolicy := "" 112 restartRetry := 0 113 restartStr := strings.Split(opts.Restart, ":") 114 restartPolicy = restartStr[0] 115 if r, err := strconv.Atoi(restartStr[len(restartStr)-1]); err == nil { 116 restartRetry = r 117 } 118 // no longer use opts.Network as networkmode 119 // always get network name from networks 120 // ----------------------------------------- 121 // network mode 和 networks 互斥 122 // 没有 networks 的时候用 networkmode 的值 123 // 有 networks 的时候一律用用 networks 的值作为 mode 124 var networkMode dockercontainer.NetworkMode 125 networks := map[string]string{} 126 for name, network := range opts.Networks { 127 networkMode = dockercontainer.NetworkMode(name) 128 networks[name] = network 129 if networkMode.IsHost() { 130 networks[name] = "" 131 } 132 } 133 // 如果没有 network 用默认值替换 134 if networkMode == "" { 135 networkMode = dockercontainer.NetworkMode(e.config.Docker.NetworkMode) 136 } 137 // log config 138 if opts.LogConfig == nil { 139 opts.LogConfig = map[string]string{} 140 } 141 opts.LogConfig["mode"] = "non-blocking" 142 opts.LogConfig["max-buffer-size"] = "4m" 143 opts.LogConfig["tag"] = fmt.Sprintf("%s {{.ID}}", opts.Name) 144 if opts.Debug { 145 opts.LogType = e.config.Docker.Log.Type 146 for k, v := range e.config.Docker.Log.Config { 147 opts.LogConfig[k] = v 148 } 149 } 150 // add node IP 151 hostIP := GetIP(ctx, e.client.DaemonHost()) 152 opts.Env = append(opts.Env, fmt.Sprintf("ERU_NODE_IP=%s", hostIP)) 153 // 如果有给dns就优先用给定的dns. 154 // 没有给出dns的时候, 如果设定是用宿主机IP作为dns, 就会把宿主机IP设置过去. 155 // 其他情况就是默认值. 156 // 哦对, networkMode如果是host也不给dns. 157 if len(opts.DNS) == 0 && e.config.Docker.UseLocalDNS && hostIP != "" { 158 opts.DNS = []string{hostIP} 159 } 160 // mount paths 161 binds, volumes := makeMountPaths(ctx, opts, resourceOpts) 162 logger.Debugf(ctx, "App %s will bind %+v", opts.Name, binds) 163 164 config := &dockercontainer.Config{ 165 Env: opts.Env, 166 Cmd: dockerslice.StrSlice(opts.Cmd), 167 User: opts.User, 168 Image: opts.Image, 169 Volumes: volumes, 170 WorkingDir: opts.WorkingDir, 171 NetworkDisabled: networkMode == "", 172 Labels: opts.Labels, 173 OpenStdin: opts.Stdin, 174 Tty: opts.Stdin, 175 } 176 177 rArgs, err := loadRawArgs(opts.RawArgs) 178 if err != nil { 179 return r, err 180 } 181 182 resource := makeResourceSetting(resourceOpts.Quota, resourceOpts.Memory, resourceOpts.CPU, resourceOpts.NUMANode, resourceOpts.IOPSOptions, false) 183 // set ulimits 184 if len(rArgs.Ulimits) == 0 { 185 resource.Ulimits = []*units.Ulimit{ 186 {Name: "nofile", Soft: 65535, Hard: 65535}, 187 } 188 } else { 189 resource.Ulimits = rArgs.Ulimits 190 } 191 if networkMode.IsHost() { 192 opts.DNS = []string{} 193 opts.Sysctl = map[string]string{} 194 } 195 if resourceOpts.Storage > 0 { 196 volumeTotal := int64(0) 197 for _, v := range resourceOpts.Volumes { 198 parts := strings.Split(v, ":") 199 if len(parts) < 4 { 200 continue 201 } 202 size, err := strconv.ParseInt(parts[3], 10, 64) 203 if err != nil { 204 return nil, err 205 } 206 volumeTotal += size 207 } 208 if resourceOpts.Storage-volumeTotal > 0 { 209 rArgs.StorageOpt["size"] = fmt.Sprintf("%+v", resourceOpts.Storage-volumeTotal) 210 } 211 } 212 // 如果有指定用户,用指定用户 213 // 没有指定用户,用镜像自己的 214 // CapAdd and Privileged 215 capAdds := dockerslice.StrSlice(rArgs.CapAdd) 216 if opts.Privileged { 217 opts.User = root 218 capAdds = append(capAdds, "SYS_ADMIN") 219 } 220 hostConfig := &dockercontainer.HostConfig{ 221 Binds: binds, 222 DNS: opts.DNS, 223 LogConfig: dockercontainer.LogConfig{ 224 Type: opts.LogType, 225 Config: opts.LogConfig, 226 }, 227 NetworkMode: networkMode, 228 RestartPolicy: dockercontainer.RestartPolicy{ 229 Name: restartPolicy, 230 MaximumRetryCount: restartRetry, 231 }, 232 CapAdd: capAdds, 233 ExtraHosts: opts.Hosts, 234 Privileged: opts.Privileged, 235 Resources: resource, 236 Sysctls: opts.Sysctl, 237 PidMode: rArgs.PidMode, 238 StorageOpt: rArgs.StorageOpt, 239 Runtime: rArgs.Runtime, 240 } 241 242 if hostConfig.NetworkMode.IsBridge() { 243 portMapping := nat.PortMap{} 244 exposePorts := nat.PortSet{} 245 for _, p := range opts.Publish { 246 port, err := nat.NewPort("tcp", p) 247 if err != nil { 248 return r, err 249 } 250 exposePorts[port] = struct{}{} 251 portMapping[port] = []nat.PortBinding{} 252 portMapping[port] = append(portMapping[port], nat.PortBinding{HostPort: p}) 253 } 254 hostConfig.PortBindings = portMapping 255 config.ExposedPorts = exposePorts 256 } 257 258 networkConfig := &dockernetwork.NetworkingConfig{ 259 EndpointsConfig: map[string]*dockernetwork.EndpointSettings{}, 260 } 261 for networkID, ipv4 := range networks { 262 if useCNI(opts.Labels) && ipv4 != "" { 263 config.Labels["ipv4"] = ipv4 264 break 265 } 266 267 endpointSetting, err := e.makeIPV4EndpointSetting(ipv4) 268 if err != nil { 269 return r, err 270 } 271 ipForShow := ipv4 272 if ipForShow == "" { 273 ipForShow = "[AutoAlloc]" 274 } 275 networkConfig.EndpointsConfig[networkID] = endpointSetting 276 logger.Infof(ctx, "Connect to %+v with IP %+v", networkID, ipForShow) 277 } 278 279 workloadCreated, err := e.client.ContainerCreate(ctx, config, hostConfig, networkConfig, nil, opts.Name) 280 r.Name = opts.Name 281 r.ID = workloadCreated.ID 282 return r, err 283 } 284 285 // VirtualizationCopyTo copy things to virtualization 286 func (e *Engine) VirtualizationCopyTo(ctx context.Context, ID, target string, content []byte, uid, gid int, mode int64) error { 287 return withTarfileDump(ctx, target, content, uid, gid, mode, func(target, tarfile string) error { 288 content, err := os.Open(tarfile) 289 if err != nil { 290 return err 291 } 292 defer content.Close() 293 return e.client.CopyToContainer(ctx, ID, filepath.Dir(target), content, dockertypes.CopyToContainerOptions{AllowOverwriteDirWithFile: true, CopyUIDGID: false}) 294 }) 295 } 296 297 // VirtualizationCopyChunkTo copy chunk to virtualization 298 func (e *Engine) VirtualizationCopyChunkTo(ctx context.Context, ID, target string, size int64, content io.Reader, uid, gid int, mode int64) error { 299 pr, pw := io.Pipe() 300 tw := tar.NewWriter(pw) 301 defer tw.Close() 302 g, _ := errgroup.WithContext(ctx) 303 g.Go(func() error { 304 hdr := &tar.Header{ 305 Name: filepath.Base(target), 306 Size: size, 307 Mode: mode, 308 Uid: uid, 309 Gid: gid, 310 } 311 if taskErr := tw.WriteHeader(hdr); taskErr != nil { 312 log.Errorf(ctx, taskErr, "[VirtualizationCopyChunkTo] write header to %s err, err: %v", ID, taskErr) 313 return taskErr 314 } 315 for { 316 data := make([]byte, types.SendLargeFileChunkSize) 317 n, taskErr := content.Read(data) 318 if taskErr != nil { 319 if taskErr != io.EOF { 320 log.Errorf(ctx, taskErr, "[VirtualizationCopyChunkTo] read data from pipe err, err: %v", taskErr) 321 return taskErr 322 } 323 if closeErr := pw.Close(); closeErr != nil { 324 log.Errorf(ctx, closeErr, "[VirtualizationCopyChunkTo] close pipe writer, err: %v", closeErr) 325 return closeErr 326 } 327 return nil 328 } 329 if n < len(data) { 330 data = data[:n] 331 } 332 _, taskErr = tw.Write(data) 333 if taskErr != nil { 334 log.Debugf(ctx, "[VirtualizationCopyChunkTo] write data into %s err, err: %v", ID, taskErr) 335 if closeErr := pw.Close(); closeErr != nil { 336 log.Errorf(ctx, closeErr, "[VirtualizationCopyChunkTo] close pipe writer, err: %v", closeErr) 337 return closeErr 338 } 339 return taskErr 340 } 341 } 342 }) 343 err := e.client.CopyToContainer(ctx, ID, filepath.Dir(target), pr, dockertypes.CopyToContainerOptions{AllowOverwriteDirWithFile: true, CopyUIDGID: false}) 344 if err != nil { 345 log.Errorf(ctx, err, "[VirtualizationCopyChunkTo] copy %s to container %s err, err:%v", target, ID, err) 346 return err 347 } 348 return g.Wait() 349 } 350 351 // VirtualizationStart start virtualization 352 func (e *Engine) VirtualizationStart(ctx context.Context, ID string) error { 353 return e.client.ContainerStart(ctx, ID, dockertypes.ContainerStartOptions{}) 354 } 355 356 // VirtualizationStop stop virtualization 357 func (e *Engine) VirtualizationStop(ctx context.Context, ID string, gracefulTimeout time.Duration) error { 358 var timeout *int 359 if t := int(gracefulTimeout.Seconds()); t > 0 { 360 timeout = &t 361 } 362 return e.client.ContainerStop(ctx, ID, dockercontainer.StopOptions{Timeout: timeout}) 363 } 364 365 // VirtualizationSuspend suspends virtualization 366 func (e *Engine) VirtualizationSuspend(context.Context, string) error { 367 return nil 368 } 369 370 // VirtualizationResume resumes virtualization 371 func (e *Engine) VirtualizationResume(context.Context, string) error { 372 return nil 373 } 374 375 func (e *Engine) RawEngine(context.Context, *enginetypes.RawEngineOptions) (res *enginetypes.RawEngineResult, err error) { 376 return nil, nil 377 } 378 379 // VirtualizationRemove remove virtualization 380 func (e *Engine) VirtualizationRemove(ctx context.Context, ID string, removeVolumes, force bool) error { 381 if err := e.client.ContainerRemove(ctx, ID, dockertypes.ContainerRemoveOptions{RemoveVolumes: removeVolumes, Force: force}); err != nil { 382 if strings.Contains(err.Error(), "no such") { 383 err = types.ErrWorkloadNotExists 384 } 385 return err 386 } 387 return nil 388 } 389 390 // VirtualizationInspect get virtualization info 391 func (e *Engine) VirtualizationInspect(ctx context.Context, ID string) (*enginetypes.VirtualizationInfo, error) { 392 if e.client == nil { 393 return nil, coretypes.ErrNilEngine 394 } 395 396 workloadJSON, err := e.client.ContainerInspect(ctx, ID) 397 r := &enginetypes.VirtualizationInfo{} 398 if err != nil { 399 return r, err 400 } 401 r.ID = workloadJSON.ID 402 r.User = workloadJSON.Config.User 403 r.Image = workloadJSON.Config.Image 404 r.Env = workloadJSON.Config.Env 405 r.Labels = workloadJSON.Config.Labels 406 r.Running = workloadJSON.State.Running 407 r.Networks = map[string]string{} 408 for networkName, networkSetting := range workloadJSON.NetworkSettings.Networks { 409 ip := networkSetting.IPAddress 410 if dockercontainer.NetworkMode(networkName).IsHost() { 411 ip = GetIP(ctx, e.client.DaemonHost()) 412 } 413 r.Networks[networkName] = ip 414 } 415 return r, nil 416 } 417 418 // VirtualizationLogs show virtualization logs 419 func (e *Engine) VirtualizationLogs(ctx context.Context, opts *enginetypes.VirtualizationLogStreamOptions) (stdout, stderr io.ReadCloser, err error) { 420 logsOpts := dockertypes.ContainerLogsOptions{ 421 ShowStdout: opts.Stdout, 422 ShowStderr: opts.Stderr, 423 Tail: opts.Tail, 424 Follow: opts.Follow, 425 Since: opts.Since, 426 Until: opts.Until, 427 } 428 resp, err := e.client.ContainerLogs(ctx, opts.ID, logsOpts) 429 if err != nil { 430 return nil, nil, err 431 } 432 if !opts.Stderr { 433 return io.NopCloser(mergeStream(resp)), nil, nil 434 } 435 stdout, stderr = e.demultiplexStdStream(ctx, resp) 436 return stdout, stderr, nil 437 } 438 439 // VirtualizationAttach attach to a virtualization 440 func (e *Engine) VirtualizationAttach(ctx context.Context, ID string, stream, stdin bool) (stdout, stderr io.ReadCloser, _ io.WriteCloser, err error) { 441 opts := dockertypes.ContainerAttachOptions{ 442 Stream: stream, 443 Stdin: stdin, 444 Logs: true, 445 Stdout: true, 446 Stderr: true, 447 } 448 resp, err := e.client.ContainerAttach(ctx, ID, opts) 449 if err != nil { 450 return nil, nil, nil, err 451 } 452 if stdin { 453 return io.NopCloser(resp.Reader), nil, resp.Conn, nil 454 } 455 stdout, stderr = e.demultiplexStdStream(ctx, resp.Reader) 456 return stdout, stderr, resp.Conn, nil 457 } 458 459 // VirtualizationResize resizes remote terminal 460 func (e *Engine) VirtualizationResize(ctx context.Context, workloadID string, height, width uint) (err error) { 461 opts := dockertypes.ResizeOptions{ 462 Height: height, 463 Width: width, 464 } 465 466 return e.client.ContainerResize(ctx, workloadID, opts) 467 } 468 469 // VirtualizationWait wait virtualization exit 470 func (e *Engine) VirtualizationWait(ctx context.Context, ID, _ string) (*enginetypes.VirtualizationWaitResult, error) { 471 waitBody, errorCh := e.client.ContainerWait(ctx, ID, dockercontainer.WaitConditionNotRunning) 472 r := &enginetypes.VirtualizationWaitResult{} 473 select { 474 case b := <-waitBody: 475 if b.Error != nil { 476 r.Message = b.Error.Message 477 } 478 r.Code = b.StatusCode 479 return r, nil 480 case err := <-errorCh: 481 r.Message = err.Error() 482 r.Code = -1 483 return r, err 484 } 485 } 486 487 // VirtualizationUpdateResource update virtualization resource 488 func (e *Engine) VirtualizationUpdateResource(ctx context.Context, ID string, engineParams resourcetypes.Resources) error { 489 logger := log.WithFunc("engine.docker.VirtualizationUpdateResource") 490 491 // parse engine args to resource options 492 resourceOpts := &engine.VirtualizationResource{} 493 if err := engine.MakeVirtualizationResource(engineParams, resourceOpts, func(p resourcetypes.Resources, d *engine.VirtualizationResource) error { 494 for _, v := range p { 495 if err := mapstructure.Decode(v, d); err != nil { 496 return err 497 } 498 } 499 return nil 500 }); err != nil { 501 logger.WithField("ID", ID).Errorf(ctx, err, "failed to parse engine args %+v", engineParams) 502 return err 503 } 504 505 if resourceOpts.Memory > 0 && resourceOpts.Memory < minMemory || resourceOpts.Memory < 0 { 506 return coretypes.ErrInvaildMemory 507 } 508 if len(resourceOpts.Volumes) > 0 || resourceOpts.VolumeChanged { 509 logger.Warnf(ctx, "docker engine not support rebinding volume resource: %+v", resourceOpts.Volumes) 510 return coretypes.ErrInvalidVolumeBind 511 } 512 513 memory := resourceOpts.Memory 514 if memory == 0 { 515 memory = maxMemory 516 } 517 518 quota := resourceOpts.Quota 519 cpuMap := resourceOpts.CPU 520 numaNode := resourceOpts.NUMANode 521 // unlimited cpu 522 if quota == 0 || len(cpuMap) == 0 { 523 info, err := e.Info(ctx) // TODO can fixed in docker engine, support empty Cpusetcpus, or use cache to speed up 524 if err != nil { 525 return err 526 } 527 cpuMap = map[string]int64{} 528 for i := 0; i < info.NCPU; i++ { 529 cpuMap[strconv.Itoa(i)] = int64(e.config.Scheduler.ShareBase) 530 } 531 if quota == 0 { 532 quota = -1 533 numaNode = "" 534 } 535 } 536 537 newResource := makeResourceSetting(quota, memory, cpuMap, numaNode, resourceOpts.IOPSOptions, resourceOpts.Remap) 538 updateConfig := dockercontainer.UpdateConfig{Resources: newResource} 539 _, err := e.client.ContainerUpdate(ctx, ID, updateConfig) 540 return err 541 } 542 543 // VirtualizationCopyFrom copy thing from a virtualization 544 func (e *Engine) VirtualizationCopyFrom(ctx context.Context, ID, path string) (content []byte, uid, gid int, mode int64, err error) { 545 resp, _, err := e.client.CopyFromContainer(ctx, ID, path) 546 if err != nil { 547 return 548 } 549 tarReader := tar.NewReader(resp) 550 header, err := tarReader.Next() 551 if err != nil { 552 return 553 } 554 content, err = io.ReadAll(tarReader) 555 return content, header.Uid, header.Gid, header.Mode, err 556 }