github.com/projecteru2/core@v0.0.0-20240321043226-06bcc1c23f58/engine/docker/container.go (about)

     1  package docker
     2  
     3  import (
     4  	"archive/tar"
     5  	"context"
     6  	"encoding/json"
     7  	"fmt"
     8  	"io"
     9  	"math"
    10  	"os"
    11  	"path/filepath"
    12  	"strconv"
    13  	"strings"
    14  	"time"
    15  
    16  	dockertypes "github.com/docker/docker/api/types"
    17  	dockercontainer "github.com/docker/docker/api/types/container"
    18  	dockernetwork "github.com/docker/docker/api/types/network"
    19  	dockerslice "github.com/docker/docker/api/types/strslice"
    20  	"github.com/docker/go-connections/nat"
    21  	"github.com/docker/go-units"
    22  	"github.com/mitchellh/mapstructure"
    23  	"golang.org/x/sync/errgroup"
    24  
    25  	"github.com/projecteru2/core/engine"
    26  	enginetypes "github.com/projecteru2/core/engine/types"
    27  	"github.com/projecteru2/core/log"
    28  	resourcetypes "github.com/projecteru2/core/resource/types"
    29  	"github.com/projecteru2/core/types"
    30  	coretypes "github.com/projecteru2/core/types"
    31  )
    32  
    33  const (
    34  	minMemory       = units.MiB * 4
    35  	maxMemory       = math.MaxInt64
    36  	defaultCPUShare = 1024
    37  	root            = "root"
    38  )
    39  
    40  // RawArgs means some underlay args
    41  type RawArgs struct {
    42  	PidMode    dockercontainer.PidMode `json:"pid_mod"`
    43  	StorageOpt map[string]string       `json:"storage_opt"`
    44  	CapAdd     []string                `json:"cap_add"`
    45  	CapDrop    []string                `json:"cap_drop"`
    46  	Ulimits    []*units.Ulimit         `json:"ulimits"`
    47  	Runtime    string                  `json:"runtime"`
    48  }
    49  
    50  // ensureValues checks if value is nil,
    51  // if so, initiate the value.
    52  // Though a nil slice won't panic in this situation,
    53  // still we initiate the values.
    54  func (r *RawArgs) ensureValues() {
    55  	if r.StorageOpt == nil {
    56  		r.StorageOpt = map[string]string{}
    57  	}
    58  	if r.CapAdd == nil {
    59  		r.CapAdd = []string{}
    60  	}
    61  	if r.CapDrop == nil {
    62  		r.CapDrop = []string{}
    63  	}
    64  	if r.Ulimits == nil {
    65  		r.Ulimits = []*units.Ulimit{}
    66  	}
    67  }
    68  
    69  // loadRawArgs loads RawArgs, if b is given,
    70  // values from b will over write default values.
    71  func loadRawArgs(b []byte) (*RawArgs, error) {
    72  	r := &RawArgs{}
    73  	if len(b) > 0 {
    74  		if err := json.Unmarshal(b, r); err != nil {
    75  			return nil, err
    76  		}
    77  	}
    78  	r.ensureValues()
    79  	return r, nil
    80  }
    81  
    82  // VirtualizationCreate create a workload
    83  func (e *Engine) VirtualizationCreate(ctx context.Context, opts *enginetypes.VirtualizationCreateOptions) (*enginetypes.VirtualizationCreated, error) { //nolint
    84  	logger := log.WithFunc("engine.docker.VirtualizationCreate")
    85  	r := &enginetypes.VirtualizationCreated{}
    86  	var err error
    87  
    88  	// parse engine args to resource options
    89  	resourceOpts := &engine.VirtualizationResource{}
    90  	if err = engine.MakeVirtualizationResource(opts.EngineParams, resourceOpts, func(p resourcetypes.Resources, d *engine.VirtualizationResource) error {
    91  		for _, v := range p {
    92  			if err := mapstructure.Decode(v, d); err != nil {
    93  				return err
    94  			}
    95  		}
    96  		return nil
    97  	}); err != nil {
    98  		logger.Errorf(ctx, err, "failed to parse engine args %+v", opts.EngineParams)
    99  		return r, coretypes.ErrInvalidEngineArgs
   100  	}
   101  
   102  	// memory should more than 4MiB
   103  	if resourceOpts.Memory > 0 && resourceOpts.Memory < minMemory || resourceOpts.Memory < 0 {
   104  		return r, coretypes.ErrInvaildMemory
   105  	}
   106  	// set default log driver if lambda
   107  	if opts.Lambda {
   108  		opts.LogType = "json-file"
   109  	}
   110  
   111  	restartPolicy := ""
   112  	restartRetry := 0
   113  	restartStr := strings.Split(opts.Restart, ":")
   114  	restartPolicy = restartStr[0]
   115  	if r, err := strconv.Atoi(restartStr[len(restartStr)-1]); err == nil {
   116  		restartRetry = r
   117  	}
   118  	// no longer use opts.Network as networkmode
   119  	// always get network name from networks
   120  	// -----------------------------------------
   121  	// network mode 和 networks 互斥
   122  	// 没有 networks 的时候用 networkmode 的值
   123  	// 有 networks 的时候一律用用 networks 的值作为 mode
   124  	var networkMode dockercontainer.NetworkMode
   125  	networks := map[string]string{}
   126  	for name, network := range opts.Networks {
   127  		networkMode = dockercontainer.NetworkMode(name)
   128  		networks[name] = network
   129  		if networkMode.IsHost() {
   130  			networks[name] = ""
   131  		}
   132  	}
   133  	// 如果没有 network 用默认值替换
   134  	if networkMode == "" {
   135  		networkMode = dockercontainer.NetworkMode(e.config.Docker.NetworkMode)
   136  	}
   137  	// log config
   138  	if opts.LogConfig == nil {
   139  		opts.LogConfig = map[string]string{}
   140  	}
   141  	opts.LogConfig["mode"] = "non-blocking"
   142  	opts.LogConfig["max-buffer-size"] = "4m"
   143  	opts.LogConfig["tag"] = fmt.Sprintf("%s {{.ID}}", opts.Name)
   144  	if opts.Debug {
   145  		opts.LogType = e.config.Docker.Log.Type
   146  		for k, v := range e.config.Docker.Log.Config {
   147  			opts.LogConfig[k] = v
   148  		}
   149  	}
   150  	// add node IP
   151  	hostIP := GetIP(ctx, e.client.DaemonHost())
   152  	opts.Env = append(opts.Env, fmt.Sprintf("ERU_NODE_IP=%s", hostIP))
   153  	// 如果有给dns就优先用给定的dns.
   154  	// 没有给出dns的时候, 如果设定是用宿主机IP作为dns, 就会把宿主机IP设置过去.
   155  	// 其他情况就是默认值.
   156  	// 哦对, networkMode如果是host也不给dns.
   157  	if len(opts.DNS) == 0 && e.config.Docker.UseLocalDNS && hostIP != "" {
   158  		opts.DNS = []string{hostIP}
   159  	}
   160  	// mount paths
   161  	binds, volumes := makeMountPaths(ctx, opts, resourceOpts)
   162  	logger.Debugf(ctx, "App %s will bind %+v", opts.Name, binds)
   163  
   164  	config := &dockercontainer.Config{
   165  		Env:             opts.Env,
   166  		Cmd:             dockerslice.StrSlice(opts.Cmd),
   167  		User:            opts.User,
   168  		Image:           opts.Image,
   169  		Volumes:         volumes,
   170  		WorkingDir:      opts.WorkingDir,
   171  		NetworkDisabled: networkMode == "",
   172  		Labels:          opts.Labels,
   173  		OpenStdin:       opts.Stdin,
   174  		Tty:             opts.Stdin,
   175  	}
   176  
   177  	rArgs, err := loadRawArgs(opts.RawArgs)
   178  	if err != nil {
   179  		return r, err
   180  	}
   181  
   182  	resource := makeResourceSetting(resourceOpts.Quota, resourceOpts.Memory, resourceOpts.CPU, resourceOpts.NUMANode, resourceOpts.IOPSOptions, false)
   183  	// set ulimits
   184  	if len(rArgs.Ulimits) == 0 {
   185  		resource.Ulimits = []*units.Ulimit{
   186  			{Name: "nofile", Soft: 65535, Hard: 65535},
   187  		}
   188  	} else {
   189  		resource.Ulimits = rArgs.Ulimits
   190  	}
   191  	if networkMode.IsHost() {
   192  		opts.DNS = []string{}
   193  		opts.Sysctl = map[string]string{}
   194  	}
   195  	if resourceOpts.Storage > 0 {
   196  		volumeTotal := int64(0)
   197  		for _, v := range resourceOpts.Volumes {
   198  			parts := strings.Split(v, ":")
   199  			if len(parts) < 4 {
   200  				continue
   201  			}
   202  			size, err := strconv.ParseInt(parts[3], 10, 64)
   203  			if err != nil {
   204  				return nil, err
   205  			}
   206  			volumeTotal += size
   207  		}
   208  		if resourceOpts.Storage-volumeTotal > 0 {
   209  			rArgs.StorageOpt["size"] = fmt.Sprintf("%+v", resourceOpts.Storage-volumeTotal)
   210  		}
   211  	}
   212  	// 如果有指定用户,用指定用户
   213  	// 没有指定用户,用镜像自己的
   214  	// CapAdd and Privileged
   215  	capAdds := dockerslice.StrSlice(rArgs.CapAdd)
   216  	if opts.Privileged {
   217  		opts.User = root
   218  		capAdds = append(capAdds, "SYS_ADMIN")
   219  	}
   220  	hostConfig := &dockercontainer.HostConfig{
   221  		Binds: binds,
   222  		DNS:   opts.DNS,
   223  		LogConfig: dockercontainer.LogConfig{
   224  			Type:   opts.LogType,
   225  			Config: opts.LogConfig,
   226  		},
   227  		NetworkMode: networkMode,
   228  		RestartPolicy: dockercontainer.RestartPolicy{
   229  			Name:              restartPolicy,
   230  			MaximumRetryCount: restartRetry,
   231  		},
   232  		CapAdd:     capAdds,
   233  		ExtraHosts: opts.Hosts,
   234  		Privileged: opts.Privileged,
   235  		Resources:  resource,
   236  		Sysctls:    opts.Sysctl,
   237  		PidMode:    rArgs.PidMode,
   238  		StorageOpt: rArgs.StorageOpt,
   239  		Runtime:    rArgs.Runtime,
   240  	}
   241  
   242  	if hostConfig.NetworkMode.IsBridge() {
   243  		portMapping := nat.PortMap{}
   244  		exposePorts := nat.PortSet{}
   245  		for _, p := range opts.Publish {
   246  			port, err := nat.NewPort("tcp", p)
   247  			if err != nil {
   248  				return r, err
   249  			}
   250  			exposePorts[port] = struct{}{}
   251  			portMapping[port] = []nat.PortBinding{}
   252  			portMapping[port] = append(portMapping[port], nat.PortBinding{HostPort: p})
   253  		}
   254  		hostConfig.PortBindings = portMapping
   255  		config.ExposedPorts = exposePorts
   256  	}
   257  
   258  	networkConfig := &dockernetwork.NetworkingConfig{
   259  		EndpointsConfig: map[string]*dockernetwork.EndpointSettings{},
   260  	}
   261  	for networkID, ipv4 := range networks {
   262  		if useCNI(opts.Labels) && ipv4 != "" {
   263  			config.Labels["ipv4"] = ipv4
   264  			break
   265  		}
   266  
   267  		endpointSetting, err := e.makeIPV4EndpointSetting(ipv4)
   268  		if err != nil {
   269  			return r, err
   270  		}
   271  		ipForShow := ipv4
   272  		if ipForShow == "" {
   273  			ipForShow = "[AutoAlloc]"
   274  		}
   275  		networkConfig.EndpointsConfig[networkID] = endpointSetting
   276  		logger.Infof(ctx, "Connect to %+v with IP %+v", networkID, ipForShow)
   277  	}
   278  
   279  	workloadCreated, err := e.client.ContainerCreate(ctx, config, hostConfig, networkConfig, nil, opts.Name)
   280  	r.Name = opts.Name
   281  	r.ID = workloadCreated.ID
   282  	return r, err
   283  }
   284  
   285  // VirtualizationCopyTo copy things to virtualization
   286  func (e *Engine) VirtualizationCopyTo(ctx context.Context, ID, target string, content []byte, uid, gid int, mode int64) error {
   287  	return withTarfileDump(ctx, target, content, uid, gid, mode, func(target, tarfile string) error {
   288  		content, err := os.Open(tarfile)
   289  		if err != nil {
   290  			return err
   291  		}
   292  		defer content.Close()
   293  		return e.client.CopyToContainer(ctx, ID, filepath.Dir(target), content, dockertypes.CopyToContainerOptions{AllowOverwriteDirWithFile: true, CopyUIDGID: false})
   294  	})
   295  }
   296  
   297  // VirtualizationCopyChunkTo copy chunk to virtualization
   298  func (e *Engine) VirtualizationCopyChunkTo(ctx context.Context, ID, target string, size int64, content io.Reader, uid, gid int, mode int64) error {
   299  	pr, pw := io.Pipe()
   300  	tw := tar.NewWriter(pw)
   301  	defer tw.Close()
   302  	g, _ := errgroup.WithContext(ctx)
   303  	g.Go(func() error {
   304  		hdr := &tar.Header{
   305  			Name: filepath.Base(target),
   306  			Size: size,
   307  			Mode: mode,
   308  			Uid:  uid,
   309  			Gid:  gid,
   310  		}
   311  		if taskErr := tw.WriteHeader(hdr); taskErr != nil {
   312  			log.Errorf(ctx, taskErr, "[VirtualizationCopyChunkTo] write header to %s err, err: %v", ID, taskErr)
   313  			return taskErr
   314  		}
   315  		for {
   316  			data := make([]byte, types.SendLargeFileChunkSize)
   317  			n, taskErr := content.Read(data)
   318  			if taskErr != nil {
   319  				if taskErr != io.EOF {
   320  					log.Errorf(ctx, taskErr, "[VirtualizationCopyChunkTo] read data from pipe err, err: %v", taskErr)
   321  					return taskErr
   322  				}
   323  				if closeErr := pw.Close(); closeErr != nil {
   324  					log.Errorf(ctx, closeErr, "[VirtualizationCopyChunkTo] close pipe writer, err: %v", closeErr)
   325  					return closeErr
   326  				}
   327  				return nil
   328  			}
   329  			if n < len(data) {
   330  				data = data[:n]
   331  			}
   332  			_, taskErr = tw.Write(data)
   333  			if taskErr != nil {
   334  				log.Debugf(ctx, "[VirtualizationCopyChunkTo] write data into %s err, err: %v", ID, taskErr)
   335  				if closeErr := pw.Close(); closeErr != nil {
   336  					log.Errorf(ctx, closeErr, "[VirtualizationCopyChunkTo] close pipe writer, err: %v", closeErr)
   337  					return closeErr
   338  				}
   339  				return taskErr
   340  			}
   341  		}
   342  	})
   343  	err := e.client.CopyToContainer(ctx, ID, filepath.Dir(target), pr, dockertypes.CopyToContainerOptions{AllowOverwriteDirWithFile: true, CopyUIDGID: false})
   344  	if err != nil {
   345  		log.Errorf(ctx, err, "[VirtualizationCopyChunkTo] copy %s to container %s err, err:%v", target, ID, err)
   346  		return err
   347  	}
   348  	return g.Wait()
   349  }
   350  
   351  // VirtualizationStart start virtualization
   352  func (e *Engine) VirtualizationStart(ctx context.Context, ID string) error {
   353  	return e.client.ContainerStart(ctx, ID, dockertypes.ContainerStartOptions{})
   354  }
   355  
   356  // VirtualizationStop stop virtualization
   357  func (e *Engine) VirtualizationStop(ctx context.Context, ID string, gracefulTimeout time.Duration) error {
   358  	var timeout *int
   359  	if t := int(gracefulTimeout.Seconds()); t > 0 {
   360  		timeout = &t
   361  	}
   362  	return e.client.ContainerStop(ctx, ID, dockercontainer.StopOptions{Timeout: timeout})
   363  }
   364  
   365  // VirtualizationSuspend suspends virtualization
   366  func (e *Engine) VirtualizationSuspend(context.Context, string) error {
   367  	return nil
   368  }
   369  
   370  // VirtualizationResume resumes virtualization
   371  func (e *Engine) VirtualizationResume(context.Context, string) error {
   372  	return nil
   373  }
   374  
   375  func (e *Engine) RawEngine(context.Context, *enginetypes.RawEngineOptions) (res *enginetypes.RawEngineResult, err error) {
   376  	return nil, nil
   377  }
   378  
   379  // VirtualizationRemove remove virtualization
   380  func (e *Engine) VirtualizationRemove(ctx context.Context, ID string, removeVolumes, force bool) error {
   381  	if err := e.client.ContainerRemove(ctx, ID, dockertypes.ContainerRemoveOptions{RemoveVolumes: removeVolumes, Force: force}); err != nil {
   382  		if strings.Contains(err.Error(), "no such") {
   383  			err = types.ErrWorkloadNotExists
   384  		}
   385  		return err
   386  	}
   387  	return nil
   388  }
   389  
   390  // VirtualizationInspect get virtualization info
   391  func (e *Engine) VirtualizationInspect(ctx context.Context, ID string) (*enginetypes.VirtualizationInfo, error) {
   392  	if e.client == nil {
   393  		return nil, coretypes.ErrNilEngine
   394  	}
   395  
   396  	workloadJSON, err := e.client.ContainerInspect(ctx, ID)
   397  	r := &enginetypes.VirtualizationInfo{}
   398  	if err != nil {
   399  		return r, err
   400  	}
   401  	r.ID = workloadJSON.ID
   402  	r.User = workloadJSON.Config.User
   403  	r.Image = workloadJSON.Config.Image
   404  	r.Env = workloadJSON.Config.Env
   405  	r.Labels = workloadJSON.Config.Labels
   406  	r.Running = workloadJSON.State.Running
   407  	r.Networks = map[string]string{}
   408  	for networkName, networkSetting := range workloadJSON.NetworkSettings.Networks {
   409  		ip := networkSetting.IPAddress
   410  		if dockercontainer.NetworkMode(networkName).IsHost() {
   411  			ip = GetIP(ctx, e.client.DaemonHost())
   412  		}
   413  		r.Networks[networkName] = ip
   414  	}
   415  	return r, nil
   416  }
   417  
   418  // VirtualizationLogs show virtualization logs
   419  func (e *Engine) VirtualizationLogs(ctx context.Context, opts *enginetypes.VirtualizationLogStreamOptions) (stdout, stderr io.ReadCloser, err error) {
   420  	logsOpts := dockertypes.ContainerLogsOptions{
   421  		ShowStdout: opts.Stdout,
   422  		ShowStderr: opts.Stderr,
   423  		Tail:       opts.Tail,
   424  		Follow:     opts.Follow,
   425  		Since:      opts.Since,
   426  		Until:      opts.Until,
   427  	}
   428  	resp, err := e.client.ContainerLogs(ctx, opts.ID, logsOpts)
   429  	if err != nil {
   430  		return nil, nil, err
   431  	}
   432  	if !opts.Stderr {
   433  		return io.NopCloser(mergeStream(resp)), nil, nil
   434  	}
   435  	stdout, stderr = e.demultiplexStdStream(ctx, resp)
   436  	return stdout, stderr, nil
   437  }
   438  
   439  // VirtualizationAttach attach to a virtualization
   440  func (e *Engine) VirtualizationAttach(ctx context.Context, ID string, stream, stdin bool) (stdout, stderr io.ReadCloser, _ io.WriteCloser, err error) {
   441  	opts := dockertypes.ContainerAttachOptions{
   442  		Stream: stream,
   443  		Stdin:  stdin,
   444  		Logs:   true,
   445  		Stdout: true,
   446  		Stderr: true,
   447  	}
   448  	resp, err := e.client.ContainerAttach(ctx, ID, opts)
   449  	if err != nil {
   450  		return nil, nil, nil, err
   451  	}
   452  	if stdin {
   453  		return io.NopCloser(resp.Reader), nil, resp.Conn, nil
   454  	}
   455  	stdout, stderr = e.demultiplexStdStream(ctx, resp.Reader)
   456  	return stdout, stderr, resp.Conn, nil
   457  }
   458  
   459  // VirtualizationResize resizes remote terminal
   460  func (e *Engine) VirtualizationResize(ctx context.Context, workloadID string, height, width uint) (err error) {
   461  	opts := dockertypes.ResizeOptions{
   462  		Height: height,
   463  		Width:  width,
   464  	}
   465  
   466  	return e.client.ContainerResize(ctx, workloadID, opts)
   467  }
   468  
   469  // VirtualizationWait wait virtualization exit
   470  func (e *Engine) VirtualizationWait(ctx context.Context, ID, _ string) (*enginetypes.VirtualizationWaitResult, error) {
   471  	waitBody, errorCh := e.client.ContainerWait(ctx, ID, dockercontainer.WaitConditionNotRunning)
   472  	r := &enginetypes.VirtualizationWaitResult{}
   473  	select {
   474  	case b := <-waitBody:
   475  		if b.Error != nil {
   476  			r.Message = b.Error.Message
   477  		}
   478  		r.Code = b.StatusCode
   479  		return r, nil
   480  	case err := <-errorCh:
   481  		r.Message = err.Error()
   482  		r.Code = -1
   483  		return r, err
   484  	}
   485  }
   486  
   487  // VirtualizationUpdateResource update virtualization resource
   488  func (e *Engine) VirtualizationUpdateResource(ctx context.Context, ID string, engineParams resourcetypes.Resources) error {
   489  	logger := log.WithFunc("engine.docker.VirtualizationUpdateResource")
   490  
   491  	// parse engine args to resource options
   492  	resourceOpts := &engine.VirtualizationResource{}
   493  	if err := engine.MakeVirtualizationResource(engineParams, resourceOpts, func(p resourcetypes.Resources, d *engine.VirtualizationResource) error {
   494  		for _, v := range p {
   495  			if err := mapstructure.Decode(v, d); err != nil {
   496  				return err
   497  			}
   498  		}
   499  		return nil
   500  	}); err != nil {
   501  		logger.WithField("ID", ID).Errorf(ctx, err, "failed to parse engine args %+v", engineParams)
   502  		return err
   503  	}
   504  
   505  	if resourceOpts.Memory > 0 && resourceOpts.Memory < minMemory || resourceOpts.Memory < 0 {
   506  		return coretypes.ErrInvaildMemory
   507  	}
   508  	if len(resourceOpts.Volumes) > 0 || resourceOpts.VolumeChanged {
   509  		logger.Warnf(ctx, "docker engine not support rebinding volume resource: %+v", resourceOpts.Volumes)
   510  		return coretypes.ErrInvalidVolumeBind
   511  	}
   512  
   513  	memory := resourceOpts.Memory
   514  	if memory == 0 {
   515  		memory = maxMemory
   516  	}
   517  
   518  	quota := resourceOpts.Quota
   519  	cpuMap := resourceOpts.CPU
   520  	numaNode := resourceOpts.NUMANode
   521  	// unlimited cpu
   522  	if quota == 0 || len(cpuMap) == 0 {
   523  		info, err := e.Info(ctx) // TODO can fixed in docker engine, support empty Cpusetcpus, or use cache to speed up
   524  		if err != nil {
   525  			return err
   526  		}
   527  		cpuMap = map[string]int64{}
   528  		for i := 0; i < info.NCPU; i++ {
   529  			cpuMap[strconv.Itoa(i)] = int64(e.config.Scheduler.ShareBase)
   530  		}
   531  		if quota == 0 {
   532  			quota = -1
   533  			numaNode = ""
   534  		}
   535  	}
   536  
   537  	newResource := makeResourceSetting(quota, memory, cpuMap, numaNode, resourceOpts.IOPSOptions, resourceOpts.Remap)
   538  	updateConfig := dockercontainer.UpdateConfig{Resources: newResource}
   539  	_, err := e.client.ContainerUpdate(ctx, ID, updateConfig)
   540  	return err
   541  }
   542  
   543  // VirtualizationCopyFrom copy thing from a virtualization
   544  func (e *Engine) VirtualizationCopyFrom(ctx context.Context, ID, path string) (content []byte, uid, gid int, mode int64, err error) {
   545  	resp, _, err := e.client.CopyFromContainer(ctx, ID, path)
   546  	if err != nil {
   547  		return
   548  	}
   549  	tarReader := tar.NewReader(resp)
   550  	header, err := tarReader.Next()
   551  	if err != nil {
   552  		return
   553  	}
   554  	content, err = io.ReadAll(tarReader)
   555  	return content, header.Uid, header.Gid, header.Mode, err
   556  }