
     1  package docker
     3  import (
     4  	"archive/tar"
     5  	"context"
     6  	"encoding/json"
     7  	"fmt"
     8  	"io"
     9  	"math"
    10  	"os"
    11  	"path/filepath"
    12  	"strconv"
    13  	"strings"
    14  	"time"
    16  	dockertypes ""
    17  	dockercontainer ""
    18  	dockernetwork ""
    19  	dockerslice ""
    20  	""
    21  	""
    22  	""
    23  	""
    25  	""
    26  	enginetypes ""
    27  	""
    28  	resourcetypes ""
    29  	""
    30  	coretypes ""
    31  )
    33  const (
    34  	minMemory       = units.MiB * 4
    35  	maxMemory       = math.MaxInt64
    36  	defaultCPUShare = 1024
    37  	root            = "root"
    38  )
    40  // RawArgs means some underlay args
    41  type RawArgs struct {
    42  	PidMode    dockercontainer.PidMode `json:"pid_mod"`
    43  	StorageOpt map[string]string       `json:"storage_opt"`
    44  	CapAdd     []string                `json:"cap_add"`
    45  	CapDrop    []string                `json:"cap_drop"`
    46  	Ulimits    []*units.Ulimit         `json:"ulimits"`
    47  	Runtime    string                  `json:"runtime"`
    48  }
    50  // ensureValues checks if value is nil,
    51  // if so, initiate the value.
    52  // Though a nil slice won't panic in this situation,
    53  // still we initiate the values.
    54  func (r *RawArgs) ensureValues() {
    55  	if r.StorageOpt == nil {
    56  		r.StorageOpt = map[string]string{}
    57  	}
    58  	if r.CapAdd == nil {
    59  		r.CapAdd = []string{}
    60  	}
    61  	if r.CapDrop == nil {
    62  		r.CapDrop = []string{}
    63  	}
    64  	if r.Ulimits == nil {
    65  		r.Ulimits = []*units.Ulimit{}
    66  	}
    67  }
    69  // loadRawArgs loads RawArgs, if b is given,
    70  // values from b will over write default values.
    71  func loadRawArgs(b []byte) (*RawArgs, error) {
    72  	r := &RawArgs{}
    73  	if len(b) > 0 {
    74  		if err := json.Unmarshal(b, r); err != nil {
    75  			return nil, err
    76  		}
    77  	}
    78  	r.ensureValues()
    79  	return r, nil
    80  }
    82  // VirtualizationCreate create a workload
    83  func (e *Engine) VirtualizationCreate(ctx context.Context, opts *enginetypes.VirtualizationCreateOptions) (*enginetypes.VirtualizationCreated, error) { //nolint
    84  	logger := log.WithFunc("engine.docker.VirtualizationCreate")
    85  	r := &enginetypes.VirtualizationCreated{}
    86  	var err error
    88  	// parse engine args to resource options
    89  	resourceOpts := &engine.VirtualizationResource{}
    90  	if err = engine.MakeVirtualizationResource(opts.EngineParams, resourceOpts, func(p resourcetypes.Resources, d *engine.VirtualizationResource) error {
    91  		for _, v := range p {
    92  			if err := mapstructure.Decode(v, d); err != nil {
    93  				return err
    94  			}
    95  		}
    96  		return nil
    97  	}); err != nil {
    98  		logger.Errorf(ctx, err, "failed to parse engine args %+v", opts.EngineParams)
    99  		return r, coretypes.ErrInvalidEngineArgs
   100  	}
   102  	// memory should more than 4MiB
   103  	if resourceOpts.Memory > 0 && resourceOpts.Memory < minMemory || resourceOpts.Memory < 0 {
   104  		return r, coretypes.ErrInvaildMemory
   105  	}
   106  	// set default log driver if lambda
   107  	if opts.Lambda {
   108  		opts.LogType = "json-file"
   109  	}
   111  	restartPolicy := ""
   112  	restartRetry := 0
   113  	restartStr := strings.Split(opts.Restart, ":")
   114  	restartPolicy = restartStr[0]
   115  	if r, err := strconv.Atoi(restartStr[len(restartStr)-1]); err == nil {
   116  		restartRetry = r
   117  	}
   118  	// no longer use opts.Network as networkmode
   119  	// always get network name from networks
   120  	// -----------------------------------------
   121  	// network mode 和 networks 互斥
   122  	// 没有 networks 的时候用 networkmode 的值
   123  	// 有 networks 的时候一律用用 networks 的值作为 mode
   124  	var networkMode dockercontainer.NetworkMode
   125  	networks := map[string]string{}
   126  	for name, network := range opts.Networks {
   127  		networkMode = dockercontainer.NetworkMode(name)
   128  		networks[name] = network
   129  		if networkMode.IsHost() {
   130  			networks[name] = ""
   131  		}
   132  	}
   133  	// 如果没有 network 用默认值替换
   134  	if networkMode == "" {
   135  		networkMode = dockercontainer.NetworkMode(e.config.Docker.NetworkMode)
   136  	}
   137  	// log config
   138  	if opts.LogConfig == nil {
   139  		opts.LogConfig = map[string]string{}
   140  	}
   141  	opts.LogConfig["mode"] = "non-blocking"
   142  	opts.LogConfig["max-buffer-size"] = "4m"
   143  	opts.LogConfig["tag"] = fmt.Sprintf("%s {{.ID}}", opts.Name)
   144  	if opts.Debug {
   145  		opts.LogType = e.config.Docker.Log.Type
   146  		for k, v := range e.config.Docker.Log.Config {
   147  			opts.LogConfig[k] = v
   148  		}
   149  	}
   150  	// add node IP
   151  	hostIP := GetIP(ctx, e.client.DaemonHost())
   152  	opts.Env = append(opts.Env, fmt.Sprintf("ERU_NODE_IP=%s", hostIP))
   153  	// 如果有给dns就优先用给定的dns.
   154  	// 没有给出dns的时候, 如果设定是用宿主机IP作为dns, 就会把宿主机IP设置过去.
   155  	// 其他情况就是默认值.
   156  	// 哦对, networkMode如果是host也不给dns.
   157  	if len(opts.DNS) == 0 && e.config.Docker.UseLocalDNS && hostIP != "" {
   158  		opts.DNS = []string{hostIP}
   159  	}
   160  	// mount paths
   161  	binds, volumes := makeMountPaths(ctx, opts, resourceOpts)
   162  	logger.Debugf(ctx, "App %s will bind %+v", opts.Name, binds)
   164  	config := &dockercontainer.Config{
   165  		Env:             opts.Env,
   166  		Cmd:             dockerslice.StrSlice(opts.Cmd),
   167  		User:            opts.User,
   168  		Image:           opts.Image,
   169  		Volumes:         volumes,
   170  		WorkingDir:      opts.WorkingDir,
   171  		NetworkDisabled: networkMode == "",
   172  		Labels:          opts.Labels,
   173  		OpenStdin:       opts.Stdin,
   174  		Tty:             opts.Stdin,
   175  	}
   177  	rArgs, err := loadRawArgs(opts.RawArgs)
   178  	if err != nil {
   179  		return r, err
   180  	}
   182  	resource := makeResourceSetting(resourceOpts.Quota, resourceOpts.Memory, resourceOpts.CPU, resourceOpts.NUMANode, resourceOpts.IOPSOptions, false)
   183  	// set ulimits
   184  	if len(rArgs.Ulimits) == 0 {
   185  		resource.Ulimits = []*units.Ulimit{
   186  			{Name: "nofile", Soft: 65535, Hard: 65535},
   187  		}
   188  	} else {
   189  		resource.Ulimits = rArgs.Ulimits
   190  	}
   191  	if networkMode.IsHost() {
   192  		opts.DNS = []string{}
   193  		opts.Sysctl = map[string]string{}
   194  	}
   195  	if resourceOpts.Storage > 0 {
   196  		volumeTotal := int64(0)
   197  		for _, v := range resourceOpts.Volumes {
   198  			parts := strings.Split(v, ":")
   199  			if len(parts) < 4 {
   200  				continue
   201  			}
   202  			size, err := strconv.ParseInt(parts[3], 10, 64)
   203  			if err != nil {
   204  				return nil, err
   205  			}
   206  			volumeTotal += size
   207  		}
   208  		if resourceOpts.Storage-volumeTotal > 0 {
   209  			rArgs.StorageOpt["size"] = fmt.Sprintf("%+v", resourceOpts.Storage-volumeTotal)
   210  		}
   211  	}
   212  	// 如果有指定用户,用指定用户
   213  	// 没有指定用户,用镜像自己的
   214  	// CapAdd and Privileged
   215  	capAdds := dockerslice.StrSlice(rArgs.CapAdd)
   216  	if opts.Privileged {
   217  		opts.User = root
   218  		capAdds = append(capAdds, "SYS_ADMIN")
   219  	}
   220  	hostConfig := &dockercontainer.HostConfig{
   221  		Binds: binds,
   222  		DNS:   opts.DNS,
   223  		LogConfig: dockercontainer.LogConfig{
   224  			Type:   opts.LogType,
   225  			Config: opts.LogConfig,
   226  		},
   227  		NetworkMode: networkMode,
   228  		RestartPolicy: dockercontainer.RestartPolicy{
   229  			Name:              restartPolicy,
   230  			MaximumRetryCount: restartRetry,
   231  		},
   232  		CapAdd:     capAdds,
   233  		ExtraHosts: opts.Hosts,
   234  		Privileged: opts.Privileged,
   235  		Resources:  resource,
   236  		Sysctls:    opts.Sysctl,
   237  		PidMode:    rArgs.PidMode,
   238  		StorageOpt: rArgs.StorageOpt,
   239  		Runtime:    rArgs.Runtime,
   240  	}
   242  	if hostConfig.NetworkMode.IsBridge() {
   243  		portMapping := nat.PortMap{}
   244  		exposePorts := nat.PortSet{}
   245  		for _, p := range opts.Publish {
   246  			port, err := nat.NewPort("tcp", p)
   247  			if err != nil {
   248  				return r, err
   249  			}
   250  			exposePorts[port] = struct{}{}
   251  			portMapping[port] = []nat.PortBinding{}
   252  			portMapping[port] = append(portMapping[port], nat.PortBinding{HostPort: p})
   253  		}
   254  		hostConfig.PortBindings = portMapping
   255  		config.ExposedPorts = exposePorts
   256  	}
   258  	networkConfig := &dockernetwork.NetworkingConfig{
   259  		EndpointsConfig: map[string]*dockernetwork.EndpointSettings{},
   260  	}
   261  	for networkID, ipv4 := range networks {
   262  		if useCNI(opts.Labels) && ipv4 != "" {
   263  			config.Labels["ipv4"] = ipv4
   264  			break
   265  		}
   267  		endpointSetting, err := e.makeIPV4EndpointSetting(ipv4)
   268  		if err != nil {
   269  			return r, err
   270  		}
   271  		ipForShow := ipv4
   272  		if ipForShow == "" {
   273  			ipForShow = "[AutoAlloc]"
   274  		}
   275  		networkConfig.EndpointsConfig[networkID] = endpointSetting
   276  		logger.Infof(ctx, "Connect to %+v with IP %+v", networkID, ipForShow)
   277  	}
   279  	workloadCreated, err := e.client.ContainerCreate(ctx, config, hostConfig, networkConfig, nil, opts.Name)
   280  	r.Name = opts.Name
   281  	r.ID = workloadCreated.ID
   282  	return r, err
   283  }
   285  // VirtualizationCopyTo copy things to virtualization
   286  func (e *Engine) VirtualizationCopyTo(ctx context.Context, ID, target string, content []byte, uid, gid int, mode int64) error {
   287  	return withTarfileDump(ctx, target, content, uid, gid, mode, func(target, tarfile string) error {
   288  		content, err := os.Open(tarfile)
   289  		if err != nil {
   290  			return err
   291  		}
   292  		defer content.Close()
   293  		return e.client.CopyToContainer(ctx, ID, filepath.Dir(target), content, dockertypes.CopyToContainerOptions{AllowOverwriteDirWithFile: true, CopyUIDGID: false})
   294  	})
   295  }
   297  // VirtualizationCopyChunkTo copy chunk to virtualization
   298  func (e *Engine) VirtualizationCopyChunkTo(ctx context.Context, ID, target string, size int64, content io.Reader, uid, gid int, mode int64) error {
   299  	pr, pw := io.Pipe()
   300  	tw := tar.NewWriter(pw)
   301  	defer tw.Close()
   302  	g, _ := errgroup.WithContext(ctx)
   303  	g.Go(func() error {
   304  		hdr := &tar.Header{
   305  			Name: filepath.Base(target),
   306  			Size: size,
   307  			Mode: mode,
   308  			Uid:  uid,
   309  			Gid:  gid,
   310  		}
   311  		if taskErr := tw.WriteHeader(hdr); taskErr != nil {
   312  			log.Errorf(ctx, taskErr, "[VirtualizationCopyChunkTo] write header to %s err, err: %v", ID, taskErr)
   313  			return taskErr
   314  		}
   315  		for {
   316  			data := make([]byte, types.SendLargeFileChunkSize)
   317  			n, taskErr := content.Read(data)
   318  			if taskErr != nil {
   319  				if taskErr != io.EOF {
   320  					log.Errorf(ctx, taskErr, "[VirtualizationCopyChunkTo] read data from pipe err, err: %v", taskErr)
   321  					return taskErr
   322  				}
   323  				if closeErr := pw.Close(); closeErr != nil {
   324  					log.Errorf(ctx, closeErr, "[VirtualizationCopyChunkTo] close pipe writer, err: %v", closeErr)
   325  					return closeErr
   326  				}
   327  				return nil
   328  			}
   329  			if n < len(data) {
   330  				data = data[:n]
   331  			}
   332  			_, taskErr = tw.Write(data)
   333  			if taskErr != nil {
   334  				log.Debugf(ctx, "[VirtualizationCopyChunkTo] write data into %s err, err: %v", ID, taskErr)
   335  				if closeErr := pw.Close(); closeErr != nil {
   336  					log.Errorf(ctx, closeErr, "[VirtualizationCopyChunkTo] close pipe writer, err: %v", closeErr)
   337  					return closeErr
   338  				}
   339  				return taskErr
   340  			}
   341  		}
   342  	})
   343  	err := e.client.CopyToContainer(ctx, ID, filepath.Dir(target), pr, dockertypes.CopyToContainerOptions{AllowOverwriteDirWithFile: true, CopyUIDGID: false})
   344  	if err != nil {
   345  		log.Errorf(ctx, err, "[VirtualizationCopyChunkTo] copy %s to container %s err, err:%v", target, ID, err)
   346  		return err
   347  	}
   348  	return g.Wait()
   349  }
   351  // VirtualizationStart start virtualization
   352  func (e *Engine) VirtualizationStart(ctx context.Context, ID string) error {
   353  	return e.client.ContainerStart(ctx, ID, dockertypes.ContainerStartOptions{})
   354  }
   356  // VirtualizationStop stop virtualization
   357  func (e *Engine) VirtualizationStop(ctx context.Context, ID string, gracefulTimeout time.Duration) error {
   358  	var timeout *int
   359  	if t := int(gracefulTimeout.Seconds()); t > 0 {
   360  		timeout = &t
   361  	}
   362  	return e.client.ContainerStop(ctx, ID, dockercontainer.StopOptions{Timeout: timeout})
   363  }
   365  // VirtualizationSuspend suspends virtualization
   366  func (e *Engine) VirtualizationSuspend(context.Context, string) error {
   367  	return nil
   368  }
   370  // VirtualizationResume resumes virtualization
   371  func (e *Engine) VirtualizationResume(context.Context, string) error {
   372  	return nil
   373  }
   375  func (e *Engine) RawEngine(context.Context, *enginetypes.RawEngineOptions) (res *enginetypes.RawEngineResult, err error) {
   376  	return nil, nil
   377  }
   379  // VirtualizationRemove remove virtualization
   380  func (e *Engine) VirtualizationRemove(ctx context.Context, ID string, removeVolumes, force bool) error {
   381  	if err := e.client.ContainerRemove(ctx, ID, dockertypes.ContainerRemoveOptions{RemoveVolumes: removeVolumes, Force: force}); err != nil {
   382  		if strings.Contains(err.Error(), "no such") {
   383  			err = types.ErrWorkloadNotExists
   384  		}
   385  		return err
   386  	}
   387  	return nil
   388  }
   390  // VirtualizationInspect get virtualization info
   391  func (e *Engine) VirtualizationInspect(ctx context.Context, ID string) (*enginetypes.VirtualizationInfo, error) {
   392  	if e.client == nil {
   393  		return nil, coretypes.ErrNilEngine
   394  	}
   396  	workloadJSON, err := e.client.ContainerInspect(ctx, ID)
   397  	r := &enginetypes.VirtualizationInfo{}
   398  	if err != nil {
   399  		return r, err
   400  	}
   401  	r.ID = workloadJSON.ID
   402  	r.User = workloadJSON.Config.User
   403  	r.Image = workloadJSON.Config.Image
   404  	r.Env = workloadJSON.Config.Env
   405  	r.Labels = workloadJSON.Config.Labels
   406  	r.Running = workloadJSON.State.Running
   407  	r.Networks = map[string]string{}
   408  	for networkName, networkSetting := range workloadJSON.NetworkSettings.Networks {
   409  		ip := networkSetting.IPAddress
   410  		if dockercontainer.NetworkMode(networkName).IsHost() {
   411  			ip = GetIP(ctx, e.client.DaemonHost())
   412  		}
   413  		r.Networks[networkName] = ip
   414  	}
   415  	return r, nil
   416  }
   418  // VirtualizationLogs show virtualization logs
   419  func (e *Engine) VirtualizationLogs(ctx context.Context, opts *enginetypes.VirtualizationLogStreamOptions) (stdout, stderr io.ReadCloser, err error) {
   420  	logsOpts := dockertypes.ContainerLogsOptions{
   421  		ShowStdout: opts.Stdout,
   422  		ShowStderr: opts.Stderr,
   423  		Tail:       opts.Tail,
   424  		Follow:     opts.Follow,
   425  		Since:      opts.Since,
   426  		Until:      opts.Until,
   427  	}
   428  	resp, err := e.client.ContainerLogs(ctx, opts.ID, logsOpts)
   429  	if err != nil {
   430  		return nil, nil, err
   431  	}
   432  	if !opts.Stderr {
   433  		return io.NopCloser(mergeStream(resp)), nil, nil
   434  	}
   435  	stdout, stderr = e.demultiplexStdStream(ctx, resp)
   436  	return stdout, stderr, nil
   437  }
   439  // VirtualizationAttach attach to a virtualization
   440  func (e *Engine) VirtualizationAttach(ctx context.Context, ID string, stream, stdin bool) (stdout, stderr io.ReadCloser, _ io.WriteCloser, err error) {
   441  	opts := dockertypes.ContainerAttachOptions{
   442  		Stream: stream,
   443  		Stdin:  stdin,
   444  		Logs:   true,
   445  		Stdout: true,
   446  		Stderr: true,
   447  	}
   448  	resp, err := e.client.ContainerAttach(ctx, ID, opts)
   449  	if err != nil {
   450  		return nil, nil, nil, err
   451  	}
   452  	if stdin {
   453  		return io.NopCloser(resp.Reader), nil, resp.Conn, nil
   454  	}
   455  	stdout, stderr = e.demultiplexStdStream(ctx, resp.Reader)
   456  	return stdout, stderr, resp.Conn, nil
   457  }
   459  // VirtualizationResize resizes remote terminal
   460  func (e *Engine) VirtualizationResize(ctx context.Context, workloadID string, height, width uint) (err error) {
   461  	opts := dockertypes.ResizeOptions{
   462  		Height: height,
   463  		Width:  width,
   464  	}
   466  	return e.client.ContainerResize(ctx, workloadID, opts)
   467  }
   469  // VirtualizationWait wait virtualization exit
   470  func (e *Engine) VirtualizationWait(ctx context.Context, ID, _ string) (*enginetypes.VirtualizationWaitResult, error) {
   471  	waitBody, errorCh := e.client.ContainerWait(ctx, ID, dockercontainer.WaitConditionNotRunning)
   472  	r := &enginetypes.VirtualizationWaitResult{}
   473  	select {
   474  	case b := <-waitBody:
   475  		if b.Error != nil {
   476  			r.Message = b.Error.Message
   477  		}
   478  		r.Code = b.StatusCode
   479  		return r, nil
   480  	case err := <-errorCh:
   481  		r.Message = err.Error()
   482  		r.Code = -1
   483  		return r, err
   484  	}
   485  }
   487  // VirtualizationUpdateResource update virtualization resource
   488  func (e *Engine) VirtualizationUpdateResource(ctx context.Context, ID string, engineParams resourcetypes.Resources) error {
   489  	logger := log.WithFunc("engine.docker.VirtualizationUpdateResource")
   491  	// parse engine args to resource options
   492  	resourceOpts := &engine.VirtualizationResource{}
   493  	if err := engine.MakeVirtualizationResource(engineParams, resourceOpts, func(p resourcetypes.Resources, d *engine.VirtualizationResource) error {
   494  		for _, v := range p {
   495  			if err := mapstructure.Decode(v, d); err != nil {
   496  				return err
   497  			}
   498  		}
   499  		return nil
   500  	}); err != nil {
   501  		logger.WithField("ID", ID).Errorf(ctx, err, "failed to parse engine args %+v", engineParams)
   502  		return err
   503  	}
   505  	if resourceOpts.Memory > 0 && resourceOpts.Memory < minMemory || resourceOpts.Memory < 0 {
   506  		return coretypes.ErrInvaildMemory
   507  	}
   508  	if len(resourceOpts.Volumes) > 0 || resourceOpts.VolumeChanged {
   509  		logger.Warnf(ctx, "docker engine not support rebinding volume resource: %+v", resourceOpts.Volumes)
   510  		return coretypes.ErrInvalidVolumeBind
   511  	}
   513  	memory := resourceOpts.Memory
   514  	if memory == 0 {
   515  		memory = maxMemory
   516  	}
   518  	quota := resourceOpts.Quota
   519  	cpuMap := resourceOpts.CPU
   520  	numaNode := resourceOpts.NUMANode
   521  	// unlimited cpu
   522  	if quota == 0 || len(cpuMap) == 0 {
   523  		info, err := e.Info(ctx) // TODO can fixed in docker engine, support empty Cpusetcpus, or use cache to speed up
   524  		if err != nil {
   525  			return err
   526  		}
   527  		cpuMap = map[string]int64{}
   528  		for i := 0; i < info.NCPU; i++ {
   529  			cpuMap[strconv.Itoa(i)] = int64(e.config.Scheduler.ShareBase)
   530  		}
   531  		if quota == 0 {
   532  			quota = -1
   533  			numaNode = ""
   534  		}
   535  	}
   537  	newResource := makeResourceSetting(quota, memory, cpuMap, numaNode, resourceOpts.IOPSOptions, resourceOpts.Remap)
   538  	updateConfig := dockercontainer.UpdateConfig{Resources: newResource}
   539  	_, err := e.client.ContainerUpdate(ctx, ID, updateConfig)
   540  	return err
   541  }
   543  // VirtualizationCopyFrom copy thing from a virtualization
   544  func (e *Engine) VirtualizationCopyFrom(ctx context.Context, ID, path string) (content []byte, uid, gid int, mode int64, err error) {
   545  	resp, _, err := e.client.CopyFromContainer(ctx, ID, path)
   546  	if err != nil {
   547  		return
   548  	}
   549  	tarReader := tar.NewReader(resp)
   550  	header, err := tarReader.Next()
   551  	if err != nil {
   552  		return
   553  	}
   554  	content, err = io.ReadAll(tarReader)
   555  	return content, header.Uid, header.Gid, header.Mode, err
   556  }