github.com/docker/docker@v299999999.0.0-20200612211812-aaf470eca7b5+incompatible/daemon/nvidia_linux.go (about)

     1  package daemon
     2  
     3  import (
     4  	"os"
     5  	"os/exec"
     6  	"strconv"
     7  	"strings"
     8  
     9  	"github.com/containerd/containerd/contrib/nvidia"
    10  	"github.com/docker/docker/pkg/capabilities"
    11  	specs "github.com/opencontainers/runtime-spec/specs-go"
    12  	"github.com/pkg/errors"
    13  )
    14  
    15  // TODO: nvidia should not be hard-coded, and should be a device plugin instead on the daemon object.
    16  // TODO: add list of device capabilities in daemon/node info
    17  
    18  var errConflictCountDeviceIDs = errors.New("cannot set both Count and DeviceIDs on device request")
    19  
    20  const nvidiaHook = "nvidia-container-runtime-hook"
    21  
    22  // These are NVIDIA-specific capabilities stolen from github.com/containerd/containerd/contrib/nvidia.allCaps
    23  var allNvidiaCaps = map[nvidia.Capability]struct{}{
    24  	nvidia.Compute:  {},
    25  	nvidia.Compat32: {},
    26  	nvidia.Graphics: {},
    27  	nvidia.Utility:  {},
    28  	nvidia.Video:    {},
    29  	nvidia.Display:  {},
    30  }
    31  
    32  func init() {
    33  	if _, err := exec.LookPath(nvidiaHook); err != nil {
    34  		// do not register Nvidia driver if helper binary is not present.
    35  		return
    36  	}
    37  	capset := capabilities.Set{"gpu": struct{}{}, "nvidia": struct{}{}}
    38  	nvidiaDriver := &deviceDriver{
    39  		capset:     capset,
    40  		updateSpec: setNvidiaGPUs,
    41  	}
    42  	for c := range allNvidiaCaps {
    43  		nvidiaDriver.capset[string(c)] = struct{}{}
    44  	}
    45  	registerDeviceDriver("nvidia", nvidiaDriver)
    46  }
    47  
    48  func setNvidiaGPUs(s *specs.Spec, dev *deviceInstance) error {
    49  	req := dev.req
    50  	if req.Count != 0 && len(req.DeviceIDs) > 0 {
    51  		return errConflictCountDeviceIDs
    52  	}
    53  
    54  	if len(req.DeviceIDs) > 0 {
    55  		s.Process.Env = append(s.Process.Env, "NVIDIA_VISIBLE_DEVICES="+strings.Join(req.DeviceIDs, ","))
    56  	} else if req.Count > 0 {
    57  		s.Process.Env = append(s.Process.Env, "NVIDIA_VISIBLE_DEVICES="+countToDevices(req.Count))
    58  	} else if req.Count < 0 {
    59  		s.Process.Env = append(s.Process.Env, "NVIDIA_VISIBLE_DEVICES=all")
    60  	}
    61  
    62  	var nvidiaCaps []string
    63  	// req.Capabilities contains device capabilities, some but not all are NVIDIA driver capabilities.
    64  	for _, c := range dev.selectedCaps {
    65  		nvcap := nvidia.Capability(c)
    66  		if _, isNvidiaCap := allNvidiaCaps[nvcap]; isNvidiaCap {
    67  			nvidiaCaps = append(nvidiaCaps, c)
    68  			continue
    69  		}
    70  		// TODO: nvidia.WithRequiredCUDAVersion
    71  		// for now we let the prestart hook verify cuda versions but errors are not pretty.
    72  	}
    73  
    74  	if nvidiaCaps != nil {
    75  		s.Process.Env = append(s.Process.Env, "NVIDIA_DRIVER_CAPABILITIES="+strings.Join(nvidiaCaps, ","))
    76  	}
    77  
    78  	path, err := exec.LookPath(nvidiaHook)
    79  	if err != nil {
    80  		return err
    81  	}
    82  
    83  	if s.Hooks == nil {
    84  		s.Hooks = &specs.Hooks{}
    85  	}
    86  	s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
    87  		Path: path,
    88  		Args: []string{
    89  			nvidiaHook,
    90  			"prestart",
    91  		},
    92  		Env: os.Environ(),
    93  	})
    94  
    95  	return nil
    96  }
    97  
    98  // countToDevices returns the list 0, 1, ... count-1 of deviceIDs.
    99  func countToDevices(count int) string {
   100  	devices := make([]string, count)
   101  	for i := range devices {
   102  		devices[i] = strconv.Itoa(i)
   103  	}
   104  	return strings.Join(devices, ",")
   105  }