github.com/docker/engine@v22.0.0-20211208180946-d456264580cf+incompatible/daemon/nvidia_linux.go (about) 1 package daemon 2 3 import ( 4 "os" 5 "os/exec" 6 "strconv" 7 "strings" 8 9 "github.com/containerd/containerd/contrib/nvidia" 10 "github.com/docker/docker/pkg/capabilities" 11 specs "github.com/opencontainers/runtime-spec/specs-go" 12 "github.com/pkg/errors" 13 ) 14 15 // TODO: nvidia should not be hard-coded, and should be a device plugin instead on the daemon object. 16 // TODO: add list of device capabilities in daemon/node info 17 18 var errConflictCountDeviceIDs = errors.New("cannot set both Count and DeviceIDs on device request") 19 20 const nvidiaHook = "nvidia-container-runtime-hook" 21 22 // These are NVIDIA-specific capabilities stolen from github.com/containerd/containerd/contrib/nvidia.allCaps 23 var allNvidiaCaps = map[nvidia.Capability]struct{}{ 24 nvidia.Compute: {}, 25 nvidia.Compat32: {}, 26 nvidia.Graphics: {}, 27 nvidia.Utility: {}, 28 nvidia.Video: {}, 29 nvidia.Display: {}, 30 } 31 32 func init() { 33 if _, err := exec.LookPath(nvidiaHook); err != nil { 34 // do not register Nvidia driver if helper binary is not present. 35 return 36 } 37 capset := capabilities.Set{"gpu": struct{}{}, "nvidia": struct{}{}} 38 nvidiaDriver := &deviceDriver{ 39 capset: capset, 40 updateSpec: setNvidiaGPUs, 41 } 42 for c := range allNvidiaCaps { 43 nvidiaDriver.capset[string(c)] = struct{}{} 44 } 45 registerDeviceDriver("nvidia", nvidiaDriver) 46 } 47 48 func setNvidiaGPUs(s *specs.Spec, dev *deviceInstance) error { 49 req := dev.req 50 if req.Count != 0 && len(req.DeviceIDs) > 0 { 51 return errConflictCountDeviceIDs 52 } 53 54 if len(req.DeviceIDs) > 0 { 55 s.Process.Env = append(s.Process.Env, "NVIDIA_VISIBLE_DEVICES="+strings.Join(req.DeviceIDs, ",")) 56 } else if req.Count > 0 { 57 s.Process.Env = append(s.Process.Env, "NVIDIA_VISIBLE_DEVICES="+countToDevices(req.Count)) 58 } else if req.Count < 0 { 59 s.Process.Env = append(s.Process.Env, "NVIDIA_VISIBLE_DEVICES=all") 60 } 61 62 var nvidiaCaps []string 63 // req.Capabilities contains device capabilities, some but not all are NVIDIA driver capabilities. 64 for _, c := range dev.selectedCaps { 65 nvcap := nvidia.Capability(c) 66 if _, isNvidiaCap := allNvidiaCaps[nvcap]; isNvidiaCap { 67 nvidiaCaps = append(nvidiaCaps, c) 68 continue 69 } 70 // TODO: nvidia.WithRequiredCUDAVersion 71 // for now we let the prestart hook verify cuda versions but errors are not pretty. 72 } 73 74 if nvidiaCaps != nil { 75 s.Process.Env = append(s.Process.Env, "NVIDIA_DRIVER_CAPABILITIES="+strings.Join(nvidiaCaps, ",")) 76 } 77 78 path, err := exec.LookPath(nvidiaHook) 79 if err != nil { 80 return err 81 } 82 83 if s.Hooks == nil { 84 s.Hooks = &specs.Hooks{} 85 } 86 s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{ 87 Path: path, 88 Args: []string{ 89 nvidiaHook, 90 "prestart", 91 }, 92 Env: os.Environ(), 93 }) 94 95 return nil 96 } 97 98 // countToDevices returns the list 0, 1, ... count-1 of deviceIDs. 99 func countToDevices(count int) string { 100 devices := make([]string, count) 101 for i := range devices { 102 devices[i] = strconv.Itoa(i) 103 } 104 return strings.Join(devices, ",") 105 }