github.com/containerd/Containerd@v1.4.13/contrib/nvidia/nvidia.go (about) 1 /* 2 Copyright The containerd Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package nvidia 18 19 import ( 20 "context" 21 "fmt" 22 "os" 23 "os/exec" 24 "strconv" 25 "strings" 26 27 "github.com/containerd/containerd/containers" 28 "github.com/containerd/containerd/oci" 29 specs "github.com/opencontainers/runtime-spec/specs-go" 30 ) 31 32 // NvidiaCLI is the path to the Nvidia helper binary 33 const NvidiaCLI = "nvidia-container-cli" 34 35 // Capability specifies capabilities for the gpu inside the container 36 // Detailed explanation of options can be found: 37 // https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities 38 type Capability string 39 40 const ( 41 // Compute capability 42 Compute Capability = "compute" 43 // Compat32 capability 44 Compat32 Capability = "compat32" 45 // Graphics capability 46 Graphics Capability = "graphics" 47 // Utility capability 48 Utility Capability = "utility" 49 // Video capability 50 Video Capability = "video" 51 // Display capability 52 Display Capability = "display" 53 ) 54 55 // AllCaps returns the complete list of supported Nvidia capabilities. 56 func AllCaps() []Capability { 57 return []Capability{ 58 Compute, 59 Compat32, 60 Graphics, 61 Utility, 62 Video, 63 Display, 64 } 65 } 66 67 // WithGPUs adds NVIDIA gpu support to a container 68 func WithGPUs(opts ...Opts) oci.SpecOpts { 69 return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error { 70 c := &config{} 71 for _, o := range opts { 72 if err := o(c); err != nil { 73 return err 74 } 75 } 76 if c.OCIHookPath == "" { 77 path, err := exec.LookPath("containerd") 78 if err != nil { 79 return err 80 } 81 c.OCIHookPath = path 82 } 83 nvidiaPath, err := exec.LookPath(NvidiaCLI) 84 if err != nil { 85 return err 86 } 87 if s.Hooks == nil { 88 s.Hooks = &specs.Hooks{} 89 } 90 s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{ 91 Path: c.OCIHookPath, 92 Args: append([]string{ 93 "containerd", 94 "oci-hook", 95 "--", 96 nvidiaPath, 97 // ensures the required kernel modules are properly loaded 98 "--load-kmods", 99 }, c.args()...), 100 Env: os.Environ(), 101 }) 102 return nil 103 } 104 } 105 106 type config struct { 107 Devices []string 108 Capabilities []Capability 109 LoadKmods bool 110 LDCache string 111 LDConfig string 112 Requirements []string 113 OCIHookPath string 114 } 115 116 func (c *config) args() []string { 117 var args []string 118 119 if c.LoadKmods { 120 args = append(args, "--load-kmods") 121 } 122 if c.LDCache != "" { 123 args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache)) 124 } 125 args = append(args, 126 "configure", 127 ) 128 if len(c.Devices) > 0 { 129 args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ","))) 130 } 131 for _, c := range c.Capabilities { 132 args = append(args, fmt.Sprintf("--%s", c)) 133 } 134 if c.LDConfig != "" { 135 args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig)) 136 } 137 for _, r := range c.Requirements { 138 args = append(args, fmt.Sprintf("--require=%s", r)) 139 } 140 args = append(args, "--pid={{pid}}", "{{rootfs}}") 141 return args 142 } 143 144 // Opts are options for configuring gpu support 145 type Opts func(*config) error 146 147 // WithDevices adds the provided device indexes to the container 148 func WithDevices(ids ...int) Opts { 149 return func(c *config) error { 150 for _, i := range ids { 151 c.Devices = append(c.Devices, strconv.Itoa(i)) 152 } 153 return nil 154 } 155 } 156 157 // WithDeviceUUIDs adds the specific device UUID to the container 158 func WithDeviceUUIDs(uuids ...string) Opts { 159 return func(c *config) error { 160 c.Devices = append(c.Devices, uuids...) 161 return nil 162 } 163 } 164 165 // WithAllDevices adds all gpus to the container 166 func WithAllDevices(c *config) error { 167 c.Devices = []string{"all"} 168 return nil 169 } 170 171 // WithAllCapabilities adds all capabilities to the container for the gpus 172 func WithAllCapabilities(c *config) error { 173 c.Capabilities = AllCaps() 174 return nil 175 } 176 177 // WithCapabilities adds the specified capabilities to the container for the gpus 178 func WithCapabilities(caps ...Capability) Opts { 179 return func(c *config) error { 180 c.Capabilities = append(c.Capabilities, caps...) 181 return nil 182 } 183 } 184 185 // WithRequiredCUDAVersion sets the required cuda version 186 func WithRequiredCUDAVersion(major, minor int) Opts { 187 return func(c *config) error { 188 c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor)) 189 return nil 190 } 191 } 192 193 // WithOCIHookPath sets the hook path for the binary 194 func WithOCIHookPath(path string) Opts { 195 return func(c *config) error { 196 c.OCIHookPath = path 197 return nil 198 } 199 } 200 201 // WithLookupOCIHookPath sets the hook path for the binary via a binary name 202 func WithLookupOCIHookPath(name string) Opts { 203 return func(c *config) error { 204 path, err := exec.LookPath(name) 205 if err != nil { 206 return err 207 } 208 c.OCIHookPath = path 209 return nil 210 } 211 }