github.com/lalkh/containerd@v1.4.3/contrib/nvidia/nvidia.go (about)

     1  /*
     2     Copyright The containerd Authors.
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package nvidia
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os"
    23  	"os/exec"
    24  	"strconv"
    25  	"strings"
    26  
    27  	"github.com/containerd/containerd/containers"
    28  	"github.com/containerd/containerd/oci"
    29  	specs "github.com/opencontainers/runtime-spec/specs-go"
    30  )
    31  
    32  // NvidiaCLI is the path to the Nvidia helper binary
    33  const NvidiaCLI = "nvidia-container-cli"
    34  
    35  // Capability specifies capabilities for the gpu inside the container
    36  // Detailed explanation of options can be found:
    37  // https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities
    38  type Capability string
    39  
    40  const (
    41  	// Compute capability
    42  	Compute Capability = "compute"
    43  	// Compat32 capability
    44  	Compat32 Capability = "compat32"
    45  	// Graphics capability
    46  	Graphics Capability = "graphics"
    47  	// Utility capability
    48  	Utility Capability = "utility"
    49  	// Video capability
    50  	Video Capability = "video"
    51  	// Display capability
    52  	Display Capability = "display"
    53  )
    54  
    55  // AllCaps returns the complete list of supported Nvidia capabilities.
    56  func AllCaps() []Capability {
    57  	return []Capability{
    58  		Compute,
    59  		Compat32,
    60  		Graphics,
    61  		Utility,
    62  		Video,
    63  		Display,
    64  	}
    65  }
    66  
    67  // WithGPUs adds NVIDIA gpu support to a container
    68  func WithGPUs(opts ...Opts) oci.SpecOpts {
    69  	return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error {
    70  		c := &config{}
    71  		for _, o := range opts {
    72  			if err := o(c); err != nil {
    73  				return err
    74  			}
    75  		}
    76  		if c.OCIHookPath == "" {
    77  			path, err := exec.LookPath("containerd")
    78  			if err != nil {
    79  				return err
    80  			}
    81  			c.OCIHookPath = path
    82  		}
    83  		nvidiaPath, err := exec.LookPath(NvidiaCLI)
    84  		if err != nil {
    85  			return err
    86  		}
    87  		if s.Hooks == nil {
    88  			s.Hooks = &specs.Hooks{}
    89  		}
    90  		s.Hooks.Prestart = append(s.Hooks.Prestart, specs.Hook{
    91  			Path: c.OCIHookPath,
    92  			Args: append([]string{
    93  				"containerd",
    94  				"oci-hook",
    95  				"--",
    96  				nvidiaPath,
    97  				// ensures the required kernel modules are properly loaded
    98  				"--load-kmods",
    99  			}, c.args()...),
   100  			Env: os.Environ(),
   101  		})
   102  		return nil
   103  	}
   104  }
   105  
   106  type config struct {
   107  	Devices      []string
   108  	Capabilities []Capability
   109  	LoadKmods    bool
   110  	LDCache      string
   111  	LDConfig     string
   112  	Requirements []string
   113  	OCIHookPath  string
   114  }
   115  
   116  func (c *config) args() []string {
   117  	var args []string
   118  
   119  	if c.LoadKmods {
   120  		args = append(args, "--load-kmods")
   121  	}
   122  	if c.LDCache != "" {
   123  		args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache))
   124  	}
   125  	args = append(args,
   126  		"configure",
   127  	)
   128  	if len(c.Devices) > 0 {
   129  		args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ",")))
   130  	}
   131  	for _, c := range c.Capabilities {
   132  		args = append(args, fmt.Sprintf("--%s", c))
   133  	}
   134  	if c.LDConfig != "" {
   135  		args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig))
   136  	}
   137  	for _, r := range c.Requirements {
   138  		args = append(args, fmt.Sprintf("--require=%s", r))
   139  	}
   140  	args = append(args, "--pid={{pid}}", "{{rootfs}}")
   141  	return args
   142  }
   143  
   144  // Opts are options for configuring gpu support
   145  type Opts func(*config) error
   146  
   147  // WithDevices adds the provided device indexes to the container
   148  func WithDevices(ids ...int) Opts {
   149  	return func(c *config) error {
   150  		for _, i := range ids {
   151  			c.Devices = append(c.Devices, strconv.Itoa(i))
   152  		}
   153  		return nil
   154  	}
   155  }
   156  
   157  // WithDeviceUUIDs adds the specific device UUID to the container
   158  func WithDeviceUUIDs(uuids ...string) Opts {
   159  	return func(c *config) error {
   160  		c.Devices = append(c.Devices, uuids...)
   161  		return nil
   162  	}
   163  }
   164  
   165  // WithAllDevices adds all gpus to the container
   166  func WithAllDevices(c *config) error {
   167  	c.Devices = []string{"all"}
   168  	return nil
   169  }
   170  
   171  // WithAllCapabilities adds all capabilities to the container for the gpus
   172  func WithAllCapabilities(c *config) error {
   173  	c.Capabilities = AllCaps()
   174  	return nil
   175  }
   176  
   177  // WithCapabilities adds the specified capabilities to the container for the gpus
   178  func WithCapabilities(caps ...Capability) Opts {
   179  	return func(c *config) error {
   180  		c.Capabilities = append(c.Capabilities, caps...)
   181  		return nil
   182  	}
   183  }
   184  
   185  // WithRequiredCUDAVersion sets the required cuda version
   186  func WithRequiredCUDAVersion(major, minor int) Opts {
   187  	return func(c *config) error {
   188  		c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor))
   189  		return nil
   190  	}
   191  }
   192  
   193  // WithOCIHookPath sets the hook path for the binary
   194  func WithOCIHookPath(path string) Opts {
   195  	return func(c *config) error {
   196  		c.OCIHookPath = path
   197  		return nil
   198  	}
   199  }
   200  
   201  // WithLookupOCIHookPath sets the hook path for the binary via a binary name
   202  func WithLookupOCIHookPath(name string) Opts {
   203  	return func(c *config) error {
   204  		path, err := exec.LookPath(name)
   205  		if err != nil {
   206  			return err
   207  		}
   208  		c.OCIHookPath = path
   209  		return nil
   210  	}
   211  }