github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/devices/nvproxy/nvproxy.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package nvproxy implements proxying for the Nvidia GPU Linux kernel driver:
    16  // https://github.com/NVIDIA/open-gpu-kernel-modules.
    17  //
    18  // Supported Nvidia GPUs: T4, L4, A100, A10G, V100 and H100.
    19  package nvproxy
    20  
    21  import (
    22  	"fmt"
    23  
    24  	"github.com/MerlinKodo/gvisor/pkg/abi/nvgpu"
    25  	"github.com/MerlinKodo/gvisor/pkg/context"
    26  	"github.com/MerlinKodo/gvisor/pkg/hostarch"
    27  	"github.com/MerlinKodo/gvisor/pkg/log"
    28  	"github.com/MerlinKodo/gvisor/pkg/marshal"
    29  	"github.com/MerlinKodo/gvisor/pkg/sentry/fsimpl/devtmpfs"
    30  	"github.com/MerlinKodo/gvisor/pkg/sentry/mm"
    31  	"github.com/MerlinKodo/gvisor/pkg/sentry/vfs"
    32  )
    33  
    34  // Register registers all devices implemented by this package in vfsObj.
    35  func Register(vfsObj *vfs.VirtualFilesystem, uvmDevMajor uint32) error {
    36  	// The kernel driver's interface is unstable, so only allow versions of the
    37  	// driver that are known to be supported.
    38  	version, err := hostDriverVersion()
    39  	if err != nil {
    40  		return fmt.Errorf("failed to get Nvidia driver version: %w", err)
    41  	}
    42  	switch version {
    43  	case
    44  		"525.60.13",
    45  		"525.105.17",
    46  		"525.125.06":
    47  		log.Infof("Nvidia driver version: %s", version)
    48  	default:
    49  		return fmt.Errorf("unsupported Nvidia driver version: %s", version)
    50  	}
    51  
    52  	nvp := &nvproxy{
    53  		objsLive: make(map[nvgpu.Handle]*object),
    54  	}
    55  	for minor := uint32(0); minor <= nvgpu.NV_CONTROL_DEVICE_MINOR; minor++ {
    56  		if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, &frontendDevice{
    57  			nvp:   nvp,
    58  			minor: minor,
    59  		}, &vfs.RegisterDeviceOptions{
    60  			GroupName: "nvidia-frontend",
    61  		}); err != nil {
    62  			return err
    63  		}
    64  	}
    65  	if err := vfsObj.RegisterDevice(vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, &uvmDevice{
    66  		nvp: nvp,
    67  	}, &vfs.RegisterDeviceOptions{
    68  		GroupName: "nvidia-uvm",
    69  	}); err != nil {
    70  		return err
    71  	}
    72  	return nil
    73  }
    74  
    75  // CreateDriverDevtmpfsFiles creates device special files in dev that should
    76  // always exist when this package is enabled. It does not create per-device
    77  // files in dev; see CreateIndexDevtmpfsFile.
    78  func CreateDriverDevtmpfsFiles(ctx context.Context, dev *devtmpfs.Accessor, uvmDevMajor uint32) error {
    79  	if err := dev.CreateDeviceFile(ctx, "nvidiactl", vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, nvgpu.NV_CONTROL_DEVICE_MINOR, 0666); err != nil {
    80  		return err
    81  	}
    82  	if err := dev.CreateDeviceFile(ctx, "nvidia-uvm", vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, 0666); err != nil {
    83  		return err
    84  	}
    85  	return nil
    86  }
    87  
    88  // CreateIndexDevtmpfsFile creates the device special file in dev for the
    89  // device with the given index.
    90  func CreateIndexDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor, minor uint32) error {
    91  	return dev.CreateDeviceFile(ctx, fmt.Sprintf("nvidia%d", minor), vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, 0666)
    92  }
    93  
    94  // +stateify savable
    95  type nvproxy struct {
    96  	objsMu   objsMutex `state:"nosave"`
    97  	objsLive map[nvgpu.Handle]*object
    98  }
    99  
   100  // object tracks an object allocated through the driver.
   101  //
   102  // +stateify savable
   103  type object struct {
   104  	impl objectImpl
   105  }
   106  
   107  func (o *object) init(impl objectImpl) {
   108  	o.impl = impl
   109  }
   110  
   111  // Release is called after the represented object is freed.
   112  func (o *object) Release(ctx context.Context) {
   113  	o.impl.Release(ctx)
   114  }
   115  
   116  type objectImpl interface {
   117  	Release(ctx context.Context)
   118  }
   119  
   120  // osDescMem is an objectImpl tracking an OS descriptor.
   121  //
   122  // +stateify savable
   123  type osDescMem struct {
   124  	object
   125  	pinnedRanges []mm.PinnedRange
   126  }
   127  
   128  // Release implements objectImpl.Release.
   129  func (o *osDescMem) Release(ctx context.Context) {
   130  	ctx.Infof("nvproxy: unpinning pages for released OS descriptor")
   131  	mm.Unpin(o.pinnedRanges)
   132  }
   133  
   134  type marshalPtr[T any] interface {
   135  	*T
   136  	marshal.Marshallable
   137  }
   138  
   139  func addrFromP64(p nvgpu.P64) hostarch.Addr {
   140  	return hostarch.Addr(uintptr(uint64(p)))
   141  }