github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/devices/nvproxy/nvproxy.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package nvproxy implements proxying for the Nvidia GPU Linux kernel driver:
    16  // https://github.com/NVIDIA/open-gpu-kernel-modules.
    17  //
    18  // Supported Nvidia GPUs: T4, L4, A100, A10G and H100.
    19  package nvproxy
    20  
    21  import (
    22  	"fmt"
    23  
    24  	"github.com/metacubex/gvisor/pkg/abi/nvgpu"
    25  	"github.com/metacubex/gvisor/pkg/context"
    26  	"github.com/metacubex/gvisor/pkg/hostarch"
    27  	"github.com/metacubex/gvisor/pkg/log"
    28  	"github.com/metacubex/gvisor/pkg/marshal"
    29  	"github.com/metacubex/gvisor/pkg/sentry/mm"
    30  	"github.com/metacubex/gvisor/pkg/sentry/vfs"
    31  )
    32  
    33  // Register registers all devices implemented by this package in vfsObj.
    34  func Register(vfsObj *vfs.VirtualFilesystem, versionStr string, uvmDevMajor uint32) error {
    35  	// The kernel driver's interface is unstable, so only allow versions of the
    36  	// driver that are known to be supported.
    37  	log.Infof("NVIDIA driver version: %s", versionStr)
    38  	version, err := DriverVersionFrom(versionStr)
    39  	if err != nil {
    40  		return fmt.Errorf("failed to parse Nvidia driver version %s: %w", versionStr, err)
    41  	}
    42  	abiCons, ok := abis[version]
    43  	if !ok {
    44  		return fmt.Errorf("unsupported Nvidia driver version: %s", versionStr)
    45  	}
    46  	nvp := &nvproxy{
    47  		objsLive: make(map[nvgpu.Handle]*object),
    48  		abi:      abiCons.cons(),
    49  		version:  version,
    50  	}
    51  	for minor := uint32(0); minor <= nvgpu.NV_CONTROL_DEVICE_MINOR; minor++ {
    52  		if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, &frontendDevice{
    53  			nvp:   nvp,
    54  			minor: minor,
    55  		}, &vfs.RegisterDeviceOptions{
    56  			GroupName: "nvidia-frontend",
    57  		}); err != nil {
    58  			return err
    59  		}
    60  	}
    61  	if err := vfsObj.RegisterDevice(vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, &uvmDevice{
    62  		nvp: nvp,
    63  	}, &vfs.RegisterDeviceOptions{
    64  		GroupName: "nvidia-uvm",
    65  	}); err != nil {
    66  		return err
    67  	}
    68  	return nil
    69  }
    70  
    71  // +stateify savable
    72  type nvproxy struct {
    73  	objsMu   objsMutex                `state:"nosave"`
    74  	objsLive map[nvgpu.Handle]*object `state:"nosave"`
    75  	abi      *driverABI               `state:"nosave"`
    76  	version  DriverVersion
    77  }
    78  
    79  // object tracks an object allocated through the driver.
    80  //
    81  // +stateify savable
    82  type object struct {
    83  	impl objectImpl
    84  }
    85  
    86  func (o *object) init(impl objectImpl) {
    87  	o.impl = impl
    88  }
    89  
    90  // Release is called after the represented object is freed.
    91  func (o *object) Release(ctx context.Context) {
    92  	o.impl.Release(ctx)
    93  }
    94  
    95  type objectImpl interface {
    96  	Release(ctx context.Context)
    97  }
    98  
    99  // osDescMem is an objectImpl tracking an OS descriptor.
   100  //
   101  // +stateify savable
   102  type osDescMem struct {
   103  	object
   104  	pinnedRanges []mm.PinnedRange
   105  }
   106  
   107  // Release implements objectImpl.Release.
   108  func (o *osDescMem) Release(ctx context.Context) {
   109  	ctx.Infof("nvproxy: unpinning pages for released OS descriptor")
   110  	mm.Unpin(o.pinnedRanges)
   111  }
   112  
   113  type marshalPtr[T any] interface {
   114  	*T
   115  	marshal.Marshallable
   116  }
   117  
   118  func addrFromP64(p nvgpu.P64) hostarch.Addr {
   119  	return hostarch.Addr(uintptr(uint64(p)))
   120  }