gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/devices/nvproxy/nvproxy.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package nvproxy implements proxying for the Nvidia GPU Linux kernel driver:
    16  // https://github.com/NVIDIA/open-gpu-kernel-modules.
    17  //
    18  // Supported Nvidia GPUs: T4, L4, A100, A10G and H100.
    19  package nvproxy
    20  
    21  import (
    22  	"fmt"
    23  
    24  	"gvisor.dev/gvisor/pkg/abi/nvgpu"
    25  	"gvisor.dev/gvisor/pkg/hostarch"
    26  	"gvisor.dev/gvisor/pkg/log"
    27  	"gvisor.dev/gvisor/pkg/marshal"
    28  	"gvisor.dev/gvisor/pkg/sentry/vfs"
    29  )
    30  
    31  // Register registers all devices implemented by this package in vfsObj.
    32  func Register(vfsObj *vfs.VirtualFilesystem, versionStr string, uvmDevMajor uint32) error {
    33  	// The kernel driver's interface is unstable, so only allow versions of the
    34  	// driver that are known to be supported.
    35  	log.Infof("NVIDIA driver version: %s", versionStr)
    36  	version, err := DriverVersionFrom(versionStr)
    37  	if err != nil {
    38  		return fmt.Errorf("failed to parse Nvidia driver version %s: %w", versionStr, err)
    39  	}
    40  	abiCons, ok := abis[version]
    41  	if !ok {
    42  		return fmt.Errorf("unsupported Nvidia driver version: %s", versionStr)
    43  	}
    44  	nvp := &nvproxy{
    45  		abi:         abiCons.cons(),
    46  		version:     version,
    47  		frontendFDs: make(map[*frontendFD]struct{}),
    48  		clients:     make(map[nvgpu.Handle]*rootClient),
    49  		objsFreeSet: make(map[*object]struct{}),
    50  	}
    51  	for minor := uint32(0); minor <= nvgpu.NV_CONTROL_DEVICE_MINOR; minor++ {
    52  		if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, &frontendDevice{
    53  			nvp:   nvp,
    54  			minor: minor,
    55  		}, &vfs.RegisterDeviceOptions{
    56  			GroupName: "nvidia-frontend",
    57  		}); err != nil {
    58  			return err
    59  		}
    60  	}
    61  	if err := vfsObj.RegisterDevice(vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, &uvmDevice{
    62  		nvp: nvp,
    63  	}, &vfs.RegisterDeviceOptions{
    64  		GroupName: "nvidia-uvm",
    65  	}); err != nil {
    66  		return err
    67  	}
    68  	return nil
    69  }
    70  
    71  // +stateify savable
    72  type nvproxy struct {
    73  	abi     *driverABI `state:"nosave"`
    74  	version DriverVersion
    75  
    76  	fdsMu       fdsMutex `state:"nosave"`
    77  	frontendFDs map[*frontendFD]struct{}
    78  
    79  	// See object.go.
    80  	// Users should call nvproxy.objsLock/Unlock() rather than locking objsMu
    81  	// directly.
    82  	objsMu objsMutex `state:"nosave"`
    83  	// These fields are protected by objsMu.
    84  	clients      map[nvgpu.Handle]*rootClient
    85  	objsCleanup  []func()             `state:"nosave"`
    86  	objsFreeList objectFreeList       `state:"nosave"`
    87  	objsFreeSet  map[*object]struct{} `state:"nosave"`
    88  }
    89  
    90  type marshalPtr[T any] interface {
    91  	*T
    92  	marshal.Marshallable
    93  }
    94  
    95  func addrFromP64(p nvgpu.P64) hostarch.Addr {
    96  	return hostarch.Addr(uintptr(uint64(p)))
    97  }
    98  
    99  type hasFrontendFDPtr[T any] interface {
   100  	marshalPtr[T]
   101  	nvgpu.HasFrontendFD
   102  }