gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/devices/nvproxy/nvproxy.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package nvproxy implements proxying for the Nvidia GPU Linux kernel driver: 16 // https://github.com/NVIDIA/open-gpu-kernel-modules. 17 // 18 // Supported Nvidia GPUs: T4, L4, A100, A10G and H100. 19 package nvproxy 20 21 import ( 22 "fmt" 23 24 "gvisor.dev/gvisor/pkg/abi/nvgpu" 25 "gvisor.dev/gvisor/pkg/hostarch" 26 "gvisor.dev/gvisor/pkg/log" 27 "gvisor.dev/gvisor/pkg/marshal" 28 "gvisor.dev/gvisor/pkg/sentry/vfs" 29 ) 30 31 // Register registers all devices implemented by this package in vfsObj. 32 func Register(vfsObj *vfs.VirtualFilesystem, versionStr string, uvmDevMajor uint32) error { 33 // The kernel driver's interface is unstable, so only allow versions of the 34 // driver that are known to be supported. 35 log.Infof("NVIDIA driver version: %s", versionStr) 36 version, err := DriverVersionFrom(versionStr) 37 if err != nil { 38 return fmt.Errorf("failed to parse Nvidia driver version %s: %w", versionStr, err) 39 } 40 abiCons, ok := abis[version] 41 if !ok { 42 return fmt.Errorf("unsupported Nvidia driver version: %s", versionStr) 43 } 44 nvp := &nvproxy{ 45 abi: abiCons.cons(), 46 version: version, 47 frontendFDs: make(map[*frontendFD]struct{}), 48 clients: make(map[nvgpu.Handle]*rootClient), 49 objsFreeSet: make(map[*object]struct{}), 50 } 51 for minor := uint32(0); minor <= nvgpu.NV_CONTROL_DEVICE_MINOR; minor++ { 52 if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, &frontendDevice{ 53 nvp: nvp, 54 minor: minor, 55 }, &vfs.RegisterDeviceOptions{ 56 GroupName: "nvidia-frontend", 57 }); err != nil { 58 return err 59 } 60 } 61 if err := vfsObj.RegisterDevice(vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, &uvmDevice{ 62 nvp: nvp, 63 }, &vfs.RegisterDeviceOptions{ 64 GroupName: "nvidia-uvm", 65 }); err != nil { 66 return err 67 } 68 return nil 69 } 70 71 // +stateify savable 72 type nvproxy struct { 73 abi *driverABI `state:"nosave"` 74 version DriverVersion 75 76 fdsMu fdsMutex `state:"nosave"` 77 frontendFDs map[*frontendFD]struct{} 78 79 // See object.go. 80 // Users should call nvproxy.objsLock/Unlock() rather than locking objsMu 81 // directly. 82 objsMu objsMutex `state:"nosave"` 83 // These fields are protected by objsMu. 84 clients map[nvgpu.Handle]*rootClient 85 objsCleanup []func() `state:"nosave"` 86 objsFreeList objectFreeList `state:"nosave"` 87 objsFreeSet map[*object]struct{} `state:"nosave"` 88 } 89 90 type marshalPtr[T any] interface { 91 *T 92 marshal.Marshallable 93 } 94 95 func addrFromP64(p nvgpu.P64) hostarch.Addr { 96 return hostarch.Addr(uintptr(uint64(p))) 97 } 98 99 type hasFrontendFDPtr[T any] interface { 100 marshalPtr[T] 101 nvgpu.HasFrontendFD 102 }