github.com/MerlinKodo/gvisor@v0.0.0-20231110090155-957f62ecf90e/pkg/sentry/devices/nvproxy/nvproxy.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package nvproxy implements proxying for the Nvidia GPU Linux kernel driver: 16 // https://github.com/NVIDIA/open-gpu-kernel-modules. 17 // 18 // Supported Nvidia GPUs: T4, L4, A100, A10G, V100 and H100. 19 package nvproxy 20 21 import ( 22 "fmt" 23 24 "github.com/MerlinKodo/gvisor/pkg/abi/nvgpu" 25 "github.com/MerlinKodo/gvisor/pkg/context" 26 "github.com/MerlinKodo/gvisor/pkg/hostarch" 27 "github.com/MerlinKodo/gvisor/pkg/log" 28 "github.com/MerlinKodo/gvisor/pkg/marshal" 29 "github.com/MerlinKodo/gvisor/pkg/sentry/fsimpl/devtmpfs" 30 "github.com/MerlinKodo/gvisor/pkg/sentry/mm" 31 "github.com/MerlinKodo/gvisor/pkg/sentry/vfs" 32 ) 33 34 // Register registers all devices implemented by this package in vfsObj. 35 func Register(vfsObj *vfs.VirtualFilesystem, uvmDevMajor uint32) error { 36 // The kernel driver's interface is unstable, so only allow versions of the 37 // driver that are known to be supported. 38 version, err := hostDriverVersion() 39 if err != nil { 40 return fmt.Errorf("failed to get Nvidia driver version: %w", err) 41 } 42 switch version { 43 case 44 "525.60.13", 45 "525.105.17", 46 "525.125.06": 47 log.Infof("Nvidia driver version: %s", version) 48 default: 49 return fmt.Errorf("unsupported Nvidia driver version: %s", version) 50 } 51 52 nvp := &nvproxy{ 53 objsLive: make(map[nvgpu.Handle]*object), 54 } 55 for minor := uint32(0); minor <= nvgpu.NV_CONTROL_DEVICE_MINOR; minor++ { 56 if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, &frontendDevice{ 57 nvp: nvp, 58 minor: minor, 59 }, &vfs.RegisterDeviceOptions{ 60 GroupName: "nvidia-frontend", 61 }); err != nil { 62 return err 63 } 64 } 65 if err := vfsObj.RegisterDevice(vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, &uvmDevice{ 66 nvp: nvp, 67 }, &vfs.RegisterDeviceOptions{ 68 GroupName: "nvidia-uvm", 69 }); err != nil { 70 return err 71 } 72 return nil 73 } 74 75 // CreateDriverDevtmpfsFiles creates device special files in dev that should 76 // always exist when this package is enabled. It does not create per-device 77 // files in dev; see CreateIndexDevtmpfsFile. 78 func CreateDriverDevtmpfsFiles(ctx context.Context, dev *devtmpfs.Accessor, uvmDevMajor uint32) error { 79 if err := dev.CreateDeviceFile(ctx, "nvidiactl", vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, nvgpu.NV_CONTROL_DEVICE_MINOR, 0666); err != nil { 80 return err 81 } 82 if err := dev.CreateDeviceFile(ctx, "nvidia-uvm", vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, 0666); err != nil { 83 return err 84 } 85 return nil 86 } 87 88 // CreateIndexDevtmpfsFile creates the device special file in dev for the 89 // device with the given index. 90 func CreateIndexDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor, minor uint32) error { 91 return dev.CreateDeviceFile(ctx, fmt.Sprintf("nvidia%d", minor), vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, 0666) 92 } 93 94 // +stateify savable 95 type nvproxy struct { 96 objsMu objsMutex `state:"nosave"` 97 objsLive map[nvgpu.Handle]*object 98 } 99 100 // object tracks an object allocated through the driver. 101 // 102 // +stateify savable 103 type object struct { 104 impl objectImpl 105 } 106 107 func (o *object) init(impl objectImpl) { 108 o.impl = impl 109 } 110 111 // Release is called after the represented object is freed. 112 func (o *object) Release(ctx context.Context) { 113 o.impl.Release(ctx) 114 } 115 116 type objectImpl interface { 117 Release(ctx context.Context) 118 } 119 120 // osDescMem is an objectImpl tracking an OS descriptor. 121 // 122 // +stateify savable 123 type osDescMem struct { 124 object 125 pinnedRanges []mm.PinnedRange 126 } 127 128 // Release implements objectImpl.Release. 129 func (o *osDescMem) Release(ctx context.Context) { 130 ctx.Infof("nvproxy: unpinning pages for released OS descriptor") 131 mm.Unpin(o.pinnedRanges) 132 } 133 134 type marshalPtr[T any] interface { 135 *T 136 marshal.Marshallable 137 } 138 139 func addrFromP64(p nvgpu.P64) hostarch.Addr { 140 return hostarch.Addr(uintptr(uint64(p))) 141 }