github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/pkg/sentry/devices/nvproxy/nvproxy.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package nvproxy implements proxying for the Nvidia GPU Linux kernel driver: 16 // https://github.com/NVIDIA/open-gpu-kernel-modules. 17 // 18 // Supported Nvidia GPUs: T4, L4, A100, A10G, V100 and H100. 19 package nvproxy 20 21 import ( 22 "fmt" 23 24 "github.com/ttpreport/gvisor-ligolo/pkg/abi/nvgpu" 25 "github.com/ttpreport/gvisor-ligolo/pkg/context" 26 "github.com/ttpreport/gvisor-ligolo/pkg/hostarch" 27 "github.com/ttpreport/gvisor-ligolo/pkg/log" 28 "github.com/ttpreport/gvisor-ligolo/pkg/marshal" 29 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/fsimpl/devtmpfs" 30 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/mm" 31 "github.com/ttpreport/gvisor-ligolo/pkg/sentry/vfs" 32 ) 33 34 // Register registers all devices implemented by this package in vfsObj. 35 func Register(vfsObj *vfs.VirtualFilesystem, uvmDevMajor uint32) error { 36 // The kernel driver's interface is unstable, so only allow versions of the 37 // driver that are known to be supported. 38 version, err := hostDriverVersion() 39 if err != nil { 40 return fmt.Errorf("failed to get Nvidia driver version: %w", err) 41 } 42 switch version { 43 case 44 "525.60.13", 45 "525.105.17": 46 log.Infof("Nvidia driver version: %s", version) 47 default: 48 return fmt.Errorf("unsupported Nvidia driver version: %s", version) 49 } 50 51 nvp := &nvproxy{ 52 objsLive: make(map[nvgpu.Handle]*object), 53 } 54 for minor := uint32(0); minor <= nvgpu.NV_CONTROL_DEVICE_MINOR; minor++ { 55 if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, &frontendDevice{ 56 nvp: nvp, 57 minor: minor, 58 }, &vfs.RegisterDeviceOptions{ 59 GroupName: "nvidia-frontend", 60 }); err != nil { 61 return err 62 } 63 } 64 if err := vfsObj.RegisterDevice(vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, &uvmDevice{ 65 nvp: nvp, 66 }, &vfs.RegisterDeviceOptions{ 67 GroupName: "nvidia-uvm", 68 }); err != nil { 69 return err 70 } 71 return nil 72 } 73 74 // CreateDriverDevtmpfsFiles creates device special files in dev that should 75 // always exist when this package is enabled. It does not create per-device 76 // files in dev; see CreateIndexDevtmpfsFile. 77 func CreateDriverDevtmpfsFiles(ctx context.Context, dev *devtmpfs.Accessor, uvmDevMajor uint32) error { 78 if err := dev.CreateDeviceFile(ctx, "nvidiactl", vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, nvgpu.NV_CONTROL_DEVICE_MINOR, 0666); err != nil { 79 return err 80 } 81 if err := dev.CreateDeviceFile(ctx, "nvidia-uvm", vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, 0666); err != nil { 82 return err 83 } 84 return nil 85 } 86 87 // CreateIndexDevtmpfsFile creates the device special file in dev for the 88 // device with the given index. 89 func CreateIndexDevtmpfsFile(ctx context.Context, dev *devtmpfs.Accessor, index uint32) error { 90 return dev.CreateDeviceFile(ctx, fmt.Sprintf("nvidia%d", index), vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, index, 0666) 91 } 92 93 // +stateify savable 94 type nvproxy struct { 95 objsMu objsMutex 96 objsLive map[nvgpu.Handle]*object 97 } 98 99 // object tracks an object allocated through the driver. 100 // 101 // +stateify savable 102 type object struct { 103 impl objectImpl 104 } 105 106 func (o *object) init(impl objectImpl) { 107 o.impl = impl 108 } 109 110 // Release is called after the represented object is freed. 111 func (o *object) Release(ctx context.Context) { 112 o.impl.Release(ctx) 113 } 114 115 type objectImpl interface { 116 Release(ctx context.Context) 117 } 118 119 // osDescMem is an objectImpl tracking an OS descriptor. 120 // 121 // +stateify savable 122 type osDescMem struct { 123 object 124 pinnedRanges []mm.PinnedRange 125 } 126 127 // Release implements objectImpl.Release. 128 func (o *osDescMem) Release(ctx context.Context) { 129 ctx.Infof("nvproxy: unpinning pages for released OS descriptor") 130 mm.Unpin(o.pinnedRanges) 131 } 132 133 type marshalPtr[T any] interface { 134 *T 135 marshal.Marshallable 136 } 137 138 func addrFromP64(p nvgpu.P64) hostarch.Addr { 139 return hostarch.Addr(uintptr(uint64(p))) 140 }