github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/devices/nvproxy/nvproxy.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package nvproxy implements proxying for the Nvidia GPU Linux kernel driver: 16 // https://github.com/NVIDIA/open-gpu-kernel-modules. 17 // 18 // Supported Nvidia GPUs: T4, L4, A100, A10G and H100. 19 package nvproxy 20 21 import ( 22 "fmt" 23 24 "github.com/metacubex/gvisor/pkg/abi/nvgpu" 25 "github.com/metacubex/gvisor/pkg/context" 26 "github.com/metacubex/gvisor/pkg/hostarch" 27 "github.com/metacubex/gvisor/pkg/log" 28 "github.com/metacubex/gvisor/pkg/marshal" 29 "github.com/metacubex/gvisor/pkg/sentry/mm" 30 "github.com/metacubex/gvisor/pkg/sentry/vfs" 31 ) 32 33 // Register registers all devices implemented by this package in vfsObj. 34 func Register(vfsObj *vfs.VirtualFilesystem, versionStr string, uvmDevMajor uint32) error { 35 // The kernel driver's interface is unstable, so only allow versions of the 36 // driver that are known to be supported. 37 log.Infof("NVIDIA driver version: %s", versionStr) 38 version, err := DriverVersionFrom(versionStr) 39 if err != nil { 40 return fmt.Errorf("failed to parse Nvidia driver version %s: %w", versionStr, err) 41 } 42 abiCons, ok := abis[version] 43 if !ok { 44 return fmt.Errorf("unsupported Nvidia driver version: %s", versionStr) 45 } 46 nvp := &nvproxy{ 47 objsLive: make(map[nvgpu.Handle]*object), 48 abi: abiCons.cons(), 49 version: version, 50 } 51 for minor := uint32(0); minor <= nvgpu.NV_CONTROL_DEVICE_MINOR; minor++ { 52 if err := vfsObj.RegisterDevice(vfs.CharDevice, nvgpu.NV_MAJOR_DEVICE_NUMBER, minor, &frontendDevice{ 53 nvp: nvp, 54 minor: minor, 55 }, &vfs.RegisterDeviceOptions{ 56 GroupName: "nvidia-frontend", 57 }); err != nil { 58 return err 59 } 60 } 61 if err := vfsObj.RegisterDevice(vfs.CharDevice, uvmDevMajor, nvgpu.NVIDIA_UVM_PRIMARY_MINOR_NUMBER, &uvmDevice{ 62 nvp: nvp, 63 }, &vfs.RegisterDeviceOptions{ 64 GroupName: "nvidia-uvm", 65 }); err != nil { 66 return err 67 } 68 return nil 69 } 70 71 // +stateify savable 72 type nvproxy struct { 73 objsMu objsMutex `state:"nosave"` 74 objsLive map[nvgpu.Handle]*object `state:"nosave"` 75 abi *driverABI `state:"nosave"` 76 version DriverVersion 77 } 78 79 // object tracks an object allocated through the driver. 80 // 81 // +stateify savable 82 type object struct { 83 impl objectImpl 84 } 85 86 func (o *object) init(impl objectImpl) { 87 o.impl = impl 88 } 89 90 // Release is called after the represented object is freed. 91 func (o *object) Release(ctx context.Context) { 92 o.impl.Release(ctx) 93 } 94 95 type objectImpl interface { 96 Release(ctx context.Context) 97 } 98 99 // osDescMem is an objectImpl tracking an OS descriptor. 100 // 101 // +stateify savable 102 type osDescMem struct { 103 object 104 pinnedRanges []mm.PinnedRange 105 } 106 107 // Release implements objectImpl.Release. 108 func (o *osDescMem) Release(ctx context.Context) { 109 ctx.Infof("nvproxy: unpinning pages for released OS descriptor") 110 mm.Unpin(o.pinnedRanges) 111 } 112 113 type marshalPtr[T any] interface { 114 *T 115 marshal.Marshallable 116 } 117 118 func addrFromP64(p nvgpu.P64) hostarch.Addr { 119 return hostarch.Addr(uintptr(uint64(p))) 120 }