gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/devices/nvproxy/object.go

gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/sentry/devices/nvproxy/object.go (about)

     1  // Copyright 2024 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nvproxy
    16  
    17  import (
    18  	"gvisor.dev/gvisor/pkg/abi/nvgpu"
    19  	"gvisor.dev/gvisor/pkg/context"
    20  	"gvisor.dev/gvisor/pkg/log"
    21  	"gvisor.dev/gvisor/pkg/marshal"
    22  	"gvisor.dev/gvisor/pkg/sentry/mm"
    23  )
    24  
    25  // object tracks a driver object.
    26  //
    27  // +stateify savable
    28  type object struct {
    29  	// These fields are initialized by nvproxy.objAdd() and are immutable thereafter.
    30  	nvp    *nvproxy
    31  	client *rootClient // may be == impl
    32  	class  nvgpu.ClassID
    33  	handle nvgpu.Handle // in client.resources, and also nvp.clients if impl is rootClient
    34  	impl   objectImpl
    35  
    36  	// The driver tracks parent/child relationships and "arbitrary dependency"
    37  	// relationships between objects separately; we treat parent/child
    38  	// relationships as equivalent to other dependencies. These fields are
    39  	// protected by nvp.objsMu.
    40  	deps  map[*object]struct{} // objects that this object depends on
    41  	rdeps map[*object]struct{} // objects that depend on this object
    42  	objectFreeEntry
    43  }
    44  
    45  type objectImpl interface {
    46  	// Object returns the object embedded in this objectImpl.
    47  	Object() *object
    48  
    49  	// Release is called when the driver object represented by this objectImpl
    50  	// is freed.
    51  	//
    52  	// Preconditions: nvproxy.objsMu must be locked.
    53  	Release(ctx context.Context)
    54  }
    55  
    56  // Object implements objectImpl.Object.
    57  func (o *object) Object() *object {
    58  	return o
    59  }
    60  
    61  func (nvp *nvproxy) objsLock() {
    62  	nvp.objsMu.Lock()
    63  }
    64  
    65  func (nvp *nvproxy) objsUnlock() {
    66  	cleanup := nvp.objsCleanup
    67  	nvp.objsCleanup = nil
    68  	nvp.objsMu.Unlock()
    69  	for _, f := range cleanup {
    70  		f()
    71  	}
    72  }
    73  
    74  // objAdd records the allocation of a driver object with class c and handle h,
    75  // in the client with handle clientH, represented by oi. Each non-zero handle
    76  // in deps is a dependency of the created object, such that the freeing of any
    77  // of those objects also results in the freeing of the recorded object.
    78  func (nvp *nvproxy) objAdd(ctx context.Context, clientH, h nvgpu.Handle, c nvgpu.ClassID, oi objectImpl, deps ...nvgpu.Handle) {
    79  	if h.Val == 0 {
    80  		log.Traceback("nvproxy: new object (class %v) has invalid handle 0", c)
    81  		return
    82  	}
    83  	var client *rootClient
    84  	// The driver forced NV01_ROOT and NV01_ROOT_NON_PRIV to NV01_ROOT_CLIENT,
    85  	// so we only need to check for the latter.
    86  	if c == nvgpu.NV01_ROOT_CLIENT {
    87  		clientH = h
    88  		client = oi.(*rootClient)
    89  		if _, ok := nvp.clients[h]; ok {
    90  			ctx.Warningf("nvproxy: client handle %v already in use", h)
    91  		}
    92  		nvp.clients[h] = client
    93  	} else {
    94  		var ok bool
    95  		client, ok = nvp.clients[clientH]
    96  		if !ok {
    97  			log.Traceback("nvproxy: new object %v (class %v) has invalid client handle %v", h, c, clientH)
    98  			return
    99  		}
   100  	}
   101  	o := oi.Object()
   102  	o.nvp = nvp
   103  	o.client = client
   104  	o.class = c
   105  	o.handle = h
   106  	o.impl = oi
   107  	if _, ok := client.resources[h]; ok {
   108  		ctx.Warningf("nvproxy: handle %v:%v already in use", clientH, h)
   109  	}
   110  	client.resources[h] = o
   111  	for _, depH := range deps {
   112  		if depH.Val == 0 /* aka NV01_NULL_OBJECT */ {
   113  			continue
   114  		}
   115  		dep, ok := client.resources[depH]
   116  		if !ok {
   117  			log.Traceback("nvproxy: new object %v:%v (class %v) has invalid dependency handle %v", clientH, h, c, depH)
   118  			continue
   119  		}
   120  		nvp.objDep(o, dep)
   121  	}
   122  	if ctx.IsLogging(log.Debug) {
   123  		ctx.Debugf("nvproxy: added object %v:%v (class %v) with dependencies %v", clientH, h, c, deps)
   124  	}
   125  }
   126  
   127  // objAddDep records a dependency between the existing object with handle h1 on
   128  // the existing object with handle h2, such that the freeing of the object with
   129  // handle h2 results in the freeing of object h1. Both h1 and h2 are handles in
   130  // the client with handle clientH.
   131  func (nvp *nvproxy) objAddDep(clientH, h1, h2 nvgpu.Handle) {
   132  	if h1.Val == 0 || h2.Val == 0 {
   133  		return
   134  	}
   135  	client, ok := nvp.clients[clientH]
   136  	if !ok {
   137  		log.Traceback("nvproxy: invalid client handle %v", clientH)
   138  		return
   139  	}
   140  	o1, ok := client.resources[h1]
   141  	if !ok {
   142  		log.Traceback("nvproxy: invalid handle %v:%v", clientH, h1)
   143  		return
   144  	}
   145  	o2, ok := client.resources[h2]
   146  	if !ok {
   147  		log.Traceback("nvproxy: invalid handle %v:%v", clientH, h2)
   148  		return
   149  	}
   150  	nvp.objDep(o1, o2)
   151  }
   152  
   153  func (nvp *nvproxy) objDep(o1, o2 *object) {
   154  	if o1.deps == nil {
   155  		o1.deps = make(map[*object]struct{})
   156  	}
   157  	o1.deps[o2] = struct{}{}
   158  	if o2.rdeps == nil {
   159  		o2.rdeps = make(map[*object]struct{})
   160  	}
   161  	o2.rdeps[o1] = struct{}{}
   162  }
   163  
   164  // objFree marks an object and its transitive dependents as freed.
   165  //
   166  // Compare
   167  // src/nvidia/src/libraries/resserv/src/rs_server.c:serverFreeResourceTree().
   168  func (nvp *nvproxy) objFree(ctx context.Context, clientH, h nvgpu.Handle) {
   169  	// Check for recursive calls to objFree() (via objectImpl.Release()).
   170  	// serverFreeResourceTree() permits this; we currently don't for
   171  	// simplicity.
   172  	if !nvp.objsFreeList.Empty() {
   173  		panic("nvproxy.objFree called with non-empty free list (possible recursion?)")
   174  	}
   175  
   176  	client, ok := nvp.clients[clientH]
   177  	if !ok {
   178  		ctx.Warningf("nvproxy: freeing object handle %v with unknown client handle %v", h, clientH)
   179  		return
   180  	}
   181  	o, ok := client.resources[h]
   182  	if !ok {
   183  		// When RS_COMPATABILITY_MODE is defined as true in the driver (as it
   184  		// is in Linux), the driver permits NV_ESC_RM_FREE on nonexistent
   185  		// handles as a no-op, and applications do this, so log at level INFO
   186  		// rather than WARNING.
   187  		ctx.Infof("nvproxy: freeing object with unknown handle %v:%v", clientH, h)
   188  		return
   189  	}
   190  	nvp.prependFreedLockedRecursive(o)
   191  	for !nvp.objsFreeList.Empty() {
   192  		o2 := nvp.objsFreeList.Front()
   193  		o2.impl.Release(ctx)
   194  		for o3 := range o2.deps {
   195  			delete(o3.rdeps, o2)
   196  		}
   197  		delete(o2.client.resources, o2.handle)
   198  		if o2.class == nvgpu.NV01_ROOT_CLIENT {
   199  			delete(nvp.clients, o2.handle)
   200  		}
   201  		nvp.objsFreeList.Remove(o2)
   202  		delete(nvp.objsFreeSet, o2)
   203  		if ctx.IsLogging(log.Debug) {
   204  			ctx.Debugf("nvproxy: freed object %v:%v (class %v)", o2.client.handle, o2.handle, o2.class)
   205  		}
   206  	}
   207  }
   208  
   209  func (nvp *nvproxy) prependFreedLockedRecursive(o *object) {
   210  	if _, ok := nvp.objsFreeSet[o]; ok {
   211  		// o is already on the free list; move it to the front so that it
   212  		// remains freed before our caller's o.
   213  		nvp.objsFreeList.Remove(o)
   214  	} else {
   215  		nvp.objsFreeSet[o] = struct{}{}
   216  	}
   217  	nvp.objsFreeList.PushFront(o)
   218  
   219  	// In the driver, freeing an object causes its children and dependents to
   220  	// be freed first; see
   221  	// src/nvidia/src/libraries/resserv/src/rs_server.c:serverFreeResourceTree()
   222  	// => clientUpdatePendingFreeList_IMPL(). Replicate this freeing order.
   223  	for o2 := range o.rdeps {
   224  		nvp.prependFreedLockedRecursive(o2)
   225  	}
   226  }
   227  
   228  // enqueueCleanup enqueues a cleanup function that will run after nvp.objsMu is
   229  // unlocked.
   230  func (nvp *nvproxy) enqueueCleanup(f func()) {
   231  	nvp.objsCleanup = append(nvp.objsCleanup, f)
   232  }
   233  
   234  // +stateify savable
   235  type capturedRmAllocParams struct {
   236  	fd              *frontendFD
   237  	ioctlParams     nvgpu.NVOS64Parameters
   238  	rightsRequested nvgpu.RS_ACCESS_MASK
   239  	allocParams     []byte
   240  }
   241  
   242  func captureRmAllocParams[Params any](fd *frontendFD, ioctlParams *nvgpu.NVOS64Parameters, rightsRequested nvgpu.RS_ACCESS_MASK, allocParams *Params) capturedRmAllocParams {
   243  	var allocParamsBuf []byte
   244  	if allocParams != nil {
   245  		if allocParamsMarshal, ok := any(allocParams).(marshal.Marshallable); ok {
   246  			allocParamsBuf = make([]byte, allocParamsMarshal.SizeBytes())
   247  			allocParamsMarshal.MarshalBytes(allocParamsBuf)
   248  		} else {
   249  			log.Traceback("nvproxy: allocParams %T is not marshalable")
   250  		}
   251  	}
   252  	return capturedRmAllocParams{
   253  		fd:              fd,
   254  		ioctlParams:     *ioctlParams,
   255  		rightsRequested: rightsRequested,
   256  		allocParams:     allocParamsBuf,
   257  	}
   258  }
   259  
   260  // rmAllocObject is an objectImpl tracking a driver object allocated by an
   261  // invocation of NV_ESC_RM_ALLOC whose class is not represented by a more
   262  // specific type.
   263  //
   264  // +stateify savable
   265  type rmAllocObject struct {
   266  	object
   267  
   268  	params capturedRmAllocParams
   269  }
   270  
   271  func newRmAllocObject[Params any](fd *frontendFD, ioctlParams *nvgpu.NVOS64Parameters, rightsRequested nvgpu.RS_ACCESS_MASK, allocParams *Params) *rmAllocObject {
   272  	return &rmAllocObject{
   273  		params: captureRmAllocParams(fd, ioctlParams, rightsRequested, allocParams),
   274  	}
   275  }
   276  
   277  // Release implements objectImpl.Release.
   278  func (o *rmAllocObject) Release(ctx context.Context) {
   279  	// no-op
   280  }
   281  
   282  // rootClient is an objectImpl tracking a NV01_ROOT_CLIENT.
   283  //
   284  // +stateify savable
   285  type rootClient struct {
   286  	object
   287  
   288  	// These fields are protected by nvproxy.objsMu.
   289  	resources map[nvgpu.Handle]*object
   290  
   291  	params capturedRmAllocParams
   292  }
   293  
   294  func newRootClient(fd *frontendFD, ioctlParams *nvgpu.NVOS64Parameters, rightsRequested nvgpu.RS_ACCESS_MASK, allocParams *nvgpu.Handle) *rootClient {
   295  	return &rootClient{
   296  		resources: make(map[nvgpu.Handle]*object),
   297  		params:    captureRmAllocParams(fd, ioctlParams, rightsRequested, allocParams),
   298  	}
   299  }
   300  
   301  // Release implements objectImpl.Release.
   302  func (o *rootClient) Release(ctx context.Context) {
   303  	delete(o.params.fd.clients, o.handle)
   304  }
   305  
   306  // osDescMem is an objectImpl tracking a NV01_MEMORY_SYSTEM_OS_DESCRIPTOR.
   307  type osDescMem struct {
   308  	object
   309  	pinnedRanges []mm.PinnedRange
   310  }
   311  
   312  // Release implements objectImpl.Release.
   313  func (o *osDescMem) Release(ctx context.Context) {
   314  	// Unpin pages (which takes MM locks) without holding nvproxy locks.
   315  	o.nvp.enqueueCleanup(func() {
   316  		mm.Unpin(o.pinnedRanges)
   317  		if ctx.IsLogging(log.Debug) {
   318  			total := uint64(0)
   319  			for _, pr := range o.pinnedRanges {
   320  				total += uint64(pr.Source.Length())
   321  			}
   322  			ctx.Debugf("nvproxy: unpinned %d bytes for released OS descriptor", total)
   323  		}
   324  	})
   325  }
   326  
   327  // osEvent is an objectImpl tracking a NV01_EVENT_OS_EVENT.
   328  type osEvent struct {
   329  	object
   330  }
   331  
   332  // Release implements objectImpl.Release.
   333  func (o *osEvent) Release(ctx context.Context) {
   334  	// no-op
   335  }
   336  
   337  // virtMem is an objectImpl tracking a NV50_MEMORY_VIRTUAL.
   338  type virtMem struct {
   339  	object
   340  }
   341  
   342  // Release implements objectImpl.Release.
   343  func (o *virtMem) Release(ctx context.Context) {
   344  	// no-op
   345  }