github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/cuda.go (about)

     1  // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License.
     2  
     3  package runner
     4  
     5  // This file contains the data structures used by the CUDA package that are used
     6  // for when the platform is and is not supported
     7  
     8  import (
     9  	"context"
    10  	"fmt"
    11  	"os"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/davecgh/go-spew/spew"
    19  	"github.com/go-stack/stack"
    20  	"github.com/jjeffery/kv" // MIT License
    21  	"github.com/rs/xid"
    22  
    23  	"github.com/lthibault/jitterbug"
    24  
    25  	"github.com/mitchellh/copystructure"
    26  )
    27  
    28  type device struct {
    29  	UUID       string    `json:"uuid"`
    30  	Name       string    `json:"name"`
    31  	Temp       uint      `json:"temp"`
    32  	Powr       uint      `json:"powr"`
    33  	MemTot     uint64    `json:"memtot"`
    34  	MemUsed    uint64    `json:"memused"`
    35  	MemFree    uint64    `json:"memfree"`
    36  	EccFailure *kv.Error `json:"eccfailure"`
    37  }
    38  
    39  type cudaDevices struct {
    40  	Devices []device `json:"devices"`
    41  }
    42  
    43  // GPUTrack is used to track usage of GPU cards and any kv.generated by the cards
    44  // at the hardware level
    45  //
    46  type GPUTrack struct {
    47  	UUID       string              // The UUID designation for the GPU being managed
    48  	Slots      uint                // The number of logical slots the GPU based on its size has
    49  	Mem        uint64              // The amount of memory the GPU posses
    50  	FreeSlots  uint                // The number of free logical slots the GPU has available
    51  	FreeMem    uint64              // The amount of free memory the GPU has
    52  	EccFailure *kv.Error           // Any Ecc failure related error messages, nil if no kv.encountered
    53  	Tracking   map[string]struct{} // Used to validate allocations as they are release
    54  }
    55  
    56  type gpuTracker struct {
    57  	Allocs map[string]*GPUTrack
    58  	sync.Mutex
    59  }
    60  
    61  var (
    62  	// A map keyed on the nvidia device UUID containing information about cards and
    63  	// their occupancy by the go runner.
    64  	//
    65  	gpuAllocs gpuTracker
    66  
    67  	// UseGPU is used for specific types of testing to disable GPU tests when there
    68  	// are GPU cards potentially present but they need to be disabled, this flag
    69  	// is not used during production to change behavior in any way
    70  	UseGPU *bool
    71  
    72  	// CudaInitErr records the result of the CUDA library initialization that would
    73  	// impact ongoing operation
    74  	CudaInitErr *kv.Error
    75  
    76  	// CudaInitWarnings records warnings and kv.that are deemed not be be fatal
    77  	// to the ongoing CUDA library usage but are of importance
    78  	CudaInitWarnings = []kv.Error{}
    79  
    80  	// CudaInTest is used to check if the running process is a go test process, if so then
    81  	// this will disable certain types of checking when using very limited GPU
    82  	// Hardware
    83  	CudaInTest = false
    84  )
    85  
    86  func init() {
    87  	temp := true
    88  	UseGPU = &temp
    89  
    90  	gpuDevices, err := getCUDAInfo()
    91  	if err != nil {
    92  		CudaInitErr = &err
    93  		CudaInitWarnings = append(CudaInitWarnings, err)
    94  		return
    95  	}
    96  
    97  	devs := os.Getenv("CUDA_VISIBLE_DEVICES")
    98  	if len(devs) == 0 {
    99  		devs = os.Getenv("NVIDIA_VISIBLE_DEVICES")
   100  	}
   101  
   102  	visDevices := strings.Split(devs, ",")
   103  
   104  	if devs == "all" {
   105  		visDevices = make([]string, 0, len(gpuDevices.Devices))
   106  		for _, device := range gpuDevices.Devices {
   107  			visDevices = append(visDevices, device.UUID)
   108  		}
   109  	}
   110  
   111  	gpuAllocs.Lock()
   112  	defer gpuAllocs.Unlock()
   113  	gpuAllocs.Allocs = make(map[string]*GPUTrack, len(visDevices))
   114  
   115  	// If the visDevices were specified use then to generate existing entries inside the device map.
   116  	// These entries will then get filled in later.
   117  	//
   118  	// Look to see if we have any index values in here, it really should be all UUID strings.
   119  	// Warn if we find some, but still continue.
   120  	warned := false
   121  	for _, id := range visDevices {
   122  		if len(id) == 0 {
   123  			continue
   124  		}
   125  		if i, err := strconv.Atoi(id); err == nil {
   126  			if !warned {
   127  				warned = true
   128  				CudaInitWarnings = append(CudaInitWarnings, kv.NewError("CUDA_VISIBLE_DEVICES should be using UUIDs not indexes").With("stack", stack.Trace().TrimRuntime()))
   129  			}
   130  			if i > len(gpuDevices.Devices) {
   131  				CudaInitWarnings = append(CudaInitWarnings, kv.NewError("CUDA_VISIBLE_DEVICES contained an index past the known population of GPU cards").With("stack", stack.Trace().TrimRuntime()))
   132  			}
   133  			gpuAllocs.Allocs[gpuDevices.Devices[i].UUID] = &GPUTrack{Tracking: map[string]struct{}{}}
   134  		} else {
   135  			gpuAllocs.Allocs[id] = &GPUTrack{Tracking: map[string]struct{}{}}
   136  		}
   137  	}
   138  
   139  	if len(gpuAllocs.Allocs) == 0 {
   140  		for _, dev := range gpuDevices.Devices {
   141  			gpuAllocs.Allocs[dev.UUID] = &GPUTrack{Tracking: map[string]struct{}{}}
   142  		}
   143  	}
   144  
   145  	// Scan the inventory, checking matches if they were specified in the visibility env var and then fill
   146  	// in real world data
   147  	//
   148  	for _, dev := range gpuDevices.Devices {
   149  		// Dont include devices that were not specified by CUDA_VISIBLE_DEVICES
   150  		if _, isPresent := gpuAllocs.Allocs[dev.UUID]; !isPresent {
   151  			fmt.Println("GPU Skipped", dev.UUID)
   152  			continue
   153  		}
   154  
   155  		track := &GPUTrack{
   156  			UUID:       dev.UUID,
   157  			Mem:        dev.MemFree,
   158  			EccFailure: dev.EccFailure,
   159  			Tracking:   map[string]struct{}{},
   160  		}
   161  		switch {
   162  		case strings.Contains(dev.Name, "GTX 1050"),
   163  			strings.Contains(dev.Name, "GTX 1060"):
   164  			track.Slots = 2
   165  		case strings.Contains(dev.Name, "GTX 1070"),
   166  			strings.Contains(dev.Name, "GTX 1080"):
   167  			track.Slots = 2
   168  		case strings.Contains(dev.Name, "TITAN X"):
   169  			track.Slots = 2
   170  		case strings.Contains(dev.Name, "RTX 2080 Ti"):
   171  			track.Slots = 2
   172  		case strings.Contains(dev.Name, "Tesla K80"):
   173  			track.Slots = 2
   174  		case strings.Contains(dev.Name, "Tesla P40"):
   175  			track.Slots = 4
   176  		case strings.Contains(dev.Name, "Tesla P100"):
   177  			track.Slots = 8
   178  		case strings.Contains(dev.Name, "Tesla V100"):
   179  			track.Slots = 16
   180  		default:
   181  			CudaInitWarnings = append(CudaInitWarnings, kv.NewError("unrecognized gpu device").With("gpu_name", dev.Name).With("gpu_uuid", dev.UUID).With("stack", stack.Trace().TrimRuntime()))
   182  		}
   183  		track.FreeSlots = track.Slots
   184  		track.FreeMem = track.Mem
   185  		gpuAllocs.Allocs[dev.UUID] = track
   186  	}
   187  }
   188  
   189  func GetCUDAInfo() (outDevs cudaDevices, err kv.Error) {
   190  	return getCUDAInfo()
   191  }
   192  
   193  // GPUInventory can be used to extract a copy of the current state of the GPU hardware seen within the
   194  // runner
   195  func GPUInventory() (gpus []GPUTrack, err kv.Error) {
   196  
   197  	gpus = []GPUTrack{}
   198  
   199  	gpuAllocs.Lock()
   200  	defer gpuAllocs.Unlock()
   201  
   202  	for _, alloc := range gpuAllocs.Allocs {
   203  		cpy, errGo := copystructure.Copy(*alloc)
   204  		if errGo != nil {
   205  			return nil, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   206  		}
   207  		gpus = append(gpus, cpy.(GPUTrack))
   208  	}
   209  	return gpus, nil
   210  }
   211  
   212  // MonitorGPUs will having initialized all of the devices in the tracking map
   213  // when started as a go function check the devices for ECC and other kv.marking
   214  // failed GPUs
   215  //
   216  func MonitorGPUs(ctx context.Context, statusC chan<- []string, errC chan<- kv.Error) {
   217  	// Take all of the warnings etc that were gathered during initialization and
   218  	// get them back to the error handling listener
   219  	for _, warn := range CudaInitWarnings {
   220  		select {
   221  		case errC <- warn:
   222  		case <-time.After(time.Second):
   223  			// last gasp attempt to output the error
   224  			fmt.Println(warn)
   225  		}
   226  	}
   227  
   228  	firstTime := true
   229  
   230  	t := jitterbug.New(time.Second*30, &jitterbug.Norm{Stdev: time.Second * 3})
   231  	defer t.Stop()
   232  
   233  	for {
   234  		select {
   235  		case <-t.C:
   236  			gpuDevices, err := getCUDAInfo()
   237  			if err != nil {
   238  				select {
   239  				case errC <- err:
   240  				default:
   241  					// last gasp attempt to output the error
   242  					fmt.Println(err)
   243  				}
   244  			}
   245  			// Look at allhe GPUs we have in our hardware config
   246  			for _, dev := range gpuDevices.Devices {
   247  				if firstTime {
   248  					msg := []string{"gpu found", "name", dev.Name, "uuid", dev.UUID, "stack", stack.Trace().TrimRuntime().String()}
   249  					select {
   250  					case statusC <- msg:
   251  					case <-time.After(time.Second):
   252  						fmt.Println(msg)
   253  					}
   254  				}
   255  				if dev.EccFailure != nil {
   256  					gpuAllocs.Lock()
   257  					// Check to see if the hardware GPU had a failure
   258  					// and if it is in the tracking table and does
   259  					// not yet have an error logged log the error
   260  					// in the tracking table
   261  					if gpu, isPresent := gpuAllocs.Allocs[dev.UUID]; isPresent {
   262  						if gpu.EccFailure == nil {
   263  							gpu.EccFailure = dev.EccFailure
   264  							gpuAllocs.Allocs[gpu.UUID] = gpu
   265  						}
   266  					}
   267  					gpuAllocs.Unlock()
   268  					select {
   269  					case errC <- *dev.EccFailure:
   270  					default:
   271  						// last gasp attempt to output the error
   272  						fmt.Println(dev.EccFailure)
   273  					}
   274  				}
   275  			}
   276  			firstTime = false
   277  		case <-ctx.Done():
   278  			return
   279  		}
   280  	}
   281  }
   282  
   283  // GPUCount returns the number of allocatable GPU resources
   284  func GPUCount() (cnt int) {
   285  	gpuAllocs.Lock()
   286  	defer gpuAllocs.Unlock()
   287  
   288  	return len(gpuAllocs.Allocs)
   289  }
   290  
   291  // GPUSlots gets the free and total number of GPU capacity slots within
   292  // the machine
   293  //
   294  func GPUSlots() (cnt uint, freeCnt uint) {
   295  	gpuAllocs.Lock()
   296  	defer gpuAllocs.Unlock()
   297  
   298  	for _, alloc := range gpuAllocs.Allocs {
   299  		cnt += alloc.Slots
   300  		freeCnt += alloc.FreeSlots
   301  	}
   302  	return cnt, freeCnt
   303  }
   304  
   305  // LargestFreeGPUSlots gets the largest number of single device free GPU slots
   306  //
   307  func LargestFreeGPUSlots() (cnt uint) {
   308  	gpuAllocs.Lock()
   309  	defer gpuAllocs.Unlock()
   310  
   311  	for _, alloc := range gpuAllocs.Allocs {
   312  		if alloc.FreeSlots > cnt {
   313  			cnt = alloc.FreeSlots
   314  		}
   315  	}
   316  	return cnt
   317  }
   318  
   319  // TotalFreeGPUSlots gets the largest number of single device free GPU slots
   320  //
   321  func TotalFreeGPUSlots() (cnt uint) {
   322  	gpuAllocs.Lock()
   323  	defer gpuAllocs.Unlock()
   324  
   325  	for _, alloc := range gpuAllocs.Allocs {
   326  		cnt += alloc.FreeSlots
   327  	}
   328  	return cnt
   329  }
   330  
   331  // LargestFreeGPUMem will obtain the largest number of available GPU slots
   332  // on any of the individual cards accessible to the runner
   333  func LargestFreeGPUMem() (freeMem uint64) {
   334  	gpuAllocs.Lock()
   335  	defer gpuAllocs.Unlock()
   336  
   337  	for _, alloc := range gpuAllocs.Allocs {
   338  		if alloc.Slots != 0 && alloc.FreeMem > freeMem {
   339  			freeMem = alloc.FreeMem
   340  		}
   341  	}
   342  	return freeMem
   343  }
   344  
   345  // GPUAllocated is used to record the allocation/reservation of a GPU resource on behalf of a caller
   346  //
   347  type GPUAllocated struct {
   348  	tracking string            // Allocation tracking ID
   349  	uuid     string            // The device identifier this allocation was successful against
   350  	slots    uint              // The number of GPU slots given from the allocation
   351  	mem      uint64            // The amount of memory given to the allocation
   352  	Env      map[string]string // Any environment variables the device allocator wants the runner to use
   353  }
   354  
   355  // GPUAllocations records the allocations that together are present to a caller.
   356  //
   357  type GPUAllocations []*GPUAllocated
   358  
   359  // AllocGPU will select the default allocation pool for GPUs and call the allocation for it.
   360  //
   361  func AllocGPU(maxGPU uint, maxGPUMem uint64, unitsOfAllocation []uint, live bool) (alloc GPUAllocations, err kv.Error) {
   362  	return gpuAllocs.AllocGPU(maxGPU, maxGPUMem, unitsOfAllocation, live)
   363  }
   364  
   365  func evens(start int, end int) (result []int) {
   366  	result = []int{start}
   367  	inc := 1
   368  	for cur := start + 1; cur < end+1; cur += inc {
   369  		if cur%2 == 0 {
   370  			result = append(result, cur)
   371  			inc = 2
   372  		}
   373  	}
   374  	return result
   375  }
   376  
   377  // AllocGPU will attempt to find a free CUDA capable GPU from a supplied allocator pool
   378  // and assign it to the client.  It will on finding a device set the appropriate values
   379  // in the allocated return structure that the client can use to manage their resource
   380  // consumption to match the permitted limits.
   381  //
   382  // When allocations occur across multiple devices the units of allocation parameter
   383  // defines the grainularity that the cards must conform to in terms of slots.
   384  //
   385  // Any allocations will take an entire card, we do not break cards across experiments
   386  //
   387  // This receiver uses a user supplied pool which allows for unit tests to be written that use a
   388  // custom pool
   389  //
   390  // The live parameter if false can be used to test if the allocation would be successful
   391  // without performing it.  If live false is used no allocation will be returned and err will be nil
   392  // if the allocation have been successful.
   393  //
   394  func (allocator *gpuTracker) AllocGPU(maxGPU uint, maxGPUMem uint64, unitsOfAllocation []uint, live bool) (alloc GPUAllocations, err kv.Error) {
   395  
   396  	alloc = GPUAllocations{}
   397  
   398  	if maxGPU == 0 && maxGPUMem == 0 {
   399  		return alloc, nil
   400  	}
   401  
   402  	// Start with the smallest granularity of allocations permitted and try and find a fit for the total,
   403  	// then continue up through the granularities until we have exhausted the options
   404  
   405  	// Put the units of allocation in to a searchable slice, putting the largest first
   406  	units := make([]int, len(unitsOfAllocation))
   407  	for i, unit := range unitsOfAllocation {
   408  		units[i] = int(unit)
   409  	}
   410  	// If needed create an exact match definition for the case where the caller failed to
   411  	// supply units of allocation, and also the even numbers between the minimum number
   412  	// of slots for GPUs being 4 and the upper limit
   413  	if len(units) == 0 {
   414  		units = evens(2, int(maxGPU+1)*2)
   415  	}
   416  
   417  	sort.Slice(units, func(i, j int) bool { return units[i] < units[j] })
   418  
   419  	// Start building logging style information to be used in the
   420  	// event of a real error
   421  	kvDetails := []interface{}{"maxGPU", maxGPU, "units", units}
   422  
   423  	// Now we lock after doing initialization of the functions own variables
   424  	allocator.Lock()
   425  	defer allocator.Unlock()
   426  
   427  	// Add a structure that will be used later to order our UUIDs
   428  	// by the number of free slots they have
   429  	type SlotsByUUID struct {
   430  		uuid      string
   431  		freeSlots uint
   432  	}
   433  	slotsByUUID := make([]SlotsByUUID, 0, len(allocator.Allocs))
   434  
   435  	// Take any cards that have the exact number of free slots that we have
   436  	// in our permitted units and use those, but exclude cards with
   437  	// ECC errors
   438  	usableAllocs := make(map[string]*GPUTrack, len(allocator.Allocs))
   439  	for k, v := range allocator.Allocs {
   440  		// Cannot use this cards it is broken
   441  		if v.EccFailure != nil {
   442  			continue
   443  		}
   444  		// Make sure the units contains the value of the valid range of slots
   445  		// acceptable to the caller
   446  		pos := sort.SearchInts(units, int(v.Slots))
   447  		if pos < len(units) && int(v.Slots) == units[pos] {
   448  			usableAllocs[k] = v
   449  			slotsByUUID = append(slotsByUUID, SlotsByUUID{uuid: v.UUID, freeSlots: v.FreeSlots})
   450  		}
   451  	}
   452  
   453  	if len(slotsByUUID) == 0 {
   454  		kvDetails = append(kvDetails, []interface{}{"allocs", spew.Sdump(allocator.Allocs)}...)
   455  		return nil, kv.NewError("insufficient free GPUs").With(kvDetails...)
   456  	}
   457  
   458  	// Take the permitted cards and sort their UUIDs in order of the
   459  	// smallest number of free slots first
   460  	sort.Slice(slotsByUUID, func(i, j int) bool {
   461  		if slotsByUUID[i].freeSlots < slotsByUUID[j].freeSlots {
   462  			return true
   463  		}
   464  
   465  		if slotsByUUID[i].freeSlots > slotsByUUID[j].freeSlots {
   466  			return false
   467  		}
   468  
   469  		return slotsByUUID[i].uuid < slotsByUUID[j].uuid
   470  	})
   471  
   472  	kvDetails = append(kvDetails, []interface{}{"slots", slotsByUUID})
   473  
   474  	// Because we know the preferred allocation units we can simply start with the smallest quantity
   475  	// and if we can slowly build up enough of the smaller items to meet the need, that become one
   476  	// combination.
   477  	//
   478  	type reservation struct {
   479  		uuid  string
   480  		slots uint
   481  	}
   482  	type combination struct {
   483  		cards []reservation
   484  		waste int
   485  	}
   486  
   487  	combinations := []combination{}
   488  
   489  	// Go though building combinations that work and track the waste for each solution.
   490  	//
   491  	for i, uuid := range slotsByUUID {
   492  		slotsFound := usableAllocs[uuid.uuid].FreeSlots
   493  		cmd := combination{cards: []reservation{{uuid: uuid.uuid, slots: usableAllocs[uuid.uuid].FreeSlots}}}
   494  		func() {
   495  			if slotsFound < maxGPU && i < len(slotsByUUID) {
   496  				for _, nextUUID := range slotsByUUID[i+1:] {
   497  					slotsFound += usableAllocs[uuid.uuid].FreeSlots
   498  					cmd.cards = append(cmd.cards, reservation{uuid: nextUUID.uuid, slots: usableAllocs[nextUUID.uuid].FreeSlots})
   499  				}
   500  
   501  				// We have enough slots now, stop looking and go to the next largest starting point
   502  				if slotsFound >= maxGPU {
   503  					return
   504  				}
   505  			}
   506  		}()
   507  
   508  		// We have a combination that meets or exceeds our needs
   509  		if slotsFound >= maxGPU {
   510  			cmd.waste = int(slotsFound - maxGPU)
   511  			combinations = append(combinations, cmd)
   512  		}
   513  	}
   514  
   515  	if len(combinations) == 0 {
   516  		kvDetails = append(kvDetails, "stack", stack.Trace().TrimRuntime())
   517  		return nil, kv.NewError("insufficient GPU").With(kvDetails...)
   518  	}
   519  
   520  	// Sort the combinations by waste, get the least waste
   521  	//
   522  	sort.Slice(combinations, func(i, j int) bool { return combinations[i].waste < combinations[j].waste })
   523  
   524  	// Get all of the combinations that have the least and same waste in slots
   525  	minWaste := combinations[0].waste
   526  	for i, comb := range combinations {
   527  		if minWaste != comb.waste {
   528  			combinations = combinations[:i]
   529  			break
   530  		}
   531  	}
   532  
   533  	// Sort what is left over by the number of impacted cards
   534  	sort.Slice(combinations, func(i, j int) bool { return len(combinations[i].cards) < len(combinations[j].cards) })
   535  	kvDetails = append(kvDetails, []interface{}{"combinations", combinations}...)
   536  
   537  	// OK Now we simply take the first option if one was found
   538  	matched := combinations[0]
   539  
   540  	if len(matched.cards) == 0 {
   541  		kvDetails = append(kvDetails, "stack", stack.Trace().TrimRuntime())
   542  		return nil, kv.NewError("insufficient partitioned GPUs").With(kvDetails...)
   543  	}
   544  
   545  	// Got as far as knowing the allocation will work so check for the live flag
   546  	if !live {
   547  		return nil, nil
   548  	}
   549  
   550  	// Go through the chosen combination of cards and do the allocations
   551  	//
   552  	for _, found := range matched.cards {
   553  		slots := maxGPU
   554  		if slots > allocator.Allocs[found.uuid].FreeSlots {
   555  			slots = allocator.Allocs[found.uuid].FreeSlots
   556  		}
   557  
   558  		if maxGPUMem == 0 {
   559  			// If the user does not know take it all, burn it to the ground
   560  			slots = allocator.Allocs[found.uuid].FreeSlots
   561  			maxGPUMem = allocator.Allocs[found.uuid].FreeMem
   562  		}
   563  		allocator.Allocs[found.uuid].FreeSlots -= slots
   564  		allocator.Allocs[found.uuid].FreeMem -= maxGPUMem
   565  
   566  		tracking := xid.New().String()
   567  		alloc = append(alloc, &GPUAllocated{
   568  			tracking: tracking,
   569  			uuid:     found.uuid,
   570  			slots:    slots,
   571  			mem:      maxGPUMem,
   572  			Env: map[string]string{
   573  				"NVIDIA_VISIBLE_DEVICES": found.uuid,
   574  				"CUDA_VISIBLE_DEVICES":   found.uuid,
   575  			},
   576  		})
   577  
   578  		allocator.Allocs[found.uuid].Tracking[tracking] = struct{}{}
   579  	}
   580  
   581  	return alloc, nil
   582  }
   583  
   584  func (allocator *gpuTracker) ReturnGPU(alloc *GPUAllocated) (err kv.Error) {
   585  
   586  	if alloc.slots == 0 {
   587  		return nil
   588  	}
   589  
   590  	allocator.Lock()
   591  	defer allocator.Unlock()
   592  
   593  	// Make sure that the allocation is still valid
   594  	if _, isPresent := allocator.Allocs[alloc.uuid]; !isPresent {
   595  		return kv.NewError("cuda device no longer in service").With("device", alloc.uuid).With("stack", stack.Trace().TrimRuntime())
   596  	}
   597  
   598  	if _, isPresent := allocator.Allocs[alloc.uuid].Tracking[alloc.tracking]; !isPresent {
   599  		return kv.NewError("invalid allocation").With("alloc_id", alloc.tracking).With("stack", stack.Trace().TrimRuntime())
   600  	}
   601  
   602  	delete(allocator.Allocs[alloc.uuid].Tracking, alloc.tracking)
   603  
   604  	// If valid pass back the resources that were consumed
   605  	allocator.Allocs[alloc.uuid].FreeSlots += alloc.slots
   606  	allocator.Allocs[alloc.uuid].FreeMem += alloc.mem
   607  
   608  	return nil
   609  }
   610  
   611  // ReturnGPU releases the GPU allocation passed in.  It will validate some of the allocation
   612  // details but is an honors system.
   613  //
   614  func ReturnGPU(alloc *GPUAllocated) (err kv.Error) {
   615  	return gpuAllocs.ReturnGPU(alloc)
   616  }