github.com/hernad/nomad@v1.6.112/nomad/structs/devices.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package structs
     5  
     6  // DeviceAccounter is used to account for device usage on a node. It can detect
     7  // when a node is oversubscribed and can be used for deciding what devices are
     8  // free
     9  type DeviceAccounter struct {
    10  	// Devices maps a device group to its device accounter instance
    11  	Devices map[DeviceIdTuple]*DeviceAccounterInstance
    12  }
    13  
    14  // DeviceAccounterInstance wraps a device and adds tracking to the instances of
    15  // the device to determine if they are free or not.
    16  type DeviceAccounterInstance struct {
    17  	// Device is the device being wrapped
    18  	Device *NodeDeviceResource
    19  
    20  	// Instances is a mapping of the device IDs to their usage.
    21  	// Only a value of 0 indicates that the instance is unused.
    22  	Instances map[string]int
    23  }
    24  
    25  // NewDeviceAccounter returns a new device accounter. The node is used to
    26  // populate the set of available devices based on what healthy device instances
    27  // exist on the node.
    28  func NewDeviceAccounter(n *Node) *DeviceAccounter {
    29  	numDevices := 0
    30  	var devices []*NodeDeviceResource
    31  
    32  	// COMPAT(0.11): Remove in 0.11
    33  	if n.NodeResources != nil {
    34  		numDevices = len(n.NodeResources.Devices)
    35  		devices = n.NodeResources.Devices
    36  	}
    37  
    38  	d := &DeviceAccounter{
    39  		Devices: make(map[DeviceIdTuple]*DeviceAccounterInstance, numDevices),
    40  	}
    41  
    42  	for _, dev := range devices {
    43  		id := *dev.ID()
    44  		d.Devices[id] = &DeviceAccounterInstance{
    45  			Device:    dev,
    46  			Instances: make(map[string]int, len(dev.Instances)),
    47  		}
    48  		for _, instance := range dev.Instances {
    49  			// Skip unhealthy devices as they aren't allocatable
    50  			if !instance.Healthy {
    51  				continue
    52  			}
    53  
    54  			d.Devices[id].Instances[instance.ID] = 0
    55  		}
    56  	}
    57  
    58  	return d
    59  }
    60  
    61  // AddAllocs takes a set of allocations and internally marks which devices are
    62  // used. If a device is used more than once by the set of passed allocations,
    63  // the collision will be returned as true.
    64  func (d *DeviceAccounter) AddAllocs(allocs []*Allocation) (collision bool) {
    65  	for _, a := range allocs {
    66  		// Filter any terminal allocation
    67  		if a.ClientTerminalStatus() {
    68  			continue
    69  		}
    70  
    71  		// COMPAT(0.11): Remove in 0.11
    72  		// If the alloc doesn't have the new style resources, it can't have
    73  		// devices
    74  		if a.AllocatedResources == nil {
    75  			continue
    76  		}
    77  
    78  		// Go through each task  resource
    79  		for _, tr := range a.AllocatedResources.Tasks {
    80  
    81  			// Go through each assigned device group
    82  			for _, device := range tr.Devices {
    83  				devID := device.ID()
    84  
    85  				// Go through each assigned device
    86  				for _, instanceID := range device.DeviceIDs {
    87  
    88  					// Mark that we are using the device. It may not be in the
    89  					// map if the device is no longer being fingerprinted, is
    90  					// unhealthy, etc.
    91  					if devInst, ok := d.Devices[*devID]; ok {
    92  						if i, ok := devInst.Instances[instanceID]; ok {
    93  							// Mark that the device is in use
    94  							devInst.Instances[instanceID]++
    95  
    96  							if i != 0 {
    97  								collision = true
    98  							}
    99  						}
   100  					}
   101  				}
   102  			}
   103  		}
   104  	}
   105  
   106  	return
   107  }
   108  
   109  // AddReserved marks the device instances in the passed device reservation as
   110  // used and returns if there is a collision.
   111  func (d *DeviceAccounter) AddReserved(res *AllocatedDeviceResource) (collision bool) {
   112  	// Lookup the device.
   113  	devInst, ok := d.Devices[*res.ID()]
   114  	if !ok {
   115  		return false
   116  	}
   117  
   118  	// For each reserved instance, mark it as used
   119  	for _, id := range res.DeviceIDs {
   120  		cur, ok := devInst.Instances[id]
   121  		if !ok {
   122  			continue
   123  		}
   124  
   125  		// It has already been used, so mark that there is a collision
   126  		if cur != 0 {
   127  			collision = true
   128  		}
   129  
   130  		devInst.Instances[id]++
   131  	}
   132  
   133  	return
   134  }
   135  
   136  // FreeCount returns the number of free device instances
   137  func (i *DeviceAccounterInstance) FreeCount() int {
   138  	count := 0
   139  	for _, c := range i.Instances {
   140  		if c == 0 {
   141  			count++
   142  		}
   143  	}
   144  	return count
   145  }