golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/coordinator/pool/ledger.go (about)

     1  // Copyright 2020 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build linux || darwin
     6  
     7  package pool
     8  
     9  import (
    10  	"context"
    11  	"fmt"
    12  	"sort"
    13  	"sync"
    14  	"time"
    15  
    16  	"golang.org/x/build/internal/cloud"
    17  	"golang.org/x/build/internal/coordinator/pool/queue"
    18  )
    19  
    20  // entry contains the resource usage of an instance as well as
    21  // identifying information.
    22  type entry struct {
    23  	createdAt    time.Time
    24  	instanceID   string
    25  	instanceName string
    26  	instanceType string
    27  	vCPUCount    int64
    28  	quota        *queue.Item
    29  }
    30  
    31  // ledger contains a record of the instances and their resource
    32  // consumption. Before an instance is created, a call to the ledger
    33  // will ensure that there are available resources for the new instance.
    34  type ledger struct {
    35  	mu sync.RWMutex
    36  	// cpuQueue is the queue for on-demand vCPU VMs created on EC2.
    37  	cpuQueue *queue.Quota
    38  	// entries contains a mapping of instance name to entries for each instance
    39  	// that has resources allocated to it.
    40  	entries map[string]*entry
    41  	// types contains a mapping of instance type names to instance types for each
    42  	// ARM64 EC2 instance.
    43  	types map[string]*cloud.InstanceType
    44  }
    45  
    46  // newLedger creates a new ledger.
    47  func newLedger() *ledger {
    48  	l := &ledger{
    49  		entries:  make(map[string]*entry),
    50  		cpuQueue: queue.NewQuota(),
    51  		types:    make(map[string]*cloud.InstanceType),
    52  	}
    53  	return l
    54  }
    55  
    56  // ReserveResources attempts to reserve the resources required for an instance to be created.
    57  // It will attempt to reserve the resources that an instance type would require. This will
    58  // attempt to reserve the resources until the context deadline is reached.
    59  func (l *ledger) ReserveResources(ctx context.Context, instName, vmType string, si *queue.SchedItem) error {
    60  	instType, err := l.PrepareReservationRequest(instName, vmType)
    61  	if err != nil {
    62  		return err
    63  	}
    64  
    65  	// should never happen
    66  	if instType.CPU <= 0 {
    67  		return fmt.Errorf("invalid allocation requested: %d", instType.CPU)
    68  	}
    69  	item := l.cpuQueue.Enqueue(int(instType.CPU), si)
    70  	if err := item.Await(ctx); err != nil {
    71  		return err
    72  	}
    73  	l.mu.Lock()
    74  	defer l.mu.Unlock()
    75  
    76  	e, ok := l.entries[instName]
    77  	if ok {
    78  		e.vCPUCount = instType.CPU
    79  	} else {
    80  		l.entries[instName] = &entry{
    81  			instanceName: instName,
    82  			vCPUCount:    instType.CPU,
    83  			instanceType: instType.Type,
    84  			quota:        item,
    85  		}
    86  	}
    87  	return nil
    88  }
    89  
    90  // PrepareReservationRequest ensures all the preconditions necessary for a reservation request are
    91  // met. If the conditions are met then an instance type for the requested VM type is returned. If
    92  // not an error is returned.
    93  func (l *ledger) PrepareReservationRequest(instName, vmType string) (*cloud.InstanceType, error) {
    94  	l.mu.RLock()
    95  	defer l.mu.RUnlock()
    96  
    97  	instType, ok := l.types[vmType]
    98  	if !ok {
    99  		return nil, fmt.Errorf("unknown EC2 vm type: %s", vmType)
   100  	}
   101  	_, ok = l.entries[instName]
   102  	if ok {
   103  		return nil, fmt.Errorf("quota has already been allocated for %s of type %s", instName, vmType)
   104  	}
   105  	return instType, nil
   106  }
   107  
   108  // releaseResources deletes the entry associated with an instance. The resources associated with the
   109  // instance will also be released. An error is returned if the instance entry is not found.
   110  // Lock l.mu must be held by the caller.
   111  func (l *ledger) releaseResources(instName string) error {
   112  	e, ok := l.entries[instName]
   113  	if !ok {
   114  		return fmt.Errorf("instance not found for releasing quota: %s", instName)
   115  	}
   116  	e.quota.ReturnQuota()
   117  	return nil
   118  }
   119  
   120  // UpdateReservation updates the entry for an instance with the id value for that instance. If
   121  // an entry for the instance does not exist then an error will be returned. Another mechanism should
   122  // be used to manage untracked instances. Updating the reservation acts as a signal that the instance
   123  // has actually been created since the instance ID is known.
   124  func (l *ledger) UpdateReservation(instName, instID string) error {
   125  	l.mu.Lock()
   126  	defer l.mu.Unlock()
   127  
   128  	e, ok := l.entries[instName]
   129  	if !ok {
   130  		return fmt.Errorf("unable to update reservation: instance not found %s", instName)
   131  	}
   132  	e.createdAt = time.Now()
   133  	e.instanceID = instID
   134  	return nil
   135  }
   136  
   137  // Remove releases any reserved resources for an instance and deletes the associated entry.
   138  // An error is returned if and entry does not exist for the instance.
   139  func (l *ledger) Remove(instName string) error {
   140  	l.mu.Lock()
   141  	defer l.mu.Unlock()
   142  
   143  	if err := l.releaseResources(instName); err != nil {
   144  		return fmt.Errorf("unable to remove instance: %w", err)
   145  	}
   146  	delete(l.entries, instName)
   147  	return nil
   148  }
   149  
   150  // InstanceID retrieves the instance ID for an instance by looking up the instance name.
   151  // If an instance is not found, an empty string is returned.
   152  func (l *ledger) InstanceID(instName string) string {
   153  	l.mu.RLock()
   154  	defer l.mu.RUnlock()
   155  
   156  	e, ok := l.entries[instName]
   157  	if !ok {
   158  		return ""
   159  	}
   160  	return e.instanceID
   161  }
   162  
   163  // SetCPULimit sets the vCPU limit used to determine if a CPU allocation would
   164  // cross the threshold for available CPU for on-demand instances.
   165  func (l *ledger) SetCPULimit(numCPU int64) {
   166  	l.cpuQueue.UpdateLimit(int(numCPU))
   167  }
   168  
   169  // UpdateInstanceTypes updates the map of instance types used to map instance
   170  // type to the resources required for the instance.
   171  func (l *ledger) UpdateInstanceTypes(types []*cloud.InstanceType) {
   172  	l.mu.Lock()
   173  	defer l.mu.Unlock()
   174  
   175  	for _, it := range types {
   176  		l.types[it.Type] = it
   177  	}
   178  }
   179  
   180  // resources contains the current limit and usage of instance related resources.
   181  type resources struct {
   182  	// InstCount is the count of how many on-demand instances are tracked in the ledger.
   183  	InstCount int64
   184  	// CPUUsed is a count of the vCPU's for on-demand instances are currently allocated in the ledger.
   185  	CPUUsed int64
   186  	// CPULimit is the limit of how many vCPU's for on-demand instances can be allocated.
   187  	CPULimit int64
   188  }
   189  
   190  // Resources retrieves the resource usage and limits for instances in the
   191  // store.
   192  func (l *ledger) Resources() *resources {
   193  	l.mu.RLock()
   194  	defer l.mu.RUnlock()
   195  
   196  	usage := l.cpuQueue.Quotas()
   197  	return &resources{
   198  		InstCount: int64(len(l.entries)),
   199  		CPUUsed:   int64(usage.Used),
   200  		CPULimit:  int64(usage.Limit),
   201  	}
   202  }
   203  
   204  // ResourceTime give a ResourceTime entry for each active instance.
   205  // The resource time slice is storted by creation time.
   206  func (l *ledger) ResourceTime() []ResourceTime {
   207  	l.mu.RLock()
   208  	defer l.mu.RUnlock()
   209  
   210  	ret := make([]ResourceTime, 0, len(l.entries))
   211  	for name, data := range l.entries {
   212  		ret = append(ret, ResourceTime{
   213  			Name:     name,
   214  			Creation: data.createdAt,
   215  		})
   216  	}
   217  	sort.Sort(ByCreationTime(ret))
   218  	return ret
   219  }