golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/coordinator/pool/ledger.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build linux || darwin 6 7 package pool 8 9 import ( 10 "context" 11 "fmt" 12 "sort" 13 "sync" 14 "time" 15 16 "golang.org/x/build/internal/cloud" 17 "golang.org/x/build/internal/coordinator/pool/queue" 18 ) 19 20 // entry contains the resource usage of an instance as well as 21 // identifying information. 22 type entry struct { 23 createdAt time.Time 24 instanceID string 25 instanceName string 26 instanceType string 27 vCPUCount int64 28 quota *queue.Item 29 } 30 31 // ledger contains a record of the instances and their resource 32 // consumption. Before an instance is created, a call to the ledger 33 // will ensure that there are available resources for the new instance. 34 type ledger struct { 35 mu sync.RWMutex 36 // cpuQueue is the queue for on-demand vCPU VMs created on EC2. 37 cpuQueue *queue.Quota 38 // entries contains a mapping of instance name to entries for each instance 39 // that has resources allocated to it. 40 entries map[string]*entry 41 // types contains a mapping of instance type names to instance types for each 42 // ARM64 EC2 instance. 43 types map[string]*cloud.InstanceType 44 } 45 46 // newLedger creates a new ledger. 47 func newLedger() *ledger { 48 l := &ledger{ 49 entries: make(map[string]*entry), 50 cpuQueue: queue.NewQuota(), 51 types: make(map[string]*cloud.InstanceType), 52 } 53 return l 54 } 55 56 // ReserveResources attempts to reserve the resources required for an instance to be created. 57 // It will attempt to reserve the resources that an instance type would require. This will 58 // attempt to reserve the resources until the context deadline is reached. 59 func (l *ledger) ReserveResources(ctx context.Context, instName, vmType string, si *queue.SchedItem) error { 60 instType, err := l.PrepareReservationRequest(instName, vmType) 61 if err != nil { 62 return err 63 } 64 65 // should never happen 66 if instType.CPU <= 0 { 67 return fmt.Errorf("invalid allocation requested: %d", instType.CPU) 68 } 69 item := l.cpuQueue.Enqueue(int(instType.CPU), si) 70 if err := item.Await(ctx); err != nil { 71 return err 72 } 73 l.mu.Lock() 74 defer l.mu.Unlock() 75 76 e, ok := l.entries[instName] 77 if ok { 78 e.vCPUCount = instType.CPU 79 } else { 80 l.entries[instName] = &entry{ 81 instanceName: instName, 82 vCPUCount: instType.CPU, 83 instanceType: instType.Type, 84 quota: item, 85 } 86 } 87 return nil 88 } 89 90 // PrepareReservationRequest ensures all the preconditions necessary for a reservation request are 91 // met. If the conditions are met then an instance type for the requested VM type is returned. If 92 // not an error is returned. 93 func (l *ledger) PrepareReservationRequest(instName, vmType string) (*cloud.InstanceType, error) { 94 l.mu.RLock() 95 defer l.mu.RUnlock() 96 97 instType, ok := l.types[vmType] 98 if !ok { 99 return nil, fmt.Errorf("unknown EC2 vm type: %s", vmType) 100 } 101 _, ok = l.entries[instName] 102 if ok { 103 return nil, fmt.Errorf("quota has already been allocated for %s of type %s", instName, vmType) 104 } 105 return instType, nil 106 } 107 108 // releaseResources deletes the entry associated with an instance. The resources associated with the 109 // instance will also be released. An error is returned if the instance entry is not found. 110 // Lock l.mu must be held by the caller. 111 func (l *ledger) releaseResources(instName string) error { 112 e, ok := l.entries[instName] 113 if !ok { 114 return fmt.Errorf("instance not found for releasing quota: %s", instName) 115 } 116 e.quota.ReturnQuota() 117 return nil 118 } 119 120 // UpdateReservation updates the entry for an instance with the id value for that instance. If 121 // an entry for the instance does not exist then an error will be returned. Another mechanism should 122 // be used to manage untracked instances. Updating the reservation acts as a signal that the instance 123 // has actually been created since the instance ID is known. 124 func (l *ledger) UpdateReservation(instName, instID string) error { 125 l.mu.Lock() 126 defer l.mu.Unlock() 127 128 e, ok := l.entries[instName] 129 if !ok { 130 return fmt.Errorf("unable to update reservation: instance not found %s", instName) 131 } 132 e.createdAt = time.Now() 133 e.instanceID = instID 134 return nil 135 } 136 137 // Remove releases any reserved resources for an instance and deletes the associated entry. 138 // An error is returned if and entry does not exist for the instance. 139 func (l *ledger) Remove(instName string) error { 140 l.mu.Lock() 141 defer l.mu.Unlock() 142 143 if err := l.releaseResources(instName); err != nil { 144 return fmt.Errorf("unable to remove instance: %w", err) 145 } 146 delete(l.entries, instName) 147 return nil 148 } 149 150 // InstanceID retrieves the instance ID for an instance by looking up the instance name. 151 // If an instance is not found, an empty string is returned. 152 func (l *ledger) InstanceID(instName string) string { 153 l.mu.RLock() 154 defer l.mu.RUnlock() 155 156 e, ok := l.entries[instName] 157 if !ok { 158 return "" 159 } 160 return e.instanceID 161 } 162 163 // SetCPULimit sets the vCPU limit used to determine if a CPU allocation would 164 // cross the threshold for available CPU for on-demand instances. 165 func (l *ledger) SetCPULimit(numCPU int64) { 166 l.cpuQueue.UpdateLimit(int(numCPU)) 167 } 168 169 // UpdateInstanceTypes updates the map of instance types used to map instance 170 // type to the resources required for the instance. 171 func (l *ledger) UpdateInstanceTypes(types []*cloud.InstanceType) { 172 l.mu.Lock() 173 defer l.mu.Unlock() 174 175 for _, it := range types { 176 l.types[it.Type] = it 177 } 178 } 179 180 // resources contains the current limit and usage of instance related resources. 181 type resources struct { 182 // InstCount is the count of how many on-demand instances are tracked in the ledger. 183 InstCount int64 184 // CPUUsed is a count of the vCPU's for on-demand instances are currently allocated in the ledger. 185 CPUUsed int64 186 // CPULimit is the limit of how many vCPU's for on-demand instances can be allocated. 187 CPULimit int64 188 } 189 190 // Resources retrieves the resource usage and limits for instances in the 191 // store. 192 func (l *ledger) Resources() *resources { 193 l.mu.RLock() 194 defer l.mu.RUnlock() 195 196 usage := l.cpuQueue.Quotas() 197 return &resources{ 198 InstCount: int64(len(l.entries)), 199 CPUUsed: int64(usage.Used), 200 CPULimit: int64(usage.Limit), 201 } 202 } 203 204 // ResourceTime give a ResourceTime entry for each active instance. 205 // The resource time slice is storted by creation time. 206 func (l *ledger) ResourceTime() []ResourceTime { 207 l.mu.RLock() 208 defer l.mu.RUnlock() 209 210 ret := make([]ResourceTime, 0, len(l.entries)) 211 for name, data := range l.entries { 212 ret = append(ret, ResourceTime{ 213 Name: name, 214 Creation: data.createdAt, 215 }) 216 } 217 sort.Sort(ByCreationTime(ret)) 218 return ret 219 }