github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/blocked_evals_stats.go (about) 1 package nomad 2 3 import ( 4 "time" 5 6 "github.com/hashicorp/nomad/nomad/structs" 7 ) 8 9 // BlockedStats returns all the stats about the blocked eval tracker. 10 type BlockedStats struct { 11 // TotalEscaped is the total number of blocked evaluations that have escaped 12 // computed node classes. 13 TotalEscaped int 14 15 // TotalBlocked is the total number of blocked evaluations. 16 TotalBlocked int 17 18 // TotalQuotaLimit is the total number of blocked evaluations that are due 19 // to the quota limit being reached. 20 TotalQuotaLimit int 21 22 // BlockedResources stores the amount of resources requested by blocked 23 // evaluations. 24 BlockedResources *BlockedResourcesStats 25 } 26 27 // classInDC is a coordinate of a specific class in a specific datacenter 28 type classInDC struct { 29 dc string 30 class string 31 } 32 33 // NewBlockedStats returns a new BlockedStats. 34 func NewBlockedStats() *BlockedStats { 35 return &BlockedStats{ 36 BlockedResources: NewBlockedResourcesStats(), 37 } 38 } 39 40 // Block updates the stats for the blocked eval tracker with the details of the 41 // evaluation being blocked. 42 func (b *BlockedStats) Block(eval *structs.Evaluation) { 43 b.TotalBlocked++ 44 resourceStats := generateResourceStats(eval) 45 b.BlockedResources = b.BlockedResources.Add(resourceStats) 46 } 47 48 // Unblock updates the stats for the blocked eval tracker with the details of the 49 // evaluation being unblocked. 50 func (b *BlockedStats) Unblock(eval *structs.Evaluation) { 51 b.TotalBlocked-- 52 resourceStats := generateResourceStats(eval) 53 b.BlockedResources = b.BlockedResources.Subtract(resourceStats) 54 } 55 56 // prune deletes any key zero metric values older than the cutoff. 57 func (b *BlockedStats) prune(cutoff time.Time) { 58 shouldPrune := func(s BlockedResourcesSummary) bool { 59 return s.Timestamp.Before(cutoff) && s.IsZero() 60 } 61 62 for k, v := range b.BlockedResources.ByJob { 63 if shouldPrune(v) { 64 delete(b.BlockedResources.ByJob, k) 65 } 66 } 67 68 for k, v := range b.BlockedResources.ByClassInDC { 69 if shouldPrune(v) { 70 delete(b.BlockedResources.ByClassInDC, k) 71 } 72 } 73 } 74 75 // generateResourceStats returns a summary of the resources requested by the 76 // input evaluation. 77 func generateResourceStats(eval *structs.Evaluation) *BlockedResourcesStats { 78 dcs := make(map[string]struct{}) 79 classes := make(map[string]struct{}) 80 81 resources := BlockedResourcesSummary{ 82 Timestamp: time.Now().UTC(), 83 } 84 85 for _, allocMetrics := range eval.FailedTGAllocs { 86 for dc := range allocMetrics.NodesAvailable { 87 dcs[dc] = struct{}{} 88 } 89 for class := range allocMetrics.ClassExhausted { 90 classes[class] = struct{}{} 91 } 92 if len(allocMetrics.ClassExhausted) == 0 { 93 // some evaluations have no class 94 classes[""] = struct{}{} 95 } 96 for _, r := range allocMetrics.ResourcesExhausted { 97 resources.CPU += r.CPU 98 resources.MemoryMB += r.MemoryMB 99 } 100 } 101 102 byJob := make(map[structs.NamespacedID]BlockedResourcesSummary) 103 nsID := structs.NewNamespacedID(eval.JobID, eval.Namespace) 104 byJob[nsID] = resources 105 106 byClassInDC := make(map[classInDC]BlockedResourcesSummary) 107 for dc := range dcs { 108 for class := range classes { 109 k := classInDC{dc: dc, class: class} 110 byClassInDC[k] = resources 111 } 112 } 113 114 return &BlockedResourcesStats{ 115 ByJob: byJob, 116 ByClassInDC: byClassInDC, 117 } 118 } 119 120 // BlockedResourcesStats stores resources requested by blocked evaluations, 121 // tracked both by job and by node. 122 type BlockedResourcesStats struct { 123 ByJob map[structs.NamespacedID]BlockedResourcesSummary 124 ByClassInDC map[classInDC]BlockedResourcesSummary 125 } 126 127 // NewBlockedResourcesStats returns a new BlockedResourcesStats. 128 func NewBlockedResourcesStats() *BlockedResourcesStats { 129 return &BlockedResourcesStats{ 130 ByJob: make(map[structs.NamespacedID]BlockedResourcesSummary), 131 ByClassInDC: make(map[classInDC]BlockedResourcesSummary), 132 } 133 } 134 135 // Copy returns a deep copy of the blocked resource stats. 136 func (b *BlockedResourcesStats) Copy() *BlockedResourcesStats { 137 result := NewBlockedResourcesStats() 138 139 for k, v := range b.ByJob { 140 result.ByJob[k] = v // value copy 141 } 142 143 for k, v := range b.ByClassInDC { 144 result.ByClassInDC[k] = v // value copy 145 } 146 147 return result 148 } 149 150 // Add returns a new BlockedResourcesStats with the values set to the current 151 // resource values plus the input. 152 func (b *BlockedResourcesStats) Add(a *BlockedResourcesStats) *BlockedResourcesStats { 153 result := b.Copy() 154 155 for k, v := range a.ByJob { 156 result.ByJob[k] = b.ByJob[k].Add(v) 157 } 158 159 for k, v := range a.ByClassInDC { 160 result.ByClassInDC[k] = b.ByClassInDC[k].Add(v) 161 } 162 163 return result 164 } 165 166 // Subtract returns a new BlockedResourcesStats with the values set to the 167 // current resource values minus the input. 168 func (b *BlockedResourcesStats) Subtract(a *BlockedResourcesStats) *BlockedResourcesStats { 169 result := b.Copy() 170 171 for k, v := range a.ByJob { 172 result.ByJob[k] = b.ByJob[k].Subtract(v) 173 } 174 175 for k, v := range a.ByClassInDC { 176 result.ByClassInDC[k] = b.ByClassInDC[k].Subtract(v) 177 } 178 179 return result 180 } 181 182 // BlockedResourcesSummary stores resource values for blocked evals. 183 type BlockedResourcesSummary struct { 184 Timestamp time.Time 185 CPU int 186 MemoryMB int 187 } 188 189 // Add returns a new BlockedResourcesSummary with each resource set to the 190 // current value plus the input. 191 func (b BlockedResourcesSummary) Add(a BlockedResourcesSummary) BlockedResourcesSummary { 192 return BlockedResourcesSummary{ 193 Timestamp: a.Timestamp, 194 CPU: b.CPU + a.CPU, 195 MemoryMB: b.MemoryMB + a.MemoryMB, 196 } 197 } 198 199 // Subtract returns a new BlockedResourcesSummary with each resource set to the 200 // current value minus the input. 201 func (b BlockedResourcesSummary) Subtract(a BlockedResourcesSummary) BlockedResourcesSummary { 202 return BlockedResourcesSummary{ 203 Timestamp: a.Timestamp, 204 CPU: b.CPU - a.CPU, 205 MemoryMB: b.MemoryMB - a.MemoryMB, 206 } 207 } 208 209 // IsZero returns true if all resource values are zero. 210 func (b BlockedResourcesSummary) IsZero() bool { 211 return b.CPU == 0 && b.MemoryMB == 0 212 }