github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/blocked_evals_stats.go (about)

     1  package nomad
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/hashicorp/nomad/nomad/structs"
     7  )
     8  
     9  // BlockedStats returns all the stats about the blocked eval tracker.
    10  type BlockedStats struct {
    11  	// TotalEscaped is the total number of blocked evaluations that have escaped
    12  	// computed node classes.
    13  	TotalEscaped int
    14  
    15  	// TotalBlocked is the total number of blocked evaluations.
    16  	TotalBlocked int
    17  
    18  	// TotalQuotaLimit is the total number of blocked evaluations that are due
    19  	// to the quota limit being reached.
    20  	TotalQuotaLimit int
    21  
    22  	// BlockedResources stores the amount of resources requested by blocked
    23  	// evaluations.
    24  	BlockedResources *BlockedResourcesStats
    25  }
    26  
    27  // classInDC is a coordinate of a specific class in a specific datacenter
    28  type classInDC struct {
    29  	dc    string
    30  	class string
    31  }
    32  
    33  // NewBlockedStats returns a new BlockedStats.
    34  func NewBlockedStats() *BlockedStats {
    35  	return &BlockedStats{
    36  		BlockedResources: NewBlockedResourcesStats(),
    37  	}
    38  }
    39  
    40  // Block updates the stats for the blocked eval tracker with the details of the
    41  // evaluation being blocked.
    42  func (b *BlockedStats) Block(eval *structs.Evaluation) {
    43  	b.TotalBlocked++
    44  	resourceStats := generateResourceStats(eval)
    45  	b.BlockedResources = b.BlockedResources.Add(resourceStats)
    46  }
    47  
    48  // Unblock updates the stats for the blocked eval tracker with the details of the
    49  // evaluation being unblocked.
    50  func (b *BlockedStats) Unblock(eval *structs.Evaluation) {
    51  	b.TotalBlocked--
    52  	resourceStats := generateResourceStats(eval)
    53  	b.BlockedResources = b.BlockedResources.Subtract(resourceStats)
    54  }
    55  
    56  // prune deletes any key zero metric values older than the cutoff.
    57  func (b *BlockedStats) prune(cutoff time.Time) {
    58  	shouldPrune := func(s BlockedResourcesSummary) bool {
    59  		return s.Timestamp.Before(cutoff) && s.IsZero()
    60  	}
    61  
    62  	for k, v := range b.BlockedResources.ByJob {
    63  		if shouldPrune(v) {
    64  			delete(b.BlockedResources.ByJob, k)
    65  		}
    66  	}
    67  
    68  	for k, v := range b.BlockedResources.ByClassInDC {
    69  		if shouldPrune(v) {
    70  			delete(b.BlockedResources.ByClassInDC, k)
    71  		}
    72  	}
    73  }
    74  
    75  // generateResourceStats returns a summary of the resources requested by the
    76  // input evaluation.
    77  func generateResourceStats(eval *structs.Evaluation) *BlockedResourcesStats {
    78  	dcs := make(map[string]struct{})
    79  	classes := make(map[string]struct{})
    80  
    81  	resources := BlockedResourcesSummary{
    82  		Timestamp: time.Now().UTC(),
    83  	}
    84  
    85  	for _, allocMetrics := range eval.FailedTGAllocs {
    86  		for dc := range allocMetrics.NodesAvailable {
    87  			dcs[dc] = struct{}{}
    88  		}
    89  		for class := range allocMetrics.ClassExhausted {
    90  			classes[class] = struct{}{}
    91  		}
    92  		if len(allocMetrics.ClassExhausted) == 0 {
    93  			// some evaluations have no class
    94  			classes[""] = struct{}{}
    95  		}
    96  		for _, r := range allocMetrics.ResourcesExhausted {
    97  			resources.CPU += r.CPU
    98  			resources.MemoryMB += r.MemoryMB
    99  		}
   100  	}
   101  
   102  	byJob := make(map[structs.NamespacedID]BlockedResourcesSummary)
   103  	nsID := structs.NewNamespacedID(eval.JobID, eval.Namespace)
   104  	byJob[nsID] = resources
   105  
   106  	byClassInDC := make(map[classInDC]BlockedResourcesSummary)
   107  	for dc := range dcs {
   108  		for class := range classes {
   109  			k := classInDC{dc: dc, class: class}
   110  			byClassInDC[k] = resources
   111  		}
   112  	}
   113  
   114  	return &BlockedResourcesStats{
   115  		ByJob:       byJob,
   116  		ByClassInDC: byClassInDC,
   117  	}
   118  }
   119  
   120  // BlockedResourcesStats stores resources requested by blocked evaluations,
   121  // tracked both by job and by node.
   122  type BlockedResourcesStats struct {
   123  	ByJob       map[structs.NamespacedID]BlockedResourcesSummary
   124  	ByClassInDC map[classInDC]BlockedResourcesSummary
   125  }
   126  
   127  // NewBlockedResourcesStats returns a new BlockedResourcesStats.
   128  func NewBlockedResourcesStats() *BlockedResourcesStats {
   129  	return &BlockedResourcesStats{
   130  		ByJob:       make(map[structs.NamespacedID]BlockedResourcesSummary),
   131  		ByClassInDC: make(map[classInDC]BlockedResourcesSummary),
   132  	}
   133  }
   134  
   135  // Copy returns a deep copy of the blocked resource stats.
   136  func (b *BlockedResourcesStats) Copy() *BlockedResourcesStats {
   137  	result := NewBlockedResourcesStats()
   138  
   139  	for k, v := range b.ByJob {
   140  		result.ByJob[k] = v // value copy
   141  	}
   142  
   143  	for k, v := range b.ByClassInDC {
   144  		result.ByClassInDC[k] = v // value copy
   145  	}
   146  
   147  	return result
   148  }
   149  
   150  // Add returns a new BlockedResourcesStats with the values set to the current
   151  // resource values plus the input.
   152  func (b *BlockedResourcesStats) Add(a *BlockedResourcesStats) *BlockedResourcesStats {
   153  	result := b.Copy()
   154  
   155  	for k, v := range a.ByJob {
   156  		result.ByJob[k] = b.ByJob[k].Add(v)
   157  	}
   158  
   159  	for k, v := range a.ByClassInDC {
   160  		result.ByClassInDC[k] = b.ByClassInDC[k].Add(v)
   161  	}
   162  
   163  	return result
   164  }
   165  
   166  // Subtract returns a new BlockedResourcesStats with the values set to the
   167  // current resource values minus the input.
   168  func (b *BlockedResourcesStats) Subtract(a *BlockedResourcesStats) *BlockedResourcesStats {
   169  	result := b.Copy()
   170  
   171  	for k, v := range a.ByJob {
   172  		result.ByJob[k] = b.ByJob[k].Subtract(v)
   173  	}
   174  
   175  	for k, v := range a.ByClassInDC {
   176  		result.ByClassInDC[k] = b.ByClassInDC[k].Subtract(v)
   177  	}
   178  
   179  	return result
   180  }
   181  
   182  // BlockedResourcesSummary stores resource values for blocked evals.
   183  type BlockedResourcesSummary struct {
   184  	Timestamp time.Time
   185  	CPU       int
   186  	MemoryMB  int
   187  }
   188  
   189  // Add returns a new BlockedResourcesSummary with each resource set to the
   190  // current value plus the input.
   191  func (b BlockedResourcesSummary) Add(a BlockedResourcesSummary) BlockedResourcesSummary {
   192  	return BlockedResourcesSummary{
   193  		Timestamp: a.Timestamp,
   194  		CPU:       b.CPU + a.CPU,
   195  		MemoryMB:  b.MemoryMB + a.MemoryMB,
   196  	}
   197  }
   198  
   199  // Subtract returns a new BlockedResourcesSummary with each resource set to the
   200  // current value minus the input.
   201  func (b BlockedResourcesSummary) Subtract(a BlockedResourcesSummary) BlockedResourcesSummary {
   202  	return BlockedResourcesSummary{
   203  		Timestamp: a.Timestamp,
   204  		CPU:       b.CPU - a.CPU,
   205  		MemoryMB:  b.MemoryMB - a.MemoryMB,
   206  	}
   207  }
   208  
   209  // IsZero returns true if all resource values are zero.
   210  func (b BlockedResourcesSummary) IsZero() bool {
   211  	return b.CPU == 0 && b.MemoryMB == 0
   212  }