github.com/hernad/nomad@v1.6.112/nomad/blocked_evals_stats_test.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package nomad
     5  
     6  import (
     7  	"fmt"
     8  	"math/rand"
     9  	"reflect"
    10  	"testing"
    11  	"testing/quick"
    12  	"time"
    13  
    14  	"github.com/hernad/nomad/ci"
    15  	"github.com/hernad/nomad/nomad/mock"
    16  	"github.com/hernad/nomad/nomad/structs"
    17  	"github.com/stretchr/testify/require"
    18  )
    19  
    20  func now(year int) time.Time {
    21  	return time.Date(2000+year, 1, 2, 3, 4, 5, 6, time.UTC)
    22  }
    23  
    24  func TestBlockedResourceSummary_Add(t *testing.T) {
    25  	now1 := now(1)
    26  	now2 := now(2)
    27  	a := BlockedResourcesSummary{
    28  		Timestamp: now1,
    29  		CPU:       600,
    30  		MemoryMB:  256,
    31  	}
    32  
    33  	b := BlockedResourcesSummary{
    34  		Timestamp: now2,
    35  		CPU:       250,
    36  		MemoryMB:  128,
    37  	}
    38  
    39  	result := a.Add(b)
    40  
    41  	// a not modified
    42  	require.Equal(t, 600, a.CPU)
    43  	require.Equal(t, 256, a.MemoryMB)
    44  	require.Equal(t, now1, a.Timestamp)
    45  
    46  	// b not modified
    47  	require.Equal(t, 250, b.CPU)
    48  	require.Equal(t, 128, b.MemoryMB)
    49  	require.Equal(t, now2, b.Timestamp)
    50  
    51  	// result is a + b, using timestamp from b
    52  	require.Equal(t, 850, result.CPU)
    53  	require.Equal(t, 384, result.MemoryMB)
    54  	require.Equal(t, now2, result.Timestamp)
    55  }
    56  
    57  func TestBlockedResourceSummary_Add_nil(t *testing.T) {
    58  	now1 := now(1)
    59  	b := BlockedResourcesSummary{
    60  		Timestamp: now1,
    61  		CPU:       250,
    62  		MemoryMB:  128,
    63  	}
    64  
    65  	// zero + b == b
    66  	result := (BlockedResourcesSummary{}).Add(b)
    67  	require.Equal(t, now1, result.Timestamp)
    68  	require.Equal(t, 250, result.CPU)
    69  	require.Equal(t, 128, result.MemoryMB)
    70  }
    71  
    72  func TestBlockedResourceSummary_Subtract(t *testing.T) {
    73  	now1 := now(1)
    74  	now2 := now(2)
    75  	a := BlockedResourcesSummary{
    76  		Timestamp: now1,
    77  		CPU:       600,
    78  		MemoryMB:  256,
    79  	}
    80  
    81  	b := BlockedResourcesSummary{
    82  		Timestamp: now2,
    83  		CPU:       250,
    84  		MemoryMB:  120,
    85  	}
    86  
    87  	result := a.Subtract(b)
    88  
    89  	// a not modified
    90  	require.Equal(t, 600, a.CPU)
    91  	require.Equal(t, 256, a.MemoryMB)
    92  	require.Equal(t, now1, a.Timestamp)
    93  
    94  	// b not modified
    95  	require.Equal(t, 250, b.CPU)
    96  	require.Equal(t, 120, b.MemoryMB)
    97  	require.Equal(t, now2, b.Timestamp)
    98  
    99  	// result is a + b, using timestamp from b
   100  	require.Equal(t, 350, result.CPU)
   101  	require.Equal(t, 136, result.MemoryMB)
   102  	require.Equal(t, now2, result.Timestamp)
   103  }
   104  
   105  func TestBlockedResourceSummary_IsZero(t *testing.T) {
   106  	now1 := now(1)
   107  
   108  	// cpu and mem zero, timestamp is ignored
   109  	require.True(t, (&BlockedResourcesSummary{
   110  		Timestamp: now1,
   111  		CPU:       0,
   112  		MemoryMB:  0,
   113  	}).IsZero())
   114  
   115  	// cpu non-zero
   116  	require.False(t, (&BlockedResourcesSummary{
   117  		Timestamp: now1,
   118  		CPU:       1,
   119  		MemoryMB:  0,
   120  	}).IsZero())
   121  
   122  	// mem non-zero
   123  	require.False(t, (&BlockedResourcesSummary{
   124  		Timestamp: now1,
   125  		CPU:       0,
   126  		MemoryMB:  1,
   127  	}).IsZero())
   128  }
   129  
   130  func TestBlockedResourceStats_New(t *testing.T) {
   131  	a := NewBlockedResourcesStats()
   132  	require.NotNil(t, a.ByJob)
   133  	require.Empty(t, a.ByJob)
   134  	require.NotNil(t, a.ByClassInDC)
   135  	require.Empty(t, a.ByClassInDC)
   136  }
   137  
   138  var (
   139  	id1 = structs.NamespacedID{
   140  		ID:        "1",
   141  		Namespace: "one",
   142  	}
   143  
   144  	id2 = structs.NamespacedID{
   145  		ID:        "2",
   146  		Namespace: "two",
   147  	}
   148  
   149  	node1 = classInDC{
   150  		dc:    "dc1",
   151  		class: "alpha",
   152  	}
   153  
   154  	node2 = classInDC{
   155  		dc:    "dc1",
   156  		class: "beta",
   157  	}
   158  
   159  	node3 = classInDC{
   160  		dc:    "dc1",
   161  		class: "", // not set
   162  	}
   163  )
   164  
   165  func TestBlockedResourceStats_Copy(t *testing.T) {
   166  	now1 := now(1)
   167  	now2 := now(2)
   168  
   169  	a := NewBlockedResourcesStats()
   170  	a.ByJob = map[structs.NamespacedID]BlockedResourcesSummary{
   171  		id1: {
   172  			Timestamp: now1,
   173  			CPU:       100,
   174  			MemoryMB:  256,
   175  		},
   176  	}
   177  	a.ByClassInDC = map[classInDC]BlockedResourcesSummary{
   178  		node1: {
   179  			Timestamp: now1,
   180  			CPU:       300,
   181  			MemoryMB:  333,
   182  		},
   183  	}
   184  
   185  	c := a.Copy()
   186  	c.ByJob[id1] = BlockedResourcesSummary{
   187  		Timestamp: now2,
   188  		CPU:       888,
   189  		MemoryMB:  888,
   190  	}
   191  	c.ByClassInDC[node1] = BlockedResourcesSummary{
   192  		Timestamp: now2,
   193  		CPU:       999,
   194  		MemoryMB:  999,
   195  	}
   196  
   197  	// underlying data should have been deep copied
   198  	require.Equal(t, 100, a.ByJob[id1].CPU)
   199  	require.Equal(t, 300, a.ByClassInDC[node1].CPU)
   200  }
   201  
   202  func TestBlockedResourcesStats_Add(t *testing.T) {
   203  	a := NewBlockedResourcesStats()
   204  	a.ByJob = map[structs.NamespacedID]BlockedResourcesSummary{
   205  		id1: {Timestamp: now(1), CPU: 111, MemoryMB: 222},
   206  	}
   207  	a.ByClassInDC = map[classInDC]BlockedResourcesSummary{
   208  		node1: {Timestamp: now(2), CPU: 333, MemoryMB: 444},
   209  	}
   210  
   211  	b := NewBlockedResourcesStats()
   212  	b.ByJob = map[structs.NamespacedID]BlockedResourcesSummary{
   213  		id1: {Timestamp: now(3), CPU: 200, MemoryMB: 300},
   214  		id2: {Timestamp: now(4), CPU: 400, MemoryMB: 500},
   215  	}
   216  	b.ByClassInDC = map[classInDC]BlockedResourcesSummary{
   217  		node1: {Timestamp: now(5), CPU: 600, MemoryMB: 700},
   218  		node2: {Timestamp: now(6), CPU: 800, MemoryMB: 900},
   219  	}
   220  
   221  	t.Run("a add b", func(t *testing.T) {
   222  		result := a.Add(b)
   223  
   224  		require.Equal(t, map[structs.NamespacedID]BlockedResourcesSummary{
   225  			id1: {Timestamp: now(3), CPU: 311, MemoryMB: 522},
   226  			id2: {Timestamp: now(4), CPU: 400, MemoryMB: 500},
   227  		}, result.ByJob)
   228  
   229  		require.Equal(t, map[classInDC]BlockedResourcesSummary{
   230  			node1: {Timestamp: now(5), CPU: 933, MemoryMB: 1144},
   231  			node2: {Timestamp: now(6), CPU: 800, MemoryMB: 900},
   232  		}, result.ByClassInDC)
   233  	})
   234  
   235  	// make sure we handle zeros in both directions
   236  	// and timestamps originate from rhs
   237  	t.Run("b add a", func(t *testing.T) {
   238  		result := b.Add(a)
   239  		require.Equal(t, map[structs.NamespacedID]BlockedResourcesSummary{
   240  			id1: {Timestamp: now(1), CPU: 311, MemoryMB: 522},
   241  			id2: {Timestamp: now(4), CPU: 400, MemoryMB: 500},
   242  		}, result.ByJob)
   243  
   244  		require.Equal(t, map[classInDC]BlockedResourcesSummary{
   245  			node1: {Timestamp: now(2), CPU: 933, MemoryMB: 1144},
   246  			node2: {Timestamp: now(6), CPU: 800, MemoryMB: 900},
   247  		}, result.ByClassInDC)
   248  	})
   249  }
   250  
   251  func TestBlockedResourcesStats_Add_NoClass(t *testing.T) {
   252  	a := NewBlockedResourcesStats()
   253  	a.ByClassInDC = map[classInDC]BlockedResourcesSummary{
   254  		node3: {Timestamp: now(1), CPU: 111, MemoryMB: 1111},
   255  	}
   256  	result := a.Add(a)
   257  	require.Equal(t, map[classInDC]BlockedResourcesSummary{
   258  		node3: {Timestamp: now(1), CPU: 222, MemoryMB: 2222},
   259  	}, result.ByClassInDC)
   260  }
   261  
   262  func TestBlockedResourcesStats_Subtract(t *testing.T) {
   263  	a := NewBlockedResourcesStats()
   264  	a.ByJob = map[structs.NamespacedID]BlockedResourcesSummary{
   265  		id1: {Timestamp: now(1), CPU: 100, MemoryMB: 100},
   266  		id2: {Timestamp: now(2), CPU: 200, MemoryMB: 200},
   267  	}
   268  	a.ByClassInDC = map[classInDC]BlockedResourcesSummary{
   269  		node1: {Timestamp: now(3), CPU: 300, MemoryMB: 300},
   270  		node2: {Timestamp: now(4), CPU: 400, MemoryMB: 400},
   271  	}
   272  
   273  	b := NewBlockedResourcesStats()
   274  	b.ByJob = map[structs.NamespacedID]BlockedResourcesSummary{
   275  		id1: {Timestamp: now(5), CPU: 10, MemoryMB: 11},
   276  		id2: {Timestamp: now(6), CPU: 12, MemoryMB: 13},
   277  	}
   278  	b.ByClassInDC = map[classInDC]BlockedResourcesSummary{
   279  		node1: {Timestamp: now(7), CPU: 14, MemoryMB: 15},
   280  		node2: {Timestamp: now(8), CPU: 16, MemoryMB: 17},
   281  	}
   282  
   283  	result := a.Subtract(b)
   284  
   285  	// id1
   286  	require.Equal(t, now(5), result.ByJob[id1].Timestamp)
   287  	require.Equal(t, 90, result.ByJob[id1].CPU)
   288  	require.Equal(t, 89, result.ByJob[id1].MemoryMB)
   289  
   290  	// id2
   291  	require.Equal(t, now(6), result.ByJob[id2].Timestamp)
   292  	require.Equal(t, 188, result.ByJob[id2].CPU)
   293  	require.Equal(t, 187, result.ByJob[id2].MemoryMB)
   294  
   295  	// node1
   296  	require.Equal(t, now(7), result.ByClassInDC[node1].Timestamp)
   297  	require.Equal(t, 286, result.ByClassInDC[node1].CPU)
   298  	require.Equal(t, 285, result.ByClassInDC[node1].MemoryMB)
   299  
   300  	// node2
   301  	require.Equal(t, now(8), result.ByClassInDC[node2].Timestamp)
   302  	require.Equal(t, 384, result.ByClassInDC[node2].CPU)
   303  	require.Equal(t, 383, result.ByClassInDC[node2].MemoryMB)
   304  }
   305  
   306  // testBlockedEvalsRandomBlockedEval wraps an eval that is randomly generated.
   307  type testBlockedEvalsRandomBlockedEval struct {
   308  	eval *structs.Evaluation
   309  }
   310  
   311  // Generate returns a random eval.
   312  func (t testBlockedEvalsRandomBlockedEval) Generate(rand *rand.Rand, _ int) reflect.Value {
   313  	resourceTypes := []string{"cpu", "memory"}
   314  
   315  	// Start with a mock eval.
   316  	e := mock.BlockedEval()
   317  
   318  	// Get how many task groups, datacenters and node classes to generate.
   319  	// Add 1 to avoid 0.
   320  	jobCount := rand.Intn(3) + 1
   321  	tgCount := rand.Intn(10) + 1
   322  	dcCount := rand.Intn(3) + 1
   323  	nodeClassCount := rand.Intn(3) + 1
   324  
   325  	failedTGAllocs := map[string]*structs.AllocMetric{}
   326  
   327  	e.JobID = fmt.Sprintf("job-%d", jobCount)
   328  	for tg := 1; tg <= tgCount; tg++ {
   329  		tgName := fmt.Sprintf("group-%d", tg)
   330  
   331  		// Get which resource type to use for this task group.
   332  		// Nomad stops at the first dimension that is exhausted, so only 1 is
   333  		// added per task group.
   334  		i := rand.Int() % len(resourceTypes)
   335  		resourceType := resourceTypes[i]
   336  
   337  		failedTGAllocs[tgName] = &structs.AllocMetric{
   338  			DimensionExhausted: map[string]int{
   339  				resourceType: 1,
   340  			},
   341  			NodesAvailable: map[string]int{},
   342  			ClassExhausted: map[string]int{},
   343  		}
   344  
   345  		for dc := 1; dc <= dcCount; dc++ {
   346  			dcName := fmt.Sprintf("dc%d", dc)
   347  			failedTGAllocs[tgName].NodesAvailable[dcName] = 1
   348  		}
   349  
   350  		for nc := 1; nc <= nodeClassCount; nc++ {
   351  			nodeClassName := fmt.Sprintf("node-class-%d", nc)
   352  			failedTGAllocs[tgName].ClassExhausted[nodeClassName] = 1
   353  		}
   354  
   355  		// Generate resources for each task.
   356  		taskCount := rand.Intn(5) + 1
   357  		resourcesExhausted := map[string]*structs.Resources{}
   358  
   359  		for t := 1; t <= taskCount; t++ {
   360  			task := fmt.Sprintf("task-%d", t)
   361  			resourcesExhausted[task] = &structs.Resources{}
   362  
   363  			resourceAmount := rand.Intn(1000)
   364  			switch resourceType {
   365  			case "cpu":
   366  				resourcesExhausted[task].CPU = resourceAmount
   367  			case "memory":
   368  				resourcesExhausted[task].MemoryMB = resourceAmount
   369  			}
   370  		}
   371  		failedTGAllocs[tgName].ResourcesExhausted = resourcesExhausted
   372  	}
   373  	e.FailedTGAllocs = failedTGAllocs
   374  	t.eval = e
   375  	return reflect.ValueOf(t)
   376  }
   377  
   378  // clearTimestampFromBlockedResourceStats set timestamp metrics to zero to
   379  // avoid invalid comparisons.
   380  func clearTimestampFromBlockedResourceStats(b *BlockedResourcesStats) {
   381  	for k, v := range b.ByJob {
   382  		v.Timestamp = time.Time{}
   383  		b.ByJob[k] = v
   384  	}
   385  	for k, v := range b.ByClassInDC {
   386  		v.Timestamp = time.Time{}
   387  		b.ByClassInDC[k] = v
   388  	}
   389  }
   390  
   391  // TestBlockedEvalsStats_BlockedResources generates random evals and processes
   392  // them using the expected code paths and a manual check of the expeceted result.
   393  func TestBlockedEvalsStats_BlockedResources(t *testing.T) {
   394  	ci.Parallel(t)
   395  	blocked, _ := testBlockedEvals(t)
   396  
   397  	// evalHistory stores all evals generated during the test.
   398  	var evalHistory []*structs.Evaluation
   399  
   400  	// blockedEvals keeps track if evals are blocked or unblocked.
   401  	blockedEvals := map[string]bool{}
   402  
   403  	// blockAndUntrack processes the generated evals in order using a
   404  	// BlockedEvals instance.
   405  	blockAndUntrack := func(testEval testBlockedEvalsRandomBlockedEval, block bool, unblockIdx uint16) *BlockedResourcesStats {
   406  		if block || len(evalHistory) == 0 {
   407  			blocked.Block(testEval.eval)
   408  		} else {
   409  			i := int(unblockIdx) % len(evalHistory)
   410  			eval := evalHistory[i]
   411  			blocked.Untrack(eval.JobID, eval.Namespace)
   412  		}
   413  
   414  		// Remove zero stats from unblocked evals.
   415  		blocked.pruneStats(time.Now().UTC())
   416  
   417  		result := blocked.Stats().BlockedResources
   418  		clearTimestampFromBlockedResourceStats(result)
   419  		return result
   420  	}
   421  
   422  	// manualCount processes only the blocked evals and generate a
   423  	// BlockedResourcesStats result directly from the eval history.
   424  	manualCount := func(testEval testBlockedEvalsRandomBlockedEval, block bool, unblockIdx uint16) *BlockedResourcesStats {
   425  		if block || len(evalHistory) == 0 {
   426  			evalHistory = append(evalHistory, testEval.eval)
   427  
   428  			// Find and unblock evals for the same job.
   429  			for _, e := range evalHistory {
   430  				if e.Namespace == testEval.eval.Namespace && e.JobID == testEval.eval.JobID {
   431  					blockedEvals[e.ID] = false
   432  				}
   433  			}
   434  			blockedEvals[testEval.eval.ID] = true
   435  		} else {
   436  			i := int(unblockIdx) % len(evalHistory)
   437  			eval := evalHistory[i]
   438  
   439  			// Find and unlock all evals for this job.
   440  			for _, e := range evalHistory {
   441  				if e.Namespace == eval.Namespace && e.JobID == eval.JobID {
   442  					blockedEvals[e.ID] = false
   443  				}
   444  			}
   445  		}
   446  
   447  		result := NewBlockedResourcesStats()
   448  		for _, e := range evalHistory {
   449  			if !blockedEvals[e.ID] {
   450  				continue
   451  			}
   452  			result = result.Add(generateResourceStats(e))
   453  		}
   454  		clearTimestampFromBlockedResourceStats(result)
   455  		return result
   456  	}
   457  
   458  	err := quick.CheckEqual(blockAndUntrack, manualCount, nil)
   459  	if err != nil {
   460  		t.Error(err)
   461  	}
   462  }