github.com/banmanh482/nomad@v0.11.8/scheduler/rank.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  
     7  	"github.com/hashicorp/nomad/nomad/structs"
     8  )
     9  
    10  const (
    11  	// binPackingMaxFitScore is the maximum possible bin packing fitness score.
    12  	// This is used to normalize bin packing score to a value between 0 and 1
    13  	binPackingMaxFitScore = 18.0
    14  )
    15  
    16  // Rank is used to provide a score and various ranking metadata
    17  // along with a node when iterating. This state can be modified as
    18  // various rank methods are applied.
    19  type RankedNode struct {
    20  	Node           *structs.Node
    21  	FinalScore     float64
    22  	Scores         []float64
    23  	TaskResources  map[string]*structs.AllocatedTaskResources
    24  	TaskLifecycles map[string]*structs.TaskLifecycleConfig
    25  	AllocResources *structs.AllocatedSharedResources
    26  
    27  	// Allocs is used to cache the proposed allocations on the
    28  	// node. This can be shared between iterators that require it.
    29  	Proposed []*structs.Allocation
    30  
    31  	// PreemptedAllocs is used by the BinpackIterator to identify allocs
    32  	// that should be preempted in order to make the placement
    33  	PreemptedAllocs []*structs.Allocation
    34  }
    35  
    36  func (r *RankedNode) GoString() string {
    37  	return fmt.Sprintf("<Node: %s Score: %0.3f>", r.Node.ID, r.FinalScore)
    38  }
    39  
    40  func (r *RankedNode) ProposedAllocs(ctx Context) ([]*structs.Allocation, error) {
    41  	if r.Proposed != nil {
    42  		return r.Proposed, nil
    43  	}
    44  
    45  	p, err := ctx.ProposedAllocs(r.Node.ID)
    46  	if err != nil {
    47  		return nil, err
    48  	}
    49  	r.Proposed = p
    50  	return p, nil
    51  }
    52  
    53  func (r *RankedNode) SetTaskResources(task *structs.Task,
    54  	resource *structs.AllocatedTaskResources) {
    55  	if r.TaskResources == nil {
    56  		r.TaskResources = make(map[string]*structs.AllocatedTaskResources)
    57  		r.TaskLifecycles = make(map[string]*structs.TaskLifecycleConfig)
    58  	}
    59  	r.TaskResources[task.Name] = resource
    60  	r.TaskLifecycles[task.Name] = task.Lifecycle
    61  }
    62  
    63  // RankFeasibleIterator is used to iteratively yield nodes along
    64  // with ranking metadata. The iterators may manage some state for
    65  // performance optimizations.
    66  type RankIterator interface {
    67  	// Next yields a ranked option or nil if exhausted
    68  	Next() *RankedNode
    69  
    70  	// Reset is invoked when an allocation has been placed
    71  	// to reset any stale state.
    72  	Reset()
    73  }
    74  
    75  // FeasibleRankIterator is used to consume from a FeasibleIterator
    76  // and return an unranked node with base ranking.
    77  type FeasibleRankIterator struct {
    78  	ctx    Context
    79  	source FeasibleIterator
    80  }
    81  
    82  // NewFeasibleRankIterator is used to return a new FeasibleRankIterator
    83  // from a FeasibleIterator source.
    84  func NewFeasibleRankIterator(ctx Context, source FeasibleIterator) *FeasibleRankIterator {
    85  	iter := &FeasibleRankIterator{
    86  		ctx:    ctx,
    87  		source: source,
    88  	}
    89  	return iter
    90  }
    91  
    92  func (iter *FeasibleRankIterator) Next() *RankedNode {
    93  	option := iter.source.Next()
    94  	if option == nil {
    95  		return nil
    96  	}
    97  	ranked := &RankedNode{
    98  		Node: option,
    99  	}
   100  	return ranked
   101  }
   102  
   103  func (iter *FeasibleRankIterator) Reset() {
   104  	iter.source.Reset()
   105  }
   106  
   107  // StaticRankIterator is a RankIterator that returns a static set of results.
   108  // This is largely only useful for testing.
   109  type StaticRankIterator struct {
   110  	ctx    Context
   111  	nodes  []*RankedNode
   112  	offset int
   113  	seen   int
   114  }
   115  
   116  // NewStaticRankIterator returns a new static rank iterator over the given nodes
   117  func NewStaticRankIterator(ctx Context, nodes []*RankedNode) *StaticRankIterator {
   118  	iter := &StaticRankIterator{
   119  		ctx:   ctx,
   120  		nodes: nodes,
   121  	}
   122  	return iter
   123  }
   124  
   125  func (iter *StaticRankIterator) Next() *RankedNode {
   126  	// Check if exhausted
   127  	n := len(iter.nodes)
   128  	if iter.offset == n || iter.seen == n {
   129  		if iter.seen != n {
   130  			iter.offset = 0
   131  		} else {
   132  			return nil
   133  		}
   134  	}
   135  
   136  	// Return the next offset
   137  	offset := iter.offset
   138  	iter.offset += 1
   139  	iter.seen += 1
   140  	return iter.nodes[offset]
   141  }
   142  
   143  func (iter *StaticRankIterator) Reset() {
   144  	iter.seen = 0
   145  }
   146  
   147  // BinPackIterator is a RankIterator that scores potential options
   148  // based on a bin-packing algorithm.
   149  type BinPackIterator struct {
   150  	ctx       Context
   151  	source    RankIterator
   152  	evict     bool
   153  	priority  int
   154  	jobId     *structs.NamespacedID
   155  	taskGroup *structs.TaskGroup
   156  	scoreFit  func(*structs.Node, *structs.ComparableResources) float64
   157  }
   158  
   159  // NewBinPackIterator returns a BinPackIterator which tries to fit tasks
   160  // potentially evicting other tasks based on a given priority.
   161  func NewBinPackIterator(ctx Context, source RankIterator, evict bool, priority int, algorithm structs.SchedulerAlgorithm) *BinPackIterator {
   162  
   163  	scoreFn := structs.ScoreFitBinPack
   164  	if algorithm == structs.SchedulerAlgorithmSpread {
   165  		scoreFn = structs.ScoreFitSpread
   166  	}
   167  
   168  	iter := &BinPackIterator{
   169  		ctx:      ctx,
   170  		source:   source,
   171  		evict:    evict,
   172  		priority: priority,
   173  		scoreFit: scoreFn,
   174  	}
   175  	iter.ctx.Logger().Named("binpack").Trace("NewBinPackIterator created", "algorithm", algorithm)
   176  	return iter
   177  }
   178  
   179  func (iter *BinPackIterator) SetJob(job *structs.Job) {
   180  	iter.priority = job.Priority
   181  	iter.jobId = job.NamespacedID()
   182  }
   183  
   184  func (iter *BinPackIterator) SetTaskGroup(taskGroup *structs.TaskGroup) {
   185  	iter.taskGroup = taskGroup
   186  }
   187  
   188  func (iter *BinPackIterator) Next() *RankedNode {
   189  OUTER:
   190  	for {
   191  		// Get the next potential option
   192  		option := iter.source.Next()
   193  		if option == nil {
   194  			return nil
   195  		}
   196  
   197  		// Get the proposed allocations
   198  		proposed, err := option.ProposedAllocs(iter.ctx)
   199  		if err != nil {
   200  			iter.ctx.Logger().Named("binpack").Error("failed retrieving proposed allocations", "error", err)
   201  			continue
   202  		}
   203  
   204  		// Index the existing network usage
   205  		netIdx := structs.NewNetworkIndex()
   206  		netIdx.SetNode(option.Node)
   207  		netIdx.AddAllocs(proposed)
   208  
   209  		// Create a device allocator
   210  		devAllocator := newDeviceAllocator(iter.ctx, option.Node)
   211  		devAllocator.AddAllocs(proposed)
   212  
   213  		// Track the affinities of the devices
   214  		totalDeviceAffinityWeight := 0.0
   215  		sumMatchingAffinities := 0.0
   216  
   217  		// Assign the resources for each task
   218  		total := &structs.AllocatedResources{
   219  			Tasks: make(map[string]*structs.AllocatedTaskResources,
   220  				len(iter.taskGroup.Tasks)),
   221  			TaskLifecycles: make(map[string]*structs.TaskLifecycleConfig,
   222  				len(iter.taskGroup.Tasks)),
   223  			Shared: structs.AllocatedSharedResources{
   224  				DiskMB: int64(iter.taskGroup.EphemeralDisk.SizeMB),
   225  			},
   226  		}
   227  
   228  		var allocsToPreempt []*structs.Allocation
   229  
   230  		// Initialize preemptor with node
   231  		preemptor := NewPreemptor(iter.priority, iter.ctx, iter.jobId)
   232  		preemptor.SetNode(option.Node)
   233  
   234  		// Count the number of existing preemptions
   235  		allPreemptions := iter.ctx.Plan().NodePreemptions
   236  		var currentPreemptions []*structs.Allocation
   237  		for _, allocs := range allPreemptions {
   238  			currentPreemptions = append(currentPreemptions, allocs...)
   239  		}
   240  		preemptor.SetPreemptions(currentPreemptions)
   241  
   242  		// Check if we need task group network resource
   243  		if len(iter.taskGroup.Networks) > 0 {
   244  			ask := iter.taskGroup.Networks[0].Copy()
   245  			offer, err := netIdx.AssignNetwork(ask)
   246  			if offer == nil {
   247  				// If eviction is not enabled, mark this node as exhausted and continue
   248  				if !iter.evict {
   249  					iter.ctx.Metrics().ExhaustedNode(option.Node,
   250  						fmt.Sprintf("network: %s", err))
   251  					netIdx.Release()
   252  					continue OUTER
   253  				}
   254  
   255  				// Look for preemptible allocations to satisfy the network resource for this task
   256  				preemptor.SetCandidates(proposed)
   257  
   258  				netPreemptions := preemptor.PreemptForNetwork(ask, netIdx)
   259  				if netPreemptions == nil {
   260  					iter.ctx.Logger().Named("binpack").Debug("preemption not possible ", "network_resource", ask)
   261  					netIdx.Release()
   262  					continue OUTER
   263  				}
   264  				allocsToPreempt = append(allocsToPreempt, netPreemptions...)
   265  
   266  				// First subtract out preempted allocations
   267  				proposed = structs.RemoveAllocs(proposed, netPreemptions)
   268  
   269  				// Reset the network index and try the offer again
   270  				netIdx.Release()
   271  				netIdx = structs.NewNetworkIndex()
   272  				netIdx.SetNode(option.Node)
   273  				netIdx.AddAllocs(proposed)
   274  
   275  				offer, err = netIdx.AssignNetwork(ask)
   276  				if offer == nil {
   277  					iter.ctx.Logger().Named("binpack").Debug("unexpected error, unable to create network offer after considering preemption", "error", err)
   278  					netIdx.Release()
   279  					continue OUTER
   280  				}
   281  			}
   282  
   283  			// Reserve this to prevent another task from colliding
   284  			netIdx.AddReserved(offer)
   285  
   286  			// Update the network ask to the offer
   287  			total.Shared.Networks = []*structs.NetworkResource{offer}
   288  			option.AllocResources = &structs.AllocatedSharedResources{
   289  				Networks: []*structs.NetworkResource{offer},
   290  				DiskMB:   int64(iter.taskGroup.EphemeralDisk.SizeMB),
   291  			}
   292  
   293  		}
   294  
   295  		for _, task := range iter.taskGroup.Tasks {
   296  			// Allocate the resources
   297  			taskResources := &structs.AllocatedTaskResources{
   298  				Cpu: structs.AllocatedCpuResources{
   299  					CpuShares: int64(task.Resources.CPU),
   300  				},
   301  				Memory: structs.AllocatedMemoryResources{
   302  					MemoryMB: int64(task.Resources.MemoryMB),
   303  				},
   304  			}
   305  
   306  			// Check if we need a network resource
   307  			if len(task.Resources.Networks) > 0 {
   308  				ask := task.Resources.Networks[0].Copy()
   309  				offer, err := netIdx.AssignNetwork(ask)
   310  				if offer == nil {
   311  					// If eviction is not enabled, mark this node as exhausted and continue
   312  					if !iter.evict {
   313  						iter.ctx.Metrics().ExhaustedNode(option.Node,
   314  							fmt.Sprintf("network: %s", err))
   315  						netIdx.Release()
   316  						continue OUTER
   317  					}
   318  
   319  					// Look for preemptible allocations to satisfy the network resource for this task
   320  					preemptor.SetCandidates(proposed)
   321  
   322  					netPreemptions := preemptor.PreemptForNetwork(ask, netIdx)
   323  					if netPreemptions == nil {
   324  						iter.ctx.Logger().Named("binpack").Debug("preemption not possible ", "network_resource", ask)
   325  						netIdx.Release()
   326  						continue OUTER
   327  					}
   328  					allocsToPreempt = append(allocsToPreempt, netPreemptions...)
   329  
   330  					// First subtract out preempted allocations
   331  					proposed = structs.RemoveAllocs(proposed, netPreemptions)
   332  
   333  					// Reset the network index and try the offer again
   334  					netIdx.Release()
   335  					netIdx = structs.NewNetworkIndex()
   336  					netIdx.SetNode(option.Node)
   337  					netIdx.AddAllocs(proposed)
   338  
   339  					offer, err = netIdx.AssignNetwork(ask)
   340  					if offer == nil {
   341  						iter.ctx.Logger().Named("binpack").Debug("unexpected error, unable to create network offer after considering preemption", "error", err)
   342  						netIdx.Release()
   343  						continue OUTER
   344  					}
   345  				}
   346  
   347  				// Reserve this to prevent another task from colliding
   348  				netIdx.AddReserved(offer)
   349  
   350  				// Update the network ask to the offer
   351  				taskResources.Networks = []*structs.NetworkResource{offer}
   352  			}
   353  
   354  			// Check if we need to assign devices
   355  			for _, req := range task.Resources.Devices {
   356  				offer, sumAffinities, err := devAllocator.AssignDevice(req)
   357  				if offer == nil {
   358  					// If eviction is not enabled, mark this node as exhausted and continue
   359  					if !iter.evict {
   360  						iter.ctx.Metrics().ExhaustedNode(option.Node, fmt.Sprintf("devices: %s", err))
   361  						continue OUTER
   362  					}
   363  
   364  					// Attempt preemption
   365  					preemptor.SetCandidates(proposed)
   366  					devicePreemptions := preemptor.PreemptForDevice(req, devAllocator)
   367  
   368  					if devicePreemptions == nil {
   369  						iter.ctx.Logger().Named("binpack").Debug("preemption not possible", "requested_device", req)
   370  						netIdx.Release()
   371  						continue OUTER
   372  					}
   373  					allocsToPreempt = append(allocsToPreempt, devicePreemptions...)
   374  
   375  					// First subtract out preempted allocations
   376  					proposed = structs.RemoveAllocs(proposed, allocsToPreempt)
   377  
   378  					// Reset the device allocator with new set of proposed allocs
   379  					devAllocator := newDeviceAllocator(iter.ctx, option.Node)
   380  					devAllocator.AddAllocs(proposed)
   381  
   382  					// Try offer again
   383  					offer, sumAffinities, err = devAllocator.AssignDevice(req)
   384  					if offer == nil {
   385  						iter.ctx.Logger().Named("binpack").Debug("unexpected error, unable to create device offer after considering preemption", "error", err)
   386  						continue OUTER
   387  					}
   388  				}
   389  
   390  				// Store the resource
   391  				devAllocator.AddReserved(offer)
   392  				taskResources.Devices = append(taskResources.Devices, offer)
   393  
   394  				// Add the scores
   395  				if len(req.Affinities) != 0 {
   396  					for _, a := range req.Affinities {
   397  						totalDeviceAffinityWeight += math.Abs(float64(a.Weight))
   398  					}
   399  					sumMatchingAffinities += sumAffinities
   400  				}
   401  			}
   402  
   403  			// Store the task resource
   404  			option.SetTaskResources(task, taskResources)
   405  
   406  			// Accumulate the total resource requirement
   407  			total.Tasks[task.Name] = taskResources
   408  			total.TaskLifecycles[task.Name] = task.Lifecycle
   409  		}
   410  
   411  		// Store current set of running allocs before adding resources for the task group
   412  		current := proposed
   413  
   414  		// Add the resources we are trying to fit
   415  		proposed = append(proposed, &structs.Allocation{AllocatedResources: total})
   416  
   417  		// Check if these allocations fit, if they do not, simply skip this node
   418  		fit, dim, util, _ := structs.AllocsFit(option.Node, proposed, netIdx, false)
   419  		netIdx.Release()
   420  		if !fit {
   421  			// Skip the node if evictions are not enabled
   422  			if !iter.evict {
   423  				iter.ctx.Metrics().ExhaustedNode(option.Node, dim)
   424  				continue
   425  			}
   426  
   427  			// If eviction is enabled and the node doesn't fit the alloc, check if
   428  			// any allocs can be preempted
   429  
   430  			// Initialize preemptor with candidate set
   431  			preemptor.SetCandidates(current)
   432  
   433  			preemptedAllocs := preemptor.PreemptForTaskGroup(total)
   434  			allocsToPreempt = append(allocsToPreempt, preemptedAllocs...)
   435  
   436  			// If we were unable to find preempted allocs to meet these requirements
   437  			// mark as exhausted and continue
   438  			if len(preemptedAllocs) == 0 {
   439  				iter.ctx.Metrics().ExhaustedNode(option.Node, dim)
   440  				continue
   441  			}
   442  		}
   443  		if len(allocsToPreempt) > 0 {
   444  			option.PreemptedAllocs = allocsToPreempt
   445  		}
   446  
   447  		// Score the fit normally otherwise
   448  		fitness := iter.scoreFit(option.Node, util)
   449  		normalizedFit := fitness / binPackingMaxFitScore
   450  		option.Scores = append(option.Scores, normalizedFit)
   451  		iter.ctx.Metrics().ScoreNode(option.Node, "binpack", normalizedFit)
   452  
   453  		// Score the device affinity
   454  		if totalDeviceAffinityWeight != 0 {
   455  			sumMatchingAffinities /= totalDeviceAffinityWeight
   456  			option.Scores = append(option.Scores, sumMatchingAffinities)
   457  			iter.ctx.Metrics().ScoreNode(option.Node, "devices", sumMatchingAffinities)
   458  		}
   459  
   460  		return option
   461  	}
   462  }
   463  
   464  func (iter *BinPackIterator) Reset() {
   465  	iter.source.Reset()
   466  }
   467  
   468  // JobAntiAffinityIterator is used to apply an anti-affinity to allocating
   469  // along side other allocations from this job. This is used to help distribute
   470  // load across the cluster.
   471  type JobAntiAffinityIterator struct {
   472  	ctx          Context
   473  	source       RankIterator
   474  	jobID        string
   475  	taskGroup    string
   476  	desiredCount int
   477  }
   478  
   479  // NewJobAntiAffinityIterator is used to create a JobAntiAffinityIterator that
   480  // applies the given penalty for co-placement with allocs from this job.
   481  func NewJobAntiAffinityIterator(ctx Context, source RankIterator, jobID string) *JobAntiAffinityIterator {
   482  	iter := &JobAntiAffinityIterator{
   483  		ctx:    ctx,
   484  		source: source,
   485  		jobID:  jobID,
   486  	}
   487  	return iter
   488  }
   489  
   490  func (iter *JobAntiAffinityIterator) SetJob(job *structs.Job) {
   491  	iter.jobID = job.ID
   492  }
   493  
   494  func (iter *JobAntiAffinityIterator) SetTaskGroup(tg *structs.TaskGroup) {
   495  	iter.taskGroup = tg.Name
   496  	iter.desiredCount = tg.Count
   497  }
   498  
   499  func (iter *JobAntiAffinityIterator) Next() *RankedNode {
   500  	for {
   501  		option := iter.source.Next()
   502  		if option == nil {
   503  			return nil
   504  		}
   505  
   506  		// Get the proposed allocations
   507  		proposed, err := option.ProposedAllocs(iter.ctx)
   508  		if err != nil {
   509  			iter.ctx.Logger().Named("job_anti_affinity").Error("failed retrieving proposed allocations", "error", err)
   510  			continue
   511  		}
   512  
   513  		// Determine the number of collisions
   514  		collisions := 0
   515  		for _, alloc := range proposed {
   516  			if alloc.JobID == iter.jobID && alloc.TaskGroup == iter.taskGroup {
   517  				collisions += 1
   518  			}
   519  		}
   520  
   521  		// Calculate the penalty based on number of collisions
   522  		// TODO(preetha): Figure out if batch jobs need a different scoring penalty where collisions matter less
   523  		if collisions > 0 {
   524  			scorePenalty := -1 * float64(collisions+1) / float64(iter.desiredCount)
   525  			option.Scores = append(option.Scores, scorePenalty)
   526  			iter.ctx.Metrics().ScoreNode(option.Node, "job-anti-affinity", scorePenalty)
   527  		} else {
   528  			iter.ctx.Metrics().ScoreNode(option.Node, "job-anti-affinity", 0)
   529  		}
   530  		return option
   531  	}
   532  }
   533  
   534  func (iter *JobAntiAffinityIterator) Reset() {
   535  	iter.source.Reset()
   536  }
   537  
   538  // NodeReschedulingPenaltyIterator is used to apply a penalty to
   539  // a node that had a previous failed allocation for the same job.
   540  // This is used when attempting to reschedule a failed alloc
   541  type NodeReschedulingPenaltyIterator struct {
   542  	ctx          Context
   543  	source       RankIterator
   544  	penaltyNodes map[string]struct{}
   545  }
   546  
   547  // NewNodeReschedulingPenaltyIterator is used to create a NodeReschedulingPenaltyIterator that
   548  // applies the given scoring penalty for placement onto nodes in penaltyNodes
   549  func NewNodeReschedulingPenaltyIterator(ctx Context, source RankIterator) *NodeReschedulingPenaltyIterator {
   550  	iter := &NodeReschedulingPenaltyIterator{
   551  		ctx:    ctx,
   552  		source: source,
   553  	}
   554  	return iter
   555  }
   556  
   557  func (iter *NodeReschedulingPenaltyIterator) SetPenaltyNodes(penaltyNodes map[string]struct{}) {
   558  	iter.penaltyNodes = penaltyNodes
   559  }
   560  
   561  func (iter *NodeReschedulingPenaltyIterator) Next() *RankedNode {
   562  	for {
   563  		option := iter.source.Next()
   564  		if option == nil {
   565  			return nil
   566  		}
   567  
   568  		_, ok := iter.penaltyNodes[option.Node.ID]
   569  		if ok {
   570  			option.Scores = append(option.Scores, -1)
   571  			iter.ctx.Metrics().ScoreNode(option.Node, "node-reschedule-penalty", -1)
   572  		} else {
   573  			iter.ctx.Metrics().ScoreNode(option.Node, "node-reschedule-penalty", 0)
   574  		}
   575  		return option
   576  	}
   577  }
   578  
   579  func (iter *NodeReschedulingPenaltyIterator) Reset() {
   580  	iter.penaltyNodes = make(map[string]struct{})
   581  	iter.source.Reset()
   582  }
   583  
   584  // NodeAffinityIterator is used to resolve any affinity rules in the job or task group,
   585  // and apply a weighted score to nodes if they match.
   586  type NodeAffinityIterator struct {
   587  	ctx           Context
   588  	source        RankIterator
   589  	jobAffinities []*structs.Affinity
   590  	affinities    []*structs.Affinity
   591  }
   592  
   593  // NewNodeAffinityIterator is used to create a NodeAffinityIterator that
   594  // applies a weighted score according to whether nodes match any
   595  // affinities in the job or task group.
   596  func NewNodeAffinityIterator(ctx Context, source RankIterator) *NodeAffinityIterator {
   597  	return &NodeAffinityIterator{
   598  		ctx:    ctx,
   599  		source: source,
   600  	}
   601  }
   602  
   603  func (iter *NodeAffinityIterator) SetJob(job *structs.Job) {
   604  	iter.jobAffinities = job.Affinities
   605  }
   606  
   607  func (iter *NodeAffinityIterator) SetTaskGroup(tg *structs.TaskGroup) {
   608  	// Merge job affinities
   609  	if iter.jobAffinities != nil {
   610  		iter.affinities = append(iter.affinities, iter.jobAffinities...)
   611  	}
   612  
   613  	// Merge task group affinities and task affinities
   614  	if tg.Affinities != nil {
   615  		iter.affinities = append(iter.affinities, tg.Affinities...)
   616  	}
   617  	for _, task := range tg.Tasks {
   618  		if task.Affinities != nil {
   619  			iter.affinities = append(iter.affinities, task.Affinities...)
   620  		}
   621  	}
   622  }
   623  
   624  func (iter *NodeAffinityIterator) Reset() {
   625  	iter.source.Reset()
   626  	// This method is called between each task group, so only reset the merged list
   627  	iter.affinities = nil
   628  }
   629  
   630  func (iter *NodeAffinityIterator) hasAffinities() bool {
   631  	return len(iter.affinities) > 0
   632  }
   633  
   634  func (iter *NodeAffinityIterator) Next() *RankedNode {
   635  	option := iter.source.Next()
   636  	if option == nil {
   637  		return nil
   638  	}
   639  	if !iter.hasAffinities() {
   640  		iter.ctx.Metrics().ScoreNode(option.Node, "node-affinity", 0)
   641  		return option
   642  	}
   643  	// TODO(preetha): we should calculate normalized weights once and reuse it here
   644  	sumWeight := 0.0
   645  	for _, affinity := range iter.affinities {
   646  		sumWeight += math.Abs(float64(affinity.Weight))
   647  	}
   648  
   649  	totalAffinityScore := 0.0
   650  	for _, affinity := range iter.affinities {
   651  		if matchesAffinity(iter.ctx, affinity, option.Node) {
   652  			totalAffinityScore += float64(affinity.Weight)
   653  		}
   654  	}
   655  	normScore := totalAffinityScore / sumWeight
   656  	if totalAffinityScore != 0.0 {
   657  		option.Scores = append(option.Scores, normScore)
   658  		iter.ctx.Metrics().ScoreNode(option.Node, "node-affinity", normScore)
   659  	}
   660  	return option
   661  }
   662  
   663  func matchesAffinity(ctx Context, affinity *structs.Affinity, option *structs.Node) bool {
   664  	//TODO(preetha): Add a step here that filters based on computed node class for potential speedup
   665  	// Resolve the targets
   666  	lVal, lOk := resolveTarget(affinity.LTarget, option)
   667  	rVal, rOk := resolveTarget(affinity.RTarget, option)
   668  
   669  	// Check if satisfied
   670  	return checkAffinity(ctx, affinity.Operand, lVal, rVal, lOk, rOk)
   671  }
   672  
   673  // ScoreNormalizationIterator is used to combine scores from various prior
   674  // iterators and combine them into one final score. The current implementation
   675  // averages the scores together.
   676  type ScoreNormalizationIterator struct {
   677  	ctx    Context
   678  	source RankIterator
   679  }
   680  
   681  // NewScoreNormalizationIterator is used to create a ScoreNormalizationIterator that
   682  // averages scores from various iterators into a final score.
   683  func NewScoreNormalizationIterator(ctx Context, source RankIterator) *ScoreNormalizationIterator {
   684  	return &ScoreNormalizationIterator{
   685  		ctx:    ctx,
   686  		source: source}
   687  }
   688  
   689  func (iter *ScoreNormalizationIterator) Reset() {
   690  	iter.source.Reset()
   691  }
   692  
   693  func (iter *ScoreNormalizationIterator) Next() *RankedNode {
   694  	option := iter.source.Next()
   695  	if option == nil || len(option.Scores) == 0 {
   696  		return option
   697  	}
   698  	numScorers := len(option.Scores)
   699  	sum := 0.0
   700  	for _, score := range option.Scores {
   701  		sum += score
   702  	}
   703  	option.FinalScore = sum / float64(numScorers)
   704  	//TODO(preetha): Turn map in allocmetrics into a heap of topK scores
   705  	iter.ctx.Metrics().ScoreNode(option.Node, "normalized-score", option.FinalScore)
   706  	return option
   707  }