github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/scheduler/queue/user_queues.go (about)

     1  package queue
     2  
     3  import (
     4  	"math/rand"
     5  	"sort"
     6  	"time"
     7  
     8  	"github.com/cortexproject/cortex/pkg/util"
     9  )
    10  
    11  // querier holds information about a querier registered in the queue.
    12  type querier struct {
    13  	// Number of active connections.
    14  	connections int
    15  
    16  	// True if the querier notified it's gracefully shutting down.
    17  	shuttingDown bool
    18  
    19  	// When the last connection has been unregistered.
    20  	disconnectedAt time.Time
    21  }
    22  
    23  // This struct holds user queues for pending requests. It also keeps track of connected queriers,
    24  // and mapping between users and queriers.
    25  type queues struct {
    26  	userQueues map[string]*userQueue
    27  
    28  	// List of all users with queues, used for iteration when searching for next queue to handle.
    29  	// Users removed from the middle are replaced with "". To avoid skipping users during iteration, we only shrink
    30  	// this list when there are ""'s at the end of it.
    31  	users []string
    32  
    33  	maxUserQueueSize int
    34  
    35  	// How long to wait before removing a querier which has got disconnected
    36  	// but hasn't notified about a graceful shutdown.
    37  	forgetDelay time.Duration
    38  
    39  	// Tracks queriers registered to the queue.
    40  	queriers map[string]*querier
    41  
    42  	// Sorted list of querier names, used when creating per-user shard.
    43  	sortedQueriers []string
    44  }
    45  
    46  type userQueue struct {
    47  	ch chan Request
    48  
    49  	// If not nil, only these queriers can handle user requests. If nil, all queriers can.
    50  	// We set this to nil if number of available queriers <= maxQueriers.
    51  	queriers    map[string]struct{}
    52  	maxQueriers int
    53  
    54  	// Seed for shuffle sharding of queriers. This seed is based on userID only and is therefore consistent
    55  	// between different frontends.
    56  	seed int64
    57  
    58  	// Points back to 'users' field in queues. Enables quick cleanup.
    59  	index int
    60  }
    61  
    62  func newUserQueues(maxUserQueueSize int, forgetDelay time.Duration) *queues {
    63  	return &queues{
    64  		userQueues:       map[string]*userQueue{},
    65  		users:            nil,
    66  		maxUserQueueSize: maxUserQueueSize,
    67  		forgetDelay:      forgetDelay,
    68  		queriers:         map[string]*querier{},
    69  		sortedQueriers:   nil,
    70  	}
    71  }
    72  
    73  func (q *queues) len() int {
    74  	return len(q.userQueues)
    75  }
    76  
    77  func (q *queues) deleteQueue(userID string) {
    78  	uq := q.userQueues[userID]
    79  	if uq == nil {
    80  		return
    81  	}
    82  
    83  	delete(q.userQueues, userID)
    84  	q.users[uq.index] = ""
    85  
    86  	// Shrink users list size if possible. This is safe, and no users will be skipped during iteration.
    87  	for ix := len(q.users) - 1; ix >= 0 && q.users[ix] == ""; ix-- {
    88  		q.users = q.users[:ix]
    89  	}
    90  }
    91  
    92  // Returns existing or new queue for user.
    93  // MaxQueriers is used to compute which queriers should handle requests for this user.
    94  // If maxQueriers is <= 0, all queriers can handle this user's requests.
    95  // If maxQueriers has changed since the last call, queriers for this are recomputed.
    96  func (q *queues) getOrAddQueue(userID string, maxQueriers int) chan Request {
    97  	// Empty user is not allowed, as that would break our users list ("" is used for free spot).
    98  	if userID == "" {
    99  		return nil
   100  	}
   101  
   102  	if maxQueriers < 0 {
   103  		maxQueriers = 0
   104  	}
   105  
   106  	uq := q.userQueues[userID]
   107  
   108  	if uq == nil {
   109  		uq = &userQueue{
   110  			ch:    make(chan Request, q.maxUserQueueSize),
   111  			seed:  util.ShuffleShardSeed(userID, ""),
   112  			index: -1,
   113  		}
   114  		q.userQueues[userID] = uq
   115  
   116  		// Add user to the list of users... find first free spot, and put it there.
   117  		for ix, u := range q.users {
   118  			if u == "" {
   119  				uq.index = ix
   120  				q.users[ix] = userID
   121  				break
   122  			}
   123  		}
   124  
   125  		// ... or add to the end.
   126  		if uq.index < 0 {
   127  			uq.index = len(q.users)
   128  			q.users = append(q.users, userID)
   129  		}
   130  	}
   131  
   132  	if uq.maxQueriers != maxQueriers {
   133  		uq.maxQueriers = maxQueriers
   134  		uq.queriers = shuffleQueriersForUser(uq.seed, maxQueriers, q.sortedQueriers, nil)
   135  	}
   136  
   137  	return uq.ch
   138  }
   139  
   140  // Finds next queue for the querier. To support fair scheduling between users, client is expected
   141  // to pass last user index returned by this function as argument. Is there was no previous
   142  // last user index, use -1.
   143  func (q *queues) getNextQueueForQuerier(lastUserIndex int, querierID string) (chan Request, string, int) {
   144  	uid := lastUserIndex
   145  
   146  	for iters := 0; iters < len(q.users); iters++ {
   147  		uid = uid + 1
   148  
   149  		// Don't use "mod len(q.users)", as that could skip users at the beginning of the list
   150  		// for example when q.users has shrunk since last call.
   151  		if uid >= len(q.users) {
   152  			uid = 0
   153  		}
   154  
   155  		u := q.users[uid]
   156  		if u == "" {
   157  			continue
   158  		}
   159  
   160  		q := q.userQueues[u]
   161  
   162  		if q.queriers != nil {
   163  			if _, ok := q.queriers[querierID]; !ok {
   164  				// This querier is not handling the user.
   165  				continue
   166  			}
   167  		}
   168  
   169  		return q.ch, u, uid
   170  	}
   171  	return nil, "", uid
   172  }
   173  
   174  func (q *queues) addQuerierConnection(querierID string) {
   175  	info := q.queriers[querierID]
   176  	if info != nil {
   177  		info.connections++
   178  
   179  		// Reset in case the querier re-connected while it was in the forget waiting period.
   180  		info.shuttingDown = false
   181  		info.disconnectedAt = time.Time{}
   182  
   183  		return
   184  	}
   185  
   186  	// First connection from this querier.
   187  	q.queriers[querierID] = &querier{connections: 1}
   188  	q.sortedQueriers = append(q.sortedQueriers, querierID)
   189  	sort.Strings(q.sortedQueriers)
   190  
   191  	q.recomputeUserQueriers()
   192  }
   193  
   194  func (q *queues) removeQuerierConnection(querierID string, now time.Time) {
   195  	info := q.queriers[querierID]
   196  	if info == nil || info.connections <= 0 {
   197  		panic("unexpected number of connections for querier")
   198  	}
   199  
   200  	// Decrease the number of active connections.
   201  	info.connections--
   202  	if info.connections > 0 {
   203  		return
   204  	}
   205  
   206  	// There no more active connections. If the forget delay is configured then
   207  	// we can remove it only if querier has announced a graceful shutdown.
   208  	if info.shuttingDown || q.forgetDelay == 0 {
   209  		q.removeQuerier(querierID)
   210  		return
   211  	}
   212  
   213  	// No graceful shutdown has been notified yet, so we should track the current time
   214  	// so that we'll remove the querier as soon as we receive the graceful shutdown
   215  	// notification (if any) or once the threshold expires.
   216  	info.disconnectedAt = now
   217  }
   218  
   219  func (q *queues) removeQuerier(querierID string) {
   220  	delete(q.queriers, querierID)
   221  
   222  	ix := sort.SearchStrings(q.sortedQueriers, querierID)
   223  	if ix >= len(q.sortedQueriers) || q.sortedQueriers[ix] != querierID {
   224  		panic("incorrect state of sorted queriers")
   225  	}
   226  
   227  	q.sortedQueriers = append(q.sortedQueriers[:ix], q.sortedQueriers[ix+1:]...)
   228  
   229  	q.recomputeUserQueriers()
   230  }
   231  
   232  // notifyQuerierShutdown records that a querier has sent notification about a graceful shutdown.
   233  func (q *queues) notifyQuerierShutdown(querierID string) {
   234  	info := q.queriers[querierID]
   235  	if info == nil {
   236  		// The querier may have already been removed, so we just ignore it.
   237  		return
   238  	}
   239  
   240  	// If there are no more connections, we should remove the querier.
   241  	if info.connections == 0 {
   242  		q.removeQuerier(querierID)
   243  		return
   244  	}
   245  
   246  	// Otherwise we should annotate we received a graceful shutdown notification
   247  	// and the querier will be removed once all connections are unregistered.
   248  	info.shuttingDown = true
   249  }
   250  
   251  // forgetDisconnectedQueriers removes all disconnected queriers that have gone since at least
   252  // the forget delay. Returns the number of forgotten queriers.
   253  func (q *queues) forgetDisconnectedQueriers(now time.Time) int {
   254  	// Nothing to do if the forget delay is disabled.
   255  	if q.forgetDelay == 0 {
   256  		return 0
   257  	}
   258  
   259  	// Remove all queriers with no connections that have gone since at least the forget delay.
   260  	threshold := now.Add(-q.forgetDelay)
   261  	forgotten := 0
   262  
   263  	for querierID := range q.queriers {
   264  		if info := q.queriers[querierID]; info.connections == 0 && info.disconnectedAt.Before(threshold) {
   265  			q.removeQuerier(querierID)
   266  			forgotten++
   267  		}
   268  	}
   269  
   270  	return forgotten
   271  }
   272  
   273  func (q *queues) recomputeUserQueriers() {
   274  	scratchpad := make([]string, 0, len(q.sortedQueriers))
   275  
   276  	for _, uq := range q.userQueues {
   277  		uq.queriers = shuffleQueriersForUser(uq.seed, uq.maxQueriers, q.sortedQueriers, scratchpad)
   278  	}
   279  }
   280  
   281  // shuffleQueriersForUser returns nil if queriersToSelect is 0 or there are not enough queriers to select from.
   282  // In that case *all* queriers should be used.
   283  // Scratchpad is used for shuffling, to avoid new allocations. If nil, new slice is allocated.
   284  func shuffleQueriersForUser(userSeed int64, queriersToSelect int, allSortedQueriers []string, scratchpad []string) map[string]struct{} {
   285  	if queriersToSelect == 0 || len(allSortedQueriers) <= queriersToSelect {
   286  		return nil
   287  	}
   288  
   289  	result := make(map[string]struct{}, queriersToSelect)
   290  	rnd := rand.New(rand.NewSource(userSeed))
   291  
   292  	scratchpad = scratchpad[:0]
   293  	scratchpad = append(scratchpad, allSortedQueriers...)
   294  
   295  	last := len(scratchpad) - 1
   296  	for i := 0; i < queriersToSelect; i++ {
   297  		r := rnd.Intn(last + 1)
   298  		result[scratchpad[r]] = struct{}{}
   299  		// move selected item to the end, it won't be selected anymore.
   300  		scratchpad[r], scratchpad[last] = scratchpad[last], scratchpad[r]
   301  		last--
   302  	}
   303  
   304  	return result
   305  }