github.com/grafana/pyroscope@v1.18.0/pkg/scheduler/queue/user_queues.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/scheduler/queue/user_queues.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package queue
     7  
     8  import (
     9  	"math/rand"
    10  	"slices"
    11  	"sort"
    12  	"time"
    13  
    14  	"github.com/grafana/pyroscope/pkg/util"
    15  )
    16  
    17  // querier holds information about a querier registered in the queue.
    18  type querier struct {
    19  	// Number of active connections.
    20  	connections int
    21  
    22  	// True if the querier notified it's gracefully shutting down.
    23  	shuttingDown bool
    24  
    25  	// When the last connection has been unregistered.
    26  	disconnectedAt time.Time
    27  }
    28  
    29  // This struct holds user queues for pending requests. It also keeps track of connected queriers,
    30  // and mapping between users and queriers.
    31  type queues struct {
    32  	userQueues map[string]*userQueue
    33  
    34  	// List of all users with queues, used for iteration when searching for next queue to handle.
    35  	// Users removed from the middle are replaced with "". To avoid skipping users during iteration, we only shrink
    36  	// this list when there are ""'s at the end of it.
    37  	users []string
    38  
    39  	maxUserQueueSize int
    40  
    41  	// How long to wait before removing a querier which has got disconnected
    42  	// but hasn't notified about a graceful shutdown.
    43  	forgetDelay time.Duration
    44  
    45  	// Tracks queriers registered to the queue.
    46  	queriers map[string]*querier
    47  
    48  	// Sorted list of querier names, used when creating per-user shard.
    49  	sortedQueriers []string
    50  }
    51  
    52  type userQueue struct {
    53  	ch chan Request
    54  
    55  	// If not nil, only these queriers can handle user requests. If nil, all queriers can.
    56  	// We set this to nil if number of available queriers <= maxQueriers.
    57  	queriers    map[string]struct{}
    58  	maxQueriers int
    59  
    60  	// Seed for shuffle sharding of queriers. This seed is based on userID only and is therefore consistent
    61  	// between different frontends.
    62  	seed int64
    63  
    64  	// Points back to 'users' field in queues. Enables quick cleanup.
    65  	index int
    66  }
    67  
    68  func newUserQueues(maxUserQueueSize int, forgetDelay time.Duration) *queues {
    69  	return &queues{
    70  		userQueues:       map[string]*userQueue{},
    71  		users:            nil,
    72  		maxUserQueueSize: maxUserQueueSize,
    73  		forgetDelay:      forgetDelay,
    74  		queriers:         map[string]*querier{},
    75  		sortedQueriers:   nil,
    76  	}
    77  }
    78  
    79  func (q *queues) len() int {
    80  	return len(q.userQueues)
    81  }
    82  
    83  func (q *queues) deleteQueue(userID string) {
    84  	uq := q.userQueues[userID]
    85  	if uq == nil {
    86  		return
    87  	}
    88  
    89  	delete(q.userQueues, userID)
    90  	q.users[uq.index] = ""
    91  
    92  	// Shrink users list size if possible. This is safe, and no users will be skipped during iteration.
    93  	for ix := len(q.users) - 1; ix >= 0 && q.users[ix] == ""; ix-- {
    94  		q.users = q.users[:ix]
    95  	}
    96  }
    97  
    98  // Returns existing or new queue for user.
    99  // MaxQueriers is used to compute which queriers should handle requests for this user.
   100  // If maxQueriers is <= 0, all queriers can handle this user's requests.
   101  // If maxQueriers has changed since the last call, queriers for this are recomputed.
   102  func (q *queues) getOrAddQueue(userID string, maxQueriers int) chan Request {
   103  	// Empty user is not allowed, as that would break our users list ("" is used for free spot).
   104  	if userID == "" {
   105  		return nil
   106  	}
   107  
   108  	if maxQueriers < 0 {
   109  		maxQueriers = 0
   110  	}
   111  
   112  	uq := q.userQueues[userID]
   113  
   114  	if uq == nil {
   115  		uq = &userQueue{
   116  			ch:    make(chan Request, q.maxUserQueueSize),
   117  			seed:  util.ShuffleShardSeed(userID, ""),
   118  			index: -1,
   119  		}
   120  		q.userQueues[userID] = uq
   121  
   122  		// Add user to the list of users... find first free spot, and put it there.
   123  		for ix, u := range q.users {
   124  			if u == "" {
   125  				uq.index = ix
   126  				q.users[ix] = userID
   127  				break
   128  			}
   129  		}
   130  
   131  		// ... or add to the end.
   132  		if uq.index < 0 {
   133  			uq.index = len(q.users)
   134  			q.users = append(q.users, userID)
   135  		}
   136  	}
   137  
   138  	if uq.maxQueriers != maxQueriers {
   139  		uq.maxQueriers = maxQueriers
   140  		uq.queriers = shuffleQueriersForUser(uq.seed, maxQueriers, q.sortedQueriers, nil)
   141  	}
   142  
   143  	return uq.ch
   144  }
   145  
   146  // Finds next queue for the querier. To support fair scheduling between users, client is expected
   147  // to pass last user index returned by this function as argument. Is there was no previous
   148  // last user index, use -1.
   149  func (q *queues) getNextQueueForQuerier(lastUserIndex int, querierID string) (chan Request, string, int) {
   150  	uid := lastUserIndex
   151  
   152  	// Ensure the querier is not shutting down. If the querier is shutting down, we shouldn't forward
   153  	// any more queries to it.
   154  	if info := q.queriers[querierID]; info == nil || info.shuttingDown {
   155  		return nil, "", uid
   156  	}
   157  
   158  	for iters := 0; iters < len(q.users); iters++ {
   159  		uid = uid + 1
   160  
   161  		// Don't use "mod len(q.users)", as that could skip users at the beginning of the list
   162  		// for example when q.users has shrunk since last call.
   163  		if uid >= len(q.users) {
   164  			uid = 0
   165  		}
   166  
   167  		u := q.users[uid]
   168  		if u == "" {
   169  			continue
   170  		}
   171  
   172  		q := q.userQueues[u]
   173  
   174  		if q.queriers != nil {
   175  			if _, ok := q.queriers[querierID]; !ok {
   176  				// This querier is not handling the user.
   177  				continue
   178  			}
   179  		}
   180  
   181  		return q.ch, u, uid
   182  	}
   183  	return nil, "", uid
   184  }
   185  
   186  func (q *queues) addQuerierConnection(querierID string) {
   187  	info := q.queriers[querierID]
   188  	if info != nil {
   189  		info.connections++
   190  
   191  		// Reset in case the querier re-connected while it was in the forget waiting period.
   192  		info.shuttingDown = false
   193  		info.disconnectedAt = time.Time{}
   194  
   195  		return
   196  	}
   197  
   198  	// First connection from this querier.
   199  	q.queriers[querierID] = &querier{connections: 1}
   200  	q.sortedQueriers = append(q.sortedQueriers, querierID)
   201  	slices.Sort(q.sortedQueriers)
   202  
   203  	q.recomputeUserQueriers()
   204  }
   205  
   206  func (q *queues) removeQuerierConnection(querierID string, now time.Time) {
   207  	info := q.queriers[querierID]
   208  	if info == nil || info.connections <= 0 {
   209  		panic("unexpected number of connections for querier")
   210  	}
   211  
   212  	// Decrease the number of active connections.
   213  	info.connections--
   214  	if info.connections > 0 {
   215  		return
   216  	}
   217  
   218  	// There no more active connections. If the forget delay is configured then
   219  	// we can remove it only if querier has announced a graceful shutdown.
   220  	if info.shuttingDown || q.forgetDelay == 0 {
   221  		q.removeQuerier(querierID)
   222  		return
   223  	}
   224  
   225  	// No graceful shutdown has been notified yet, so we should track the current time
   226  	// so that we'll remove the querier as soon as we receive the graceful shutdown
   227  	// notification (if any) or once the threshold expires.
   228  	info.disconnectedAt = now
   229  }
   230  
   231  func (q *queues) removeQuerier(querierID string) {
   232  	delete(q.queriers, querierID)
   233  
   234  	ix := sort.SearchStrings(q.sortedQueriers, querierID)
   235  	if ix >= len(q.sortedQueriers) || q.sortedQueriers[ix] != querierID {
   236  		panic("incorrect state of sorted queriers")
   237  	}
   238  
   239  	q.sortedQueriers = append(q.sortedQueriers[:ix], q.sortedQueriers[ix+1:]...)
   240  
   241  	q.recomputeUserQueriers()
   242  }
   243  
   244  // notifyQuerierShutdown records that a querier has sent notification about a graceful shutdown.
   245  func (q *queues) notifyQuerierShutdown(querierID string) {
   246  	info := q.queriers[querierID]
   247  	if info == nil {
   248  		// The querier may have already been removed, so we just ignore it.
   249  		return
   250  	}
   251  
   252  	// If there are no more connections, we should remove the querier.
   253  	if info.connections == 0 {
   254  		q.removeQuerier(querierID)
   255  		return
   256  	}
   257  
   258  	// Otherwise we should annotate we received a graceful shutdown notification
   259  	// and the querier will be removed once all connections are unregistered.
   260  	info.shuttingDown = true
   261  }
   262  
   263  // forgetDisconnectedQueriers removes all disconnected queriers that have gone since at least
   264  // the forget delay. Returns the number of forgotten queriers.
   265  func (q *queues) forgetDisconnectedQueriers(now time.Time) int {
   266  	// Nothing to do if the forget delay is disabled.
   267  	if q.forgetDelay == 0 {
   268  		return 0
   269  	}
   270  
   271  	// Remove all queriers with no connections that have gone since at least the forget delay.
   272  	threshold := now.Add(-q.forgetDelay)
   273  	forgotten := 0
   274  
   275  	for querierID := range q.queriers {
   276  		if info := q.queriers[querierID]; info.connections == 0 && info.disconnectedAt.Before(threshold) {
   277  			q.removeQuerier(querierID)
   278  			forgotten++
   279  		}
   280  	}
   281  
   282  	return forgotten
   283  }
   284  
   285  func (q *queues) recomputeUserQueriers() {
   286  	scratchpad := make([]string, 0, len(q.sortedQueriers))
   287  
   288  	for _, uq := range q.userQueues {
   289  		uq.queriers = shuffleQueriersForUser(uq.seed, uq.maxQueriers, q.sortedQueriers, scratchpad)
   290  	}
   291  }
   292  
   293  // shuffleQueriersForUser returns nil if queriersToSelect is 0 or there are not enough queriers to select from.
   294  // In that case *all* queriers should be used.
   295  // Scratchpad is used for shuffling, to avoid new allocations. If nil, new slice is allocated.
   296  func shuffleQueriersForUser(userSeed int64, queriersToSelect int, allSortedQueriers []string, scratchpad []string) map[string]struct{} {
   297  	if queriersToSelect == 0 || len(allSortedQueriers) <= queriersToSelect {
   298  		return nil
   299  	}
   300  
   301  	result := make(map[string]struct{}, queriersToSelect)
   302  	rnd := rand.New(rand.NewSource(userSeed))
   303  
   304  	scratchpad = scratchpad[:0]
   305  	scratchpad = append(scratchpad, allSortedQueriers...)
   306  
   307  	last := len(scratchpad) - 1
   308  	for i := 0; i < queriersToSelect; i++ {
   309  		r := rnd.Intn(last + 1)
   310  		result[scratchpad[r]] = struct{}{}
   311  		// move selected item to the end, it won't be selected anymore.
   312  		scratchpad[r], scratchpad[last] = scratchpad[last], scratchpad[r]
   313  		last--
   314  	}
   315  
   316  	return result
   317  }