github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/scheduler/queue/queue_test.go (about)

     1  package queue
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strconv"
     7  	"sync"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/grafana/dskit/services"
    12  	"github.com/prometheus/client_golang/prometheus"
    13  	"github.com/stretchr/testify/assert"
    14  	"github.com/stretchr/testify/require"
    15  )
    16  
    17  func BenchmarkGetNextRequest(b *testing.B) {
    18  	const maxOutstandingPerTenant = 2
    19  	const numTenants = 50
    20  	const queriers = 5
    21  
    22  	queues := make([]*RequestQueue, 0, b.N)
    23  
    24  	for n := 0; n < b.N; n++ {
    25  		queue := NewRequestQueue(maxOutstandingPerTenant, 0,
    26  			prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
    27  			prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
    28  		)
    29  		queues = append(queues, queue)
    30  
    31  		for ix := 0; ix < queriers; ix++ {
    32  			queue.RegisterQuerierConnection(fmt.Sprintf("querier-%d", ix))
    33  		}
    34  
    35  		for i := 0; i < maxOutstandingPerTenant; i++ {
    36  			for j := 0; j < numTenants; j++ {
    37  				userID := strconv.Itoa(j)
    38  
    39  				err := queue.EnqueueRequest(userID, "request", 0, nil)
    40  				if err != nil {
    41  					b.Fatal(err)
    42  				}
    43  			}
    44  		}
    45  	}
    46  
    47  	ctx := context.Background()
    48  	b.ResetTimer()
    49  
    50  	for i := 0; i < b.N; i++ {
    51  		idx := FirstUser()
    52  		for j := 0; j < maxOutstandingPerTenant*numTenants; j++ {
    53  			querier := ""
    54  		b:
    55  			// Find querier with at least one request to avoid blocking in getNextRequestForQuerier.
    56  			for _, q := range queues[i].queues.userQueues {
    57  				for qid := range q.queriers {
    58  					querier = qid
    59  					break b
    60  				}
    61  			}
    62  
    63  			_, nidx, err := queues[i].GetNextRequestForQuerier(ctx, idx, querier)
    64  			if err != nil {
    65  				b.Fatal(err)
    66  			}
    67  			idx = nidx
    68  		}
    69  	}
    70  }
    71  
    72  func BenchmarkQueueRequest(b *testing.B) {
    73  	const maxOutstandingPerTenant = 2
    74  	const numTenants = 50
    75  	const queriers = 5
    76  
    77  	queues := make([]*RequestQueue, 0, b.N)
    78  	users := make([]string, 0, numTenants)
    79  	requests := make([]string, 0, numTenants)
    80  
    81  	for n := 0; n < b.N; n++ {
    82  		q := NewRequestQueue(maxOutstandingPerTenant, 0,
    83  			prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
    84  			prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}),
    85  		)
    86  
    87  		for ix := 0; ix < queriers; ix++ {
    88  			q.RegisterQuerierConnection(fmt.Sprintf("querier-%d", ix))
    89  		}
    90  
    91  		queues = append(queues, q)
    92  
    93  		for j := 0; j < numTenants; j++ {
    94  			requests = append(requests, fmt.Sprintf("%d-%d", n, j))
    95  			users = append(users, strconv.Itoa(j))
    96  		}
    97  	}
    98  
    99  	b.ResetTimer()
   100  	for n := 0; n < b.N; n++ {
   101  		for i := 0; i < maxOutstandingPerTenant; i++ {
   102  			for j := 0; j < numTenants; j++ {
   103  				err := queues[n].EnqueueRequest(users[j], requests[j], 0, nil)
   104  				if err != nil {
   105  					b.Fatal(err)
   106  				}
   107  			}
   108  		}
   109  	}
   110  }
   111  
   112  func TestRequestQueue_GetNextRequestForQuerier_ShouldGetRequestAfterReshardingBecauseQuerierHasBeenForgotten(t *testing.T) {
   113  	const forgetDelay = 3 * time.Second
   114  
   115  	queue := NewRequestQueue(1, forgetDelay,
   116  		prometheus.NewGaugeVec(prometheus.GaugeOpts{}, []string{"user"}),
   117  		prometheus.NewCounterVec(prometheus.CounterOpts{}, []string{"user"}))
   118  
   119  	// Start the queue service.
   120  	ctx := context.Background()
   121  	require.NoError(t, services.StartAndAwaitRunning(ctx, queue))
   122  	t.Cleanup(func() {
   123  		require.NoError(t, services.StopAndAwaitTerminated(ctx, queue))
   124  	})
   125  
   126  	// Two queriers connect.
   127  	queue.RegisterQuerierConnection("querier-1")
   128  	queue.RegisterQuerierConnection("querier-2")
   129  
   130  	// Querier-2 waits for a new request.
   131  	querier2wg := sync.WaitGroup{}
   132  	querier2wg.Add(1)
   133  	go func() {
   134  		defer querier2wg.Done()
   135  		_, _, err := queue.GetNextRequestForQuerier(ctx, FirstUser(), "querier-2")
   136  		require.NoError(t, err)
   137  	}()
   138  
   139  	// Querier-1 crashes (no graceful shutdown notification).
   140  	queue.UnregisterQuerierConnection("querier-1")
   141  
   142  	// Enqueue a request from an user which would be assigned to querier-1.
   143  	// NOTE: "user-1" hash falls in the querier-1 shard.
   144  	require.NoError(t, queue.EnqueueRequest("user-1", "request", 1, nil))
   145  
   146  	startTime := time.Now()
   147  	querier2wg.Wait()
   148  	waitTime := time.Since(startTime)
   149  
   150  	// We expect that querier-2 got the request only after querier-1 forget delay is passed.
   151  	assert.GreaterOrEqual(t, waitTime.Milliseconds(), forgetDelay.Milliseconds())
   152  }
   153  
   154  func TestContextCond(t *testing.T) {
   155  	t.Run("wait until broadcast", func(t *testing.T) {
   156  		t.Parallel()
   157  		mtx := &sync.Mutex{}
   158  		cond := contextCond{Cond: sync.NewCond(mtx)}
   159  
   160  		doneWaiting := make(chan struct{})
   161  
   162  		mtx.Lock()
   163  		go func() {
   164  			cond.Wait(context.Background())
   165  			mtx.Unlock()
   166  			close(doneWaiting)
   167  		}()
   168  
   169  		assertChanNotReceived(t, doneWaiting, 100*time.Millisecond, "cond.Wait returned, but it should not because we did not broadcast yet")
   170  
   171  		cond.Broadcast()
   172  		assertChanReceived(t, doneWaiting, 250*time.Millisecond, "cond.Wait did not return after broadcast")
   173  	})
   174  
   175  	t.Run("wait until context deadline", func(t *testing.T) {
   176  		t.Parallel()
   177  		mtx := &sync.Mutex{}
   178  		cond := contextCond{Cond: sync.NewCond(mtx)}
   179  		doneWaiting := make(chan struct{})
   180  
   181  		ctx, cancel := context.WithCancel(context.Background())
   182  		defer cancel()
   183  
   184  		mtx.Lock()
   185  		go func() {
   186  			cond.Wait(ctx)
   187  			mtx.Unlock()
   188  			close(doneWaiting)
   189  		}()
   190  
   191  		assertChanNotReceived(t, doneWaiting, 100*time.Millisecond, "cond.Wait returned, but it should not because we did not broadcast yet and didn't cancel the context")
   192  
   193  		cancel()
   194  		assertChanReceived(t, doneWaiting, 250*time.Millisecond, "cond.Wait did not return after cancelling the context")
   195  	})
   196  
   197  	t.Run("wait on already canceled context", func(t *testing.T) {
   198  		// This test represents the racy real world scenario,
   199  		// we don't know whether it's going to wait before the broadcast triggered by the context cancellation.
   200  		t.Parallel()
   201  		mtx := &sync.Mutex{}
   202  		cond := contextCond{Cond: sync.NewCond(mtx)}
   203  		doneWaiting := make(chan struct{})
   204  
   205  		alreadyCanceledContext, cancel := context.WithCancel(context.Background())
   206  		cancel()
   207  
   208  		mtx.Lock()
   209  		go func() {
   210  			cond.Wait(alreadyCanceledContext)
   211  			mtx.Unlock()
   212  			close(doneWaiting)
   213  		}()
   214  
   215  		assertChanReceived(t, doneWaiting, 250*time.Millisecond, "cond.Wait did not return after cancelling the context")
   216  	})
   217  
   218  	t.Run("wait on already canceled context, but it takes a while to wait", func(t *testing.T) {
   219  		t.Parallel()
   220  		mtx := &sync.Mutex{}
   221  		cond := contextCond{
   222  			Cond: sync.NewCond(mtx),
   223  			testHookBeforeWaiting: func() {
   224  				// This makes the waiting goroutine so slow that out Wait(ctx) will need to broadcast once it sees it waiting.
   225  				time.Sleep(250 * time.Millisecond)
   226  			},
   227  		}
   228  		doneWaiting := make(chan struct{})
   229  
   230  		alreadyCanceledContext, cancel := context.WithCancel(context.Background())
   231  		cancel()
   232  
   233  		mtx.Lock()
   234  		go func() {
   235  			cond.Wait(alreadyCanceledContext)
   236  			mtx.Unlock()
   237  			close(doneWaiting)
   238  		}()
   239  
   240  		assertChanReceived(t, doneWaiting, time.Second, "cond.Wait did not return after 500ms")
   241  	})
   242  
   243  	t.Run("lots of goroutines waiting at the same time, none of them misses it's broadcast from cancel", func(t *testing.T) {
   244  		t.Parallel()
   245  		mtx := &sync.Mutex{}
   246  		cond := contextCond{
   247  			Cond: sync.NewCond(mtx),
   248  			testHookBeforeWaiting: func() {
   249  				// Wait just a little bit to create every goroutine
   250  				time.Sleep(time.Millisecond)
   251  			},
   252  		}
   253  		const goroutines = 100
   254  
   255  		doneWaiting := make(chan struct{}, goroutines)
   256  		release := make(chan struct{})
   257  
   258  		ctx, cancel := context.WithCancel(context.Background())
   259  		cancel()
   260  
   261  		for i := 0; i < goroutines; i++ {
   262  			go func() {
   263  				<-release
   264  
   265  				mtx.Lock()
   266  				cond.Wait(ctx)
   267  				mtx.Unlock()
   268  
   269  				doneWaiting <- struct{}{}
   270  			}()
   271  		}
   272  		go func() {
   273  			<-release
   274  			cancel()
   275  		}()
   276  
   277  		close(release)
   278  
   279  		assert.Eventually(t, func() bool {
   280  			return len(doneWaiting) == goroutines
   281  		}, time.Second, 10*time.Millisecond)
   282  	})
   283  }
   284  
   285  func assertChanReceived(t *testing.T, c chan struct{}, timeout time.Duration, msg string) {
   286  	t.Helper()
   287  
   288  	select {
   289  	case <-c:
   290  	case <-time.After(timeout):
   291  		t.Fatalf(msg)
   292  	}
   293  }
   294  
   295  func assertChanNotReceived(t *testing.T, c chan struct{}, wait time.Duration, msg string, args ...interface{}) {
   296  	t.Helper()
   297  
   298  	select {
   299  	case <-c:
   300  		t.Fatalf(msg, args...)
   301  	case <-time.After(wait):
   302  		// OK!
   303  	}
   304  }