github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/engine/execution/ingestion/uploader/uploader_test.go (about)

     1  package uploader
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"runtime/debug"
     7  	"sync"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/rs/zerolog"
    12  	"github.com/stretchr/testify/require"
    13  	"go.uber.org/atomic"
    14  
    15  	"github.com/onflow/flow-go/engine/execution"
    16  	"github.com/onflow/flow-go/engine/execution/state/unittest"
    17  	"github.com/onflow/flow-go/module/metrics"
    18  	testutils "github.com/onflow/flow-go/utils/unittest"
    19  	unittest2 "github.com/onflow/flow-go/utils/unittest"
    20  )
    21  
    22  func Test_AsyncUploader(t *testing.T) {
    23  
    24  	computationResult := unittest.ComputationResultFixture(
    25  		t,
    26  		testutils.IdentifierFixture(),
    27  		nil)
    28  
    29  	t.Run("uploads are run in parallel and emit metrics", func(t *testing.T) {
    30  		wgUploadStarted := sync.WaitGroup{}
    31  		wgUploadStarted.Add(3)
    32  
    33  		wgContinueUpload := sync.WaitGroup{}
    34  		wgContinueUpload.Add(1)
    35  
    36  		uploader := &DummyUploader{
    37  			f: func() error {
    38  				// this should be called 3 times
    39  				wgUploadStarted.Done()
    40  
    41  				wgContinueUpload.Wait()
    42  
    43  				return nil
    44  			},
    45  		}
    46  
    47  		metrics := &DummyCollector{}
    48  		async := NewAsyncUploader(uploader, 1*time.Nanosecond, 1, zerolog.Nop(), metrics)
    49  
    50  		err := async.Upload(computationResult)
    51  		require.NoError(t, err)
    52  
    53  		err = async.Upload(computationResult)
    54  		require.NoError(t, err)
    55  
    56  		err = async.Upload(computationResult)
    57  		require.NoError(t, err)
    58  
    59  		wgUploadStarted.Wait() // all three are in progress, check metrics
    60  
    61  		require.Equal(t, int64(3), metrics.Counter.Load())
    62  
    63  		wgContinueUpload.Done() //release all
    64  
    65  		// shut down component
    66  		<-async.Done()
    67  
    68  		require.Equal(t, int64(0), metrics.Counter.Load())
    69  		require.True(t, metrics.DurationTotal.Load() > 0, "duration should be nonzero")
    70  	})
    71  
    72  	t.Run("failed uploads are retried", func(t *testing.T) {
    73  
    74  		callCount := 0
    75  
    76  		wg := sync.WaitGroup{}
    77  		wg.Add(1)
    78  
    79  		uploader := &DummyUploader{
    80  			f: func() error {
    81  				// force an upload error to test that upload is retried 3 times
    82  				if callCount < 3 {
    83  					callCount++
    84  					return fmt.Errorf("artificial upload error")
    85  				}
    86  				wg.Done()
    87  				return nil
    88  			},
    89  		}
    90  
    91  		async := NewAsyncUploader(uploader, 1*time.Nanosecond, 5, zerolog.Nop(), &metrics.NoopCollector{})
    92  
    93  		err := async.Upload(computationResult)
    94  		require.NoError(t, err)
    95  
    96  		wg.Wait()
    97  
    98  		require.Equal(t, 3, callCount)
    99  	})
   100  
   101  	// This test shuts down the async uploader right after the upload has started. The upload has an error to force
   102  	// the retry mechanism to kick in (under normal circumstances). Since the component is shutting down, the retry
   103  	// should not kick in.
   104  	//
   105  	// sequence of events:
   106  	// 1. create async uploader and initiate upload with an error - to force retrying
   107  	// 2. shut down async uploader right after upload initiated (not completed)
   108  	// 3. assert that upload called only once even when trying to use retry mechanism
   109  	t.Run("stopping component stops retrying", func(t *testing.T) {
   110  		testutils.SkipUnless(t, testutils.TEST_FLAKY, "flaky")
   111  
   112  		callCount := 0
   113  		t.Log("test started grID:", string(bytes.Fields(debug.Stack())[1]))
   114  
   115  		// this wait group ensures that async uploader has a chance to start the upload before component is shut down
   116  		// otherwise, there's a race condition that can happen where the component can shut down before the async uploader
   117  		// has a chance to start the upload
   118  		wgUploadStarted := sync.WaitGroup{}
   119  		wgUploadStarted.Add(1)
   120  
   121  		// this wait group ensures that async uploader won't send an error (to test if retry will kick in) until
   122  		// the component has initiated shutting down (which should stop retry from working)
   123  		wgShutdownStarted := sync.WaitGroup{}
   124  		wgShutdownStarted.Add(1)
   125  		t.Log("added 1 to wait group grID:", string(bytes.Fields(debug.Stack())[1]))
   126  
   127  		uploader := &DummyUploader{
   128  			f: func() error {
   129  				t.Log("DummyUploader func() - about to call wgUploadStarted.Done() grID:", string(bytes.Fields(debug.Stack())[1]))
   130  				// signal to main goroutine that upload started, so it can initiate shutting down component
   131  				wgUploadStarted.Done()
   132  
   133  				t.Log("DummyUpload func() waiting for component shutdown to start grID:", string(bytes.Fields(debug.Stack())[1]))
   134  				wgShutdownStarted.Wait()
   135  				t.Log("DummyUploader func() component shutdown started, about to return error grID:", string(bytes.Fields(debug.Stack())[1]))
   136  
   137  				// force an upload error to test that upload is never retried (because component is shut down)
   138  				// normally, we would see retry mechanism kick in and the callCount would be > 1
   139  				// but since component has started shutting down, we expect callCount to be 1
   140  				// In summary, callCount SHOULD be called only once - but we want the test to TRY and call it more than once to prove that it
   141  				// was only called it once. If we changed it to 'callCount < 1' that wouldn't prove that the test tried to call it more than once
   142  				// and wouldn't prove that stopping the component stopped the retry mechanism.
   143  				if callCount < 5 {
   144  					t.Logf("DummyUploader func() incrementing callCount=%d grID: %s", callCount, string(bytes.Fields(debug.Stack())[1]))
   145  					callCount++
   146  					t.Logf("DummyUploader func() about to return error callCount=%d grID: %s", callCount, string(bytes.Fields(debug.Stack())[1]))
   147  					return fmt.Errorf("this should return only once")
   148  				}
   149  				return nil
   150  			},
   151  		}
   152  		t.Log("about to create NewAsyncUploader grID:", string(bytes.Fields(debug.Stack())[1]))
   153  		async := NewAsyncUploader(uploader, 1*time.Nanosecond, 5, zerolog.Nop(), &metrics.NoopCollector{})
   154  		t.Log("about to call async.Upload() grID:", string(bytes.Fields(debug.Stack())[1]))
   155  		err := async.Upload(computationResult) // doesn't matter what we upload
   156  		require.NoError(t, err)
   157  
   158  		// stop component and check that it's fully stopped
   159  		t.Log("about to close async uploader grID:", string(bytes.Fields(debug.Stack())[1]))
   160  
   161  		// wait until upload has started before shutting down the component
   162  		wgUploadStarted.Wait()
   163  
   164  		// stop component and check that it's fully stopped
   165  		t.Log("about to initiate shutdown grID: ", string(bytes.Fields(debug.Stack())[1]))
   166  		c := async.Done()
   167  		t.Log("about to notify upload() that shutdown started and can continue uploading grID:", string(bytes.Fields(debug.Stack())[1]))
   168  		wgShutdownStarted.Done()
   169  		t.Log("about to check async done channel is closed grID:", string(bytes.Fields(debug.Stack())[1]))
   170  		unittest2.RequireCloseBefore(t, c, 1*time.Second, "async uploader not closed in time")
   171  
   172  		t.Log("about to check if callCount is 1 grID:", string(bytes.Fields(debug.Stack())[1]))
   173  		require.Equal(t, 1, callCount)
   174  	})
   175  
   176  	t.Run("onComplete callback called if set", func(t *testing.T) {
   177  		var onCompleteCallbackCalled = false
   178  
   179  		wgUploadCalleded := sync.WaitGroup{}
   180  		wgUploadCalleded.Add(1)
   181  
   182  		uploader := &DummyUploader{
   183  			f: func() error {
   184  				wgUploadCalleded.Done()
   185  				return nil
   186  			},
   187  		}
   188  
   189  		async := NewAsyncUploader(uploader, 1*time.Nanosecond, 1, zerolog.Nop(), &DummyCollector{})
   190  		async.SetOnCompleteCallback(func(computationResult *execution.ComputationResult, err error) {
   191  			onCompleteCallbackCalled = true
   192  		})
   193  
   194  		err := async.Upload(computationResult)
   195  		require.NoError(t, err)
   196  
   197  		wgUploadCalleded.Wait()
   198  		<-async.Done()
   199  
   200  		require.True(t, onCompleteCallbackCalled)
   201  	})
   202  }
   203  
   204  // DummyUploader is an Uploader implementation with an Upload() callback
   205  type DummyUploader struct {
   206  	f func() error
   207  }
   208  
   209  func (d *DummyUploader) Upload(_ *execution.ComputationResult) error {
   210  	return d.f()
   211  }
   212  
   213  // FailingUploader mocks upload failure cases
   214  type FailingUploader struct {
   215  	failTimes int
   216  	callCount int
   217  }
   218  
   219  func (d *FailingUploader) Upload(_ *execution.ComputationResult) error {
   220  	defer func() {
   221  		d.callCount++
   222  	}()
   223  
   224  	if d.callCount <= d.failTimes {
   225  		return fmt.Errorf("an artificial error")
   226  	}
   227  
   228  	return nil
   229  }
   230  
   231  // DummyCollector is test uploader metrics implementation
   232  type DummyCollector struct {
   233  	metrics.NoopCollector
   234  	Counter       atomic.Int64
   235  	DurationTotal atomic.Int64
   236  }
   237  
   238  func (d *DummyCollector) ExecutionBlockDataUploadStarted() {
   239  	d.Counter.Inc()
   240  }
   241  
   242  func (d *DummyCollector) ExecutionBlockDataUploadFinished(dur time.Duration) {
   243  	d.Counter.Dec()
   244  	d.DurationTotal.Add(dur.Nanoseconds())
   245  }