github.com/matrixorigin/matrixone@v1.2.0/pkg/taskservice/task_runner_test.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package taskservice
    16  
    17  import (
    18  	"context"
    19  	"sync"
    20  	"sync/atomic"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/lni/goutils/leaktest"
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    27  	"github.com/matrixorigin/matrixone/pkg/logutil"
    28  	"github.com/matrixorigin/matrixone/pkg/pb/task"
    29  	"github.com/stretchr/testify/assert"
    30  	"github.com/stretchr/testify/require"
    31  	"go.uber.org/zap"
    32  )
    33  
    34  func TestRunTask(t *testing.T) {
    35  	runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) {
    36  		c := make(chan struct{})
    37  		r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error {
    38  			defer close(c)
    39  			return nil
    40  		})
    41  		mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t1"))
    42  		mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID})
    43  		<-c
    44  	}, WithRunnerParallelism(1),
    45  		WithRunnerFetchInterval(time.Millisecond))
    46  }
    47  
    48  func TestRunTasksInParallel(t *testing.T) {
    49  	runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) {
    50  		wg := &sync.WaitGroup{}
    51  		wg.Add(2)
    52  		r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error {
    53  			defer wg.Done()
    54  			time.Sleep(time.Millisecond * 200)
    55  			return nil
    56  		})
    57  		mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t1"))
    58  		mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t2"))
    59  		mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID, "t2": r.runnerID})
    60  		wg.Wait()
    61  	}, WithRunnerParallelism(2),
    62  		WithRunnerFetchInterval(time.Millisecond))
    63  }
    64  
    65  func TestTooMuchTasksWillBlockAndEventuallyCanBeExecuted(t *testing.T) {
    66  	runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) {
    67  		c := make(chan struct{})
    68  		continueC := make(chan struct{})
    69  		v := atomic.Uint32{}
    70  		wait := time.Millisecond * 200
    71  		r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error {
    72  			n := v.Add(1)
    73  			if n == 2 {
    74  				defer close(c) // second task close the chan
    75  			}
    76  			if n == 1 {
    77  				time.Sleep(wait) // block first task
    78  				<-continueC
    79  			}
    80  
    81  			return nil
    82  		})
    83  		mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t1"))
    84  		mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t2"))
    85  		mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID, "t2": r.runnerID})
    86  		select {
    87  		case <-c:
    88  			assert.Fail(t, "must block")
    89  		case <-time.After(wait):
    90  			assert.Equal(t, uint32(1), v.Load())
    91  			close(continueC) // second task can be run
    92  		}
    93  		<-c
    94  		assert.Equal(t, uint32(2), v.Load())
    95  	}, WithRunnerParallelism(1),
    96  		WithRunnerFetchInterval(time.Millisecond))
    97  }
    98  
    99  func TestHeartbeatWithRunningTask(t *testing.T) {
   100  	runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) {
   101  		c := make(chan struct{})
   102  		completeC := make(chan struct{})
   103  		n := atomic.Uint32{}
   104  		r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error {
   105  			if n.Add(1) == 2 {
   106  				close(c)
   107  			}
   108  			<-completeC
   109  			return nil
   110  		})
   111  		mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t1"))
   112  		mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t2"))
   113  		mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID, "t2": r.runnerID})
   114  		<-c
   115  		mustWaitTestTaskHasHeartbeat(t, store, 2)
   116  		close(completeC)
   117  	}, WithRunnerParallelism(2),
   118  		WithRunnerHeartbeatInterval(time.Millisecond),
   119  		WithRunnerFetchInterval(time.Millisecond))
   120  }
   121  
   122  func TestRunTaskWithRetry(t *testing.T) {
   123  	runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) {
   124  		c := make(chan struct{})
   125  		n := atomic.Uint32{}
   126  		r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error {
   127  			if n.Add(1) == 1 {
   128  				return moerr.NewInternalError(context.TODO(), "error")
   129  			}
   130  			close(c)
   131  			return nil
   132  		})
   133  		v := newTestAsyncTask("t1")
   134  		v.Metadata.Options.MaxRetryTimes = 1
   135  		mustAddTestAsyncTask(t, store, 1, v)
   136  		mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID})
   137  		<-c
   138  		assert.Equal(t, uint32(2), n.Load())
   139  	}, WithRunnerParallelism(2),
   140  		WithRunnerHeartbeatInterval(time.Millisecond),
   141  		WithRunnerFetchInterval(time.Millisecond))
   142  }
   143  
   144  func TestRunTaskWithDisableRetry(t *testing.T) {
   145  	runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) {
   146  		c := make(chan struct{})
   147  		n := atomic.Uint32{}
   148  		r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error {
   149  			close(c)
   150  			if n.Add(1) == 1 {
   151  				return moerr.NewInternalError(context.TODO(), "error")
   152  			}
   153  			return nil
   154  		})
   155  		v := newTestAsyncTask("t1")
   156  		v.Metadata.Options.MaxRetryTimes = 0
   157  		mustAddTestAsyncTask(t, store, 1, v)
   158  		mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID})
   159  		<-c
   160  		mustWaitTestTaskHasExecuteResult(t, store, 1)
   161  		v = mustGetTestAsyncTask(t, store, 1)[0]
   162  		assert.Equal(t, task.ResultCode_Failed, v.ExecuteResult.Code)
   163  	}, WithRunnerParallelism(2),
   164  		WithRunnerHeartbeatInterval(time.Millisecond),
   165  		WithRunnerFetchInterval(time.Millisecond))
   166  }
   167  
   168  func TestCancelRunningTask(t *testing.T) {
   169  	runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) {
   170  		cancelC := make(chan struct{})
   171  		r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error {
   172  			select {
   173  			case <-ctx.Done():
   174  			case cancelC <- struct{}{}:
   175  			}
   176  			return nil
   177  		})
   178  		v := newTestAsyncTask("t1")
   179  		v.Metadata.Options.MaxRetryTimes = 0
   180  		mustAddTestAsyncTask(t, store, 1, v)
   181  		mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID})
   182  		v = mustGetTestAsyncTask(t, store, 1)[0]
   183  		v.Epoch++
   184  		mustUpdateTestAsyncTask(t, store, 1, []task.AsyncTask{v})
   185  		<-cancelC
   186  		for v := mustGetTestAsyncTask(t, store, 1)[0]; v.Status != task.TaskStatus_Completed; v = mustGetTestAsyncTask(t, store, 1)[0] {
   187  			time.Sleep(10 * time.Millisecond)
   188  		}
   189  		r.runningTasks.RLock()
   190  		defer r.runningTasks.RUnlock()
   191  		assert.Equal(t, 0, len(r.runningTasks.m))
   192  	}, WithRunnerParallelism(2),
   193  		WithRunnerHeartbeatInterval(time.Millisecond),
   194  		WithRunnerFetchInterval(time.Millisecond))
   195  }
   196  
   197  func runTaskRunnerTest(t *testing.T,
   198  	testFunc func(r *taskRunner, s TaskService, store TaskStorage),
   199  	opts ...RunnerOption) {
   200  	defer leaktest.AfterTest(t)()
   201  	store := NewMemTaskStorage()
   202  	s := NewTaskService(runtime.DefaultRuntime(), store)
   203  	defer func() {
   204  		assert.NoError(t, s.Close())
   205  	}()
   206  
   207  	opts = append(opts, WithRunnerLogger(logutil.GetPanicLoggerWithLevel(zap.DebugLevel)))
   208  	r := NewTaskRunner("r1", s, func(string) bool {
   209  		return true
   210  	}, opts...)
   211  
   212  	require.NoError(t, r.Start())
   213  	defer func() {
   214  		require.NoError(t, r.Stop())
   215  	}()
   216  	testFunc(r.(*taskRunner), s, store)
   217  }
   218  
   219  func mustAllocTestTask(t *testing.T, s TaskService, store TaskStorage, alloc map[string]string) {
   220  	ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
   221  	defer cancel()
   222  
   223  	tasks := mustGetTestAsyncTask(t, store, len(alloc), WithTaskStatusCond(task.TaskStatus_Created))
   224  	n := 0
   225  	for _, v := range tasks {
   226  		if runner, ok := alloc[v.Metadata.ID]; ok {
   227  			require.NoError(t, s.Allocate(ctx, v, runner))
   228  			n++
   229  		}
   230  	}
   231  	if n != len(alloc) {
   232  		require.Fail(t, "task not found")
   233  	}
   234  }
   235  
   236  func mustWaitTestTaskHasHeartbeat(t *testing.T, store TaskStorage, expectHasHeartbeatCount int) {
   237  	ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
   238  	defer cancel()
   239  
   240  	for {
   241  		select {
   242  		case <-ctx.Done():
   243  			require.Fail(t, "wait heatbeat timeout")
   244  		default:
   245  			tasks := mustGetTestAsyncTask(t, store, expectHasHeartbeatCount,
   246  				WithTaskStatusCond(task.TaskStatus_Running))
   247  			n := 0
   248  			for _, v := range tasks {
   249  				if v.LastHeartbeat > 0 {
   250  					n++
   251  				}
   252  			}
   253  			if n == len(tasks) {
   254  				return
   255  			}
   256  		}
   257  	}
   258  }
   259  
   260  func mustWaitTestTaskHasExecuteResult(t *testing.T, store TaskStorage, expectCount int) {
   261  	ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
   262  	defer cancel()
   263  
   264  	for {
   265  		select {
   266  		case <-ctx.Done():
   267  			require.Fail(t, "wait execute result timeout")
   268  		default:
   269  			tasks, err := store.QueryAsyncTask(ctx, WithTaskStatusCond(task.TaskStatus_Completed))
   270  			require.NoError(t, err)
   271  			if len(tasks) != expectCount {
   272  				break
   273  			}
   274  			n := 0
   275  			for _, v := range tasks {
   276  				if v.ExecuteResult != nil {
   277  					n++
   278  				}
   279  			}
   280  			if n == len(tasks) {
   281  				return
   282  			}
   283  		}
   284  	}
   285  }