github.com/matrixorigin/matrixone@v1.2.0/pkg/taskservice/task_runner_test.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package taskservice 16 17 import ( 18 "context" 19 "sync" 20 "sync/atomic" 21 "testing" 22 "time" 23 24 "github.com/lni/goutils/leaktest" 25 "github.com/matrixorigin/matrixone/pkg/common/moerr" 26 "github.com/matrixorigin/matrixone/pkg/common/runtime" 27 "github.com/matrixorigin/matrixone/pkg/logutil" 28 "github.com/matrixorigin/matrixone/pkg/pb/task" 29 "github.com/stretchr/testify/assert" 30 "github.com/stretchr/testify/require" 31 "go.uber.org/zap" 32 ) 33 34 func TestRunTask(t *testing.T) { 35 runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) { 36 c := make(chan struct{}) 37 r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error { 38 defer close(c) 39 return nil 40 }) 41 mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t1")) 42 mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID}) 43 <-c 44 }, WithRunnerParallelism(1), 45 WithRunnerFetchInterval(time.Millisecond)) 46 } 47 48 func TestRunTasksInParallel(t *testing.T) { 49 runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) { 50 wg := &sync.WaitGroup{} 51 wg.Add(2) 52 r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error { 53 defer wg.Done() 54 time.Sleep(time.Millisecond * 200) 55 return nil 56 }) 57 mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t1")) 58 mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t2")) 59 mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID, "t2": r.runnerID}) 60 wg.Wait() 61 }, WithRunnerParallelism(2), 62 WithRunnerFetchInterval(time.Millisecond)) 63 } 64 65 func TestTooMuchTasksWillBlockAndEventuallyCanBeExecuted(t *testing.T) { 66 runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) { 67 c := make(chan struct{}) 68 continueC := make(chan struct{}) 69 v := atomic.Uint32{} 70 wait := time.Millisecond * 200 71 r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error { 72 n := v.Add(1) 73 if n == 2 { 74 defer close(c) // second task close the chan 75 } 76 if n == 1 { 77 time.Sleep(wait) // block first task 78 <-continueC 79 } 80 81 return nil 82 }) 83 mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t1")) 84 mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t2")) 85 mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID, "t2": r.runnerID}) 86 select { 87 case <-c: 88 assert.Fail(t, "must block") 89 case <-time.After(wait): 90 assert.Equal(t, uint32(1), v.Load()) 91 close(continueC) // second task can be run 92 } 93 <-c 94 assert.Equal(t, uint32(2), v.Load()) 95 }, WithRunnerParallelism(1), 96 WithRunnerFetchInterval(time.Millisecond)) 97 } 98 99 func TestHeartbeatWithRunningTask(t *testing.T) { 100 runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) { 101 c := make(chan struct{}) 102 completeC := make(chan struct{}) 103 n := atomic.Uint32{} 104 r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error { 105 if n.Add(1) == 2 { 106 close(c) 107 } 108 <-completeC 109 return nil 110 }) 111 mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t1")) 112 mustAddTestAsyncTask(t, store, 1, newTestAsyncTask("t2")) 113 mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID, "t2": r.runnerID}) 114 <-c 115 mustWaitTestTaskHasHeartbeat(t, store, 2) 116 close(completeC) 117 }, WithRunnerParallelism(2), 118 WithRunnerHeartbeatInterval(time.Millisecond), 119 WithRunnerFetchInterval(time.Millisecond)) 120 } 121 122 func TestRunTaskWithRetry(t *testing.T) { 123 runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) { 124 c := make(chan struct{}) 125 n := atomic.Uint32{} 126 r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error { 127 if n.Add(1) == 1 { 128 return moerr.NewInternalError(context.TODO(), "error") 129 } 130 close(c) 131 return nil 132 }) 133 v := newTestAsyncTask("t1") 134 v.Metadata.Options.MaxRetryTimes = 1 135 mustAddTestAsyncTask(t, store, 1, v) 136 mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID}) 137 <-c 138 assert.Equal(t, uint32(2), n.Load()) 139 }, WithRunnerParallelism(2), 140 WithRunnerHeartbeatInterval(time.Millisecond), 141 WithRunnerFetchInterval(time.Millisecond)) 142 } 143 144 func TestRunTaskWithDisableRetry(t *testing.T) { 145 runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) { 146 c := make(chan struct{}) 147 n := atomic.Uint32{} 148 r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error { 149 close(c) 150 if n.Add(1) == 1 { 151 return moerr.NewInternalError(context.TODO(), "error") 152 } 153 return nil 154 }) 155 v := newTestAsyncTask("t1") 156 v.Metadata.Options.MaxRetryTimes = 0 157 mustAddTestAsyncTask(t, store, 1, v) 158 mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID}) 159 <-c 160 mustWaitTestTaskHasExecuteResult(t, store, 1) 161 v = mustGetTestAsyncTask(t, store, 1)[0] 162 assert.Equal(t, task.ResultCode_Failed, v.ExecuteResult.Code) 163 }, WithRunnerParallelism(2), 164 WithRunnerHeartbeatInterval(time.Millisecond), 165 WithRunnerFetchInterval(time.Millisecond)) 166 } 167 168 func TestCancelRunningTask(t *testing.T) { 169 runTaskRunnerTest(t, func(r *taskRunner, s TaskService, store TaskStorage) { 170 cancelC := make(chan struct{}) 171 r.RegisterExecutor(0, func(ctx context.Context, task task.Task) error { 172 select { 173 case <-ctx.Done(): 174 case cancelC <- struct{}{}: 175 } 176 return nil 177 }) 178 v := newTestAsyncTask("t1") 179 v.Metadata.Options.MaxRetryTimes = 0 180 mustAddTestAsyncTask(t, store, 1, v) 181 mustAllocTestTask(t, s, store, map[string]string{"t1": r.runnerID}) 182 v = mustGetTestAsyncTask(t, store, 1)[0] 183 v.Epoch++ 184 mustUpdateTestAsyncTask(t, store, 1, []task.AsyncTask{v}) 185 <-cancelC 186 for v := mustGetTestAsyncTask(t, store, 1)[0]; v.Status != task.TaskStatus_Completed; v = mustGetTestAsyncTask(t, store, 1)[0] { 187 time.Sleep(10 * time.Millisecond) 188 } 189 r.runningTasks.RLock() 190 defer r.runningTasks.RUnlock() 191 assert.Equal(t, 0, len(r.runningTasks.m)) 192 }, WithRunnerParallelism(2), 193 WithRunnerHeartbeatInterval(time.Millisecond), 194 WithRunnerFetchInterval(time.Millisecond)) 195 } 196 197 func runTaskRunnerTest(t *testing.T, 198 testFunc func(r *taskRunner, s TaskService, store TaskStorage), 199 opts ...RunnerOption) { 200 defer leaktest.AfterTest(t)() 201 store := NewMemTaskStorage() 202 s := NewTaskService(runtime.DefaultRuntime(), store) 203 defer func() { 204 assert.NoError(t, s.Close()) 205 }() 206 207 opts = append(opts, WithRunnerLogger(logutil.GetPanicLoggerWithLevel(zap.DebugLevel))) 208 r := NewTaskRunner("r1", s, func(string) bool { 209 return true 210 }, opts...) 211 212 require.NoError(t, r.Start()) 213 defer func() { 214 require.NoError(t, r.Stop()) 215 }() 216 testFunc(r.(*taskRunner), s, store) 217 } 218 219 func mustAllocTestTask(t *testing.T, s TaskService, store TaskStorage, alloc map[string]string) { 220 ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) 221 defer cancel() 222 223 tasks := mustGetTestAsyncTask(t, store, len(alloc), WithTaskStatusCond(task.TaskStatus_Created)) 224 n := 0 225 for _, v := range tasks { 226 if runner, ok := alloc[v.Metadata.ID]; ok { 227 require.NoError(t, s.Allocate(ctx, v, runner)) 228 n++ 229 } 230 } 231 if n != len(alloc) { 232 require.Fail(t, "task not found") 233 } 234 } 235 236 func mustWaitTestTaskHasHeartbeat(t *testing.T, store TaskStorage, expectHasHeartbeatCount int) { 237 ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) 238 defer cancel() 239 240 for { 241 select { 242 case <-ctx.Done(): 243 require.Fail(t, "wait heatbeat timeout") 244 default: 245 tasks := mustGetTestAsyncTask(t, store, expectHasHeartbeatCount, 246 WithTaskStatusCond(task.TaskStatus_Running)) 247 n := 0 248 for _, v := range tasks { 249 if v.LastHeartbeat > 0 { 250 n++ 251 } 252 } 253 if n == len(tasks) { 254 return 255 } 256 } 257 } 258 } 259 260 func mustWaitTestTaskHasExecuteResult(t *testing.T, store TaskStorage, expectCount int) { 261 ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) 262 defer cancel() 263 264 for { 265 select { 266 case <-ctx.Done(): 267 require.Fail(t, "wait execute result timeout") 268 default: 269 tasks, err := store.QueryAsyncTask(ctx, WithTaskStatusCond(task.TaskStatus_Completed)) 270 require.NoError(t, err) 271 if len(tasks) != expectCount { 272 break 273 } 274 n := 0 275 for _, v := range tasks { 276 if v.ExecuteResult != nil { 277 n++ 278 } 279 } 280 if n == len(tasks) { 281 return 282 } 283 } 284 } 285 }