github.com/matrixorigin/matrixone@v0.7.0/pkg/tests/service/task_test.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package service 16 17 import ( 18 "context" 19 "testing" 20 "time" 21 22 "github.com/lni/goutils/leaktest" 23 "github.com/matrixorigin/matrixone/pkg/pb/task" 24 "github.com/matrixorigin/matrixone/pkg/taskservice" 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 "go.uber.org/zap" 28 ) 29 30 func waitTaskScheduled(t *testing.T, ctx context.Context, taskService taskservice.TaskService) string { 31 i := 0 32 for { 33 select { 34 case <-ctx.Done(): 35 assert.FailNow(t, "task not allocated") 36 default: 37 t.Logf("iteration: %d", i) 38 tasks, err := taskService.QueryTask(context.TODO(), 39 taskservice.WithTaskIDDesc()) 40 require.NoError(t, err) 41 42 if len(tasks) != 0 && tasks[0].TaskRunner != "" { 43 t.Logf("task %d allocated on %s", tasks[0].ID, tasks[0].TaskRunner) 44 t.Logf("num task: %d", len(tasks)) 45 return tasks[0].TaskRunner 46 } 47 time.Sleep(300 * time.Millisecond) 48 i++ 49 } 50 } 51 } 52 53 func waitTaskRescheduled(t *testing.T, ctx context.Context, taskService taskservice.TaskService, uuid string) { 54 i := 0 55 for { 56 select { 57 case <-ctx.Done(): 58 assert.FailNow(t, "task not reallocated") 59 default: 60 t.Logf("iteration: %d", i) 61 tasks, err := taskService.QueryTask(context.TODO(), 62 taskservice.WithTaskIDDesc(), 63 taskservice.WithTaskStatusCond(taskservice.EQ, task.TaskStatus_Running)) 64 require.NoError(t, err) 65 if tasks[0].TaskRunner == uuid { 66 t.Logf("task %d is still on %s", tasks[0].ID, tasks[0].TaskRunner) 67 time.Sleep(1 * time.Second) 68 i++ 69 continue 70 } else { 71 t.Logf("task %d reallocated on %s", tasks[0].ID, tasks[0].TaskRunner) 72 return 73 } 74 } 75 } 76 } 77 78 func TestTaskServiceCanCreate(t *testing.T) { 79 defer leaktest.AfterTest(t)() 80 if testing.Short() { 81 t.Skip("skipping in short mode.") 82 return 83 } 84 85 // initialize cluster 86 c, err := NewCluster(t, DefaultOptions(). 87 WithCNServiceNum(1). 88 WithCNShardNum(1). 89 WithDNServiceNum(1). 90 WithDNShardNum(1). 91 WithLogServiceNum(3). 92 WithLogShardNum(1)) 93 require.NoError(t, err) 94 95 // close the cluster 96 defer func(c Cluster) { 97 require.NoError(t, c.Close()) 98 }(c) 99 // start the cluster 100 require.NoError(t, c.Start()) 101 102 t.Log("cluster log svcs length:", len(c.(*testCluster).log.svcs)) 103 104 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 105 defer cancel() 106 c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0) 107 c.WaitDNStoreTaskServiceCreatedIndexed(ctx, 0) 108 c.WaitLogStoreTaskServiceCreatedIndexed(ctx, 0) 109 c.WaitLogStoreTaskServiceCreatedIndexed(ctx, 1) 110 c.WaitLogStoreTaskServiceCreatedIndexed(ctx, 2) 111 } 112 113 func TestTaskSchedulerCanAllocateTask(t *testing.T) { 114 defer leaktest.AfterTest(t)() 115 if testing.Short() { 116 t.Skip("skipping in short mode.") 117 return 118 } 119 120 opt := DefaultOptions() 121 // initialize cluster 122 c, err := NewCluster(t, opt) 123 require.NoError(t, err) 124 125 // close the cluster 126 defer func(c Cluster) { 127 require.NoError(t, c.Close()) 128 }(c) 129 // start the cluster 130 require.NoError(t, c.Start()) 131 132 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 133 defer cancel() 134 135 c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0) 136 indexed, err := c.GetCNServiceIndexed(0) 137 require.NoError(t, err) 138 taskService, ok := indexed.GetTaskService() 139 require.True(t, ok) 140 141 i := 0 142 for { 143 select { 144 case <-ctx.Done(): 145 require.FailNow(t, "failed to query tasks") 146 default: 147 } 148 t.Logf("iter %d", i) 149 tasks, err := taskService.QueryTask(ctx) 150 require.NoError(t, err) 151 if len(tasks) == 0 { 152 time.Sleep(time.Second) 153 i++ 154 continue 155 } 156 require.Equal(t, 1, len(tasks)) 157 t.Logf("task status: %s", tasks[0].Status) 158 break 159 } 160 161 waitTaskScheduled(t, ctx, taskService) 162 } 163 164 func TestTaskSchedulerCanReallocateTask(t *testing.T) { 165 defer leaktest.AfterTest(t)() 166 if testing.Short() { 167 t.Skip("skipping in short mode.") 168 return 169 } 170 171 cnSvcNum := 2 172 opt := DefaultOptions(). 173 WithCNServiceNum(cnSvcNum) 174 175 // initialize cluster 176 c, err := NewCluster(t, opt) 177 require.NoError(t, err) 178 179 halt := make(chan bool) 180 taskExecutor := func(ctx context.Context, task task.Task) error { 181 t.Logf("task %d is running", task.ID) 182 select { 183 case <-ctx.Done(): 184 case <-halt: 185 } 186 return nil 187 } 188 189 defer func(c Cluster, halt chan bool) { 190 halt <- true 191 require.NoError(t, c.Close()) 192 close(halt) 193 }(c, halt) 194 // start the cluster 195 require.NoError(t, c.Start()) 196 197 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 198 defer cancel() 199 200 c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0) 201 c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 1) 202 cn1, err := c.GetCNServiceIndexed(0) 203 require.NoError(t, err) 204 205 cn2, err := c.GetCNServiceIndexed(1) 206 require.NoError(t, err) 207 cn1.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor) 208 cn2.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor) 209 210 taskService, ok := cn1.GetTaskService() 211 require.True(t, ok) 212 err = taskService.Create(context.TODO(), task.TaskMetadata{ID: "a", Executor: task.TaskCode_TestOnly}) 213 require.NoError(t, err) 214 215 tasks, err := taskService.QueryTask(ctx, 216 taskservice.WithTaskExecutorCond(taskservice.EQ, task.TaskCode_TestOnly)) 217 require.NoError(t, err) 218 require.Equal(t, 1, len(tasks)) 219 220 uuid1 := waitTaskScheduled(t, ctx, taskService) 221 222 err = c.CloseCNService(uuid1) 223 require.NoError(t, err) 224 225 if uuid1 == cn1.ID() { 226 taskService, ok = cn2.GetTaskService() 227 require.True(t, ok) 228 } 229 waitTaskRescheduled(t, ctx, taskService, uuid1) 230 } 231 232 func TestTaskRunner(t *testing.T) { 233 defer leaktest.AfterTest(t)() 234 if testing.Short() { 235 t.Skip("skipping in short mode.") 236 return 237 } 238 239 ch := make(chan int) 240 taskExecutor := func(ctx context.Context, task task.Task) error { 241 t.Logf("task %d is running", task.ID) 242 ch <- int(task.ID) 243 return nil 244 } 245 246 cnSvcNum := 1 247 opt := DefaultOptions(). 248 WithCNServiceNum(cnSvcNum) 249 250 // initialize cluster 251 c, err := NewCluster(t, opt.WithLogLevel(zap.DebugLevel)) 252 require.NoError(t, err) 253 254 // close the cluster 255 defer func(c Cluster) { 256 require.NoError(t, c.Close()) 257 }(c) 258 // start the cluster 259 require.NoError(t, c.Start()) 260 261 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 262 defer cancel() 263 264 c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0) 265 indexed, err := c.GetCNServiceIndexed(0) 266 require.NoError(t, err) 267 268 indexed.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor) 269 270 taskService, ok := indexed.GetTaskService() 271 require.True(t, ok) 272 273 err = taskService.Create(context.TODO(), task.TaskMetadata{ID: "a", Executor: task.TaskCode_TestOnly}) 274 require.NoError(t, err) 275 276 waitTaskScheduled(t, ctx, taskService) 277 278 select { 279 case <-ctx.Done(): 280 assert.FailNow(t, "task not running") 281 case i := <-ch: 282 t.Logf("task %d is completed", i) 283 } 284 } 285 286 func TestCronTask(t *testing.T) { 287 defer leaktest.AfterTest(t)() 288 if testing.Short() { 289 t.Skip("skipping in short mode.") 290 return 291 } 292 293 opt := DefaultOptions() 294 // initialize cluster 295 c, err := NewCluster(t, opt.WithLogLevel(zap.DebugLevel)) 296 require.NoError(t, err) 297 298 ch := make(chan int) 299 taskExecutor := func(ctx context.Context, task task.Task) error { 300 t.Logf("task %d is running", task.ID) 301 select { 302 case ch <- int(task.ID): 303 case <-ctx.Done(): 304 return nil 305 } 306 return nil 307 } 308 309 // close the cluster 310 defer func(c Cluster) { 311 require.NoError(t, c.Close()) 312 }(c) 313 // start the cluster 314 require.NoError(t, c.Start()) 315 316 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 317 defer cancel() 318 319 c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0) 320 indexed, err := c.GetCNServiceIndexed(0) 321 require.NoError(t, err) 322 323 indexed.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor) 324 325 taskService, ok := indexed.GetTaskService() 326 require.True(t, ok) 327 328 require.NoError(t, taskService.CreateCronTask(context.TODO(), 329 task.TaskMetadata{ 330 ID: "a", 331 Executor: task.TaskCode_TestOnly, 332 }, 333 "*/1 * * * * *", // every 1 second 334 )) 335 336 waitChannelFull(t, ctx, ch, 3) 337 } 338 339 func waitChannelFull(t *testing.T, ctx context.Context, ch chan int, expected int) { 340 i := 0 341 received := make([]int, 0, expected) 342 for { 343 select { 344 case <-ctx.Done(): 345 assert.FailNow(t, "cron task not repeated enough") 346 case c := <-ch: 347 received = append(received, c) 348 if len(received) == expected { 349 t.Logf("received %d numbers", expected) 350 return 351 } 352 default: 353 t.Logf("iteration: %d", i) 354 time.Sleep(time.Second) 355 i++ 356 } 357 } 358 }