github.com/matrixorigin/matrixone@v1.2.0/pkg/tests/service/task_test.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package service
    16  
    17  import (
    18  	"context"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/lni/goutils/leaktest"
    23  	"github.com/matrixorigin/matrixone/pkg/pb/task"
    24  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    25  	"github.com/stretchr/testify/assert"
    26  	"github.com/stretchr/testify/require"
    27  	"go.uber.org/zap"
    28  )
    29  
    30  func waitTaskScheduled(t *testing.T, ctx context.Context, taskService taskservice.TaskService) string {
    31  	i := 0
    32  	for {
    33  		select {
    34  		case <-ctx.Done():
    35  			assert.FailNow(t, "task not allocated")
    36  		default:
    37  			t.Logf("iteration: %d", i)
    38  			tasks, err := taskService.QueryAsyncTask(context.TODO(),
    39  				taskservice.WithTaskIDDesc())
    40  			require.NoError(t, err)
    41  
    42  			if len(tasks) != 0 && tasks[0].TaskRunner != "" {
    43  				t.Logf("task %d allocated on %s", tasks[0].ID, tasks[0].TaskRunner)
    44  				t.Logf("num task: %d", len(tasks))
    45  				return tasks[0].TaskRunner
    46  			}
    47  			time.Sleep(300 * time.Millisecond)
    48  			i++
    49  		}
    50  	}
    51  }
    52  
    53  func waitTaskRescheduled(t *testing.T, ctx context.Context, taskService taskservice.TaskService, uuid string) {
    54  	i := 0
    55  	for {
    56  		select {
    57  		case <-ctx.Done():
    58  			assert.FailNow(t, "task not reallocated")
    59  		default:
    60  			t.Logf("iteration: %d", i)
    61  			tasks, err := taskService.QueryAsyncTask(context.TODO(),
    62  				taskservice.WithTaskIDDesc(),
    63  				taskservice.WithTaskStatusCond(task.TaskStatus_Running))
    64  			require.NoError(t, err)
    65  			if tasks[0].TaskRunner == uuid {
    66  				t.Logf("task %d is still on %s", tasks[0].ID, tasks[0].TaskRunner)
    67  				time.Sleep(1 * time.Second)
    68  				i++
    69  				continue
    70  			} else {
    71  				t.Logf("task %d reallocated on %s", tasks[0].ID, tasks[0].TaskRunner)
    72  				return
    73  			}
    74  		}
    75  	}
    76  }
    77  
    78  func TestTaskServiceCanCreate(t *testing.T) {
    79  	defer leaktest.AfterTest(t)()
    80  	if testing.Short() {
    81  		t.Skip("skipping in short mode.")
    82  		return
    83  	}
    84  	ctx := context.Background()
    85  
    86  	// initialize cluster
    87  	c, err := NewCluster(ctx, t, DefaultOptions().
    88  		WithCNServiceNum(1).
    89  		WithCNShardNum(1).
    90  		WithTNServiceNum(1).
    91  		WithTNShardNum(1).
    92  		WithLogServiceNum(3).
    93  		WithLogShardNum(1))
    94  	require.NoError(t, err)
    95  
    96  	// close the cluster
    97  	defer func(c Cluster) {
    98  		require.NoError(t, c.Close())
    99  	}(c)
   100  	// start the cluster
   101  	require.NoError(t, c.Start())
   102  
   103  	t.Log("cluster log svcs length:", len(c.(*testCluster).log.svcs))
   104  
   105  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   106  	defer cancel()
   107  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   108  	c.WaitTNStoreTaskServiceCreatedIndexed(ctx, 0)
   109  	c.WaitLogStoreTaskServiceCreatedIndexed(ctx, 0)
   110  	c.WaitLogStoreTaskServiceCreatedIndexed(ctx, 1)
   111  	c.WaitLogStoreTaskServiceCreatedIndexed(ctx, 2)
   112  }
   113  
   114  func TestTaskSchedulerCanAllocateTask(t *testing.T) {
   115  	defer leaktest.AfterTest(t)()
   116  	if testing.Short() {
   117  		t.Skip("skipping in short mode.")
   118  		return
   119  	}
   120  	ctx := context.Background()
   121  
   122  	opt := DefaultOptions()
   123  	// initialize cluster
   124  	c, err := NewCluster(ctx, t, opt)
   125  	require.NoError(t, err)
   126  
   127  	// close the cluster
   128  	defer func(c Cluster) {
   129  		require.NoError(t, c.Close())
   130  	}(c)
   131  	// start the cluster
   132  	require.NoError(t, c.Start())
   133  
   134  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   135  	defer cancel()
   136  
   137  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   138  	indexed, err := c.GetCNServiceIndexed(0)
   139  	require.NoError(t, err)
   140  	taskService, ok := indexed.GetTaskService()
   141  	require.True(t, ok)
   142  
   143  	i := 0
   144  	for {
   145  		select {
   146  		case <-ctx.Done():
   147  			require.FailNow(t, "failed to query tasks")
   148  		default:
   149  		}
   150  		t.Logf("iter %d", i)
   151  		tasks, err := taskService.QueryAsyncTask(ctx)
   152  		require.NoError(t, err)
   153  		if len(tasks) == 0 {
   154  			time.Sleep(time.Second)
   155  			i++
   156  			continue
   157  		}
   158  		require.Equal(t, 1, len(tasks))
   159  		t.Logf("task status: %s", tasks[0].Status)
   160  		break
   161  	}
   162  
   163  	waitTaskScheduled(t, ctx, taskService)
   164  }
   165  
   166  func TestTaskSchedulerCanReallocateTask(t *testing.T) {
   167  	defer leaktest.AfterTest(t)()
   168  	if testing.Short() {
   169  		t.Skip("skipping in short mode.")
   170  		return
   171  	}
   172  	ctx := context.Background()
   173  
   174  	cnSvcNum := 2
   175  	opt := DefaultOptions().
   176  		WithCNServiceNum(cnSvcNum)
   177  
   178  	// initialize cluster
   179  	c, err := NewCluster(ctx, t, opt)
   180  	require.NoError(t, err)
   181  
   182  	halt := make(chan bool)
   183  	taskExecutor := func(ctx context.Context, task task.Task) error {
   184  		t.Logf("task %d is running", task.GetID())
   185  		select {
   186  		case <-ctx.Done():
   187  		case <-halt:
   188  		}
   189  		return nil
   190  	}
   191  
   192  	defer func(c Cluster, halt chan bool) {
   193  		halt <- true
   194  		require.NoError(t, c.Close())
   195  		close(halt)
   196  	}(c, halt)
   197  	// start the cluster
   198  	require.NoError(t, c.Start())
   199  
   200  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   201  	defer cancel()
   202  
   203  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   204  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 1)
   205  	cn1, err := c.GetCNServiceIndexed(0)
   206  	require.NoError(t, err)
   207  
   208  	cn2, err := c.GetCNServiceIndexed(1)
   209  	require.NoError(t, err)
   210  	cn1.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor)
   211  	cn2.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor)
   212  
   213  	taskService, ok := cn1.GetTaskService()
   214  	require.True(t, ok)
   215  	err = taskService.CreateAsyncTask(context.TODO(), task.TaskMetadata{ID: "a", Executor: task.TaskCode_TestOnly})
   216  	require.NoError(t, err)
   217  
   218  	tasks, err := taskService.QueryAsyncTask(ctx,
   219  		taskservice.WithTaskExecutorCond(taskservice.EQ, task.TaskCode_TestOnly))
   220  	require.NoError(t, err)
   221  	require.Equal(t, 1, len(tasks))
   222  
   223  	uuid1 := waitTaskScheduled(t, ctx, taskService)
   224  
   225  	err = c.CloseCNService(uuid1)
   226  	require.NoError(t, err)
   227  
   228  	if uuid1 == cn1.ID() {
   229  		taskService, ok = cn2.GetTaskService()
   230  		require.True(t, ok)
   231  	}
   232  	waitTaskRescheduled(t, ctx, taskService, uuid1)
   233  }
   234  
   235  func TestTaskRunner(t *testing.T) {
   236  	defer leaktest.AfterTest(t)()
   237  	if testing.Short() {
   238  		t.Skip("skipping in short mode.")
   239  		return
   240  	}
   241  	ctx := context.Background()
   242  
   243  	ch := make(chan int)
   244  	taskExecutor := func(_ context.Context, task task.Task) error {
   245  		t.Logf("task %d is running", task.GetID())
   246  		ch <- int(task.GetID())
   247  		return nil
   248  	}
   249  
   250  	cnSvcNum := 1
   251  	opt := DefaultOptions().
   252  		WithCNServiceNum(cnSvcNum)
   253  
   254  	// initialize cluster
   255  	c, err := NewCluster(ctx, t, opt.WithLogLevel(zap.DebugLevel))
   256  	require.NoError(t, err)
   257  
   258  	// close the cluster
   259  	defer func(c Cluster) {
   260  		require.NoError(t, c.Close())
   261  	}(c)
   262  	// start the cluster
   263  	require.NoError(t, c.Start())
   264  
   265  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   266  	defer cancel()
   267  
   268  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   269  	indexed, err := c.GetCNServiceIndexed(0)
   270  	require.NoError(t, err)
   271  
   272  	indexed.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor)
   273  
   274  	taskService, ok := indexed.GetTaskService()
   275  	require.True(t, ok)
   276  
   277  	err = taskService.CreateAsyncTask(context.TODO(), task.TaskMetadata{ID: "a", Executor: task.TaskCode_TestOnly})
   278  	require.NoError(t, err)
   279  
   280  	waitTaskScheduled(t, ctx, taskService)
   281  
   282  	select {
   283  	case <-ctx.Done():
   284  		assert.FailNow(t, "task not running")
   285  	case i := <-ch:
   286  		t.Logf("task %d is completed", i)
   287  	}
   288  }
   289  
   290  func TestCronTask(t *testing.T) {
   291  	defer leaktest.AfterTest(t)()
   292  	if testing.Short() {
   293  		t.Skip("skipping in short mode.")
   294  		return
   295  	}
   296  	ctx := context.Background()
   297  
   298  	opt := DefaultOptions()
   299  	// initialize cluster
   300  	c, err := NewCluster(ctx, t, opt.WithLogLevel(zap.DebugLevel))
   301  	require.NoError(t, err)
   302  
   303  	ch := make(chan int)
   304  	taskExecutor := func(ctx context.Context, task task.Task) error {
   305  		t.Logf("task %d is running", task.GetID())
   306  		select {
   307  		case ch <- int(task.GetID()):
   308  		case <-ctx.Done():
   309  			return nil
   310  		}
   311  		return nil
   312  	}
   313  
   314  	// close the cluster
   315  	defer func(c Cluster) {
   316  		require.NoError(t, c.Close())
   317  	}(c)
   318  	// start the cluster
   319  	require.NoError(t, c.Start())
   320  
   321  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   322  	defer cancel()
   323  
   324  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   325  	indexed, err := c.GetCNServiceIndexed(0)
   326  	require.NoError(t, err)
   327  
   328  	indexed.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor)
   329  
   330  	taskService, ok := indexed.GetTaskService()
   331  	require.True(t, ok)
   332  
   333  	require.NoError(t, taskService.CreateCronTask(context.TODO(),
   334  		task.TaskMetadata{
   335  			ID:       "a",
   336  			Executor: task.TaskCode_TestOnly,
   337  		},
   338  		"*/1 * * * * *", // every 1 second
   339  	))
   340  
   341  	waitChannelFull(t, ctx, ch, 3)
   342  }
   343  
   344  func waitChannelFull(t *testing.T, ctx context.Context, ch chan int, expected int) {
   345  	i := 0
   346  	received := make([]int, 0, expected)
   347  	for {
   348  		select {
   349  		case <-ctx.Done():
   350  			assert.FailNow(t, "cron task not repeated enough")
   351  		case c := <-ch:
   352  			received = append(received, c)
   353  			if len(received) == expected {
   354  				t.Logf("received %d numbers", expected)
   355  				return
   356  			}
   357  		default:
   358  			t.Logf("iteration: %d", i)
   359  			time.Sleep(time.Second)
   360  			i++
   361  		}
   362  	}
   363  }