github.com/matrixorigin/matrixone@v0.7.0/pkg/tests/service/task_test.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package service
    16  
    17  import (
    18  	"context"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/lni/goutils/leaktest"
    23  	"github.com/matrixorigin/matrixone/pkg/pb/task"
    24  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    25  	"github.com/stretchr/testify/assert"
    26  	"github.com/stretchr/testify/require"
    27  	"go.uber.org/zap"
    28  )
    29  
    30  func waitTaskScheduled(t *testing.T, ctx context.Context, taskService taskservice.TaskService) string {
    31  	i := 0
    32  	for {
    33  		select {
    34  		case <-ctx.Done():
    35  			assert.FailNow(t, "task not allocated")
    36  		default:
    37  			t.Logf("iteration: %d", i)
    38  			tasks, err := taskService.QueryTask(context.TODO(),
    39  				taskservice.WithTaskIDDesc())
    40  			require.NoError(t, err)
    41  
    42  			if len(tasks) != 0 && tasks[0].TaskRunner != "" {
    43  				t.Logf("task %d allocated on %s", tasks[0].ID, tasks[0].TaskRunner)
    44  				t.Logf("num task: %d", len(tasks))
    45  				return tasks[0].TaskRunner
    46  			}
    47  			time.Sleep(300 * time.Millisecond)
    48  			i++
    49  		}
    50  	}
    51  }
    52  
    53  func waitTaskRescheduled(t *testing.T, ctx context.Context, taskService taskservice.TaskService, uuid string) {
    54  	i := 0
    55  	for {
    56  		select {
    57  		case <-ctx.Done():
    58  			assert.FailNow(t, "task not reallocated")
    59  		default:
    60  			t.Logf("iteration: %d", i)
    61  			tasks, err := taskService.QueryTask(context.TODO(),
    62  				taskservice.WithTaskIDDesc(),
    63  				taskservice.WithTaskStatusCond(taskservice.EQ, task.TaskStatus_Running))
    64  			require.NoError(t, err)
    65  			if tasks[0].TaskRunner == uuid {
    66  				t.Logf("task %d is still on %s", tasks[0].ID, tasks[0].TaskRunner)
    67  				time.Sleep(1 * time.Second)
    68  				i++
    69  				continue
    70  			} else {
    71  				t.Logf("task %d reallocated on %s", tasks[0].ID, tasks[0].TaskRunner)
    72  				return
    73  			}
    74  		}
    75  	}
    76  }
    77  
    78  func TestTaskServiceCanCreate(t *testing.T) {
    79  	defer leaktest.AfterTest(t)()
    80  	if testing.Short() {
    81  		t.Skip("skipping in short mode.")
    82  		return
    83  	}
    84  
    85  	// initialize cluster
    86  	c, err := NewCluster(t, DefaultOptions().
    87  		WithCNServiceNum(1).
    88  		WithCNShardNum(1).
    89  		WithDNServiceNum(1).
    90  		WithDNShardNum(1).
    91  		WithLogServiceNum(3).
    92  		WithLogShardNum(1))
    93  	require.NoError(t, err)
    94  
    95  	// close the cluster
    96  	defer func(c Cluster) {
    97  		require.NoError(t, c.Close())
    98  	}(c)
    99  	// start the cluster
   100  	require.NoError(t, c.Start())
   101  
   102  	t.Log("cluster log svcs length:", len(c.(*testCluster).log.svcs))
   103  
   104  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   105  	defer cancel()
   106  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   107  	c.WaitDNStoreTaskServiceCreatedIndexed(ctx, 0)
   108  	c.WaitLogStoreTaskServiceCreatedIndexed(ctx, 0)
   109  	c.WaitLogStoreTaskServiceCreatedIndexed(ctx, 1)
   110  	c.WaitLogStoreTaskServiceCreatedIndexed(ctx, 2)
   111  }
   112  
   113  func TestTaskSchedulerCanAllocateTask(t *testing.T) {
   114  	defer leaktest.AfterTest(t)()
   115  	if testing.Short() {
   116  		t.Skip("skipping in short mode.")
   117  		return
   118  	}
   119  
   120  	opt := DefaultOptions()
   121  	// initialize cluster
   122  	c, err := NewCluster(t, opt)
   123  	require.NoError(t, err)
   124  
   125  	// close the cluster
   126  	defer func(c Cluster) {
   127  		require.NoError(t, c.Close())
   128  	}(c)
   129  	// start the cluster
   130  	require.NoError(t, c.Start())
   131  
   132  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   133  	defer cancel()
   134  
   135  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   136  	indexed, err := c.GetCNServiceIndexed(0)
   137  	require.NoError(t, err)
   138  	taskService, ok := indexed.GetTaskService()
   139  	require.True(t, ok)
   140  
   141  	i := 0
   142  	for {
   143  		select {
   144  		case <-ctx.Done():
   145  			require.FailNow(t, "failed to query tasks")
   146  		default:
   147  		}
   148  		t.Logf("iter %d", i)
   149  		tasks, err := taskService.QueryTask(ctx)
   150  		require.NoError(t, err)
   151  		if len(tasks) == 0 {
   152  			time.Sleep(time.Second)
   153  			i++
   154  			continue
   155  		}
   156  		require.Equal(t, 1, len(tasks))
   157  		t.Logf("task status: %s", tasks[0].Status)
   158  		break
   159  	}
   160  
   161  	waitTaskScheduled(t, ctx, taskService)
   162  }
   163  
   164  func TestTaskSchedulerCanReallocateTask(t *testing.T) {
   165  	defer leaktest.AfterTest(t)()
   166  	if testing.Short() {
   167  		t.Skip("skipping in short mode.")
   168  		return
   169  	}
   170  
   171  	cnSvcNum := 2
   172  	opt := DefaultOptions().
   173  		WithCNServiceNum(cnSvcNum)
   174  
   175  	// initialize cluster
   176  	c, err := NewCluster(t, opt)
   177  	require.NoError(t, err)
   178  
   179  	halt := make(chan bool)
   180  	taskExecutor := func(ctx context.Context, task task.Task) error {
   181  		t.Logf("task %d is running", task.ID)
   182  		select {
   183  		case <-ctx.Done():
   184  		case <-halt:
   185  		}
   186  		return nil
   187  	}
   188  
   189  	defer func(c Cluster, halt chan bool) {
   190  		halt <- true
   191  		require.NoError(t, c.Close())
   192  		close(halt)
   193  	}(c, halt)
   194  	// start the cluster
   195  	require.NoError(t, c.Start())
   196  
   197  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   198  	defer cancel()
   199  
   200  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   201  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 1)
   202  	cn1, err := c.GetCNServiceIndexed(0)
   203  	require.NoError(t, err)
   204  
   205  	cn2, err := c.GetCNServiceIndexed(1)
   206  	require.NoError(t, err)
   207  	cn1.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor)
   208  	cn2.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor)
   209  
   210  	taskService, ok := cn1.GetTaskService()
   211  	require.True(t, ok)
   212  	err = taskService.Create(context.TODO(), task.TaskMetadata{ID: "a", Executor: task.TaskCode_TestOnly})
   213  	require.NoError(t, err)
   214  
   215  	tasks, err := taskService.QueryTask(ctx,
   216  		taskservice.WithTaskExecutorCond(taskservice.EQ, task.TaskCode_TestOnly))
   217  	require.NoError(t, err)
   218  	require.Equal(t, 1, len(tasks))
   219  
   220  	uuid1 := waitTaskScheduled(t, ctx, taskService)
   221  
   222  	err = c.CloseCNService(uuid1)
   223  	require.NoError(t, err)
   224  
   225  	if uuid1 == cn1.ID() {
   226  		taskService, ok = cn2.GetTaskService()
   227  		require.True(t, ok)
   228  	}
   229  	waitTaskRescheduled(t, ctx, taskService, uuid1)
   230  }
   231  
   232  func TestTaskRunner(t *testing.T) {
   233  	defer leaktest.AfterTest(t)()
   234  	if testing.Short() {
   235  		t.Skip("skipping in short mode.")
   236  		return
   237  	}
   238  
   239  	ch := make(chan int)
   240  	taskExecutor := func(ctx context.Context, task task.Task) error {
   241  		t.Logf("task %d is running", task.ID)
   242  		ch <- int(task.ID)
   243  		return nil
   244  	}
   245  
   246  	cnSvcNum := 1
   247  	opt := DefaultOptions().
   248  		WithCNServiceNum(cnSvcNum)
   249  
   250  	// initialize cluster
   251  	c, err := NewCluster(t, opt.WithLogLevel(zap.DebugLevel))
   252  	require.NoError(t, err)
   253  
   254  	// close the cluster
   255  	defer func(c Cluster) {
   256  		require.NoError(t, c.Close())
   257  	}(c)
   258  	// start the cluster
   259  	require.NoError(t, c.Start())
   260  
   261  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   262  	defer cancel()
   263  
   264  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   265  	indexed, err := c.GetCNServiceIndexed(0)
   266  	require.NoError(t, err)
   267  
   268  	indexed.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor)
   269  
   270  	taskService, ok := indexed.GetTaskService()
   271  	require.True(t, ok)
   272  
   273  	err = taskService.Create(context.TODO(), task.TaskMetadata{ID: "a", Executor: task.TaskCode_TestOnly})
   274  	require.NoError(t, err)
   275  
   276  	waitTaskScheduled(t, ctx, taskService)
   277  
   278  	select {
   279  	case <-ctx.Done():
   280  		assert.FailNow(t, "task not running")
   281  	case i := <-ch:
   282  		t.Logf("task %d is completed", i)
   283  	}
   284  }
   285  
   286  func TestCronTask(t *testing.T) {
   287  	defer leaktest.AfterTest(t)()
   288  	if testing.Short() {
   289  		t.Skip("skipping in short mode.")
   290  		return
   291  	}
   292  
   293  	opt := DefaultOptions()
   294  	// initialize cluster
   295  	c, err := NewCluster(t, opt.WithLogLevel(zap.DebugLevel))
   296  	require.NoError(t, err)
   297  
   298  	ch := make(chan int)
   299  	taskExecutor := func(ctx context.Context, task task.Task) error {
   300  		t.Logf("task %d is running", task.ID)
   301  		select {
   302  		case ch <- int(task.ID):
   303  		case <-ctx.Done():
   304  			return nil
   305  		}
   306  		return nil
   307  	}
   308  
   309  	// close the cluster
   310  	defer func(c Cluster) {
   311  		require.NoError(t, c.Close())
   312  	}(c)
   313  	// start the cluster
   314  	require.NoError(t, c.Start())
   315  
   316  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   317  	defer cancel()
   318  
   319  	c.WaitCNStoreTaskServiceCreatedIndexed(ctx, 0)
   320  	indexed, err := c.GetCNServiceIndexed(0)
   321  	require.NoError(t, err)
   322  
   323  	indexed.GetTaskRunner().RegisterExecutor(task.TaskCode_TestOnly, taskExecutor)
   324  
   325  	taskService, ok := indexed.GetTaskService()
   326  	require.True(t, ok)
   327  
   328  	require.NoError(t, taskService.CreateCronTask(context.TODO(),
   329  		task.TaskMetadata{
   330  			ID:       "a",
   331  			Executor: task.TaskCode_TestOnly,
   332  		},
   333  		"*/1 * * * * *", // every 1 second
   334  	))
   335  
   336  	waitChannelFull(t, ctx, ch, 3)
   337  }
   338  
   339  func waitChannelFull(t *testing.T, ctx context.Context, ch chan int, expected int) {
   340  	i := 0
   341  	received := make([]int, 0, expected)
   342  	for {
   343  		select {
   344  		case <-ctx.Done():
   345  			assert.FailNow(t, "cron task not repeated enough")
   346  		case c := <-ch:
   347  			received = append(received, c)
   348  			if len(received) == expected {
   349  				t.Logf("received %d numbers", expected)
   350  				return
   351  			}
   352  		default:
   353  			t.Logf("iteration: %d", i)
   354  			time.Sleep(time.Second)
   355  			i++
   356  		}
   357  	}
   358  }