github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/executor/server_test.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package executor
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"io"
    20  	"net/http"
    21  	"strings"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/phayes/freeport"
    26  	pb "github.com/pingcap/tiflow/engine/enginepb"
    27  	"github.com/pingcap/tiflow/engine/executor/server"
    28  	"github.com/pingcap/tiflow/engine/executor/worker"
    29  	frameModel "github.com/pingcap/tiflow/engine/framework/model"
    30  	"github.com/pingcap/tiflow/engine/framework/registry"
    31  	"github.com/pingcap/tiflow/engine/jobmaster/fakejob"
    32  	"github.com/pingcap/tiflow/engine/model"
    33  	"github.com/pingcap/tiflow/engine/pkg/client"
    34  	dcontext "github.com/pingcap/tiflow/engine/pkg/context"
    35  	"github.com/pingcap/tiflow/engine/pkg/deps"
    36  	pkgOrm "github.com/pingcap/tiflow/engine/pkg/orm"
    37  	"github.com/pingcap/tiflow/engine/pkg/rpcutil"
    38  	"github.com/pingcap/tiflow/engine/pkg/tenant"
    39  	"github.com/pingcap/tiflow/pkg/errors"
    40  	"github.com/pingcap/tiflow/pkg/httputil"
    41  	"github.com/pingcap/tiflow/pkg/logutil"
    42  	"github.com/pingcap/tiflow/pkg/uuid"
    43  	"github.com/stretchr/testify/require"
    44  	"golang.org/x/sync/errgroup"
    45  	"google.golang.org/grpc"
    46  	"google.golang.org/grpc/codes"
    47  	"google.golang.org/grpc/status"
    48  )
    49  
    50  func init() {
    51  	err := logutil.InitLogger(&logutil.Config{Level: "warn"})
    52  	if err != nil {
    53  		panic(err)
    54  	}
    55  }
    56  
    57  func TestStartTCPSrv(t *testing.T) {
    58  	t.Parallel()
    59  
    60  	cfg := GetDefaultExecutorConfig()
    61  	port, err := freeport.GetFreePort()
    62  	require.Nil(t, err)
    63  	addr := fmt.Sprintf("127.0.0.1:%d", port)
    64  	cfg.Addr = addr
    65  	s := NewServer(cfg)
    66  
    67  	s.grpcSrv = grpc.NewServer()
    68  	wg, ctx := errgroup.WithContext(context.Background())
    69  	err = s.startTCPService(ctx, wg)
    70  	require.Nil(t, err)
    71  
    72  	apiURL := fmt.Sprintf("http://127.0.0.1:%d", port)
    73  	testPprof(t, apiURL)
    74  
    75  	testPrometheusMetrics(t, apiURL)
    76  	s.Stop()
    77  }
    78  
    79  func testPprof(t *testing.T, addr string) {
    80  	urls := []string{
    81  		"/debug/pprof/",
    82  		"/debug/pprof/cmdline",
    83  		"/debug/pprof/symbol",
    84  		// enable these two apis will make ut slow
    85  		//"/debug/pprof/profile", http.MethodGet,
    86  		//"/debug/pprof/trace", http.MethodGet,
    87  		"/debug/pprof/threadcreate",
    88  		"/debug/pprof/allocs",
    89  		"/debug/pprof/block",
    90  		"/debug/pprof/goroutine?debug=1",
    91  		"/debug/pprof/mutex?debug=1",
    92  	}
    93  	ctx := context.Background()
    94  	cli, err := httputil.NewClient(nil)
    95  	require.NoError(t, err)
    96  	for _, uri := range urls {
    97  		resp, err := cli.Get(ctx, addr+uri)
    98  		require.Nil(t, err)
    99  		defer resp.Body.Close()
   100  		require.Equal(t, http.StatusOK, resp.StatusCode)
   101  		_, err = io.ReadAll(resp.Body)
   102  		require.Nil(t, err)
   103  	}
   104  }
   105  
   106  func testPrometheusMetrics(t *testing.T, addr string) {
   107  	urls := []string{
   108  		"/metrics",
   109  	}
   110  	ctx := context.Background()
   111  	cli, err := httputil.NewClient(nil)
   112  	require.NoError(t, err)
   113  	for _, uri := range urls {
   114  		resp, err := cli.Get(ctx, addr+uri)
   115  		require.Nil(t, err)
   116  		defer resp.Body.Close()
   117  		require.Equal(t, http.StatusOK, resp.StatusCode)
   118  		_, err = io.ReadAll(resp.Body)
   119  		require.Nil(t, err)
   120  	}
   121  }
   122  
   123  func TestCollectMetric(t *testing.T) {
   124  	wg, ctx := errgroup.WithContext(context.Background())
   125  	cfg := GetDefaultExecutorConfig()
   126  	port, err := freeport.GetFreePort()
   127  	require.Nil(t, err)
   128  	addr := fmt.Sprintf("127.0.0.1:%d", port)
   129  	cfg.Addr = addr
   130  	s := NewServer(cfg)
   131  	s.taskRunner = worker.NewTaskRunner(defaultRuntimeIncomingQueueLen, defaultRuntimeInitConcurrency)
   132  
   133  	s.grpcSrv = grpc.NewServer()
   134  	err = s.startTCPService(ctx, wg)
   135  	require.Nil(t, err)
   136  
   137  	wg.Go(func() error {
   138  		return s.collectMetricLoop(ctx, time.Millisecond*10)
   139  	})
   140  	apiURL := fmt.Sprintf("http://%s", addr)
   141  	testCustomedPrometheusMetrics(t, apiURL)
   142  	s.Stop()
   143  	wg.Wait()
   144  }
   145  
   146  func testCustomedPrometheusMetrics(t *testing.T, addr string) {
   147  	ctx := context.Background()
   148  	cli, err := httputil.NewClient(nil)
   149  	require.NoError(t, err)
   150  	require.Eventually(t, func() bool {
   151  		resp, err := cli.Get(ctx, addr+"/metrics")
   152  		require.Nil(t, err)
   153  		defer resp.Body.Close()
   154  		require.Equal(t, http.StatusOK, resp.StatusCode)
   155  		body, err := io.ReadAll(resp.Body)
   156  		require.Nil(t, err)
   157  		metric := string(body)
   158  		return strings.Contains(metric, "dataflow_executor_task_num")
   159  	}, time.Second, time.Millisecond*20)
   160  }
   161  
   162  type registerExecutorReturnValue struct {
   163  	executor *pb.Executor
   164  	err      error
   165  }
   166  
   167  type mockRegisterMasterClient struct {
   168  	client.ServerMasterClient
   169  	respChan chan *registerExecutorReturnValue
   170  }
   171  
   172  func newMockRegisterMasterClient(chanBufferSize int) *mockRegisterMasterClient {
   173  	return &mockRegisterMasterClient{
   174  		respChan: make(chan *registerExecutorReturnValue, chanBufferSize),
   175  	}
   176  }
   177  
   178  func (c *mockRegisterMasterClient) RegisterExecutor(
   179  	ctx context.Context, req *pb.RegisterExecutorRequest,
   180  ) (nodeID model.ExecutorID, err error) {
   181  	value := <-c.respChan
   182  	if value.err != nil {
   183  		return "", value.err
   184  	}
   185  	return model.ExecutorID(value.executor.Id), nil
   186  }
   187  
   188  func TestSelfRegister(t *testing.T) {
   189  	t.Parallel()
   190  
   191  	ctx := context.Background()
   192  	cfg := GetDefaultExecutorConfig()
   193  	port, err := freeport.GetFreePort()
   194  	require.Nil(t, err)
   195  	addr := fmt.Sprintf("127.0.0.1:%d", port)
   196  	cfg.AdvertiseAddr = addr
   197  	s := NewServer(cfg)
   198  	mockMasterClient := newMockRegisterMasterClient(10)
   199  	s.masterClient = mockMasterClient
   200  
   201  	mockMasterClient.respChan <- &registerExecutorReturnValue{
   202  		nil, errors.New("service unavailable"),
   203  	}
   204  	err = s.selfRegister(ctx)
   205  	require.Error(t, err, "service unavailable")
   206  
   207  	executorID := uuid.NewGenerator().NewString()
   208  	returnValues := []*registerExecutorReturnValue{
   209  		{
   210  			&pb.Executor{
   211  				Id: executorID,
   212  			}, nil,
   213  		},
   214  	}
   215  	for _, val := range returnValues {
   216  		mockMasterClient.respChan <- val
   217  	}
   218  	err = s.selfRegister(ctx)
   219  	require.NoError(t, err)
   220  	require.Equal(t, executorID, string(s.selfID))
   221  }
   222  
   223  func TestRPCCallBeforeInitialized(t *testing.T) {
   224  	svr := &Server{
   225  		metastores: server.NewMetastoreManager(),
   226  	}
   227  
   228  	_, err := svr.PreDispatchTask(context.Background(), &pb.PreDispatchTaskRequest{})
   229  	require.Error(t, err)
   230  	require.Equal(t, codes.Unavailable, status.Convert(err).Code())
   231  
   232  	_, err = svr.ConfirmDispatchTask(context.Background(), &pb.ConfirmDispatchTaskRequest{})
   233  	require.Error(t, err)
   234  	require.Equal(t, codes.Unavailable, status.Convert(err).Code())
   235  }
   236  
   237  func TestConvertMakeTaskError(t *testing.T) {
   238  	t.Parallel()
   239  
   240  	register := registry.NewRegistry()
   241  	ok := register.RegisterWorkerType(frameModel.FakeJobMaster,
   242  		registry.NewSimpleWorkerFactory(fakejob.NewFakeMaster))
   243  	require.True(t, ok)
   244  
   245  	testCases := []struct {
   246  		err         error
   247  		isRetryable bool
   248  	}{
   249  		{errors.ErrDeserializeConfig.GenWithStackByArgs(), false},
   250  		{errors.New("normal error"), true},
   251  	}
   252  
   253  	for _, tc := range testCases {
   254  		err := convertMakeTaskErrorToRPCError(register, tc.err, frameModel.FakeJobMaster)
   255  		require.Error(t, err)
   256  		errIn := rpcutil.FromGRPCError(err)
   257  		if tc.isRetryable {
   258  			require.True(t, errors.Is(errIn, errors.ErrCreateWorkerNonTerminate))
   259  		} else {
   260  			require.True(t, errors.Is(errIn, errors.ErrCreateWorkerTerminate))
   261  		}
   262  	}
   263  }
   264  
   265  func TestPrecheckMasterMeta(t *testing.T) {
   266  	t.Parallel()
   267  
   268  	register := registry.NewRegistry()
   269  	ok := register.RegisterWorkerType(frameModel.FakeJobMaster,
   270  		registry.NewSimpleWorkerFactory(fakejob.NewFakeMaster))
   271  	require.True(t, ok)
   272  
   273  	ormCli, err := pkgOrm.NewMockClient()
   274  	require.NoError(t, err)
   275  
   276  	masterID := "precheck-master-id"
   277  	dp := deps.NewDeps()
   278  	err = dp.Provide(func() pkgOrm.Client {
   279  		return ormCli
   280  	})
   281  	require.NoError(t, err)
   282  
   283  	ctx := dcontext.Background().WithDeps(dp)
   284  	masterMeta := &frameModel.MasterMeta{
   285  		ProjectID: tenant.TestProjectInfo.UniqueID(),
   286  		ID:        masterID,
   287  		Type:      frameModel.FakeJobMaster,
   288  		State:     frameModel.MasterStateUninit,
   289  	}
   290  	err = ormCli.UpsertJob(ctx, masterMeta)
   291  	require.NoError(t, err)
   292  
   293  	// normal master meta, no error message
   294  	err = precheckMasterMeta(ctx, register, masterID, frameModel.FakeJobMaster)
   295  	require.NoError(t, err)
   296  
   297  	// failover on retryable error
   298  	masterMeta.State = frameModel.MasterStateInit
   299  	masterMeta.ErrorMsg = "normal error"
   300  	err = ormCli.UpsertJob(ctx, masterMeta)
   301  	require.NoError(t, err)
   302  	err = precheckMasterMeta(ctx, register, masterID, frameModel.FakeJobMaster)
   303  	require.NoError(t, err)
   304  
   305  	// no retry on unretryable error
   306  	fakeJobErr := errors.ErrDeserializeConfig.GenWithStackByArgs()
   307  	masterMeta.ErrorMsg = fakeJobErr.Error()
   308  	err = ormCli.UpsertJob(ctx, masterMeta)
   309  	require.NoError(t, err)
   310  	err = precheckMasterMeta(ctx, register, masterID, frameModel.FakeJobMaster)
   311  	require.Error(t, err)
   312  	require.EqualError(t, err, fakeJobErr.Error())
   313  	err = convertMakeTaskErrorToRPCError(register, err, frameModel.FakeJobMaster)
   314  	errIn := rpcutil.FromGRPCError(err)
   315  	require.True(t, errors.Is(errIn, errors.ErrCreateWorkerTerminate))
   316  }