github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/executor/server_test.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package executor 15 16 import ( 17 "context" 18 "fmt" 19 "io" 20 "net/http" 21 "strings" 22 "testing" 23 "time" 24 25 "github.com/phayes/freeport" 26 pb "github.com/pingcap/tiflow/engine/enginepb" 27 "github.com/pingcap/tiflow/engine/executor/server" 28 "github.com/pingcap/tiflow/engine/executor/worker" 29 frameModel "github.com/pingcap/tiflow/engine/framework/model" 30 "github.com/pingcap/tiflow/engine/framework/registry" 31 "github.com/pingcap/tiflow/engine/jobmaster/fakejob" 32 "github.com/pingcap/tiflow/engine/model" 33 "github.com/pingcap/tiflow/engine/pkg/client" 34 dcontext "github.com/pingcap/tiflow/engine/pkg/context" 35 "github.com/pingcap/tiflow/engine/pkg/deps" 36 pkgOrm "github.com/pingcap/tiflow/engine/pkg/orm" 37 "github.com/pingcap/tiflow/engine/pkg/rpcutil" 38 "github.com/pingcap/tiflow/engine/pkg/tenant" 39 "github.com/pingcap/tiflow/pkg/errors" 40 "github.com/pingcap/tiflow/pkg/httputil" 41 "github.com/pingcap/tiflow/pkg/logutil" 42 "github.com/pingcap/tiflow/pkg/uuid" 43 "github.com/stretchr/testify/require" 44 "golang.org/x/sync/errgroup" 45 "google.golang.org/grpc" 46 "google.golang.org/grpc/codes" 47 "google.golang.org/grpc/status" 48 ) 49 50 func init() { 51 err := logutil.InitLogger(&logutil.Config{Level: "warn"}) 52 if err != nil { 53 panic(err) 54 } 55 } 56 57 func TestStartTCPSrv(t *testing.T) { 58 t.Parallel() 59 60 cfg := GetDefaultExecutorConfig() 61 port, err := freeport.GetFreePort() 62 require.Nil(t, err) 63 addr := fmt.Sprintf("127.0.0.1:%d", port) 64 cfg.Addr = addr 65 s := NewServer(cfg) 66 67 s.grpcSrv = grpc.NewServer() 68 wg, ctx := errgroup.WithContext(context.Background()) 69 err = s.startTCPService(ctx, wg) 70 require.Nil(t, err) 71 72 apiURL := fmt.Sprintf("http://127.0.0.1:%d", port) 73 testPprof(t, apiURL) 74 75 testPrometheusMetrics(t, apiURL) 76 s.Stop() 77 } 78 79 func testPprof(t *testing.T, addr string) { 80 urls := []string{ 81 "/debug/pprof/", 82 "/debug/pprof/cmdline", 83 "/debug/pprof/symbol", 84 // enable these two apis will make ut slow 85 //"/debug/pprof/profile", http.MethodGet, 86 //"/debug/pprof/trace", http.MethodGet, 87 "/debug/pprof/threadcreate", 88 "/debug/pprof/allocs", 89 "/debug/pprof/block", 90 "/debug/pprof/goroutine?debug=1", 91 "/debug/pprof/mutex?debug=1", 92 } 93 ctx := context.Background() 94 cli, err := httputil.NewClient(nil) 95 require.NoError(t, err) 96 for _, uri := range urls { 97 resp, err := cli.Get(ctx, addr+uri) 98 require.Nil(t, err) 99 defer resp.Body.Close() 100 require.Equal(t, http.StatusOK, resp.StatusCode) 101 _, err = io.ReadAll(resp.Body) 102 require.Nil(t, err) 103 } 104 } 105 106 func testPrometheusMetrics(t *testing.T, addr string) { 107 urls := []string{ 108 "/metrics", 109 } 110 ctx := context.Background() 111 cli, err := httputil.NewClient(nil) 112 require.NoError(t, err) 113 for _, uri := range urls { 114 resp, err := cli.Get(ctx, addr+uri) 115 require.Nil(t, err) 116 defer resp.Body.Close() 117 require.Equal(t, http.StatusOK, resp.StatusCode) 118 _, err = io.ReadAll(resp.Body) 119 require.Nil(t, err) 120 } 121 } 122 123 func TestCollectMetric(t *testing.T) { 124 wg, ctx := errgroup.WithContext(context.Background()) 125 cfg := GetDefaultExecutorConfig() 126 port, err := freeport.GetFreePort() 127 require.Nil(t, err) 128 addr := fmt.Sprintf("127.0.0.1:%d", port) 129 cfg.Addr = addr 130 s := NewServer(cfg) 131 s.taskRunner = worker.NewTaskRunner(defaultRuntimeIncomingQueueLen, defaultRuntimeInitConcurrency) 132 133 s.grpcSrv = grpc.NewServer() 134 err = s.startTCPService(ctx, wg) 135 require.Nil(t, err) 136 137 wg.Go(func() error { 138 return s.collectMetricLoop(ctx, time.Millisecond*10) 139 }) 140 apiURL := fmt.Sprintf("http://%s", addr) 141 testCustomedPrometheusMetrics(t, apiURL) 142 s.Stop() 143 wg.Wait() 144 } 145 146 func testCustomedPrometheusMetrics(t *testing.T, addr string) { 147 ctx := context.Background() 148 cli, err := httputil.NewClient(nil) 149 require.NoError(t, err) 150 require.Eventually(t, func() bool { 151 resp, err := cli.Get(ctx, addr+"/metrics") 152 require.Nil(t, err) 153 defer resp.Body.Close() 154 require.Equal(t, http.StatusOK, resp.StatusCode) 155 body, err := io.ReadAll(resp.Body) 156 require.Nil(t, err) 157 metric := string(body) 158 return strings.Contains(metric, "dataflow_executor_task_num") 159 }, time.Second, time.Millisecond*20) 160 } 161 162 type registerExecutorReturnValue struct { 163 executor *pb.Executor 164 err error 165 } 166 167 type mockRegisterMasterClient struct { 168 client.ServerMasterClient 169 respChan chan *registerExecutorReturnValue 170 } 171 172 func newMockRegisterMasterClient(chanBufferSize int) *mockRegisterMasterClient { 173 return &mockRegisterMasterClient{ 174 respChan: make(chan *registerExecutorReturnValue, chanBufferSize), 175 } 176 } 177 178 func (c *mockRegisterMasterClient) RegisterExecutor( 179 ctx context.Context, req *pb.RegisterExecutorRequest, 180 ) (nodeID model.ExecutorID, err error) { 181 value := <-c.respChan 182 if value.err != nil { 183 return "", value.err 184 } 185 return model.ExecutorID(value.executor.Id), nil 186 } 187 188 func TestSelfRegister(t *testing.T) { 189 t.Parallel() 190 191 ctx := context.Background() 192 cfg := GetDefaultExecutorConfig() 193 port, err := freeport.GetFreePort() 194 require.Nil(t, err) 195 addr := fmt.Sprintf("127.0.0.1:%d", port) 196 cfg.AdvertiseAddr = addr 197 s := NewServer(cfg) 198 mockMasterClient := newMockRegisterMasterClient(10) 199 s.masterClient = mockMasterClient 200 201 mockMasterClient.respChan <- ®isterExecutorReturnValue{ 202 nil, errors.New("service unavailable"), 203 } 204 err = s.selfRegister(ctx) 205 require.Error(t, err, "service unavailable") 206 207 executorID := uuid.NewGenerator().NewString() 208 returnValues := []*registerExecutorReturnValue{ 209 { 210 &pb.Executor{ 211 Id: executorID, 212 }, nil, 213 }, 214 } 215 for _, val := range returnValues { 216 mockMasterClient.respChan <- val 217 } 218 err = s.selfRegister(ctx) 219 require.NoError(t, err) 220 require.Equal(t, executorID, string(s.selfID)) 221 } 222 223 func TestRPCCallBeforeInitialized(t *testing.T) { 224 svr := &Server{ 225 metastores: server.NewMetastoreManager(), 226 } 227 228 _, err := svr.PreDispatchTask(context.Background(), &pb.PreDispatchTaskRequest{}) 229 require.Error(t, err) 230 require.Equal(t, codes.Unavailable, status.Convert(err).Code()) 231 232 _, err = svr.ConfirmDispatchTask(context.Background(), &pb.ConfirmDispatchTaskRequest{}) 233 require.Error(t, err) 234 require.Equal(t, codes.Unavailable, status.Convert(err).Code()) 235 } 236 237 func TestConvertMakeTaskError(t *testing.T) { 238 t.Parallel() 239 240 register := registry.NewRegistry() 241 ok := register.RegisterWorkerType(frameModel.FakeJobMaster, 242 registry.NewSimpleWorkerFactory(fakejob.NewFakeMaster)) 243 require.True(t, ok) 244 245 testCases := []struct { 246 err error 247 isRetryable bool 248 }{ 249 {errors.ErrDeserializeConfig.GenWithStackByArgs(), false}, 250 {errors.New("normal error"), true}, 251 } 252 253 for _, tc := range testCases { 254 err := convertMakeTaskErrorToRPCError(register, tc.err, frameModel.FakeJobMaster) 255 require.Error(t, err) 256 errIn := rpcutil.FromGRPCError(err) 257 if tc.isRetryable { 258 require.True(t, errors.Is(errIn, errors.ErrCreateWorkerNonTerminate)) 259 } else { 260 require.True(t, errors.Is(errIn, errors.ErrCreateWorkerTerminate)) 261 } 262 } 263 } 264 265 func TestPrecheckMasterMeta(t *testing.T) { 266 t.Parallel() 267 268 register := registry.NewRegistry() 269 ok := register.RegisterWorkerType(frameModel.FakeJobMaster, 270 registry.NewSimpleWorkerFactory(fakejob.NewFakeMaster)) 271 require.True(t, ok) 272 273 ormCli, err := pkgOrm.NewMockClient() 274 require.NoError(t, err) 275 276 masterID := "precheck-master-id" 277 dp := deps.NewDeps() 278 err = dp.Provide(func() pkgOrm.Client { 279 return ormCli 280 }) 281 require.NoError(t, err) 282 283 ctx := dcontext.Background().WithDeps(dp) 284 masterMeta := &frameModel.MasterMeta{ 285 ProjectID: tenant.TestProjectInfo.UniqueID(), 286 ID: masterID, 287 Type: frameModel.FakeJobMaster, 288 State: frameModel.MasterStateUninit, 289 } 290 err = ormCli.UpsertJob(ctx, masterMeta) 291 require.NoError(t, err) 292 293 // normal master meta, no error message 294 err = precheckMasterMeta(ctx, register, masterID, frameModel.FakeJobMaster) 295 require.NoError(t, err) 296 297 // failover on retryable error 298 masterMeta.State = frameModel.MasterStateInit 299 masterMeta.ErrorMsg = "normal error" 300 err = ormCli.UpsertJob(ctx, masterMeta) 301 require.NoError(t, err) 302 err = precheckMasterMeta(ctx, register, masterID, frameModel.FakeJobMaster) 303 require.NoError(t, err) 304 305 // no retry on unretryable error 306 fakeJobErr := errors.ErrDeserializeConfig.GenWithStackByArgs() 307 masterMeta.ErrorMsg = fakeJobErr.Error() 308 err = ormCli.UpsertJob(ctx, masterMeta) 309 require.NoError(t, err) 310 err = precheckMasterMeta(ctx, register, masterID, frameModel.FakeJobMaster) 311 require.Error(t, err) 312 require.EqualError(t, err, fakeJobErr.Error()) 313 err = convertMakeTaskErrorToRPCError(register, err, frameModel.FakeJobMaster) 314 errIn := rpcutil.FromGRPCError(err) 315 require.True(t, errors.Is(errIn, errors.ErrCreateWorkerTerminate)) 316 }