github.com/matrixorigin/matrixone@v0.7.0/pkg/logservice/hakeeper_client.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package logservice 16 17 import ( 18 "context" 19 "fmt" 20 "math/rand" 21 "sync" 22 23 "go.uber.org/zap" 24 25 "github.com/matrixorigin/matrixone/pkg/common/moerr" 26 "github.com/matrixorigin/matrixone/pkg/common/morpc" 27 "github.com/matrixorigin/matrixone/pkg/hakeeper" 28 "github.com/matrixorigin/matrixone/pkg/logutil" 29 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 30 "github.com/matrixorigin/matrixone/pkg/util/trace" 31 ) 32 33 type basicHAKeeperClient interface { 34 // Close closes the hakeeper client. 35 Close() error 36 // AllocateID allocate a globally unique ID 37 AllocateID(ctx context.Context) (uint64, error) 38 // GetClusterDetails queries the HAKeeper and return CN and DN nodes that are 39 // known to the HAKeeper. 40 GetClusterDetails(ctx context.Context) (pb.ClusterDetails, error) 41 // GetClusterState queries the cluster state 42 GetClusterState(ctx context.Context) (pb.CheckerState, error) 43 } 44 45 // CNHAKeeperClient is the HAKeeper client used by a CN store. 46 type CNHAKeeperClient interface { 47 basicHAKeeperClient 48 // SendCNHeartbeat sends the specified heartbeat message to the HAKeeper. 49 SendCNHeartbeat(ctx context.Context, hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) 50 } 51 52 // DNHAKeeperClient is the HAKeeper client used by a DN store. 53 type DNHAKeeperClient interface { 54 basicHAKeeperClient 55 // SendDNHeartbeat sends the specified heartbeat message to the HAKeeper. The 56 // returned CommandBatch contains Schedule Commands to be executed by the local 57 // DN store. 58 SendDNHeartbeat(ctx context.Context, hb pb.DNStoreHeartbeat) (pb.CommandBatch, error) 59 } 60 61 // LogHAKeeperClient is the HAKeeper client used by a Log store. 62 type LogHAKeeperClient interface { 63 basicHAKeeperClient 64 // SendLogHeartbeat sends the specified heartbeat message to the HAKeeper. The 65 // returned CommandBatch contains Schedule Commands to be executed by the local 66 // Log store. 67 SendLogHeartbeat(ctx context.Context, hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) 68 } 69 70 // TODO: HAKeeper discovery to be implemented 71 72 var _ CNHAKeeperClient = (*managedHAKeeperClient)(nil) 73 var _ DNHAKeeperClient = (*managedHAKeeperClient)(nil) 74 var _ LogHAKeeperClient = (*managedHAKeeperClient)(nil) 75 76 // NewCNHAKeeperClient creates a HAKeeper client to be used by a CN node. 77 // 78 // NB: caller could specify options for morpc.Client via ctx. 79 func NewCNHAKeeperClient(ctx context.Context, 80 cfg HAKeeperClientConfig) (CNHAKeeperClient, error) { 81 if err := cfg.Validate(); err != nil { 82 return nil, err 83 } 84 return newManagedHAKeeperClient(ctx, cfg) 85 } 86 87 // NewDNHAKeeperClient creates a HAKeeper client to be used by a DN node. 88 // 89 // NB: caller could specify options for morpc.Client via ctx. 90 func NewDNHAKeeperClient(ctx context.Context, 91 cfg HAKeeperClientConfig) (DNHAKeeperClient, error) { 92 if err := cfg.Validate(); err != nil { 93 return nil, err 94 } 95 return newManagedHAKeeperClient(ctx, cfg) 96 } 97 98 // NewLogHAKeeperClient creates a HAKeeper client to be used by a Log Service node. 99 // 100 // NB: caller could specify options for morpc.Client via ctx. 101 func NewLogHAKeeperClient(ctx context.Context, 102 cfg HAKeeperClientConfig) (LogHAKeeperClient, error) { 103 if err := cfg.Validate(); err != nil { 104 return nil, err 105 } 106 return newManagedHAKeeperClient(ctx, cfg) 107 } 108 109 func newManagedHAKeeperClient(ctx context.Context, 110 cfg HAKeeperClientConfig) (*managedHAKeeperClient, error) { 111 c, err := newHAKeeperClient(ctx, cfg) 112 if err != nil { 113 return nil, err 114 } 115 116 mc := &managedHAKeeperClient{ 117 cfg: cfg, 118 backendOptions: GetBackendOptions(ctx), 119 clientOptions: GetClientOptions(ctx), 120 } 121 mc.mu.client = c 122 return mc, nil 123 } 124 125 type managedHAKeeperClient struct { 126 cfg HAKeeperClientConfig 127 128 // Method `prepareClient` may update moprc.Client. 129 // So we need to keep options for morpc.Client. 130 backendOptions []morpc.BackendOption 131 clientOptions []morpc.ClientOption 132 133 mu struct { 134 sync.RWMutex 135 nextID uint64 136 lastID uint64 137 client *hakeeperClient 138 } 139 } 140 141 func (c *managedHAKeeperClient) Close() error { 142 c.mu.Lock() 143 defer c.mu.Unlock() 144 if c.mu.client == nil { 145 return nil 146 } 147 return c.mu.client.close() 148 } 149 150 func (c *managedHAKeeperClient) GetClusterDetails(ctx context.Context) (pb.ClusterDetails, error) { 151 for { 152 if err := c.prepareClient(ctx); err != nil { 153 return pb.ClusterDetails{}, err 154 } 155 cd, err := c.getClient().getClusterDetails(ctx) 156 if err != nil { 157 c.resetClient() 158 } 159 if c.isRetryableError(err) { 160 continue 161 } 162 return cd, err 163 } 164 } 165 166 func (c *managedHAKeeperClient) GetClusterState(ctx context.Context) (pb.CheckerState, error) { 167 for { 168 if err := c.prepareClient(ctx); err != nil { 169 return pb.CheckerState{}, err 170 } 171 s, err := c.getClient().getClusterState(ctx) 172 if err != nil { 173 c.resetClient() 174 } 175 if c.isRetryableError(err) { 176 continue 177 } 178 return s, err 179 } 180 } 181 182 func (c *managedHAKeeperClient) AllocateID(ctx context.Context) (uint64, error) { 183 c.mu.Lock() 184 if c.mu.nextID != c.mu.lastID { 185 v := c.mu.nextID 186 c.mu.nextID++ 187 c.mu.Unlock() 188 return v, nil 189 } 190 191 for { 192 if err := c.prepareClientLocked(ctx); err != nil { 193 return 0, err 194 } 195 firstID, err := c.mu.client.sendCNAllocateID(ctx, c.cfg.AllocateIDBatch) 196 if err != nil { 197 c.resetClientLocked() 198 } 199 if c.isRetryableError(err) { 200 continue 201 } 202 203 c.mu.nextID = firstID + 1 204 c.mu.lastID = firstID + c.cfg.AllocateIDBatch - 1 205 c.mu.Unlock() 206 return firstID, err 207 } 208 } 209 210 func (c *managedHAKeeperClient) SendCNHeartbeat(ctx context.Context, 211 hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) { 212 for { 213 if err := c.prepareClient(ctx); err != nil { 214 return pb.CommandBatch{}, err 215 } 216 result, err := c.getClient().sendCNHeartbeat(ctx, hb) 217 if err != nil { 218 c.resetClient() 219 } 220 if c.isRetryableError(err) { 221 continue 222 } 223 return result, err 224 } 225 } 226 227 func (c *managedHAKeeperClient) SendDNHeartbeat(ctx context.Context, 228 hb pb.DNStoreHeartbeat) (pb.CommandBatch, error) { 229 for { 230 if err := c.prepareClient(ctx); err != nil { 231 return pb.CommandBatch{}, err 232 } 233 cb, err := c.getClient().sendDNHeartbeat(ctx, hb) 234 if err != nil { 235 c.resetClient() 236 } 237 if c.isRetryableError(err) { 238 continue 239 } 240 return cb, err 241 } 242 } 243 244 func (c *managedHAKeeperClient) SendLogHeartbeat(ctx context.Context, 245 hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) { 246 for { 247 if err := c.prepareClient(ctx); err != nil { 248 return pb.CommandBatch{}, err 249 } 250 cb, err := c.getClient().sendLogHeartbeat(ctx, hb) 251 if err != nil { 252 c.resetClient() 253 } 254 if c.isRetryableError(err) { 255 continue 256 } 257 return cb, err 258 } 259 } 260 261 func (c *managedHAKeeperClient) isRetryableError(err error) bool { 262 return moerr.IsMoErrCode(err, moerr.ErrNoHAKeeper) 263 } 264 265 func (c *managedHAKeeperClient) resetClient() { 266 c.mu.Lock() 267 defer c.mu.Unlock() 268 c.resetClientLocked() 269 } 270 271 func (c *managedHAKeeperClient) prepareClient(ctx context.Context) error { 272 c.mu.Lock() 273 defer c.mu.Unlock() 274 return c.prepareClientLocked(ctx) 275 } 276 277 func (c *managedHAKeeperClient) resetClientLocked() { 278 if c.mu.client != nil { 279 cc := c.mu.client 280 c.mu.client = nil 281 if err := cc.close(); err != nil { 282 logutil.Error("failed to close client", zap.Error(err)) 283 } 284 } 285 } 286 287 func (c *managedHAKeeperClient) prepareClientLocked(ctx context.Context) error { 288 if c.mu.client != nil { 289 return nil 290 } 291 292 // we must use the recoreded options for morpc.Client 293 ctx = SetBackendOptions(ctx, c.backendOptions...) 294 ctx = SetClientOptions(ctx, c.clientOptions...) 295 296 cc, err := newHAKeeperClient(ctx, c.cfg) 297 if err != nil { 298 return err 299 } 300 c.mu.client = cc 301 return nil 302 } 303 304 type hakeeperClient struct { 305 cfg HAKeeperClientConfig 306 client morpc.RPCClient 307 addr string 308 pool *sync.Pool 309 respPool *sync.Pool 310 } 311 312 func newHAKeeperClient(ctx context.Context, 313 cfg HAKeeperClientConfig) (*hakeeperClient, error) { 314 client, err := connectToHAKeeper(ctx, cfg.ServiceAddresses, cfg) 315 if client != nil && err == nil { 316 return client, nil 317 } 318 if len(cfg.DiscoveryAddress) > 0 { 319 return connectByReverseProxy(ctx, cfg.DiscoveryAddress, cfg) 320 } 321 if err != nil { 322 return nil, err 323 } 324 return nil, moerr.NewNoHAKeeper(ctx) 325 } 326 327 func connectByReverseProxy(ctx context.Context, 328 discoveryAddress string, cfg HAKeeperClientConfig) (*hakeeperClient, error) { 329 si, ok, err := GetShardInfo(discoveryAddress, hakeeper.DefaultHAKeeperShardID) 330 if err != nil { 331 return nil, err 332 } 333 if !ok { 334 return nil, nil 335 } 336 addresses := make([]string, 0) 337 leaderAddress, ok := si.Replicas[si.ReplicaID] 338 if ok { 339 addresses = append(addresses, leaderAddress) 340 } 341 for replicaID, address := range si.Replicas { 342 if replicaID != si.ReplicaID { 343 addresses = append(addresses, address) 344 } 345 } 346 return connectToHAKeeper(ctx, addresses, cfg) 347 } 348 349 func connectToHAKeeper(ctx context.Context, 350 targets []string, cfg HAKeeperClientConfig) (*hakeeperClient, error) { 351 if len(targets) == 0 { 352 return nil, nil 353 } 354 355 pool := &sync.Pool{} 356 pool.New = func() interface{} { 357 return &RPCRequest{pool: pool} 358 } 359 respPool := &sync.Pool{} 360 respPool.New = func() interface{} { 361 return &RPCResponse{pool: respPool} 362 } 363 c := &hakeeperClient{ 364 cfg: cfg, 365 pool: pool, 366 respPool: respPool, 367 } 368 var e error 369 addresses := append([]string{}, targets...) 370 rand.Shuffle(len(addresses), func(i, j int) { 371 addresses[i], addresses[j] = addresses[j], addresses[i] 372 }) 373 for _, addr := range addresses { 374 cc, err := getRPCClient(ctx, addr, c.respPool, defaultMaxMessageSize, cfg.EnableCompress, "connectToHAKeeper") 375 if err != nil { 376 e = err 377 continue 378 } 379 c.addr = addr 380 c.client = cc 381 isHAKeeper, err := c.checkIsHAKeeper(ctx) 382 logutil.Info(fmt.Sprintf("isHAKeeper: %t, err: %v", isHAKeeper, err)) 383 if err == nil && isHAKeeper { 384 return c, nil 385 } else if err != nil { 386 e = err 387 } 388 if err := cc.Close(); err != nil { 389 logutil.Error("failed to close the client", zap.Error(err)) 390 } 391 } 392 if e == nil { 393 // didn't encounter any error 394 return nil, moerr.NewNoHAKeeper(ctx) 395 } 396 return nil, e 397 } 398 399 func (c *hakeeperClient) close() error { 400 if c == nil { 401 panic("!!!") 402 } 403 404 if c.client != nil { 405 return c.client.Close() 406 } 407 return nil 408 } 409 410 func (c *hakeeperClient) getClusterDetails(ctx context.Context) (pb.ClusterDetails, error) { 411 req := pb.Request{ 412 Method: pb.GET_CLUSTER_DETAILS, 413 } 414 resp, err := c.request(ctx, req) 415 if err != nil { 416 return pb.ClusterDetails{}, err 417 } 418 return *resp.ClusterDetails, nil 419 } 420 421 func (c *hakeeperClient) getClusterState(ctx context.Context) (pb.CheckerState, error) { 422 req := pb.Request{ 423 Method: pb.GET_CLUSTER_STATE, 424 } 425 resp, err := c.request(ctx, req) 426 if err != nil { 427 return pb.CheckerState{}, err 428 } 429 return *resp.CheckerState, nil 430 } 431 432 func (c *hakeeperClient) sendCNHeartbeat(ctx context.Context, hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) { 433 req := pb.Request{ 434 Method: pb.CN_HEARTBEAT, 435 CNHeartbeat: &hb, 436 } 437 return c.sendHeartbeat(ctx, req) 438 } 439 440 func (c *hakeeperClient) sendCNAllocateID(ctx context.Context, batch uint64) (uint64, error) { 441 req := pb.Request{ 442 Method: pb.CN_ALLOCATE_ID, 443 CNAllocateID: &pb.CNAllocateID{Batch: batch}, 444 } 445 resp, err := c.request(ctx, req) 446 if err != nil { 447 return 0, err 448 } 449 return resp.AllocateID.FirstID, nil 450 } 451 452 func (c *hakeeperClient) sendDNHeartbeat(ctx context.Context, 453 hb pb.DNStoreHeartbeat) (pb.CommandBatch, error) { 454 req := pb.Request{ 455 Method: pb.DN_HEARTBEAT, 456 DNHeartbeat: &hb, 457 } 458 return c.sendHeartbeat(ctx, req) 459 } 460 461 func (c *hakeeperClient) sendLogHeartbeat(ctx context.Context, 462 hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) { 463 req := pb.Request{ 464 Method: pb.LOG_HEARTBEAT, 465 LogHeartbeat: &hb, 466 } 467 cb, err := c.sendHeartbeat(ctx, req) 468 if err != nil { 469 return pb.CommandBatch{}, err 470 } 471 for _, cmd := range cb.Commands { 472 logutil.Info("hakeeper client received cmd", zap.String("cmd", cmd.LogString())) 473 } 474 return cb, nil 475 } 476 477 func (c *hakeeperClient) sendHeartbeat(ctx context.Context, 478 req pb.Request) (pb.CommandBatch, error) { 479 resp, err := c.request(ctx, req) 480 if err != nil { 481 return pb.CommandBatch{}, err 482 } 483 if resp.CommandBatch == nil { 484 return pb.CommandBatch{}, nil 485 } 486 return *resp.CommandBatch, nil 487 } 488 489 func (c *hakeeperClient) checkIsHAKeeper(ctx context.Context) (bool, error) { 490 req := pb.Request{ 491 Method: pb.CHECK_HAKEEPER, 492 } 493 resp, err := c.request(ctx, req) 494 if err != nil { 495 return false, err 496 } 497 return resp.IsHAKeeper, nil 498 } 499 500 func (c *hakeeperClient) request(ctx context.Context, req pb.Request) (pb.Response, error) { 501 if c == nil { 502 return pb.Response{}, moerr.NewNoHAKeeper(ctx) 503 } 504 ctx, span := trace.Debug(ctx, "hakeeperClient.request") 505 defer span.End() 506 r := c.pool.Get().(*RPCRequest) 507 r.Request = req 508 future, err := c.client.Send(ctx, c.addr, r) 509 if err != nil { 510 return pb.Response{}, err 511 } 512 defer future.Close() 513 msg, err := future.Get() 514 if err != nil { 515 return pb.Response{}, err 516 } 517 response, ok := msg.(*RPCResponse) 518 if !ok { 519 panic("unexpected response type") 520 } 521 resp := response.Response 522 defer response.Release() 523 err = toError(ctx, response.Response) 524 if err != nil { 525 return pb.Response{}, err 526 } 527 return resp, nil 528 } 529 530 func (c *managedHAKeeperClient) getClient() *hakeeperClient { 531 c.mu.RLock() 532 defer c.mu.RUnlock() 533 return c.mu.client 534 }