github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/client.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package logservice 16 17 import ( 18 "context" 19 "math/rand" 20 "sync" 21 "time" 22 23 "go.uber.org/zap" 24 25 "github.com/cockroachdb/errors" 26 "github.com/lni/dragonboat/v4" 27 "github.com/matrixorigin/matrixone/pkg/common/moerr" 28 "github.com/matrixorigin/matrixone/pkg/common/morpc" 29 "github.com/matrixorigin/matrixone/pkg/common/mpool" 30 "github.com/matrixorigin/matrixone/pkg/logutil" 31 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 32 "github.com/matrixorigin/matrixone/pkg/util/trace" 33 ) 34 35 const ( 36 defaultWriteSocketSize = 64 * 1024 37 ) 38 39 // IsTempError returns a boolean value indicating whether the specified error 40 // is a temp error that worth to be retried, e.g. timeouts, temp network 41 // issues. Non-temp error caused by program logics rather than some external 42 // factors. 43 func IsTempError(err error) bool { 44 return isTempError(err) 45 } 46 47 type ClientFactory func() (Client, error) 48 49 // Client is the Log Service Client interface exposed to the DN. 50 type Client interface { 51 // Close closes the client. 52 Close() error 53 // Config returns the specified configuration when creating the client. 54 Config() ClientConfig 55 // GetLogRecord returns a new LogRecord instance with its Data field enough 56 // to hold payloadLength bytes of payload. The layout of the Data field is 57 // 4 bytes of record type (pb.UserEntryUpdate) + 8 bytes TN replica ID + 58 // payloadLength bytes of actual payload. 59 GetLogRecord(payloadLength int) pb.LogRecord 60 // Append appends the specified LogRecord into the Log Service. On success, the 61 // assigned Lsn will be returned. For the specified LogRecord, only its Data 62 // field is used with all other fields ignored by Append(). Once returned, the 63 // pb.LogRecord can be reused. 64 Append(ctx context.Context, rec pb.LogRecord) (Lsn, error) 65 // Read reads the Log Service from the specified Lsn position until the 66 // returned LogRecord set reaches the specified maxSize in bytes. The returned 67 // Lsn indicates the next Lsn to use to resume the read, or it means 68 // everything available has been read when it equals to the specified Lsn. 69 // The returned pb.LogRecord records will have their Lsn and Type fields set, 70 // the Lsn field is the Lsn assigned to the record while the Type field tells 71 // whether the record is an internal record generated by the Log Service itself 72 // or appended by the user. 73 Read(ctx context.Context, firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) 74 // Truncate truncates the Log Service log at the specified Lsn with Lsn 75 // itself included. This allows the Log Service to free up storage capacities 76 // for future appends, all future reads must start after the specified Lsn 77 // position. 78 Truncate(ctx context.Context, lsn Lsn) error 79 // GetTruncatedLsn returns the largest Lsn value that has been specified for 80 // truncation. 81 GetTruncatedLsn(ctx context.Context) (Lsn, error) 82 // GetTSOTimestamp requests a total of count unique timestamps from the TSO and 83 // return the first assigned such timestamp, that is TSO timestamps 84 // [returned value, returned value + count] will be owned by the caller. 85 GetTSOTimestamp(ctx context.Context, count uint64) (uint64, error) 86 } 87 88 type managedClient struct { 89 cfg ClientConfig 90 client *client 91 } 92 93 var _ Client = (*managedClient)(nil) 94 95 // NewClient creates a Log Service client. Each returned client can be used 96 // to synchronously issue requests to the Log Service. To send multiple requests 97 // to the Log Service in parallel, multiple clients should be created and used 98 // to do so. 99 func NewClient(ctx context.Context, cfg ClientConfig) (Client, error) { 100 if err := cfg.Validate(); err != nil { 101 return nil, err 102 } 103 client, err := newClient(ctx, cfg) 104 if err != nil { 105 return nil, err 106 } 107 return &managedClient{cfg: cfg, client: client}, nil 108 } 109 110 func (c *managedClient) Close() error { 111 if c.client != nil { 112 return c.client.close() 113 } 114 return nil 115 } 116 117 func (c *managedClient) Config() ClientConfig { 118 return c.cfg 119 } 120 121 func (c *managedClient) GetLogRecord(payloadLength int) pb.LogRecord { 122 data := make([]byte, headerSize+8+payloadLength) 123 binaryEnc.PutUint32(data, uint32(pb.UserEntryUpdate)) 124 binaryEnc.PutUint64(data[headerSize:], c.cfg.TNReplicaID) 125 return pb.LogRecord{Data: data} 126 } 127 128 func (c *managedClient) Append(ctx context.Context, rec pb.LogRecord) (Lsn, error) { 129 for { 130 if err := c.prepareClient(ctx); err != nil { 131 return 0, err 132 } 133 v, err := c.client.append(ctx, rec) 134 if err != nil { 135 c.resetClient() 136 } 137 if c.isRetryableError(err) { 138 continue 139 } 140 return v, err 141 } 142 } 143 144 func (c *managedClient) Read(ctx context.Context, 145 firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) { 146 for { 147 if err := c.prepareClient(ctx); err != nil { 148 return nil, 0, err 149 } 150 recs, v, err := c.client.read(ctx, firstLsn, maxSize) 151 if err != nil { 152 c.resetClient() 153 } 154 if c.isRetryableError(err) { 155 continue 156 } 157 return recs, v, err 158 } 159 } 160 161 func (c *managedClient) Truncate(ctx context.Context, lsn Lsn) error { 162 for { 163 if err := c.prepareClient(ctx); err != nil { 164 return err 165 } 166 err := c.client.truncate(ctx, lsn) 167 if err != nil { 168 c.resetClient() 169 } 170 if c.isRetryableError(err) { 171 continue 172 } 173 return err 174 } 175 } 176 177 func (c *managedClient) GetTruncatedLsn(ctx context.Context) (Lsn, error) { 178 for { 179 if err := c.prepareClient(ctx); err != nil { 180 return 0, err 181 } 182 v, err := c.client.getTruncatedLsn(ctx) 183 if err != nil { 184 c.resetClient() 185 } 186 if c.isRetryableError(err) { 187 continue 188 } 189 return v, err 190 } 191 } 192 193 func (c *managedClient) GetTSOTimestamp(ctx context.Context, count uint64) (uint64, error) { 194 for { 195 if err := c.prepareClient(ctx); err != nil { 196 return 0, err 197 } 198 v, err := c.client.getTSOTimestamp(ctx, count) 199 if err != nil { 200 c.resetClient() 201 } 202 if c.isRetryableError(err) { 203 continue 204 } 205 return v, err 206 } 207 } 208 209 func (c *managedClient) isRetryableError(err error) bool { 210 /* 211 old code, obviously strange 212 if errors.Is(err, dragonboat.ErrTimeout) { 213 return false 214 } 215 return errors.Is(err, dragonboat.ErrShardNotFound) 216 */ 217 218 // Dragonboat error leaked here 219 if errors.Is(err, dragonboat.ErrShardNotFound) { 220 return true 221 } 222 return moerr.IsMoErrCode(err, moerr.ErrDragonboatShardNotFound) 223 } 224 225 func (c *managedClient) resetClient() { 226 if c.client != nil { 227 cc := c.client 228 c.client = nil 229 if err := cc.close(); err != nil { 230 logutil.Error("failed to close client", zap.Error(err)) 231 } 232 } 233 } 234 235 func (c *managedClient) prepareClient(ctx context.Context) error { 236 if c.client != nil { 237 return nil 238 } 239 cc, err := newClient(ctx, c.cfg) 240 if err != nil { 241 return err 242 } 243 c.client = cc 244 return nil 245 } 246 247 type client struct { 248 cfg ClientConfig 249 client morpc.RPCClient 250 addr string 251 pool *sync.Pool 252 respPool *sync.Pool 253 } 254 255 func newClient(ctx context.Context, cfg ClientConfig) (*client, error) { 256 var c *client 257 var err error 258 // If the discovery address is configured, we used it first. 259 if len(cfg.DiscoveryAddress) > 0 { 260 c, err = connectToLogServiceByReverseProxy(ctx, cfg.DiscoveryAddress, cfg) 261 if c != nil && err == nil { 262 return c, nil 263 } 264 } else if len(cfg.ServiceAddresses) > 0 { 265 c, err = connectToLogService(ctx, cfg.ServiceAddresses, cfg) 266 if c != nil && err == nil { 267 return c, nil 268 } 269 } 270 if err != nil { 271 return nil, err 272 } 273 return nil, moerr.NewLogServiceNotReady(ctx) 274 } 275 276 func connectToLogServiceByReverseProxy(ctx context.Context, 277 discoveryAddress string, cfg ClientConfig) (*client, error) { 278 si, ok, err := GetShardInfo(discoveryAddress, cfg.LogShardID) 279 if err != nil { 280 return nil, err 281 } 282 if !ok { 283 return nil, moerr.NewLogServiceNotReady(ctx) 284 } 285 addresses := make([]string, 0) 286 leaderAddress, ok := si.Replicas[si.ReplicaID] 287 if ok { 288 addresses = append(addresses, leaderAddress) 289 } 290 for replicaID, address := range si.Replicas { 291 if replicaID != si.ReplicaID { 292 addresses = append(addresses, address) 293 } 294 } 295 return connectToLogService(ctx, addresses, cfg) 296 } 297 298 func connectToLogService(ctx context.Context, 299 targets []string, cfg ClientConfig) (*client, error) { 300 if len(targets) == 0 { 301 return nil, nil 302 } 303 304 pool := &sync.Pool{} 305 pool.New = func() interface{} { 306 return &RPCRequest{pool: pool} 307 } 308 respPool := &sync.Pool{} 309 respPool.New = func() interface{} { 310 return &RPCResponse{pool: respPool} 311 } 312 c := &client{ 313 cfg: cfg, 314 pool: pool, 315 respPool: respPool, 316 } 317 var e error 318 addresses := append([]string{}, targets...) 319 rand.Shuffle(len(cfg.ServiceAddresses), func(i, j int) { 320 addresses[i], addresses[j] = addresses[j], addresses[i] 321 }) 322 for _, addr := range addresses { 323 cc, err := getRPCClient( 324 ctx, 325 addr, 326 c.respPool, 327 c.cfg.MaxMessageSize, 328 cfg.EnableCompress, 329 0, 330 cfg.Tag, 331 ) 332 if err != nil { 333 e = err 334 continue 335 } 336 c.addr = addr 337 c.client = cc 338 if cfg.ReadOnly { 339 if err := c.connectReadOnly(ctx); err == nil { 340 return c, nil 341 } else { 342 if err := c.close(); err != nil { 343 logutil.Error("failed to close the client", zap.Error(err)) 344 } 345 e = err 346 } 347 } else { 348 // TODO: add a test to check whether it works when there is no truncated 349 // LSN known to the logservice. 350 if err := c.connectReadWrite(ctx); err == nil { 351 return c, nil 352 } else { 353 if err := c.close(); err != nil { 354 logutil.Error("failed to close the client", zap.Error(err)) 355 } 356 e = err 357 } 358 } 359 } 360 return nil, e 361 } 362 363 func (c *client) close() error { 364 return c.client.Close() 365 } 366 367 func (c *client) append(ctx context.Context, rec pb.LogRecord) (Lsn, error) { 368 if c.readOnly() { 369 return 0, moerr.NewInvalidInput(ctx, "incompatible client") 370 } 371 // TODO: check piggybacked hint on whether we are connected to the leader node 372 return c.doAppend(ctx, rec) 373 } 374 375 func (c *client) read(ctx context.Context, 376 firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) { 377 return c.doRead(ctx, firstLsn, maxSize) 378 } 379 380 func (c *client) truncate(ctx context.Context, lsn Lsn) error { 381 if c.readOnly() { 382 return moerr.NewInvalidInput(ctx, "incompatible client") 383 } 384 return c.doTruncate(ctx, lsn) 385 } 386 387 func (c *client) getTruncatedLsn(ctx context.Context) (Lsn, error) { 388 return c.doGetTruncatedLsn(ctx) 389 } 390 391 func (c *client) getTSOTimestamp(ctx context.Context, count uint64) (uint64, error) { 392 return c.tsoRequest(ctx, count) 393 } 394 395 func (c *client) readOnly() bool { 396 return c.cfg.ReadOnly 397 } 398 399 func (c *client) connectReadWrite(ctx context.Context) error { 400 if c.readOnly() { 401 panic(moerr.NewInvalidInput(ctx, "incompatible client")) 402 } 403 return c.connect(ctx, pb.CONNECT) 404 } 405 406 func (c *client) connectReadOnly(ctx context.Context) error { 407 return c.connect(ctx, pb.CONNECT_RO) 408 } 409 410 func (c *client) request(ctx context.Context, 411 mt pb.MethodType, payload []byte, lsn Lsn, 412 maxSize uint64) (pb.Response, []pb.LogRecord, error) { 413 ctx, span := trace.Debug(ctx, "client.request") 414 defer span.End() 415 req := pb.Request{ 416 Method: mt, 417 LogRequest: pb.LogRequest{ 418 ShardID: c.cfg.LogShardID, 419 TNID: c.cfg.TNReplicaID, 420 Lsn: lsn, 421 MaxSize: maxSize, 422 }, 423 } 424 r := c.pool.Get().(*RPCRequest) 425 defer r.Release() 426 r.Request = req 427 r.payload = payload 428 future, err := c.client.Send(ctx, c.addr, r) 429 if err != nil { 430 return pb.Response{}, nil, err 431 } 432 defer future.Close() 433 msg, err := future.Get() 434 if err != nil { 435 return pb.Response{}, nil, err 436 } 437 response, ok := msg.(*RPCResponse) 438 if !ok { 439 panic("unexpected response type") 440 } 441 resp := response.Response 442 defer response.Release() 443 var recs pb.LogRecordResponse 444 if len(response.payload) > 0 { 445 MustUnmarshal(&recs, response.payload) 446 } 447 err = toError(ctx, response.Response) 448 if err != nil { 449 return pb.Response{}, nil, err 450 } 451 return resp, recs.Records, nil 452 } 453 454 func (c *client) tsoRequest(ctx context.Context, count uint64) (uint64, error) { 455 ctx, span := trace.Debug(ctx, "client.tsoRequest") 456 defer span.End() 457 req := pb.Request{ 458 Method: pb.TSO_UPDATE, 459 TsoRequest: &pb.TsoRequest{ 460 Count: count, 461 }, 462 } 463 r := c.pool.Get().(*RPCRequest) 464 r.Request = req 465 future, err := c.client.Send(ctx, c.addr, r) 466 if err != nil { 467 return 0, err 468 } 469 defer future.Close() 470 msg, err := future.Get() 471 if err != nil { 472 return 0, err 473 } 474 response, ok := msg.(*RPCResponse) 475 if !ok { 476 panic("unexpected response type") 477 } 478 resp := response.Response 479 defer response.Release() 480 err = toError(ctx, response.Response) 481 if err != nil { 482 return 0, err 483 } 484 return resp.TsoResponse.Value, nil 485 } 486 487 func (c *client) connect(ctx context.Context, mt pb.MethodType) error { 488 _, _, err := c.request(ctx, mt, nil, 0, 0) 489 return err 490 } 491 492 func (c *client) doAppend(ctx context.Context, rec pb.LogRecord) (Lsn, error) { 493 resp, _, err := c.request(ctx, pb.APPEND, rec.Data, 0, 0) 494 if err != nil { 495 return 0, err 496 } 497 return resp.LogResponse.Lsn, nil 498 } 499 500 func (c *client) doRead(ctx context.Context, 501 firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) { 502 resp, recs, err := c.request(ctx, pb.READ, nil, firstLsn, maxSize) 503 if err != nil { 504 return nil, 0, err 505 } 506 return recs, resp.LogResponse.LastLsn, nil 507 } 508 509 func (c *client) doTruncate(ctx context.Context, lsn Lsn) error { 510 _, _, err := c.request(ctx, pb.TRUNCATE, nil, lsn, 0) 511 return err 512 } 513 514 func (c *client) doGetTruncatedLsn(ctx context.Context) (Lsn, error) { 515 resp, _, err := c.request(ctx, pb.GET_TRUNCATE, nil, 0, 0) 516 if err != nil { 517 return 0, err 518 } 519 return resp.LogResponse.Lsn, nil 520 } 521 522 func getRPCClient( 523 ctx context.Context, 524 target string, 525 pool *sync.Pool, 526 maxMessageSize int, 527 enableCompress bool, 528 readTimeout time.Duration, 529 tag ...string) (morpc.RPCClient, error) { 530 mf := func() morpc.Message { 531 return pool.Get().(*RPCResponse) 532 } 533 534 // construct morpc.BackendOption 535 backendOpts := []morpc.BackendOption{ 536 morpc.WithBackendConnectTimeout(time.Second), 537 morpc.WithBackendHasPayloadResponse(), 538 morpc.WithBackendLogger(logutil.GetGlobalLogger().Named("hakeeper-client-backend")), 539 morpc.WithBackendReadTimeout(readTimeout), 540 } 541 backendOpts = append(backendOpts, GetBackendOptions(ctx)...) 542 543 // construct morpc.ClientOption 544 clientOpts := []morpc.ClientOption{ 545 morpc.WithClientInitBackends([]string{target}, []int{1}), 546 morpc.WithClientMaxBackendPerHost(1), 547 morpc.WithClientLogger(logutil.GetGlobalLogger()), 548 } 549 clientOpts = append(clientOpts, GetClientOptions(ctx)...) 550 551 var codecOpts []morpc.CodecOption 552 codecOpts = append(codecOpts, 553 morpc.WithCodecPayloadCopyBufferSize(defaultWriteSocketSize), 554 morpc.WithCodecEnableChecksum(), 555 morpc.WithCodecMaxBodySize(maxMessageSize)) 556 if enableCompress { 557 mp, err := mpool.NewMPool("log_rpc_client", 0, mpool.NoFixed) 558 if err != nil { 559 return nil, err 560 } 561 codecOpts = append(codecOpts, morpc.WithCodecEnableCompress(mp)) 562 } 563 564 // we set connection timeout to a constant value so if ctx's deadline is much 565 // larger, then we can ensure that all specified potential nodes have a chance 566 // to be attempted 567 codec := morpc.NewMessageCodec(mf, codecOpts...) 568 bf := morpc.NewGoettyBasedBackendFactory(codec, backendOpts...) 569 return morpc.NewClient("logservice-client", bf, clientOpts...) 570 }