github.com/matrixorigin/matrixone@v0.7.0/pkg/logservice/client.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package logservice 16 17 import ( 18 "context" 19 "fmt" 20 "math/rand" 21 "sync" 22 "time" 23 24 "go.uber.org/zap" 25 26 "github.com/cockroachdb/errors" 27 "github.com/lni/dragonboat/v4" 28 "github.com/matrixorigin/matrixone/pkg/common/moerr" 29 "github.com/matrixorigin/matrixone/pkg/common/morpc" 30 "github.com/matrixorigin/matrixone/pkg/common/mpool" 31 "github.com/matrixorigin/matrixone/pkg/logutil" 32 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 33 "github.com/matrixorigin/matrixone/pkg/util/trace" 34 ) 35 36 const ( 37 defaultWriteSocketSize = 64 * 1024 38 ) 39 40 // IsTempError returns a boolean value indicating whether the specified error 41 // is a temp error that worth to be retried, e.g. timeouts, temp network 42 // issues. Non-temp error caused by program logics rather than some external 43 // factors. 44 func IsTempError(err error) bool { 45 return isTempError(err) 46 } 47 48 type ClientFactory func() (Client, error) 49 50 // Client is the Log Service Client interface exposed to the DN. 51 type Client interface { 52 // Close closes the client. 53 Close() error 54 // Config returns the specified configuration when creating the client. 55 Config() ClientConfig 56 // GetLogRecord returns a new LogRecord instance with its Data field enough 57 // to hold payloadLength bytes of payload. The layout of the Data field is 58 // 4 bytes of record type (pb.UserEntryUpdate) + 8 bytes DN replica ID + 59 // payloadLength bytes of actual payload. 60 GetLogRecord(payloadLength int) pb.LogRecord 61 // Append appends the specified LogRecord into the Log Service. On success, the 62 // assigned Lsn will be returned. For the specified LogRecord, only its Data 63 // field is used with all other fields ignored by Append(). Once returned, the 64 // pb.LogRecord can be reused. 65 Append(ctx context.Context, rec pb.LogRecord) (Lsn, error) 66 // Read reads the Log Service from the specified Lsn position until the 67 // returned LogRecord set reaches the specified maxSize in bytes. The returned 68 // Lsn indicates the next Lsn to use to resume the read, or it means 69 // everything available has been read when it equals to the specified Lsn. 70 // The returned pb.LogRecord records will have their Lsn and Type fields set, 71 // the Lsn field is the Lsn assigned to the record while the Type field tells 72 // whether the record is an internal record generated by the Log Service itself 73 // or appended by the user. 74 Read(ctx context.Context, firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) 75 // Truncate truncates the Log Service log at the specified Lsn with Lsn 76 // itself included. This allows the Log Service to free up storage capacities 77 // for future appends, all future reads must start after the specified Lsn 78 // position. 79 Truncate(ctx context.Context, lsn Lsn) error 80 // GetTruncatedLsn returns the largest Lsn value that has been specified for 81 // truncation. 82 GetTruncatedLsn(ctx context.Context) (Lsn, error) 83 // GetTSOTimestamp requests a total of count unique timestamps from the TSO and 84 // return the first assigned such timestamp, that is TSO timestamps 85 // [returned value, returned value + count] will be owned by the caller. 86 GetTSOTimestamp(ctx context.Context, count uint64) (uint64, error) 87 } 88 89 type managedClient struct { 90 cfg ClientConfig 91 client *client 92 } 93 94 var _ Client = (*managedClient)(nil) 95 96 // NewClient creates a Log Service client. Each returned client can be used 97 // to synchronously issue requests to the Log Service. To send multiple requests 98 // to the Log Service in parallel, multiple clients should be created and used 99 // to do so. 100 func NewClient(ctx context.Context, cfg ClientConfig) (Client, error) { 101 if err := cfg.Validate(); err != nil { 102 return nil, err 103 } 104 client, err := newClient(ctx, cfg) 105 if err != nil { 106 return nil, err 107 } 108 return &managedClient{cfg: cfg, client: client}, nil 109 } 110 111 func (c *managedClient) Close() error { 112 if c.client != nil { 113 return c.client.close() 114 } 115 return nil 116 } 117 118 func (c *managedClient) Config() ClientConfig { 119 return c.cfg 120 } 121 122 func (c *managedClient) GetLogRecord(payloadLength int) pb.LogRecord { 123 data := make([]byte, headerSize+8+payloadLength) 124 binaryEnc.PutUint32(data, uint32(pb.UserEntryUpdate)) 125 binaryEnc.PutUint64(data[headerSize:], c.cfg.DNReplicaID) 126 return pb.LogRecord{Data: data} 127 } 128 129 func (c *managedClient) Append(ctx context.Context, rec pb.LogRecord) (Lsn, error) { 130 for { 131 if err := c.prepareClient(ctx); err != nil { 132 return 0, err 133 } 134 v, err := c.client.append(ctx, rec) 135 if err != nil { 136 c.resetClient() 137 } 138 if c.isRetryableError(err) { 139 continue 140 } 141 return v, err 142 } 143 } 144 145 func (c *managedClient) Read(ctx context.Context, 146 firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) { 147 for { 148 if err := c.prepareClient(ctx); err != nil { 149 return nil, 0, err 150 } 151 recs, v, err := c.client.read(ctx, firstLsn, maxSize) 152 if err != nil { 153 c.resetClient() 154 } 155 if c.isRetryableError(err) { 156 continue 157 } 158 return recs, v, err 159 } 160 } 161 162 func (c *managedClient) Truncate(ctx context.Context, lsn Lsn) error { 163 for { 164 if err := c.prepareClient(ctx); err != nil { 165 return err 166 } 167 err := c.client.truncate(ctx, lsn) 168 if err != nil { 169 c.resetClient() 170 } 171 if c.isRetryableError(err) { 172 continue 173 } 174 return err 175 } 176 } 177 178 func (c *managedClient) GetTruncatedLsn(ctx context.Context) (Lsn, error) { 179 for { 180 if err := c.prepareClient(ctx); err != nil { 181 return 0, err 182 } 183 v, err := c.client.getTruncatedLsn(ctx) 184 if err != nil { 185 c.resetClient() 186 } 187 if c.isRetryableError(err) { 188 continue 189 } 190 return v, err 191 } 192 } 193 194 func (c *managedClient) GetTSOTimestamp(ctx context.Context, count uint64) (uint64, error) { 195 for { 196 if err := c.prepareClient(ctx); err != nil { 197 return 0, err 198 } 199 v, err := c.client.getTSOTimestamp(ctx, count) 200 if err != nil { 201 c.resetClient() 202 } 203 if c.isRetryableError(err) { 204 continue 205 } 206 return v, err 207 } 208 } 209 210 func (c *managedClient) isRetryableError(err error) bool { 211 /* 212 old code, obviously strange 213 if errors.Is(err, dragonboat.ErrTimeout) { 214 return false 215 } 216 return errors.Is(err, dragonboat.ErrShardNotFound) 217 */ 218 219 // Dragonboat error leaked here 220 if errors.Is(err, dragonboat.ErrShardNotFound) { 221 return true 222 } 223 return moerr.IsMoErrCode(err, moerr.ErrDragonboatShardNotFound) 224 } 225 226 func (c *managedClient) resetClient() { 227 if c.client != nil { 228 cc := c.client 229 c.client = nil 230 if err := cc.close(); err != nil { 231 logutil.Error("failed to close client", zap.Error(err)) 232 } 233 } 234 } 235 236 func (c *managedClient) prepareClient(ctx context.Context) error { 237 if c.client != nil { 238 return nil 239 } 240 cc, err := newClient(ctx, c.cfg) 241 if err != nil { 242 return err 243 } 244 c.client = cc 245 return nil 246 } 247 248 type client struct { 249 cfg ClientConfig 250 client morpc.RPCClient 251 addr string 252 pool *sync.Pool 253 respPool *sync.Pool 254 } 255 256 func newClient(ctx context.Context, cfg ClientConfig) (*client, error) { 257 client, err := connectToLogService(ctx, cfg.ServiceAddresses, cfg) 258 if client != nil && err == nil { 259 return client, nil 260 } 261 if len(cfg.DiscoveryAddress) > 0 { 262 return connectToLogServiceByReverseProxy(ctx, cfg.DiscoveryAddress, cfg) 263 } 264 if err != nil { 265 return nil, err 266 } 267 return nil, moerr.NewLogServiceNotReady(ctx) 268 } 269 270 func connectToLogServiceByReverseProxy(ctx context.Context, 271 discoveryAddress string, cfg ClientConfig) (*client, error) { 272 si, ok, err := GetShardInfo(discoveryAddress, cfg.LogShardID) 273 if err != nil { 274 return nil, err 275 } 276 if !ok { 277 return nil, moerr.NewLogServiceNotReady(ctx) 278 } 279 addresses := make([]string, 0) 280 leaderAddress, ok := si.Replicas[si.ReplicaID] 281 if ok { 282 addresses = append(addresses, leaderAddress) 283 } 284 for replicaID, address := range si.Replicas { 285 if replicaID != si.ReplicaID { 286 addresses = append(addresses, address) 287 } 288 } 289 return connectToLogService(ctx, addresses, cfg) 290 } 291 292 func connectToLogService(ctx context.Context, 293 targets []string, cfg ClientConfig) (*client, error) { 294 if len(targets) == 0 { 295 return nil, nil 296 } 297 298 pool := &sync.Pool{} 299 pool.New = func() interface{} { 300 return &RPCRequest{pool: pool} 301 } 302 respPool := &sync.Pool{} 303 respPool.New = func() interface{} { 304 return &RPCResponse{pool: respPool} 305 } 306 c := &client{ 307 cfg: cfg, 308 pool: pool, 309 respPool: respPool, 310 } 311 var e error 312 addresses := append([]string{}, targets...) 313 rand.Shuffle(len(cfg.ServiceAddresses), func(i, j int) { 314 addresses[i], addresses[j] = addresses[j], addresses[i] 315 }) 316 for _, addr := range addresses { 317 cc, err := getRPCClient(ctx, addr, c.respPool, c.cfg.MaxMessageSize, cfg.EnableCompress, cfg.Tag) 318 if err != nil { 319 e = err 320 continue 321 } 322 c.addr = addr 323 c.client = cc 324 if cfg.ReadOnly { 325 if err := c.connectReadOnly(ctx); err == nil { 326 return c, nil 327 } else { 328 if err := c.close(); err != nil { 329 logutil.Error("failed to close the client", zap.Error(err)) 330 } 331 e = err 332 } 333 } else { 334 // TODO: add a test to check whether it works when there is no truncated 335 // LSN known to the logservice. 336 if err := c.connectReadWrite(ctx); err == nil { 337 return c, nil 338 } else { 339 if err := c.close(); err != nil { 340 logutil.Error("failed to close the client", zap.Error(err)) 341 } 342 e = err 343 } 344 } 345 } 346 return nil, e 347 } 348 349 func (c *client) close() error { 350 return c.client.Close() 351 } 352 353 func (c *client) append(ctx context.Context, rec pb.LogRecord) (Lsn, error) { 354 if c.readOnly() { 355 return 0, moerr.NewInvalidInput(ctx, "incompatible client") 356 } 357 // TODO: check piggybacked hint on whether we are connected to the leader node 358 return c.doAppend(ctx, rec) 359 } 360 361 func (c *client) read(ctx context.Context, 362 firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) { 363 return c.doRead(ctx, firstLsn, maxSize) 364 } 365 366 func (c *client) truncate(ctx context.Context, lsn Lsn) error { 367 if c.readOnly() { 368 return moerr.NewInvalidInput(ctx, "incompatible client") 369 } 370 return c.doTruncate(ctx, lsn) 371 } 372 373 func (c *client) getTruncatedLsn(ctx context.Context) (Lsn, error) { 374 return c.doGetTruncatedLsn(ctx) 375 } 376 377 func (c *client) getTSOTimestamp(ctx context.Context, count uint64) (uint64, error) { 378 return c.tsoRequest(ctx, count) 379 } 380 381 func (c *client) readOnly() bool { 382 return c.cfg.ReadOnly 383 } 384 385 func (c *client) connectReadWrite(ctx context.Context) error { 386 if c.readOnly() { 387 panic(moerr.NewInvalidInput(ctx, "incompatible client")) 388 } 389 return c.connect(ctx, pb.CONNECT) 390 } 391 392 func (c *client) connectReadOnly(ctx context.Context) error { 393 return c.connect(ctx, pb.CONNECT_RO) 394 } 395 396 func (c *client) request(ctx context.Context, 397 mt pb.MethodType, payload []byte, lsn Lsn, 398 maxSize uint64) (pb.Response, []pb.LogRecord, error) { 399 ctx, span := trace.Debug(ctx, "client.request") 400 defer span.End() 401 req := pb.Request{ 402 Method: mt, 403 LogRequest: pb.LogRequest{ 404 ShardID: c.cfg.LogShardID, 405 DNID: c.cfg.DNReplicaID, 406 Lsn: lsn, 407 MaxSize: maxSize, 408 }, 409 } 410 r := c.pool.Get().(*RPCRequest) 411 defer r.Release() 412 r.Request = req 413 r.payload = payload 414 future, err := c.client.Send(ctx, c.addr, r) 415 if err != nil { 416 return pb.Response{}, nil, err 417 } 418 defer future.Close() 419 msg, err := future.Get() 420 if err != nil { 421 return pb.Response{}, nil, err 422 } 423 response, ok := msg.(*RPCResponse) 424 if !ok { 425 panic("unexpected response type") 426 } 427 resp := response.Response 428 defer response.Release() 429 var recs pb.LogRecordResponse 430 if len(response.payload) > 0 { 431 MustUnmarshal(&recs, response.payload) 432 } 433 err = toError(ctx, response.Response) 434 if err != nil { 435 return pb.Response{}, nil, err 436 } 437 return resp, recs.Records, nil 438 } 439 440 func (c *client) tsoRequest(ctx context.Context, count uint64) (uint64, error) { 441 ctx, span := trace.Debug(ctx, "client.tsoRequest") 442 defer span.End() 443 req := pb.Request{ 444 Method: pb.TSO_UPDATE, 445 TsoRequest: &pb.TsoRequest{ 446 Count: count, 447 }, 448 } 449 r := c.pool.Get().(*RPCRequest) 450 r.Request = req 451 future, err := c.client.Send(ctx, c.addr, r) 452 if err != nil { 453 return 0, err 454 } 455 defer future.Close() 456 msg, err := future.Get() 457 if err != nil { 458 return 0, err 459 } 460 response, ok := msg.(*RPCResponse) 461 if !ok { 462 panic("unexpected response type") 463 } 464 resp := response.Response 465 defer response.Release() 466 err = toError(ctx, response.Response) 467 if err != nil { 468 return 0, err 469 } 470 return resp.TsoResponse.Value, nil 471 } 472 473 func (c *client) connect(ctx context.Context, mt pb.MethodType) error { 474 _, _, err := c.request(ctx, mt, nil, 0, 0) 475 return err 476 } 477 478 func (c *client) doAppend(ctx context.Context, rec pb.LogRecord) (Lsn, error) { 479 resp, _, err := c.request(ctx, pb.APPEND, rec.Data, 0, 0) 480 if err != nil { 481 return 0, err 482 } 483 return resp.LogResponse.Lsn, nil 484 } 485 486 func (c *client) doRead(ctx context.Context, 487 firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) { 488 resp, recs, err := c.request(ctx, pb.READ, nil, firstLsn, maxSize) 489 if err != nil { 490 return nil, 0, err 491 } 492 return recs, resp.LogResponse.LastLsn, nil 493 } 494 495 func (c *client) doTruncate(ctx context.Context, lsn Lsn) error { 496 _, _, err := c.request(ctx, pb.TRUNCATE, nil, lsn, 0) 497 return err 498 } 499 500 func (c *client) doGetTruncatedLsn(ctx context.Context) (Lsn, error) { 501 resp, _, err := c.request(ctx, pb.GET_TRUNCATE, nil, 0, 0) 502 if err != nil { 503 return 0, err 504 } 505 return resp.LogResponse.Lsn, nil 506 } 507 508 func getRPCClient( 509 ctx context.Context, 510 target string, 511 pool *sync.Pool, 512 maxMessageSize int, 513 enableCompress bool, 514 tag ...string) (morpc.RPCClient, error) { 515 mf := func() morpc.Message { 516 return pool.Get().(*RPCResponse) 517 } 518 519 // construct morpc.BackendOption 520 backendOpts := []morpc.BackendOption{ 521 morpc.WithBackendConnectTimeout(time.Second), 522 morpc.WithBackendHasPayloadResponse(), 523 morpc.WithBackendLogger(logutil.GetGlobalLogger().Named("hakeeper-client-backend")), 524 } 525 backendOpts = append(backendOpts, GetBackendOptions(ctx)...) 526 527 // construct morpc.ClientOption 528 clientOpts := []morpc.ClientOption{ 529 morpc.WithClientInitBackends([]string{target}, []int{1}), 530 morpc.WithClientMaxBackendPerHost(1), 531 morpc.WithClientTag(fmt.Sprintf("hakeeper-client(%s)", tag)), 532 morpc.WithClientLogger(logutil.GetGlobalLogger()), 533 } 534 clientOpts = append(clientOpts, GetClientOptions(ctx)...) 535 536 var codecOpts []morpc.CodecOption 537 codecOpts = append(codecOpts, 538 morpc.WithCodecPayloadCopyBufferSize(defaultWriteSocketSize), 539 morpc.WithCodecEnableChecksum(), 540 morpc.WithCodecMaxBodySize(maxMessageSize)) 541 if enableCompress { 542 mp, err := mpool.NewMPool("log_rpc_client", 0, mpool.NoFixed) 543 if err != nil { 544 return nil, err 545 } 546 codecOpts = append(codecOpts, morpc.WithCodecEnableCompress(mp)) 547 } 548 549 // we set connection timeout to a constant value so if ctx's deadline is much 550 // larger, then we can ensure that all specified potential nodes have a chance 551 // to be attempted 552 codec := morpc.NewMessageCodec(mf, codecOpts...) 553 bf := morpc.NewGoettyBasedBackendFactory(codec, backendOpts...) 554 return morpc.NewClient(bf, clientOpts...) 555 }