github.com/matrixorigin/matrixone@v1.2.0/pkg/common/morpc/server.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package morpc 16 17 import ( 18 "context" 19 "fmt" 20 "sync" 21 "time" 22 23 "github.com/fagongzi/goetty/v2" 24 "github.com/matrixorigin/matrixone/pkg/common/moerr" 25 "github.com/matrixorigin/matrixone/pkg/common/stopper" 26 "github.com/matrixorigin/matrixone/pkg/logutil" 27 "go.uber.org/zap" 28 ) 29 30 // WithServerLogger set rpc server logger 31 func WithServerLogger(logger *zap.Logger) ServerOption { 32 return func(rs *server) { 33 rs.logger = logger 34 } 35 } 36 37 // WithServerSessionBufferSize set the buffer size of the write response chan. 38 // Default is 16. 39 func WithServerSessionBufferSize(size int) ServerOption { 40 return func(s *server) { 41 s.options.bufferSize = size 42 } 43 } 44 45 // WithServerWriteFilter set write filter func. Input ready to send Messages, output 46 // is really need to be send Messages. 47 func WithServerWriteFilter(filter func(Message) bool) ServerOption { 48 return func(s *server) { 49 s.options.filter = filter 50 } 51 } 52 53 // WithServerGoettyOptions set write filter func. Input ready to send Messages, output 54 // is really need to be send Messages. 55 func WithServerGoettyOptions(options ...goetty.Option) ServerOption { 56 return func(s *server) { 57 s.options.goettyOptions = options 58 } 59 } 60 61 // WithServerBatchSendSize set the maximum number of messages to be sent together 62 // at each batch. Default is 8. 63 func WithServerBatchSendSize(size int) ServerOption { 64 return func(s *server) { 65 s.options.batchSendSize = size 66 } 67 } 68 69 // WithServerDisableAutoCancelContext disable automatic cancel messaging for the context. 70 // The server will receive RPC messages from the client, each message comes with a Context, 71 // and morpc will call the handler to process it, and when the handler returns, the Context 72 // will be auto cancel the context. But in some scenarios, the handler is asynchronous, 73 // so morpc can't directly cancel the context after the handler returns, otherwise many strange 74 // problems will occur. 75 func WithServerDisableAutoCancelContext() ServerOption { 76 return func(s *server) { 77 s.options.disableAutoCancelContext = true 78 } 79 } 80 81 type server struct { 82 name string 83 metrics *serverMetrics 84 address string 85 logger *zap.Logger 86 codec Codec 87 application goetty.NetApplication 88 stopper *stopper.Stopper 89 handler func(ctx context.Context, request RPCMessage, sequence uint64, cs ClientSession) error 90 sessions *sync.Map // session-id => *clientSession 91 options struct { 92 goettyOptions []goetty.Option 93 bufferSize int 94 batchSendSize int 95 filter func(Message) bool 96 disableAutoCancelContext bool 97 } 98 pool struct { 99 futures *sync.Pool 100 } 101 } 102 103 // NewRPCServer create rpc server with options. After the rpc server starts, one link corresponds to two 104 // goroutines, one read and one write. All messages to be written are first written to a buffer chan and 105 // sent to the client by the write goroutine. 106 func NewRPCServer( 107 name, address string, 108 codec Codec, 109 options ...ServerOption) (RPCServer, error) { 110 s := &server{ 111 name: name, 112 metrics: newServerMetrics(name), 113 address: address, 114 codec: codec, 115 stopper: stopper.NewStopper(name), 116 sessions: &sync.Map{}, 117 } 118 for _, opt := range options { 119 opt(s) 120 } 121 s.adjust() 122 123 s.options.goettyOptions = append(s.options.goettyOptions, 124 goetty.WithSessionCodec(codec), 125 goetty.WithSessionLogger(s.logger)) 126 127 app, err := goetty.NewApplication( 128 s.address, 129 s.onMessage, 130 goetty.WithAppLogger(s.logger), 131 goetty.WithAppSessionOptions(s.options.goettyOptions...), 132 ) 133 if err != nil { 134 s.logger.Error("create rpc server failed", 135 zap.Error(err)) 136 return nil, err 137 } 138 s.application = app 139 s.pool.futures = &sync.Pool{ 140 New: func() interface{} { 141 return newFuture(s.releaseFuture) 142 }, 143 } 144 if err := s.stopper.RunTask(s.closeDisconnectedSession); err != nil { 145 panic(err) 146 } 147 return s, nil 148 } 149 150 func (s *server) Start() error { 151 err := s.application.Start() 152 if err != nil { 153 s.logger.Fatal("start rpc server failed", 154 zap.Error(err)) 155 return err 156 } 157 return nil 158 } 159 160 func (s *server) Close() error { 161 s.stopper.Stop() 162 err := s.application.Stop() 163 if err != nil { 164 s.logger.Error("stop rpc server failed", 165 zap.Error(err)) 166 } 167 168 return err 169 } 170 171 func (s *server) RegisterRequestHandler(handler func( 172 ctx context.Context, 173 request RPCMessage, 174 sequence uint64, 175 cs ClientSession) error) { 176 s.handler = handler 177 } 178 179 func (s *server) adjust() { 180 s.logger = logutil.Adjust(s.logger).With(zap.String("name", s.name)) 181 if s.options.batchSendSize == 0 { 182 s.options.batchSendSize = 8 183 } 184 if s.options.bufferSize == 0 { 185 s.options.bufferSize = 16 186 } 187 if s.options.filter == nil { 188 s.options.filter = func(messages Message) bool { 189 return true 190 } 191 } 192 } 193 194 func (s *server) onMessage(rs goetty.IOSession, value any, sequence uint64) error { 195 s.metrics.receiveCounter.Inc() 196 197 cs, err := s.getSession(rs) 198 if err != nil { 199 return err 200 } 201 request := value.(RPCMessage) 202 s.metrics.inputBytesCounter.Add(float64(request.Message.Size())) 203 if ce := s.logger.Check(zap.DebugLevel, "received request"); ce != nil { 204 ce.Write(zap.Uint64("sequence", sequence), 205 zap.String("client", rs.RemoteAddress()), 206 zap.Uint64("request-id", request.Message.GetID()), 207 zap.String("request", request.Message.DebugString())) 208 } 209 210 // Can't be sure that the Context is properly consumed if disableAutoCancelContext is set to 211 // true. So we use the pessimistic wait for the context to time out automatically be canceled 212 // behavior here, which may cause some resources to be released more slowly. 213 // FIXME: Use the CancelFunc pass to let the handler decide to cancel itself 214 if !s.options.disableAutoCancelContext && request.Cancel != nil { 215 defer request.Cancel() 216 } 217 // get requestID here to avoid data race, because the request maybe released in handler 218 requestID := request.Message.GetID() 219 220 if request.stream && 221 !cs.validateStreamRequest(requestID, request.streamSequence) { 222 s.logger.Error("failed to handle stream request", 223 zap.Uint32("last-sequence", cs.receivedStreamSequences[requestID]), 224 zap.Uint32("current-sequence", request.streamSequence), 225 zap.String("client", rs.RemoteAddress())) 226 cs.cancelWrite() 227 return moerr.NewStreamClosedNoCtx() 228 } 229 230 // handle internal message 231 if request.internal { 232 if m, ok := request.Message.(*flagOnlyMessage); ok { 233 switch m.flag { 234 case flagPing: 235 sendAt := time.Now() 236 n := len(cs.c) 237 err := cs.WriteRPCMessage(RPCMessage{ 238 Ctx: request.Ctx, 239 internal: true, 240 Message: &flagOnlyMessage{ 241 flag: flagPong, 242 id: m.id, 243 }, 244 }) 245 if err != nil { 246 failedAt := time.Now() 247 s.logger.Error("handle ping failed", 248 zap.Time("sendAt", sendAt), 249 zap.Time("failedAt", failedAt), 250 zap.Int("queue-size", n), 251 zap.Error(err)) 252 } 253 return nil 254 default: 255 panic(fmt.Sprintf("invalid internal message, flag %d", m.flag)) 256 } 257 } 258 } 259 260 if err := s.handler(request.Ctx, request, sequence, cs); err != nil { 261 s.logger.Error("handle request failed", 262 zap.Uint64("sequence", sequence), 263 zap.String("client", rs.RemoteAddress()), 264 zap.Error(err)) 265 cs.cancelWrite() 266 return err 267 } 268 269 if ce := s.logger.Check(zap.DebugLevel, "handle request completed"); ce != nil { 270 ce.Write(zap.Uint64("sequence", sequence), 271 zap.String("client", rs.RemoteAddress()), 272 zap.Uint64("request-id", requestID)) 273 } 274 return nil 275 } 276 277 func (s *server) startWriteLoop(cs *clientSession) error { 278 return s.stopper.RunTask(func(ctx context.Context) { 279 defer s.closeClientSession(cs) 280 281 responses := make([]*Future, 0, s.options.batchSendSize) 282 needClose := make([]*Future, 0, s.options.batchSendSize) 283 fetch := func() { 284 defer func() { 285 cs.metrics.sendingQueueSizeGauge.Set(float64(len(cs.c))) 286 }() 287 288 for i := 0; i < len(responses); i++ { 289 responses[i] = nil 290 } 291 for i := 0; i < len(needClose); i++ { 292 needClose[i] = nil 293 } 294 responses = responses[:0] 295 needClose = needClose[:0] 296 297 for i := 0; i < s.options.batchSendSize; i++ { 298 if len(responses) == 0 { 299 select { 300 case <-ctx.Done(): 301 responses = nil 302 return 303 case <-cs.ctx.Done(): 304 responses = nil 305 return 306 case resp, ok := <-cs.c: 307 if ok { 308 responses = append(responses, resp) 309 } 310 } 311 } else { 312 select { 313 case <-ctx.Done(): 314 return 315 case <-cs.ctx.Done(): 316 return 317 case resp, ok := <-cs.c: 318 if ok { 319 responses = append(responses, resp) 320 } 321 default: 322 return 323 } 324 } 325 } 326 } 327 328 for { 329 select { 330 case <-ctx.Done(): 331 return 332 case <-cs.ctx.Done(): 333 return 334 default: 335 fetch() 336 337 if len(responses) > 0 { 338 s.metrics.sendingBatchSizeGauge.Set(float64(len(responses))) 339 340 start := time.Now() 341 342 var fields []zap.Field 343 ce := s.logger.Check(zap.DebugLevel, "write responses") 344 if ce != nil { 345 fields = append(fields, zap.String("client", cs.conn.RemoteAddress())) 346 } 347 348 written := responses[:0] 349 timeout := time.Duration(0) 350 for _, f := range responses { 351 s.metrics.writeLatencyDurationHistogram.Observe(start.Sub(f.send.createAt).Seconds()) 352 if f.oneWay { 353 needClose = append(needClose, f) 354 } 355 356 if !s.options.filter(f.send.Message) { 357 f.messageSent(messageSkipped) 358 continue 359 } 360 361 if f.send.Timeout() { 362 f.messageSent(f.send.Ctx.Err()) 363 continue 364 } 365 366 v, err := f.send.GetTimeoutFromContext() 367 if err != nil { 368 f.messageSent(err) 369 continue 370 } 371 372 timeout += v 373 // Record the information of some responses in advance, because after flush, 374 // these responses will be released, thus avoiding causing data race. 375 if ce != nil { 376 fields = append(fields, zap.Uint64("request-id", 377 f.send.Message.GetID())) 378 fields = append(fields, zap.String("response", 379 f.send.Message.DebugString())) 380 } 381 if err := cs.conn.Write(f.send, goetty.WriteOptions{}); err != nil { 382 s.logger.Error("write response failed", 383 zap.Uint64("request-id", f.send.Message.GetID()), 384 zap.Error(err)) 385 f.messageSent(err) 386 return 387 } 388 written = append(written, f) 389 } 390 391 if len(written) > 0 { 392 s.metrics.outputBytesCounter.Add(float64(cs.conn.OutBuf().Readable())) 393 err := cs.conn.Flush(timeout) 394 if err != nil { 395 if ce != nil { 396 fields = append(fields, zap.Error(err)) 397 } 398 for _, f := range responses { 399 if s.options.filter(f.send.Message) { 400 id := f.getSendMessageID() 401 s.logger.Error("write response failed", 402 zap.Uint64("request-id", id), 403 zap.Error(err)) 404 f.messageSent(err) 405 } 406 } 407 } 408 if ce != nil { 409 ce.Write(fields...) 410 } 411 if err != nil { 412 return 413 } 414 } 415 416 for _, f := range written { 417 f.messageSent(nil) 418 } 419 for _, f := range needClose { 420 f.Close() 421 } 422 423 s.metrics.writeDurationHistogram.Observe(time.Since(start).Seconds()) 424 } 425 } 426 } 427 }) 428 } 429 430 func (s *server) closeClientSession(cs *clientSession) { 431 s.sessions.Delete(cs.conn.ID()) 432 s.metrics.sessionSizeGauge.Set(float64(s.getSessionCount())) 433 if err := cs.Close(); err != nil { 434 s.logger.Error("close client session failed", 435 zap.Error(err)) 436 } 437 } 438 439 func (s *server) getSession(rs goetty.IOSession) (*clientSession, error) { 440 if v, ok := s.sessions.Load(rs.ID()); ok { 441 return v.(*clientSession), nil 442 } 443 444 cs := newClientSession(s.metrics, rs, s.codec, s.newFuture) 445 v, loaded := s.sessions.LoadOrStore(rs.ID(), cs) 446 if loaded { 447 close(cs.c) 448 return v.(*clientSession), nil 449 } 450 451 s.metrics.sessionSizeGauge.Set(float64(s.getSessionCount())) 452 rs.Ref() 453 if err := s.startWriteLoop(cs); err != nil { 454 s.closeClientSession(cs) 455 return nil, err 456 } 457 return cs, nil 458 } 459 460 func (s *server) releaseFuture(f *Future) { 461 f.reset() 462 s.pool.futures.Put(f) 463 } 464 465 func (s *server) newFuture() *Future { 466 return s.pool.futures.Get().(*Future) 467 } 468 469 func (s *server) closeDisconnectedSession(ctx context.Context) { 470 // TODO(fagongzi): modify goetty to support connection event 471 timer := time.NewTicker(time.Second * 10) 472 defer timer.Stop() 473 for { 474 select { 475 case <-ctx.Done(): 476 return 477 case <-timer.C: 478 s.sessions.Range(func(key, value any) bool { 479 id := key.(uint64) 480 rs, err := s.application.GetSession(id) 481 if err == nil && rs == nil { 482 s.closeClientSession(value.(*clientSession)) 483 } 484 return true 485 }) 486 } 487 } 488 } 489 490 func (s *server) getSessionCount() int { 491 n := 0 492 s.sessions.Range(func(key, value any) bool { 493 n++ 494 return true 495 }) 496 return n 497 } 498 499 type clientSession struct { 500 metrics *serverMetrics 501 codec Codec 502 conn goetty.IOSession 503 c chan *Future 504 newFutureFunc func() *Future 505 // streaming id -> last received sequence, no concurrent, access in io goroutine 506 receivedStreamSequences map[uint64]uint32 507 // streaming id -> last sent sequence, multi-stream access in multi-goroutines if 508 // the tcp connection is shared. But no concurrent in one stream. 509 sentStreamSequences sync.Map 510 cancel context.CancelFunc 511 ctx context.Context 512 checkTimeoutCacheOnce sync.Once 513 closedC chan struct{} 514 mu struct { 515 sync.RWMutex 516 closed bool 517 caches map[uint64]cacheWithContext 518 } 519 } 520 521 func newClientSession( 522 metrics *serverMetrics, 523 conn goetty.IOSession, 524 codec Codec, 525 newFutureFunc func() *Future) *clientSession { 526 ctx, cancel := context.WithCancel(context.Background()) 527 cs := &clientSession{ 528 metrics: metrics, 529 closedC: make(chan struct{}), 530 codec: codec, 531 c: make(chan *Future, 1024), 532 receivedStreamSequences: make(map[uint64]uint32), 533 conn: conn, 534 ctx: ctx, 535 cancel: cancel, 536 newFutureFunc: newFutureFunc, 537 } 538 cs.mu.caches = make(map[uint64]cacheWithContext) 539 return cs 540 } 541 542 func (cs *clientSession) RemoteAddress() string { 543 return cs.conn.RemoteAddress() 544 } 545 546 func (cs *clientSession) Close() error { 547 cs.mu.Lock() 548 defer cs.mu.Unlock() 549 if cs.mu.closed { 550 return nil 551 } 552 close(cs.closedC) 553 cs.cleanSend() 554 close(cs.c) 555 cs.mu.closed = true 556 for _, c := range cs.mu.caches { 557 c.cache.Close() 558 } 559 cs.mu.caches = nil 560 cs.cancelWrite() 561 return cs.conn.Close() 562 } 563 564 func (cs *clientSession) cleanSend() { 565 for { 566 select { 567 case f, ok := <-cs.c: 568 if !ok { 569 return 570 } 571 f.messageSent(backendClosed) 572 default: 573 return 574 } 575 } 576 } 577 578 func (cs *clientSession) WriteRPCMessage(msg RPCMessage) error { 579 f, err := cs.send(msg) 580 if err != nil { 581 return err 582 } 583 defer f.Close() 584 585 // stream only wait send completed 586 return f.waitSendCompleted() 587 } 588 589 func (cs *clientSession) Write( 590 ctx context.Context, 591 response Message) error { 592 if ctx == nil { 593 panic("Write nil context") 594 } 595 return cs.WriteRPCMessage(RPCMessage{ 596 Ctx: ctx, 597 Message: response, 598 }) 599 } 600 601 func (cs *clientSession) AsyncWrite(response Message) error { 602 _, err := cs.send(RPCMessage{ 603 Ctx: context.Background(), 604 Message: response, 605 oneWay: true, 606 }) 607 return err 608 } 609 610 func (cs *clientSession) send(msg RPCMessage) (*Future, error) { 611 cs.metrics.sendCounter.Inc() 612 613 response := msg.Message 614 if err := cs.codec.Valid(response); err != nil { 615 return nil, err 616 } 617 618 cs.mu.RLock() 619 defer cs.mu.RUnlock() 620 621 if cs.mu.closed { 622 return nil, moerr.NewClientClosedNoCtx() 623 } 624 625 id := response.GetID() 626 if v, ok := cs.sentStreamSequences.Load(id); ok { 627 seq := v.(uint32) + 1 628 cs.sentStreamSequences.Store(id, seq) 629 msg.stream = true 630 msg.streamSequence = seq 631 } 632 633 f := cs.newFutureFunc() 634 f.init(msg) 635 if !f.oneWay { 636 f.ref() 637 } 638 cs.c <- f 639 cs.metrics.sendingQueueSizeGauge.Set(float64(len(cs.c))) 640 return f, nil 641 } 642 643 func (cs *clientSession) startCheckCacheTimeout() { 644 cs.checkTimeoutCacheOnce.Do(cs.checkCacheTimeout) 645 } 646 647 func (cs *clientSession) checkCacheTimeout() { 648 go func() { 649 timer := time.NewTimer(time.Second) 650 defer timer.Stop() 651 for { 652 select { 653 case <-cs.closedC: 654 return 655 case <-timer.C: 656 cs.mu.Lock() 657 for k, c := range cs.mu.caches { 658 if c.closeIfTimeout() { 659 delete(cs.mu.caches, k) 660 } 661 } 662 cs.mu.Unlock() 663 timer.Reset(time.Second) 664 } 665 } 666 }() 667 } 668 669 func (cs *clientSession) cancelWrite() { 670 cs.cancel() 671 } 672 673 func (cs *clientSession) validateStreamRequest( 674 id uint64, 675 sequence uint32) bool { 676 expectSequence := cs.receivedStreamSequences[id] + 1 677 if sequence != expectSequence { 678 return false 679 } 680 cs.receivedStreamSequences[id] = sequence 681 if sequence == 1 { 682 cs.sentStreamSequences.Store(id, uint32(0)) 683 } 684 return true 685 } 686 687 func (cs *clientSession) CreateCache( 688 ctx context.Context, 689 cacheID uint64) (MessageCache, error) { 690 cs.mu.Lock() 691 defer cs.mu.Unlock() 692 693 if cs.mu.closed { 694 return nil, moerr.NewClientClosedNoCtx() 695 } 696 697 v, ok := cs.mu.caches[cacheID] 698 if !ok { 699 v = cacheWithContext{ctx: ctx, cache: newCache()} 700 cs.mu.caches[cacheID] = v 701 cs.startCheckCacheTimeout() 702 } 703 return v.cache, nil 704 } 705 706 func (cs *clientSession) DeleteCache(cacheID uint64) { 707 cs.mu.Lock() 708 defer cs.mu.Unlock() 709 710 if cs.mu.closed { 711 return 712 } 713 if c, ok := cs.mu.caches[cacheID]; ok { 714 c.cache.Close() 715 delete(cs.mu.caches, cacheID) 716 } 717 } 718 719 func (cs *clientSession) GetCache(cacheID uint64) (MessageCache, error) { 720 cs.mu.RLock() 721 defer cs.mu.RUnlock() 722 723 if cs.mu.closed { 724 return nil, moerr.NewClientClosedNoCtx() 725 } 726 727 if c, ok := cs.mu.caches[cacheID]; ok { 728 return c.cache, nil 729 } 730 return nil, nil 731 } 732 733 type cacheWithContext struct { 734 ctx context.Context 735 cache MessageCache 736 } 737 738 func (c cacheWithContext) closeIfTimeout() bool { 739 select { 740 case <-c.ctx.Done(): 741 return true 742 default: 743 return false 744 } 745 }