github.com/matrixorigin/matrixone@v1.2.0/pkg/proxy/tunnel.go (about) 1 // Copyright 2021 - 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package proxy 16 17 import ( 18 "context" 19 "encoding/binary" 20 "errors" 21 "io" 22 "net" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/matrixorigin/matrixone/pkg/common/log" 28 "github.com/matrixorigin/matrixone/pkg/common/moerr" 29 "github.com/matrixorigin/matrixone/pkg/frontend" 30 "github.com/matrixorigin/matrixone/pkg/util/errutil" 31 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 32 "go.uber.org/zap" 33 ) 34 35 const ( 36 // The default transfer timeout is 10s. 37 defaultTransferTimeout = time.Second * 10 38 39 connClientName = "client" 40 connServerName = "server" 41 42 pipeClientToServer = "c2s" 43 pipeServerToClient = "s2c" 44 45 minSequenceID = 0 46 maxSequenceID = 255 47 ) 48 49 var ( 50 // errPipeClosed indicates that the pipe has been closed. 51 errPipeClosed = moerr.NewInternalErrorNoCtx("pipe has been closed") 52 ) 53 54 type tunnelOption func(*tunnel) 55 56 func withRebalancer(r *rebalancer) tunnelOption { 57 return func(t *tunnel) { 58 t.rebalancer = r 59 } 60 } 61 62 func withRebalancePolicy(policy RebalancePolicy) tunnelOption { 63 return func(t *tunnel) { 64 t.rebalancePolicy = policy 65 } 66 } 67 68 type transferType int 69 70 const ( 71 transferByRebalance transferType = 0 72 transferByScaling transferType = 1 73 ) 74 75 // tunnel is used to forward client message to CN server. 76 type tunnel struct { 77 ctx context.Context 78 ctxCancel context.CancelFunc 79 logger *log.MOLogger 80 // errC is a channel indicates the tunnel error. 81 errC chan error 82 // cc is the client connection which this tunnel holds. 83 cc ClientConn 84 // reqC is the event request channel. Events may be happened in tunnel data flow, 85 // and need to be handled in client connection. 86 reqC chan IEvent 87 // respC is the event response channel. 88 respC chan []byte 89 // closeOnce controls the close function to close tunnel only once. 90 closeOnce sync.Once 91 // counterSet counts the events in proxy. 92 counterSet *counterSet 93 // the global rebalancer. 94 rebalancer *rebalancer 95 // transferProactive means that the connection transfer is more proactive. 96 rebalancePolicy RebalancePolicy 97 98 transferType transferType 99 100 // transferIntent indicates that this tunnel was tried to transfer to 101 // other servers, but not safe to. Set it to true to do the transfer 102 // more proactive. 103 // It only works if RebalancePolicy is "active". 104 transferIntent atomic.Bool 105 106 mu struct { 107 sync.Mutex 108 // started indicates that the tunnel has started. 109 started bool 110 // inTransfer means a transfer of server connection is in progress. 111 inTransfer bool 112 113 // clientConn is the connection between client and proxy. 114 clientConn *MySQLConn 115 // serverConn is the connection between server and proxy. 116 serverConn *MySQLConn 117 // There are two pipes in a tunnel: client to server and server to client, 118 // which controls the data flow. 119 // csp is a pipe from client to server. 120 csp *pipe 121 // scp is a pipe from server to client. 122 scp *pipe 123 } 124 } 125 126 // newTunnel creates a tunnel. 127 func newTunnel(ctx context.Context, logger *log.MOLogger, cs *counterSet, opts ...tunnelOption) *tunnel { 128 ctx, cancel := context.WithCancel(ctx) 129 t := &tunnel{ 130 ctx: ctx, 131 ctxCancel: cancel, 132 logger: logger, 133 errC: make(chan error, 1), 134 // We need to handle events synchronously, so this channel has no buffer. 135 reqC: make(chan IEvent), 136 // response channel should have buffer, because it is handled in the same 137 // for-select with reqC. 138 respC: make(chan []byte, 10), 139 // set the counter set. 140 counterSet: cs, 141 } 142 for _, opt := range opts { 143 opt(t) 144 } 145 return t 146 } 147 148 // run starts the tunnel, make the data between client and server flow in it. 149 func (t *tunnel) run(cc ClientConn, sc ServerConn) error { 150 digThrough := func() error { 151 t.mu.Lock() 152 defer t.mu.Unlock() 153 154 if t.ctx.Err() != nil { 155 return t.ctx.Err() 156 } 157 t.cc = cc 158 t.logger = t.logger.With(zap.Uint32("conn ID", cc.ConnID())) 159 t.mu.clientConn = newMySQLConn(connClientName, cc.RawConn(), 0, t.reqC, t.respC, cc.ConnID()) 160 t.mu.serverConn = newMySQLConn(connServerName, sc.RawConn(), 0, t.reqC, t.respC, sc.ConnID()) 161 162 // Create the pipes from client to server and server to client. 163 t.mu.csp = t.newPipe(pipeClientToServer, t.mu.clientConn, t.mu.serverConn) 164 t.mu.scp = t.newPipe(pipeServerToClient, t.mu.serverConn, t.mu.clientConn) 165 166 return nil 167 } 168 169 if err := digThrough(); err != nil { 170 return moerr.NewInternalErrorNoCtx("set up tunnel failed: %v", err) 171 } 172 if err := t.kickoff(); err != nil { 173 return moerr.NewInternalErrorNoCtx("kickoff pipe failed: %v", err) 174 } 175 176 func() { 177 t.mu.Lock() 178 defer t.mu.Unlock() 179 t.mu.started = true 180 }() 181 182 return nil 183 } 184 185 // getPipes returns the pipes. 186 func (t *tunnel) getPipes() (*pipe, *pipe) { 187 t.mu.Lock() 188 defer t.mu.Unlock() 189 return t.mu.csp, t.mu.scp 190 } 191 192 // getConns returns the client connection and server connection. 193 func (t *tunnel) getConns() (*MySQLConn, *MySQLConn) { 194 t.mu.Lock() 195 defer t.mu.Unlock() 196 return t.mu.clientConn, t.mu.serverConn 197 } 198 199 // setError tries to set the tunnel error if there is no error. 200 func (t *tunnel) setError(err error) { 201 select { 202 case t.errC <- err: 203 _ = t.Close() 204 default: 205 } 206 } 207 208 // kickoff starts up the tunnel 209 func (t *tunnel) kickoff() error { 210 csp, scp := t.getPipes() 211 go func() { 212 if err := csp.kickoff(t.ctx, scp); err != nil { 213 v2.ProxyClientDisconnectCounter.Inc() 214 t.setError(withCode(err, codeClientDisconnect)) 215 } 216 }() 217 go func() { 218 if err := scp.kickoff(t.ctx, csp); err != nil { 219 v2.ProxyServerDisconnectCounter.Inc() 220 t.setError(withCode(err, codeServerDisconnect)) 221 } 222 }() 223 if err := csp.waitReady(t.ctx); err != nil { 224 return err 225 } 226 if err := scp.waitReady(t.ctx); err != nil { 227 return err 228 } 229 return nil 230 } 231 232 // replaceServerConn replaces the CN server. 233 func (t *tunnel) replaceServerConn(newServerConn *MySQLConn, sync bool) { 234 t.mu.Lock() 235 defer t.mu.Unlock() 236 _ = t.mu.serverConn.Close() 237 t.mu.serverConn = newServerConn 238 239 if sync { 240 t.mu.csp.dst = t.mu.serverConn 241 t.mu.scp.src = t.mu.serverConn 242 } else { 243 t.mu.csp = t.newPipe(pipeClientToServer, t.mu.clientConn, t.mu.serverConn) 244 t.mu.scp = t.newPipe(pipeServerToClient, t.mu.serverConn, t.mu.clientConn) 245 } 246 } 247 248 // canStartTransfer checks whether the transfer can be started. 249 func (t *tunnel) canStartTransfer(sync bool) bool { 250 t.mu.Lock() 251 defer t.mu.Unlock() 252 253 // The tunnel has not started. 254 if !t.mu.started { 255 return false 256 } 257 258 // Another transfer is already in progress. 259 if t.mu.inTransfer { 260 return false 261 } 262 263 csp, scp := t.mu.csp, t.mu.scp 264 csp.mu.Lock() 265 scp.mu.Lock() 266 defer csp.mu.Unlock() 267 defer scp.mu.Unlock() 268 269 // The last message must be from server to client. 270 if scp.mu.lastCmdTime.Before(csp.mu.lastCmdTime) { 271 t.logger.Info("reason: client packet is after server packet") 272 return false 273 } 274 275 // We are now in a transaction. 276 if !scp.safeToTransferLocked() { 277 t.logger.Info("reason: txn status is true") 278 return false 279 } 280 281 // Set the tunnel in transfer and the pipes paused directly. 282 t.mu.inTransfer = true 283 if !sync { 284 csp.mu.paused = true 285 scp.mu.paused = true 286 } 287 288 return true 289 } 290 291 func (t *tunnel) setTransferIntent(i bool) { 292 if t.rebalancePolicy == RebalancePolicyPassive && 293 t.getTransferType() == transferByRebalance { 294 return 295 } 296 t.logger.Info("set tunnel transfer intent", zap.Bool("value", i)) 297 t.transferIntent.Store(i) 298 if i { 299 v2.ProxyConnectionsTransferIntentGauge.Inc() 300 } else { 301 v2.ProxyConnectionsTransferIntentGauge.Dec() 302 } 303 } 304 305 func (t *tunnel) finishTransfer(start time.Time) { 306 t.setTransferIntent(false) 307 t.setTransferType(transferByRebalance) 308 t.mu.Lock() 309 defer t.mu.Unlock() 310 t.mu.inTransfer = false 311 resume := func(p *pipe) { 312 p.mu.Lock() 313 defer p.mu.Unlock() 314 p.mu.paused = false 315 } 316 resume(t.mu.scp) 317 resume(t.mu.csp) 318 319 t.logger.Info("transfer end") 320 duration := time.Since(start) 321 if duration > time.Second { 322 t.logger.Info("slow transfer for tunnel", 323 zap.Duration("transfer duration", duration), 324 ) 325 } 326 v2.ProxyTransferDurationHistogram.Observe(time.Since(start).Seconds()) 327 } 328 329 func (t *tunnel) doReplaceConnection(ctx context.Context, sync bool) error { 330 newConn, err := t.getNewServerConn(ctx) 331 if err != nil { 332 t.logger.Error("failed to get a new connection", zap.Error(err)) 333 return err 334 } 335 t.replaceServerConn(newConn, sync) 336 t.counterSet.connMigrationSuccess.Add(1) 337 t.logger.Info("transfer to a new CN server", 338 zap.String("addr", newConn.RemoteAddr().String())) 339 return nil 340 } 341 342 // transfer transfers the serverConn of tunnel to a new one. 343 func (t *tunnel) transfer(ctx context.Context) error { 344 t.counterSet.connMigrationRequested.Add(1) 345 // Must check if it is safe to start the transfer. 346 if ok := t.canStartTransfer(false); !ok { 347 t.logger.Info("cannot start transfer safely") 348 t.setTransferIntent(true) 349 t.counterSet.connMigrationCannotStart.Add(1) 350 return moerr.GetOkExpectedNotSafeToStartTransfer() 351 } 352 353 start := time.Now() 354 defer t.finishTransfer(start) 355 t.logger.Info("transfer begin") 356 357 ctx, cancel := context.WithTimeout(ctx, defaultTransferTimeout) 358 defer cancel() 359 360 csp, scp := t.getPipes() 361 // Pause pipes before the transfer. 362 if err := csp.pause(ctx); err != nil { 363 v2.ProxyTransferFailCounter.Inc() 364 return err 365 } 366 if err := scp.pause(ctx); err != nil { 367 v2.ProxyTransferFailCounter.Inc() 368 return err 369 } 370 if err := t.doReplaceConnection(ctx, false); err != nil { 371 v2.ProxyTransferFailCounter.Inc() 372 t.logger.Error("failed to replace connection", zap.Error(err)) 373 } 374 // Restart pipes even if the error happened in last step. 375 if err := t.kickoff(); err != nil { 376 t.logger.Error("failed to kickoff tunnel", zap.Error(err)) 377 _ = t.Close() 378 } else { 379 v2.ProxyTransferSuccessCounter.Inc() 380 } 381 return nil 382 } 383 384 func (t *tunnel) transferSync(ctx context.Context) error { 385 // Must check if it is safe to start the transfer. 386 if ok := t.canStartTransfer(true); !ok { 387 return moerr.GetOkExpectedNotSafeToStartTransfer() 388 } 389 start := time.Now() 390 defer t.finishTransfer(start) 391 t.logger.Info("transfer begin") 392 ctx, cancel := context.WithTimeout(ctx, defaultTransferTimeout) 393 defer cancel() 394 if err := t.doReplaceConnection(ctx, true); err != nil { 395 v2.ProxyTransferFailCounter.Inc() 396 return err 397 } 398 v2.ProxyTransferSuccessCounter.Inc() 399 return nil 400 } 401 402 // getNewServerConn selects a new CN server and connects to it then 403 // returns the new connection. 404 func (t *tunnel) getNewServerConn(ctx context.Context) (*MySQLConn, error) { 405 if ctx.Err() != nil { 406 return nil, ctx.Err() 407 } 408 newConn, err := t.cc.BuildConnWithServer(t.mu.serverConn.RemoteAddr().String()) 409 if err != nil { 410 return nil, err 411 } 412 return newMySQLConn(connServerName, newConn.RawConn(), 0, t.reqC, t.respC, newConn.ConnID()), nil 413 } 414 415 func (t *tunnel) getTransferType() transferType { 416 return t.transferType 417 } 418 419 func (t *tunnel) setTransferType(typ transferType) { 420 t.transferType = typ 421 } 422 423 // Close closes the tunnel. 424 func (t *tunnel) Close() error { 425 t.closeOnce.Do(func() { 426 if t.ctxCancel != nil { 427 t.ctxCancel() 428 } 429 // Close the event channels. 430 close(t.reqC) 431 close(t.respC) 432 433 cc, sc := t.getConns() 434 if cc != nil { 435 _ = cc.Close() 436 } 437 if sc != nil { 438 _ = sc.Close() 439 } 440 }) 441 return nil 442 } 443 444 // pipe must be created through newPipe. 445 type pipe struct { 446 name string 447 logger *log.MOLogger 448 449 // source connection and destination connection wrapped 450 // by a message buffer. 451 src *MySQLConn 452 dst *MySQLConn 453 454 // this value do not need in mutex as it is read and write in 455 // a single goroutine. 456 transferred bool 457 458 mu struct { 459 sync.Mutex 460 // cond is used to control the pause of the pipe. 461 cond *sync.Cond 462 // closed indicates that the pipe is closed. 463 closed bool 464 // started indicates that the pipe has started. 465 started bool 466 // inPreRecv indicates that the pipe in the preRecv phase. 467 inPreRecv bool 468 // paused indicates that the pipe is paused to do transfer. 469 paused bool 470 // inTxn indicates that if the session is in a txn. It only 471 // matters for server end. 472 inTxn bool 473 // Track last cmd time and whether we are in a transaction. 474 lastCmdTime time.Time 475 } 476 477 // tun is the tunnel that the pipe belongs to. 478 tun *tunnel 479 480 wg sync.WaitGroup 481 482 testHelper struct { 483 beforeSend func() 484 } 485 } 486 487 // newPipe creates a pipe. 488 func (t *tunnel) newPipe(name string, src, dst *MySQLConn) *pipe { 489 p := &pipe{ 490 name: name, 491 logger: t.logger.With(zap.String("pipe-direction", name)), 492 src: src, 493 dst: dst, 494 tun: t, 495 } 496 p.mu.cond = sync.NewCond(&p.mu) 497 return p 498 } 499 500 // kickoff starts up the pipe and the data would flow in it. 501 func (p *pipe) kickoff(ctx context.Context, peer *pipe) (e error) { 502 start := func() (bool, error) { 503 p.mu.Lock() 504 defer p.mu.Unlock() 505 if p.mu.closed { 506 return false, errPipeClosed 507 } 508 if p.mu.started { 509 return true, nil 510 } 511 p.mu.started = true 512 p.mu.cond.Broadcast() 513 return false, nil 514 } 515 finish := func() { 516 p.mu.Lock() 517 defer p.mu.Unlock() 518 if e != nil { 519 p.mu.closed = true 520 } 521 p.mu.started = false 522 p.mu.cond.Broadcast() 523 } 524 var lastSeq int16 = -1 525 var rotated bool 526 prepareNextMessage := func() (terminate bool, err error) { 527 if terminate := func() bool { 528 p.mu.Lock() 529 defer p.mu.Unlock() 530 // pipe is paused to begin a connection transfer. 531 if p.mu.paused { 532 return true 533 } 534 p.mu.inPreRecv = true 535 return false 536 }(); terminate { 537 return true, nil 538 } 539 _, re := p.src.preRecv() 540 p.mu.Lock() 541 defer p.mu.Unlock() 542 p.mu.inPreRecv = false 543 544 var netErr net.Error 545 if p.mu.paused && re == nil { 546 return true, nil 547 } else if p.mu.paused && errors.As(re, &netErr) && netErr.Timeout() { 548 // The preRecv is cut off by set the connection deadline to a pastime. 549 return true, nil 550 } else if re != nil { 551 if errors.Is(re, io.EOF) { 552 return false, re 553 } 554 return false, moerr.NewInternalError(errutil.ContextWithNoReport(ctx, true), 555 "preRecv message: %s, name %s", re.Error(), p.name) 556 } 557 // set txn status and cmd time within the mutex together. 558 // only server->client pipe need to set the txn status. 559 if p.name == pipeServerToClient { 560 var currSeq int16 561 buf := p.src.readAvailBuf() 562 563 // issue#16042 564 if len(buf) > 3 { 565 currSeq = int16(buf[3]) 566 } 567 568 // last sequence id is 255 and current sequence id is 0, the 569 // sequence ID is rotated, in which case, we do NOT allow to 570 // do the migration. 571 if currSeq == minSequenceID && lastSeq == maxSequenceID { 572 rotated = true 573 } 574 575 // the server starts a new response, reset the rotated. 576 if rotated && currSeq != minSequenceID && currSeq < lastSeq { 577 rotated = false 578 } 579 580 p.mu.inTxn = checkTxnStatus(buf) 581 if !p.mu.inTxn && p.tun.transferIntent.Load() && !rotated { 582 peer.wg.Add(1) 583 p.transferred = true 584 } else { 585 p.transferred = false 586 } 587 if len(buf) > 3 { 588 lastSeq = int16(buf[3]) 589 } 590 } 591 p.mu.lastCmdTime = time.Now() 592 return false, nil 593 } 594 595 started, err := start() 596 // If this pipe is started already, return nil directly. 597 if started { 598 return nil 599 } 600 if err != nil { 601 return err 602 } 603 defer finish() 604 605 for ctx.Err() == nil { 606 if p.name == pipeServerToClient && p.transferred { 607 if err := p.handleTransferIntent(ctx, &peer.wg); err != nil { 608 p.logger.Error("failed to transfer connection", zap.Error(err)) 609 } 610 } 611 if terminate, err := prepareNextMessage(); err != nil || terminate { 612 return err 613 } 614 if p.testHelper.beforeSend != nil { 615 p.testHelper.beforeSend() 616 } 617 // If the server is in transfer, we wait here until the transfer is finished. 618 p.wg.Wait() 619 620 if err = p.src.sendTo(p.dst); err != nil { 621 return moerr.NewInternalErrorNoCtx("send message error: %v", err) 622 } 623 } 624 return ctx.Err() 625 } 626 627 func (p *pipe) handleTransferIntent(ctx context.Context, wg *sync.WaitGroup) error { 628 // If it is not in a txn and transfer intent is true, transfer it sync. 629 if p.tun != nil && p.safeToTransfer() { 630 err := p.tun.transferSync(ctx) 631 wg.Done() 632 return err 633 } 634 return nil 635 } 636 637 // waitReady waits the pip starts up. 638 func (p *pipe) waitReady(ctx context.Context) error { 639 p.mu.Lock() 640 defer p.mu.Unlock() 641 for !p.mu.started { 642 if ctx.Err() != nil { 643 return ctx.Err() 644 } 645 if p.mu.closed { 646 return errPipeClosed 647 } 648 p.mu.cond.Wait() 649 } 650 return nil 651 } 652 653 // pause sets paused to true and make the pipe finished, then 654 // sets paused to false again. When paused, the pipe should stop 655 // and transfer server connection to a new one then start pipe again. 656 func (p *pipe) pause(ctx context.Context) error { 657 p.mu.Lock() 658 defer p.mu.Unlock() 659 if p.mu.closed { 660 return errPipeClosed 661 } 662 defer func() { 663 if p.mu.paused { 664 p.mu.paused = false 665 // Recover the deadline time. 666 _ = p.src.SetReadDeadline(time.Time{}) 667 } 668 }() 669 670 for p.mu.started { 671 if ctx.Err() != nil { 672 return ctx.Err() 673 } 674 p.mu.paused = true 675 // If the net connection is waiting for messages, we force it terminated by 676 // set the deadline. 677 if p.mu.inPreRecv { 678 if err := p.src.SetReadDeadline(time.Unix(1, 0)); err != nil { 679 return err 680 } 681 } 682 p.mu.cond.Wait() 683 } 684 return nil 685 } 686 687 // safeToTransfer indicates whether it is safe to transfer the session. 688 // NB: the pipe MUST be server-to-client pipe. 689 func (p *pipe) safeToTransfer() bool { 690 p.mu.Lock() 691 defer p.mu.Unlock() 692 return !p.mu.inTxn 693 } 694 695 func (p *pipe) safeToTransferLocked() bool { 696 return !p.mu.inTxn 697 } 698 699 // txnStatus return if the session is within a transaction. 700 // first, we consider it as true and check the three conditions: 701 // 1. SERVER_STATUS_IN_TRANS is not set 702 // 2. SERVER_QUERY_WAS_SLOW and SERVER_STATUS_NO_GOOD_INDEX_USED is set 703 func txnStatus(status uint16) bool { 704 // assume it is in txn by priority. 705 v := true 706 if status&frontend.SERVER_QUERY_WAS_SLOW != 0 && 707 status&frontend.SERVER_STATUS_NO_GOOD_INDEX_USED != 0 && 708 status&frontend.SERVER_STATUS_IN_TRANS == 0 { 709 v = false 710 } 711 return v 712 } 713 714 // handleOKPacket handles the OK packet from server to update the txn state. 715 func handleOKPacket(msg []byte) bool { 716 var mp *frontend.MysqlProtocolImpl 717 // the sequence ID should be 1 for OK packet. 718 if msg[3] != 1 { 719 return txnStatus(0) 720 } 721 pos := 5 722 _, pos, ok := mp.ReadIntLenEnc(msg, pos) 723 if !ok { 724 return txnStatus(0) 725 } 726 _, pos, ok = mp.ReadIntLenEnc(msg, pos) 727 if !ok { 728 return txnStatus(0) 729 } 730 if len(msg[pos:]) < 2 { 731 return txnStatus(0) 732 } 733 status := binary.LittleEndian.Uint16(msg[pos:]) 734 return txnStatus(status) 735 } 736 737 // handleEOFPacket handles the EOF packet from server to update the txn state. 738 func handleEOFPacket(msg []byte) bool { 739 if len(msg) < 9 { 740 return txnStatus(0) 741 } 742 return txnStatus(binary.LittleEndian.Uint16(msg[7:])) 743 } 744 745 func checkTxnStatus(msg []byte) bool { 746 inTxn := true 747 // For the server->client pipe, we get the transaction status from the 748 // OK and EOF packet, which is used in connection transfer. If the session 749 // is in a transaction, a transfer should not start. 750 if isOKPacket(msg) { 751 inTxn = handleOKPacket(msg) 752 } else if isEOFPacket(msg) { 753 inTxn = handleEOFPacket(msg) 754 } 755 return inTxn 756 }