github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/tunnel.go (about) 1 /* 2 * Copyright (c) 2015, Psiphon Inc. 3 * All rights reserved. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package psiphon 21 22 import ( 23 "bytes" 24 "context" 25 "crypto/rand" 26 "encoding/base64" 27 "encoding/json" 28 std_errors "errors" 29 "fmt" 30 "io" 31 "io/ioutil" 32 "net" 33 "net/http" 34 "sync" 35 "sync/atomic" 36 "time" 37 38 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common" 39 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/crypto/ssh" 40 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors" 41 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/obfuscator" 42 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters" 43 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng" 44 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol" 45 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/quic" 46 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/refraction" 47 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/tactics" 48 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/transferstats" 49 ) 50 51 // Tunneler specifies the interface required by components that use tunnels. 52 type Tunneler interface { 53 54 // Dial creates a tunneled connection. 55 // 56 // When split tunnel mode is enabled, the connection may be untunneled, 57 // depending on GeoIP classification of the destination. 58 // 59 // downstreamConn is an optional parameter which specifies a connection to be 60 // explicitly closed when the Dialed connection is closed. For instance, this 61 // is used to close downstreamConn App<->LocalProxy connections when the related 62 // LocalProxy<->SshPortForward connections close. 63 Dial(remoteAddr string, downstreamConn net.Conn) (conn net.Conn, err error) 64 65 DirectDial(remoteAddr string) (conn net.Conn, err error) 66 67 SignalComponentFailure() 68 } 69 70 // TunnelOwner specifies the interface required by Tunnel to notify its 71 // owner when it has failed. The owner may, as in the case of the Controller, 72 // remove the tunnel from its list of active tunnels. 73 type TunnelOwner interface { 74 SignalSeededNewSLOK() 75 SignalTunnelFailure(tunnel *Tunnel) 76 } 77 78 // Tunnel is a connection to a Psiphon server. An established 79 // tunnel includes a network connection to the specified server 80 // and an SSH session built on top of that transport. 81 type Tunnel struct { 82 mutex *sync.Mutex 83 config *Config 84 isActivated bool 85 isDiscarded bool 86 isClosed bool 87 dialParams *DialParameters 88 livenessTestMetrics *livenessTestMetrics 89 extraFailureAction func() 90 serverContext *ServerContext 91 monitoringStartTime time.Time 92 conn *common.BurstMonitoredConn 93 sshClient *ssh.Client 94 sshServerRequests <-chan *ssh.Request 95 operateWaitGroup *sync.WaitGroup 96 operateCtx context.Context 97 stopOperate context.CancelFunc 98 signalPortForwardFailure chan struct{} 99 totalPortForwardFailures int 100 adjustedEstablishStartTime time.Time 101 establishDuration time.Duration 102 establishedTime time.Time 103 handledSSHKeepAliveFailure int32 104 inFlightConnectedRequestSignal chan struct{} 105 } 106 107 // getCustomParameters helpers wrap the verbose function call chain required 108 // to get a current snapshot of the parameters.Parameters customized with the 109 // dial parameters associated with a tunnel. 110 111 func (tunnel *Tunnel) getCustomParameters() parameters.ParametersAccessor { 112 return getCustomParameters(tunnel.config, tunnel.dialParams) 113 } 114 115 func getCustomParameters( 116 config *Config, dialParams *DialParameters) parameters.ParametersAccessor { 117 return config.GetParameters().GetCustom(dialParams.NetworkLatencyMultiplier) 118 } 119 120 // ConnectTunnel first makes a network transport connection to the 121 // Psiphon server and then establishes an SSH client session on top of 122 // that transport. The SSH server is authenticated using the public 123 // key in the server entry. 124 // Depending on the server's capabilities, the connection may use 125 // plain SSH over TCP, obfuscated SSH over TCP, or obfuscated SSH over 126 // HTTP (meek protocol). 127 // When requiredProtocol is not blank, that protocol is used. Otherwise, 128 // the a random supported protocol is used. 129 // 130 // Call Activate on a connected tunnel to complete its establishment 131 // before using. 132 // 133 // Tunnel establishment is split into two phases: connection, and 134 // activation. The Controller will run many ConnectTunnel calls 135 // concurrently and then, to avoid unnecessary overhead from making 136 // handshake requests and starting operateTunnel from tunnels which 137 // may be discarded, call Activate on connected tunnels sequentially 138 // as necessary. 139 func ConnectTunnel( 140 ctx context.Context, 141 config *Config, 142 adjustedEstablishStartTime time.Time, 143 dialParams *DialParameters) (*Tunnel, error) { 144 145 // Build transport layers and establish SSH connection. Note that 146 // dialConn and monitoredConn are the same network connection. 147 dialResult, err := dialTunnel(ctx, config, dialParams) 148 if err != nil { 149 return nil, errors.Trace(err) 150 } 151 152 // The tunnel is now connected 153 return &Tunnel{ 154 mutex: new(sync.Mutex), 155 config: config, 156 dialParams: dialParams, 157 livenessTestMetrics: dialResult.livenessTestMetrics, 158 extraFailureAction: dialResult.extraFailureAction, 159 monitoringStartTime: dialResult.monitoringStartTime, 160 conn: dialResult.monitoredConn, 161 sshClient: dialResult.sshClient, 162 sshServerRequests: dialResult.sshRequests, 163 // A buffer allows at least one signal to be sent even when the receiver is 164 // not listening. Senders should not block. 165 signalPortForwardFailure: make(chan struct{}, 1), 166 adjustedEstablishStartTime: adjustedEstablishStartTime, 167 }, nil 168 } 169 170 // Activate completes the tunnel establishment, performing the handshake 171 // request and starting operateTunnel, the worker that monitors the tunnel 172 // and handles periodic management. 173 func (tunnel *Tunnel) Activate( 174 ctx context.Context, tunnelOwner TunnelOwner) (retErr error) { 175 176 // Ensure that, unless the base context is cancelled, any replayed dial 177 // parameters are cleared, no longer to be retried, if the tunnel fails to 178 // activate. 179 activationSucceeded := false 180 baseCtx := ctx 181 defer func() { 182 if !activationSucceeded && baseCtx.Err() != context.Canceled { 183 tunnel.dialParams.Failed(tunnel.config) 184 if tunnel.extraFailureAction != nil { 185 tunnel.extraFailureAction() 186 } 187 if retErr != nil { 188 _ = RecordFailedTunnelStat( 189 tunnel.config, 190 tunnel.dialParams, 191 tunnel.livenessTestMetrics, 192 -1, 193 -1, 194 retErr) 195 } 196 } 197 }() 198 199 // Create a new Psiphon API server context for this tunnel. This includes 200 // performing a handshake request. If the handshake fails, this activation 201 // fails. 202 var serverContext *ServerContext 203 if !tunnel.config.DisableApi { 204 NoticeInfo( 205 "starting server context for %s", 206 tunnel.dialParams.ServerEntry.GetDiagnosticID()) 207 208 // Call NewServerContext in a goroutine, as it blocks on a network operation, 209 // the handshake request, and would block shutdown. If the shutdown signal is 210 // received, close the tunnel, which will interrupt the handshake request 211 // that may be blocking NewServerContext. 212 // 213 // Timeout after PsiphonApiServerTimeoutSeconds. NewServerContext may not 214 // return if the tunnel network connection is unstable during the handshake 215 // request. At this point, there is no operateTunnel monitor that will detect 216 // this condition with SSH keep alives. 217 218 timeout := tunnel.getCustomParameters().Duration( 219 parameters.PsiphonAPIRequestTimeout) 220 221 if timeout > 0 { 222 var cancelFunc context.CancelFunc 223 ctx, cancelFunc = context.WithTimeout(ctx, timeout) 224 defer cancelFunc() 225 } 226 227 type newServerContextResult struct { 228 serverContext *ServerContext 229 err error 230 } 231 232 resultChannel := make(chan newServerContextResult) 233 234 go func() { 235 serverContext, err := NewServerContext(tunnel) 236 resultChannel <- newServerContextResult{ 237 serverContext: serverContext, 238 err: err, 239 } 240 }() 241 242 var result newServerContextResult 243 244 select { 245 case result = <-resultChannel: 246 case <-ctx.Done(): 247 result.err = ctx.Err() 248 // Interrupt the goroutine 249 tunnel.Close(true) 250 <-resultChannel 251 } 252 253 if result.err != nil { 254 return errors.Trace(result.err) 255 } 256 257 serverContext = result.serverContext 258 } 259 260 // The activation succeeded. 261 activationSucceeded = true 262 263 tunnel.mutex.Lock() 264 265 // It may happen that the tunnel gets closed while Activate is running. 266 // In this case, abort here, to ensure that the operateTunnel goroutine 267 // will not be launched after Close is called. 268 if tunnel.isClosed { 269 return errors.TraceNew("tunnel is closed") 270 } 271 272 tunnel.isActivated = true 273 tunnel.serverContext = serverContext 274 275 // establishDuration is the elapsed time between the controller starting tunnel 276 // establishment and this tunnel being established. The reported value represents 277 // how long the user waited between starting the client and having a usable tunnel; 278 // or how long between the client detecting an unexpected tunnel disconnect and 279 // completing automatic reestablishment. 280 // 281 // This time period may include time spent unsuccessfully connecting to other 282 // servers. Time spent waiting for network connectivity is excluded. 283 tunnel.establishDuration = time.Since(tunnel.adjustedEstablishStartTime) 284 tunnel.establishedTime = time.Now() 285 286 // Use the Background context instead of the controller run context, as tunnels 287 // are terminated when the controller calls tunnel.Close. 288 tunnel.operateCtx, tunnel.stopOperate = context.WithCancel(context.Background()) 289 tunnel.operateWaitGroup = new(sync.WaitGroup) 290 291 // Spawn the operateTunnel goroutine, which monitors the tunnel and handles periodic 292 // stats updates. 293 tunnel.operateWaitGroup.Add(1) 294 go tunnel.operateTunnel(tunnelOwner) 295 296 tunnel.mutex.Unlock() 297 298 return nil 299 } 300 301 // Close stops operating the tunnel and closes the underlying connection. 302 // Supports multiple and/or concurrent calls to Close(). 303 // When isDiscarded is set, operateTunnel will not attempt to send final 304 // status requests. 305 func (tunnel *Tunnel) Close(isDiscarded bool) { 306 307 tunnel.mutex.Lock() 308 tunnel.isDiscarded = isDiscarded 309 isActivated := tunnel.isActivated 310 isClosed := tunnel.isClosed 311 tunnel.isClosed = true 312 tunnel.mutex.Unlock() 313 314 if !isClosed { 315 316 // Signal operateTunnel to stop before closing the tunnel -- this 317 // allows a final status request to be made in the case of an orderly 318 // shutdown. 319 // A timer is set, so if operateTunnel takes too long to stop, the 320 // tunnel is closed, which will interrupt any slow final status request. 321 322 if isActivated { 323 timeout := tunnel.getCustomParameters().Duration( 324 parameters.TunnelOperateShutdownTimeout) 325 afterFunc := time.AfterFunc( 326 timeout, 327 func() { tunnel.conn.Close() }) 328 tunnel.stopOperate() 329 tunnel.operateWaitGroup.Wait() 330 afterFunc.Stop() 331 } 332 333 tunnel.sshClient.Close() 334 // tunnel.conn.Close() may get called multiple times, which is allowed. 335 tunnel.conn.Close() 336 337 err := tunnel.sshClient.Wait() 338 if err != nil { 339 NoticeWarning("close tunnel ssh error: %s", err) 340 } 341 } 342 343 // Log burst metrics now that the BurstMonitoredConn is closed. 344 // Metrics will be empty when burst monitoring is disabled. 345 if !isDiscarded && isActivated { 346 burstMetrics := tunnel.conn.GetMetrics(tunnel.monitoringStartTime) 347 if len(burstMetrics) > 0 { 348 NoticeBursts( 349 tunnel.dialParams.ServerEntry.GetDiagnosticID(), 350 burstMetrics) 351 } 352 } 353 } 354 355 // SetInFlightConnectedRequest checks if a connected request can begin and 356 // sets the channel used to signal that the request is complete. 357 // 358 // The caller must not initiate a connected request when 359 // SetInFlightConnectedRequest returns false. When SetInFlightConnectedRequest 360 // returns true, the caller must call SetInFlightConnectedRequest(nil) when 361 // the connected request completes. 362 func (tunnel *Tunnel) SetInFlightConnectedRequest(requestSignal chan struct{}) bool { 363 tunnel.mutex.Lock() 364 defer tunnel.mutex.Unlock() 365 366 // If already closing, don't start a connected request: the 367 // TunnelOperateShutdownTimeout period may be nearly expired. 368 if tunnel.isClosed { 369 return false 370 } 371 372 if requestSignal == nil { 373 // Not already in-flight (not expected) 374 if tunnel.inFlightConnectedRequestSignal == nil { 375 return false 376 } 377 } else { 378 // Already in-flight (not expected) 379 if tunnel.inFlightConnectedRequestSignal != nil { 380 return false 381 } 382 } 383 384 tunnel.inFlightConnectedRequestSignal = requestSignal 385 386 return true 387 } 388 389 // AwaitInFlightConnectedRequest waits for the signal that any in-flight 390 // connected request is complete. 391 // 392 // AwaitInFlightConnectedRequest may block until the connected request is 393 // aborted by terminating the tunnel. 394 func (tunnel *Tunnel) AwaitInFlightConnectedRequest() { 395 tunnel.mutex.Lock() 396 requestSignal := tunnel.inFlightConnectedRequestSignal 397 tunnel.mutex.Unlock() 398 399 if requestSignal != nil { 400 <-requestSignal 401 } 402 } 403 404 // IsActivated returns the tunnel's activated flag. 405 func (tunnel *Tunnel) IsActivated() bool { 406 tunnel.mutex.Lock() 407 defer tunnel.mutex.Unlock() 408 return tunnel.isActivated 409 } 410 411 // IsDiscarded returns the tunnel's discarded flag. 412 func (tunnel *Tunnel) IsDiscarded() bool { 413 tunnel.mutex.Lock() 414 defer tunnel.mutex.Unlock() 415 return tunnel.isDiscarded 416 } 417 418 // SendAPIRequest sends an API request as an SSH request through the tunnel. 419 // This function blocks awaiting a response. Only one request may be in-flight 420 // at once; a concurrent SendAPIRequest will block until an active request 421 // receives its response (or the SSH connection is terminated). 422 func (tunnel *Tunnel) SendAPIRequest( 423 name string, requestPayload []byte) ([]byte, error) { 424 425 ok, responsePayload, err := tunnel.sshClient.Conn.SendRequest( 426 name, true, requestPayload) 427 428 if err != nil { 429 return nil, errors.Trace(err) 430 } 431 432 if !ok { 433 return nil, errors.TraceNew("API request rejected") 434 } 435 436 return responsePayload, nil 437 } 438 439 // DialTCPChannel establishes a TCP port forward connection through the 440 // tunnel. 441 // 442 // When split tunnel mode is enabled, and unless alwaysTunneled is set, the 443 // server may reject the port forward and indicate that the client is to make 444 // direct, untunneled connection. In this case, the bool return value is true 445 // and net.Conn and error are nil. 446 // 447 // downstreamConn is an optional parameter which specifies a connection to be 448 // explicitly closed when the dialed connection is closed. 449 func (tunnel *Tunnel) DialTCPChannel( 450 remoteAddr string, 451 alwaysTunneled bool, 452 downstreamConn net.Conn) (net.Conn, bool, error) { 453 454 channelType := "direct-tcpip" 455 if alwaysTunneled && tunnel.config.IsSplitTunnelEnabled() { 456 // This channel type is only necessary in split tunnel mode. 457 channelType = protocol.TCP_PORT_FORWARD_NO_SPLIT_TUNNEL_TYPE 458 } 459 460 channel, err := tunnel.dialChannel(channelType, remoteAddr) 461 if err != nil { 462 if isSplitTunnelRejectReason(err) { 463 return nil, true, nil 464 } 465 return nil, false, errors.Trace(err) 466 } 467 468 netConn, ok := channel.(net.Conn) 469 if !ok { 470 return nil, false, errors.Tracef("unexpected channel type: %T", channel) 471 } 472 473 conn := &TunneledConn{ 474 Conn: netConn, 475 tunnel: tunnel, 476 downstreamConn: downstreamConn} 477 478 return tunnel.wrapWithTransferStats(conn), false, nil 479 } 480 481 func (tunnel *Tunnel) DialPacketTunnelChannel() (net.Conn, error) { 482 483 channel, err := tunnel.dialChannel(protocol.PACKET_TUNNEL_CHANNEL_TYPE, "") 484 if err != nil { 485 return nil, errors.Trace(err) 486 } 487 488 sshChannel, ok := channel.(ssh.Channel) 489 if !ok { 490 return nil, errors.Tracef("unexpected channel type: %T", channel) 491 } 492 493 NoticeInfo("DialPacketTunnelChannel: established channel") 494 495 conn := newChannelConn(sshChannel) 496 497 // wrapWithTransferStats will track bytes transferred for the 498 // packet tunnel. It will count packet overhead (TCP/UDP/IP headers). 499 // 500 // Since the data in the channel is not HTTP or TLS, no domain bytes 501 // counting is expected. 502 // 503 // transferstats are also used to determine that there's been recent 504 // activity and skip periodic SSH keep alives; see Tunnel.operateTunnel. 505 506 return tunnel.wrapWithTransferStats(conn), nil 507 } 508 509 func (tunnel *Tunnel) dialChannel(channelType, remoteAddr string) (interface{}, error) { 510 511 if !tunnel.IsActivated() { 512 return nil, errors.TraceNew("tunnel is not activated") 513 } 514 515 // Note: there is no dial context since SSH port forward dials cannot 516 // be interrupted directly. Closing the tunnel will interrupt the dials. 517 // A timeout is set to unblock this function, but the goroutine may 518 // not exit until the tunnel is closed. 519 520 type channelDialResult struct { 521 channel interface{} 522 err error 523 } 524 525 // Use a buffer of 1 as there are two senders and only one guaranteed receive. 526 527 results := make(chan *channelDialResult, 1) 528 529 afterFunc := time.AfterFunc( 530 tunnel.getCustomParameters().Duration( 531 parameters.TunnelPortForwardDialTimeout), 532 func() { 533 results <- &channelDialResult{ 534 nil, errors.Tracef("channel dial timeout: %s", channelType)} 535 }) 536 defer afterFunc.Stop() 537 538 go func() { 539 result := new(channelDialResult) 540 switch channelType { 541 542 case "direct-tcpip", protocol.TCP_PORT_FORWARD_NO_SPLIT_TUNNEL_TYPE: 543 // The protocol.TCP_PORT_FORWARD_NO_SPLIT_TUNNEL_TYPE is the same as 544 // "direct-tcpip", except split tunnel channel rejections are disallowed 545 // even when split tunnel mode is enabled. 546 result.channel, result.err = 547 tunnel.sshClient.Dial(channelType, remoteAddr) 548 549 default: 550 var sshRequests <-chan *ssh.Request 551 result.channel, sshRequests, result.err = 552 tunnel.sshClient.OpenChannel(channelType, nil) 553 if result.err == nil { 554 go ssh.DiscardRequests(sshRequests) 555 } 556 } 557 if result.err != nil { 558 result.err = errors.Trace(result.err) 559 } 560 results <- result 561 }() 562 563 result := <-results 564 565 if result.err != nil { 566 if !isSplitTunnelRejectReason(result.err) { 567 select { 568 case tunnel.signalPortForwardFailure <- struct{}{}: 569 default: 570 } 571 } 572 return nil, errors.Trace(result.err) 573 } 574 575 return result.channel, nil 576 } 577 578 func isSplitTunnelRejectReason(err error) bool { 579 580 var openChannelErr *ssh.OpenChannelError 581 if std_errors.As(err, &openChannelErr) { 582 return openChannelErr.Reason == 583 ssh.RejectionReason(protocol.CHANNEL_REJECT_REASON_SPLIT_TUNNEL) 584 } 585 586 return false 587 } 588 589 func (tunnel *Tunnel) wrapWithTransferStats(conn net.Conn) net.Conn { 590 591 // Tunnel does not have a serverContext when DisableApi is set. We still use 592 // transferstats.Conn to count bytes transferred for monitoring tunnel 593 // quality. 594 var regexps *transferstats.Regexps 595 if tunnel.serverContext != nil { 596 regexps = tunnel.serverContext.StatsRegexps() 597 } 598 599 return transferstats.NewConn( 600 conn, tunnel.dialParams.ServerEntry.IpAddress, regexps) 601 } 602 603 // SignalComponentFailure notifies the tunnel that an associated component has failed. 604 // This will terminate the tunnel. 605 func (tunnel *Tunnel) SignalComponentFailure() { 606 NoticeWarning("tunnel received component failure signal") 607 tunnel.Close(false) 608 } 609 610 // TunneledConn implements net.Conn and wraps a port forward connection. 611 // It is used to hook into Read and Write to observe I/O errors and 612 // report these errors back to the tunnel monitor as port forward failures. 613 // TunneledConn optionally tracks a peer connection to be explicitly closed 614 // when the TunneledConn is closed. 615 type TunneledConn struct { 616 net.Conn 617 tunnel *Tunnel 618 downstreamConn net.Conn 619 } 620 621 func (conn *TunneledConn) Read(buffer []byte) (n int, err error) { 622 n, err = conn.Conn.Read(buffer) 623 if err != nil && err != io.EOF { 624 // Report new failure. Won't block; assumes the receiver 625 // has a sufficient buffer for the threshold number of reports. 626 // TODO: conditional on type of error or error message? 627 select { 628 case conn.tunnel.signalPortForwardFailure <- struct{}{}: 629 default: 630 } 631 } 632 return 633 } 634 635 func (conn *TunneledConn) Write(buffer []byte) (n int, err error) { 636 n, err = conn.Conn.Write(buffer) 637 if err != nil && err != io.EOF { 638 // Same as TunneledConn.Read() 639 select { 640 case conn.tunnel.signalPortForwardFailure <- struct{}{}: 641 default: 642 } 643 } 644 return 645 } 646 647 func (conn *TunneledConn) Close() error { 648 if conn.downstreamConn != nil { 649 conn.downstreamConn.Close() 650 } 651 return conn.Conn.Close() 652 } 653 654 type dialResult struct { 655 dialConn net.Conn 656 monitoringStartTime time.Time 657 monitoredConn *common.BurstMonitoredConn 658 sshClient *ssh.Client 659 sshRequests <-chan *ssh.Request 660 livenessTestMetrics *livenessTestMetrics 661 extraFailureAction func() 662 } 663 664 // dialTunnel is a helper that builds the transport layers and establishes the 665 // SSH connection. When additional dial configuration is used, dial metrics 666 // are recorded and returned. 667 func dialTunnel( 668 ctx context.Context, 669 config *Config, 670 dialParams *DialParameters) (_ *dialResult, retErr error) { 671 672 // Return immediately when overall context is canceled or timed-out. This 673 // avoids notice noise. 674 err := ctx.Err() 675 if err != nil { 676 return nil, errors.Trace(err) 677 } 678 679 p := getCustomParameters(config, dialParams) 680 timeout := p.Duration(parameters.TunnelConnectTimeout) 681 rateLimits := p.RateLimits(parameters.TunnelRateLimits) 682 obfuscatedSSHMinPadding := p.Int(parameters.ObfuscatedSSHMinPadding) 683 obfuscatedSSHMaxPadding := p.Int(parameters.ObfuscatedSSHMaxPadding) 684 livenessTestMinUpstreamBytes := p.Int(parameters.LivenessTestMinUpstreamBytes) 685 livenessTestMaxUpstreamBytes := p.Int(parameters.LivenessTestMaxUpstreamBytes) 686 livenessTestMinDownstreamBytes := p.Int(parameters.LivenessTestMinDownstreamBytes) 687 livenessTestMaxDownstreamBytes := p.Int(parameters.LivenessTestMaxDownstreamBytes) 688 burstUpstreamTargetBytes := int64(p.Int(parameters.ClientBurstUpstreamTargetBytes)) 689 burstUpstreamDeadline := p.Duration(parameters.ClientBurstUpstreamDeadline) 690 burstDownstreamTargetBytes := int64(p.Int(parameters.ClientBurstDownstreamTargetBytes)) 691 burstDownstreamDeadline := p.Duration(parameters.ClientBurstDownstreamDeadline) 692 p.Close() 693 694 // Ensure that, unless the base context is cancelled, any replayed dial 695 // parameters are cleared, no longer to be retried, if the tunnel fails to 696 // connect. 697 // 698 // Limitation: dials that fail to connect due to the server being in a 699 // load-limiting state are not distinguished and excepted from this 700 // logic. 701 dialSucceeded := false 702 baseCtx := ctx 703 var failedTunnelLivenessTestMetrics *livenessTestMetrics 704 var extraFailureAction func() 705 defer func() { 706 if !dialSucceeded && baseCtx.Err() != context.Canceled { 707 dialParams.Failed(config) 708 if extraFailureAction != nil { 709 extraFailureAction() 710 } 711 if retErr != nil { 712 _ = RecordFailedTunnelStat( 713 config, 714 dialParams, 715 failedTunnelLivenessTestMetrics, 716 -1, 717 -1, 718 retErr) 719 } 720 } 721 }() 722 723 var cancelFunc context.CancelFunc 724 ctx, cancelFunc = context.WithTimeout(ctx, timeout) 725 defer cancelFunc() 726 727 // DialDuration is the elapsed time for both successful and failed tunnel 728 // dials. For successful tunnels, it includes any the network protocol 729 // handshake(s), obfuscation protocol handshake(s), SSH handshake, and 730 // liveness test, when performed. 731 // 732 // Note: ensure DialDuration is set before calling any function which logs 733 // dial_duration. 734 735 startDialTime := time.Now() 736 defer func() { 737 dialParams.DialDuration = time.Since(startDialTime) 738 }() 739 740 // Note: dialParams.MeekResolvedIPAddress isn't set until the dial begins, 741 // so it will always be blank in NoticeConnectingServer. 742 743 NoticeConnectingServer(dialParams) 744 745 // Create the base transport: meek or direct connection 746 747 var dialConn net.Conn 748 749 if protocol.TunnelProtocolUsesMeek(dialParams.TunnelProtocol) { 750 751 dialConn, err = DialMeek( 752 ctx, 753 dialParams.GetMeekConfig(), 754 dialParams.GetDialConfig()) 755 if err != nil { 756 return nil, errors.Trace(err) 757 } 758 759 } else if protocol.TunnelProtocolUsesQUIC(dialParams.TunnelProtocol) { 760 761 packetConn, remoteAddr, err := NewUDPConn( 762 ctx, 763 dialParams.DirectDialAddress, 764 dialParams.GetDialConfig()) 765 if err != nil { 766 return nil, errors.Trace(err) 767 } 768 769 dialConn, err = quic.Dial( 770 ctx, 771 packetConn, 772 remoteAddr, 773 dialParams.QUICDialSNIAddress, 774 dialParams.QUICVersion, 775 dialParams.QUICClientHelloSeed, 776 dialParams.ServerEntry.SshObfuscatedKey, 777 dialParams.ObfuscatedQUICPaddingSeed, 778 dialParams.QUICDisablePathMTUDiscovery) 779 if err != nil { 780 return nil, errors.Trace(err) 781 } 782 783 } else if protocol.TunnelProtocolUsesTapDance(dialParams.TunnelProtocol) { 784 785 dialConn, err = refraction.DialTapDance( 786 ctx, 787 config.EmitRefractionNetworkingLogs, 788 config.GetPsiphonDataDirectory(), 789 NewNetDialer(dialParams.GetDialConfig()), 790 dialParams.DirectDialAddress) 791 if err != nil { 792 return nil, errors.Trace(err) 793 } 794 795 } else if protocol.TunnelProtocolUsesConjure(dialParams.TunnelProtocol) { 796 797 // Specify a cache key with a scope that ensures that: 798 // 799 // (a) cached registrations aren't used across different networks, as a 800 // registration requires the client's public IP to match the value at time 801 // of registration; 802 // 803 // (b) cached registrations are associated with specific Psiphon server 804 // candidates, to ensure that replay will use the same phantom IP(s). 805 // 806 // This scheme allows for reuse of cached registrations on network A when a 807 // client roams from network A to network B and back to network A. 808 // 809 // Using the network ID as a proxy for client public IP address is a 810 // heurisitic: it's possible that a clients public IP address changes 811 // without the network ID changing, and it's not guaranteed that the client 812 // will be assigned the original public IP on network A; so there's some 813 // chance the registration cannot be reused. 814 815 diagnosticID := dialParams.ServerEntry.GetDiagnosticID() 816 817 cacheKey := dialParams.NetworkID + "-" + diagnosticID 818 819 conjureConfig := &refraction.ConjureConfig{ 820 RegistrationCacheTTL: dialParams.ConjureCachedRegistrationTTL, 821 RegistrationCacheKey: cacheKey, 822 Transport: dialParams.ConjureTransport, 823 DiagnosticID: diagnosticID, 824 Logger: NoticeCommonLogger(), 825 } 826 827 // Set extraFailureAction, which is invoked whenever the tunnel fails (i.e., 828 // where RecordFailedTunnelStat is invoked). The action will remove any 829 // cached registration. When refraction.DialConjure succeeds, the underlying 830 // registration is cached. After refraction.DialConjure returns, it no 831 // longer modifies the cached state of that registration, assuming that it 832 // remains valid and effective. However adversarial impact on a given 833 // phantom IP may not become evident until after the initial TCP connection 834 // establishment and handshake performed by refraction.DialConjure. For 835 // example, it may be that the phantom dial is targeted for severe 836 // throttling which begins or is only evident later in the flow. Scheduling 837 // a call to DeleteCachedConjureRegistration allows us to invalidate the 838 // cached registration for a tunnel that fails later in its lifecycle. 839 // 840 // Note that extraFailureAction will retain a reference to conjureConfig for 841 // the lifetime of the tunnel. 842 extraFailureAction = func() { 843 refraction.DeleteCachedConjureRegistration(conjureConfig) 844 } 845 846 if dialParams.ConjureAPIRegistration { 847 848 // Use MeekConn to domain front Conjure API registration. 849 // 850 // ConjureAPIRegistrarFrontingSpecs are applied via 851 // dialParams.GetMeekConfig, and will be subject to replay. 852 // 853 // Since DialMeek will create a TLS connection immediately, and a cached 854 // registration may be used, we will delay initializing the MeekConn-based 855 // RoundTripper until we know it's needed. This is implemented by passing 856 // in a RoundTripper that establishes a MeekConn when RoundTrip is called. 857 // 858 // In refraction.dial we configure 0 retries for API registration requests, 859 // assuming it's better to let another Psiphon candidate retry, with new 860 // domaing fronting parameters. As such, we expect only one round trip call 861 // per NewHTTPRoundTripper, so, in practise, there's no performance penalty 862 // from establishing a new MeekConn per round trip. 863 // 864 // Performing the full DialMeek/RoundTrip operation here allows us to call 865 // MeekConn.Close and ensure all resources are immediately cleaned up. 866 roundTrip := func(request *http.Request) (*http.Response, error) { 867 868 conn, err := DialMeek( 869 ctx, dialParams.GetMeekConfig(), dialParams.GetDialConfig()) 870 if err != nil { 871 return nil, errors.Trace(err) 872 } 873 defer conn.Close() 874 875 response, err := conn.RoundTrip(request) 876 if err != nil { 877 return nil, errors.Trace(err) 878 } 879 880 // Read the response into a buffer and close the response 881 // body, ensuring that MeekConn.Close closes all idle connections. 882 // 883 // Alternatively, we could Clone the request to set 884 // http.Request.Close and avoid keeping any idle connection 885 // open after the response body is read by gotapdance. Since 886 // the response body is small and since gotapdance does not 887 // stream the response body, we're taking this approach which 888 // ensures cleanup. 889 890 body, err := ioutil.ReadAll(response.Body) 891 _ = response.Body.Close() 892 if err != nil { 893 return nil, errors.Trace(err) 894 } 895 response.Body = io.NopCloser(bytes.NewReader(body)) 896 897 return response, nil 898 } 899 900 conjureConfig.APIRegistrarHTTPClient = &http.Client{ 901 Transport: common.NewHTTPRoundTripper(roundTrip), 902 } 903 904 conjureConfig.APIRegistrarBidirectionalURL = 905 dialParams.ConjureAPIRegistrarBidirectionalURL 906 conjureConfig.APIRegistrarDelay = dialParams.ConjureAPIRegistrarDelay 907 908 } else if dialParams.ConjureDecoyRegistration { 909 910 // The Conjure "phantom" connection is compatible with fragmentation, but 911 // the decoy registrar connection, like Tapdance, is not, so force it off. 912 // Any tunnel fragmentation metrics will refer to the "phantom" connection 913 // only. 914 conjureConfig.DecoyRegistrarDialer = NewNetDialer( 915 dialParams.GetDialConfig().WithoutFragmentor()) 916 conjureConfig.DecoyRegistrarWidth = dialParams.ConjureDecoyRegistrarWidth 917 conjureConfig.DecoyRegistrarDelay = dialParams.ConjureDecoyRegistrarDelay 918 } 919 920 dialConn, err = refraction.DialConjure( 921 ctx, 922 config.EmitRefractionNetworkingLogs, 923 config.GetPsiphonDataDirectory(), 924 NewNetDialer(dialParams.GetDialConfig()), 925 dialParams.DirectDialAddress, 926 conjureConfig) 927 if err != nil { 928 return nil, errors.Trace(err) 929 } 930 931 } else { 932 933 dialConn, err = DialTCP( 934 ctx, 935 dialParams.DirectDialAddress, 936 dialParams.GetDialConfig()) 937 if err != nil { 938 return nil, errors.Trace(err) 939 } 940 } 941 942 // Some conns report additional metrics. fragmentor.Conns report 943 // fragmentor configs. 944 if metricsSource, ok := dialConn.(common.MetricsSource); ok { 945 dialParams.DialConnMetrics = metricsSource 946 } 947 948 if noticeMetricsSource, ok := dialConn.(common.NoticeMetricsSource); ok { 949 dialParams.DialConnNoticeMetrics = noticeMetricsSource 950 } 951 952 // If dialConn is not a Closer, tunnel failure detection may be slower 953 if _, ok := dialConn.(common.Closer); !ok { 954 NoticeWarning("tunnel.dialTunnel: dialConn is not a Closer") 955 } 956 957 cleanupConn := dialConn 958 defer func() { 959 // Cleanup on error 960 if cleanupConn != nil { 961 cleanupConn.Close() 962 } 963 }() 964 965 monitoringStartTime := time.Now() 966 monitoredConn := common.NewBurstMonitoredConn( 967 dialConn, 968 false, 969 burstUpstreamTargetBytes, burstUpstreamDeadline, 970 burstDownstreamTargetBytes, burstDownstreamDeadline) 971 972 // Apply throttling (if configured) 973 throttledConn := common.NewThrottledConn( 974 monitoredConn, 975 rateLimits) 976 977 // Add obfuscated SSH layer 978 var sshConn net.Conn = throttledConn 979 if protocol.TunnelProtocolUsesObfuscatedSSH(dialParams.TunnelProtocol) { 980 obfuscatedSSHConn, err := obfuscator.NewClientObfuscatedSSHConn( 981 throttledConn, 982 dialParams.ServerEntry.SshObfuscatedKey, 983 dialParams.ObfuscatorPaddingSeed, 984 &obfuscatedSSHMinPadding, 985 &obfuscatedSSHMaxPadding) 986 if err != nil { 987 return nil, errors.Trace(err) 988 } 989 sshConn = obfuscatedSSHConn 990 dialParams.ObfuscatedSSHConnMetrics = obfuscatedSSHConn 991 } 992 993 // Now establish the SSH session over the conn transport 994 expectedPublicKey, err := base64.StdEncoding.DecodeString( 995 dialParams.ServerEntry.SshHostKey) 996 if err != nil { 997 return nil, errors.Trace(err) 998 } 999 sshCertChecker := &ssh.CertChecker{ 1000 IsHostAuthority: func(auth ssh.PublicKey, address string) bool { 1001 // Psiphon servers do not currently use SSH certificates. This CertChecker 1002 // code path may still be hit if a client attempts to connect using an 1003 // obsolete server entry. 1004 return false 1005 }, 1006 HostKeyFallback: func(addr string, remote net.Addr, publicKey ssh.PublicKey) error { 1007 if !bytes.Equal(expectedPublicKey, publicKey.Marshal()) { 1008 return errors.TraceNew("unexpected host public key") 1009 } 1010 return nil 1011 }, 1012 } 1013 1014 sshPasswordPayload := &protocol.SSHPasswordPayload{ 1015 SessionId: config.SessionID, 1016 SshPassword: dialParams.ServerEntry.SshPassword, 1017 ClientCapabilities: []string{protocol.CLIENT_CAPABILITY_SERVER_REQUESTS}, 1018 } 1019 1020 payload, err := json.Marshal(sshPasswordPayload) 1021 if err != nil { 1022 return nil, errors.Trace(err) 1023 } 1024 1025 sshClientConfig := &ssh.ClientConfig{ 1026 User: dialParams.ServerEntry.SshUsername, 1027 Auth: []ssh.AuthMethod{ 1028 ssh.Password(string(payload)), 1029 }, 1030 HostKeyCallback: sshCertChecker.CheckHostKey, 1031 ClientVersion: dialParams.SSHClientVersion, 1032 } 1033 1034 sshClientConfig.KEXPRNGSeed = dialParams.SSHKEXSeed 1035 1036 if protocol.TunnelProtocolUsesObfuscatedSSH(dialParams.TunnelProtocol) { 1037 if config.ObfuscatedSSHAlgorithms != nil { 1038 sshClientConfig.KeyExchanges = []string{config.ObfuscatedSSHAlgorithms[0]} 1039 sshClientConfig.Ciphers = []string{config.ObfuscatedSSHAlgorithms[1]} 1040 sshClientConfig.MACs = []string{config.ObfuscatedSSHAlgorithms[2]} 1041 sshClientConfig.HostKeyAlgorithms = []string{config.ObfuscatedSSHAlgorithms[3]} 1042 1043 } else { 1044 // With Encrypt-then-MAC hash algorithms, packet length is 1045 // transmitted in plaintext, which aids in traffic analysis. 1046 // 1047 // TUNNEL_PROTOCOL_SSH is excepted since its KEX appears in plaintext, 1048 // and the protocol is intended to look like SSH on the wire. 1049 sshClientConfig.NoEncryptThenMACHash = true 1050 } 1051 } else { 1052 // For TUNNEL_PROTOCOL_SSH only, the server is expected to randomize 1053 // its KEX; setting PeerKEXPRNGSeed will ensure successful negotiation 1054 // betweem two randomized KEXes. 1055 if dialParams.ServerEntry.SshObfuscatedKey != "" { 1056 sshClientConfig.PeerKEXPRNGSeed, err = protocol.DeriveSSHServerKEXPRNGSeed( 1057 dialParams.ServerEntry.SshObfuscatedKey) 1058 if err != nil { 1059 return nil, errors.Trace(err) 1060 } 1061 } 1062 } 1063 1064 // The ssh session establishment (via ssh.NewClientConn) is wrapped 1065 // in a timeout to ensure it won't hang. We've encountered firewalls 1066 // that allow the TCP handshake to complete but then send a RST to the 1067 // server-side and nothing to the client-side, and if that happens 1068 // while ssh.NewClientConn is reading, it may wait forever. The timeout 1069 // closes the conn, which interrupts it. 1070 // Note: TCP handshake timeouts are provided by TCPConn, and session 1071 // timeouts *after* ssh establishment are provided by the ssh keep alive 1072 // in operate tunnel. 1073 1074 type sshNewClientResult struct { 1075 sshClient *ssh.Client 1076 sshRequests <-chan *ssh.Request 1077 livenessTestMetrics *livenessTestMetrics 1078 err error 1079 } 1080 1081 resultChannel := make(chan sshNewClientResult) 1082 1083 // Call NewClientConn in a goroutine, as it blocks on SSH handshake network 1084 // operations, and would block canceling or shutdown. If the parent context 1085 // is canceled, close the net.Conn underlying SSH, which will interrupt the 1086 // SSH handshake that may be blocking NewClientConn. 1087 1088 go func() { 1089 // The following is adapted from ssh.Dial(), here using a custom conn 1090 // The sshAddress is passed through to host key verification callbacks; we don't use it. 1091 sshAddress := "" 1092 sshClientConn, sshChannels, sshRequests, err := ssh.NewClientConn( 1093 sshConn, sshAddress, sshClientConfig) 1094 1095 var sshClient *ssh.Client 1096 var metrics *livenessTestMetrics 1097 1098 if err == nil { 1099 1100 // sshRequests is handled by operateTunnel. 1101 // ssh.NewClient also expects to handle the sshRequests 1102 // value from ssh.NewClientConn and will spawn a goroutine 1103 // to handle the <-chan *ssh.Request, so we must provide 1104 // a closed channel to ensure that goroutine halts instead 1105 // of hanging on a nil channel. 1106 noRequests := make(chan *ssh.Request) 1107 close(noRequests) 1108 1109 sshClient = ssh.NewClient(sshClientConn, sshChannels, noRequests) 1110 1111 if livenessTestMaxUpstreamBytes > 0 || livenessTestMaxDownstreamBytes > 0 { 1112 1113 // When configured, perform a liveness test which sends and 1114 // receives bytes through the tunnel to ensure the tunnel had 1115 // not been blocked upon or shortly after connecting. This 1116 // test is performed concurrently for each establishment 1117 // candidate before selecting a successful tunnel. 1118 // 1119 // Note that the liveness test is subject to the 1120 // TunnelConnectTimeout, which should be adjusted 1121 // accordinging. 1122 1123 metrics, err = performLivenessTest( 1124 sshClient, 1125 livenessTestMinUpstreamBytes, livenessTestMaxUpstreamBytes, 1126 livenessTestMinDownstreamBytes, livenessTestMaxDownstreamBytes, 1127 dialParams.LivenessTestSeed) 1128 1129 // Skip notice when cancelling. 1130 if baseCtx.Err() == nil { 1131 NoticeLivenessTest( 1132 dialParams.ServerEntry.GetDiagnosticID(), metrics, err == nil) 1133 } 1134 } 1135 } 1136 1137 resultChannel <- sshNewClientResult{sshClient, sshRequests, metrics, err} 1138 }() 1139 1140 var result sshNewClientResult 1141 1142 select { 1143 case result = <-resultChannel: 1144 case <-ctx.Done(): 1145 1146 // Interrupt the goroutine and capture its error context to 1147 // distinguish point of failure. 1148 err := ctx.Err() 1149 sshConn.Close() 1150 result = <-resultChannel 1151 if result.err != nil { 1152 result.err = fmt.Errorf("%s: %s", err, result.err) 1153 } else { 1154 result.err = err 1155 } 1156 } 1157 1158 if result.err != nil { 1159 failedTunnelLivenessTestMetrics = result.livenessTestMetrics 1160 return nil, errors.Trace(result.err) 1161 } 1162 1163 dialSucceeded = true 1164 1165 NoticeConnectedServer(dialParams) 1166 1167 cleanupConn = nil 1168 1169 // Invoke DNS cache extension (if enabled in the resolver) now that the 1170 // tunnel is connected and the Psiphon server is authenticated. This 1171 // demonstrates that any domain name resolved to an endpoint that is or 1172 // is forwarded to the expected Psiphon server. 1173 // 1174 // Limitation: DNS cache extension is not implemented for Refraction 1175 // Networking protocols. iOS VPN, the primary use case for DNS cache 1176 // extension, does not enable Refraction Networking. 1177 if protocol.TunnelProtocolUsesFrontedMeek(dialParams.TunnelProtocol) { 1178 resolver := config.GetResolver() 1179 if resolver != nil { 1180 resolver.VerifyCacheExtension(dialParams.MeekFrontingDialAddress) 1181 } 1182 } 1183 1184 // When configured to do so, hold-off on activating this tunnel. This allows 1185 // some extra time for slower but less resource intensive protocols to 1186 // establish tunnels. By holding off post-connect, the client has this 1187 // established tunnel ready to activate in case other protocols fail to 1188 // establish. This hold-off phase continues to consume one connection worker. 1189 // 1190 // The network latency multiplier is not applied to HoldOffTunnelDuration, 1191 // as the goal is to apply a consistent hold-off range across all tunnel 1192 // candidates; and this avoids scaling up any delay users experience. 1193 // 1194 // The hold-off is applied regardless of whether this is the first tunnel 1195 // in a session or a reconnection, even to a server affinity candidate, 1196 // so that the advantage for other protocols persists. 1197 1198 if dialParams.HoldOffTunnelDuration > 0 { 1199 NoticeHoldOffTunnel(dialParams.ServerEntry.GetDiagnosticID(), dialParams.HoldOffTunnelDuration) 1200 common.SleepWithContext(ctx, dialParams.HoldOffTunnelDuration) 1201 } 1202 1203 // Note: dialConn may be used to close the underlying network connection 1204 // but should not be used to perform I/O as that would interfere with SSH 1205 // (and also bypasses throttling). 1206 1207 return &dialResult{ 1208 dialConn: dialConn, 1209 monitoringStartTime: monitoringStartTime, 1210 monitoredConn: monitoredConn, 1211 sshClient: result.sshClient, 1212 sshRequests: result.sshRequests, 1213 livenessTestMetrics: result.livenessTestMetrics, 1214 extraFailureAction: extraFailureAction, 1215 }, 1216 nil 1217 } 1218 1219 // Fields are exported for JSON encoding in NoticeLivenessTest. 1220 type livenessTestMetrics struct { 1221 Duration string 1222 UpstreamBytes int 1223 SentUpstreamBytes int 1224 DownstreamBytes int 1225 ReceivedDownstreamBytes int 1226 } 1227 1228 func performLivenessTest( 1229 sshClient *ssh.Client, 1230 minUpstreamBytes, maxUpstreamBytes int, 1231 minDownstreamBytes, maxDownstreamBytes int, 1232 livenessTestPRNGSeed *prng.Seed) (*livenessTestMetrics, error) { 1233 1234 metrics := new(livenessTestMetrics) 1235 1236 defer func(startTime time.Time) { 1237 metrics.Duration = time.Since(startTime).String() 1238 }(time.Now()) 1239 1240 PRNG := prng.NewPRNGWithSeed(livenessTestPRNGSeed) 1241 1242 metrics.UpstreamBytes = PRNG.Range(minUpstreamBytes, maxUpstreamBytes) 1243 metrics.DownstreamBytes = PRNG.Range(minDownstreamBytes, maxDownstreamBytes) 1244 1245 request := &protocol.RandomStreamRequest{ 1246 UpstreamBytes: metrics.UpstreamBytes, 1247 DownstreamBytes: metrics.DownstreamBytes, 1248 } 1249 1250 extraData, err := json.Marshal(request) 1251 if err != nil { 1252 return metrics, errors.Trace(err) 1253 } 1254 1255 channel, requests, err := sshClient.OpenChannel( 1256 protocol.RANDOM_STREAM_CHANNEL_TYPE, extraData) 1257 if err != nil { 1258 return metrics, errors.Trace(err) 1259 } 1260 defer channel.Close() 1261 1262 go ssh.DiscardRequests(requests) 1263 1264 sent := 0 1265 received := 0 1266 upstream := new(sync.WaitGroup) 1267 var errUpstream, errDownstream error 1268 1269 if metrics.UpstreamBytes > 0 { 1270 1271 // Process streams concurrently to minimize elapsed time. This also 1272 // avoids a unidirectional flow burst early in the tunnel lifecycle. 1273 1274 upstream.Add(1) 1275 go func() { 1276 defer upstream.Done() 1277 1278 // In consideration of memory-constrained environments, use modest-sized copy 1279 // buffers since many tunnel establishment workers may run the liveness test 1280 // concurrently. 1281 var buffer [4096]byte 1282 1283 n, err := common.CopyNBuffer(channel, rand.Reader, int64(metrics.UpstreamBytes), buffer[:]) 1284 sent = int(n) 1285 if err != nil { 1286 errUpstream = errors.Trace(err) 1287 } 1288 }() 1289 } 1290 1291 if metrics.DownstreamBytes > 0 { 1292 var buffer [4096]byte 1293 n, err := common.CopyNBuffer(ioutil.Discard, channel, int64(metrics.DownstreamBytes), buffer[:]) 1294 received = int(n) 1295 if err != nil { 1296 errDownstream = errors.Trace(err) 1297 } 1298 } 1299 1300 upstream.Wait() 1301 metrics.SentUpstreamBytes = sent 1302 metrics.ReceivedDownstreamBytes = received 1303 1304 if errUpstream != nil { 1305 return metrics, errUpstream 1306 } else if errDownstream != nil { 1307 return metrics, errDownstream 1308 } 1309 1310 return metrics, nil 1311 } 1312 1313 // operateTunnel monitors the health of the tunnel and performs 1314 // periodic work. 1315 // 1316 // BytesTransferred and TotalBytesTransferred notices are emitted 1317 // for live reporting and diagnostics reporting, respectively. 1318 // 1319 // Status requests are sent to the Psiphon API to report bytes 1320 // transferred. 1321 // 1322 // Periodic SSH keep alive packets are sent to ensure the underlying 1323 // TCP connection isn't terminated by NAT, or other network 1324 // interference -- or test if it has been terminated while the device 1325 // has been asleep. When a keep alive times out, the tunnel is 1326 // considered failed. 1327 // 1328 // An immediate SSH keep alive "probe" is sent to test the tunnel and 1329 // server responsiveness when a port forward failure is detected: a 1330 // failed dial or failed read/write. This keep alive has a shorter 1331 // timeout. 1332 // 1333 // Note that port forward failures may be due to non-failure conditions. 1334 // For example, when the user inputs an invalid domain name and 1335 // resolution is done by the ssh server; or trying to connect to a 1336 // non-white-listed port; and the error message in these cases is not 1337 // distinguishable from a a true server error (a common error message, 1338 // "ssh: rejected: administratively prohibited (open failed)", may be 1339 // returned for these cases but also if the server has run out of 1340 // ephemeral ports, for example). 1341 // 1342 // SSH keep alives are not sent when the tunnel has been recently 1343 // active (not only does tunnel activity obviate the necessity of a keep 1344 // alive, testing has shown that keep alives may time out for "busy" 1345 // tunnels, especially over meek protocol and other high latency 1346 // conditions). 1347 // 1348 // "Recently active" is defined has having received payload bytes. Sent 1349 // bytes are not considered as testing has shown bytes may appear to 1350 // send when certain NAT devices have interfered with the tunnel, while 1351 // no bytes are received. In a pathological case, with DNS implemented 1352 // as tunneled UDP, a browser may wait excessively for a domain name to 1353 // resolve, while no new port forward is attempted which would otherwise 1354 // result in a tunnel failure detection. 1355 // 1356 // TODO: change "recently active" to include having received any 1357 // SSH protocol messages from the server, not just user payload? 1358 func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) { 1359 defer tunnel.operateWaitGroup.Done() 1360 1361 now := time.Now() 1362 lastBytesReceivedTime := now 1363 lastTotalBytesTransferedTime := now 1364 totalSent := int64(0) 1365 totalReceived := int64(0) 1366 setDialParamsSucceeded := false 1367 1368 noticeBytesTransferredTicker := time.NewTicker(1 * time.Second) 1369 defer noticeBytesTransferredTicker.Stop() 1370 1371 // The next status request and ssh keep alive times are picked at random, 1372 // from a range, to make the resulting traffic less fingerprintable, 1373 // Note: not using Tickers since these are not fixed time periods. 1374 nextStatusRequestPeriod := func() time.Duration { 1375 p := tunnel.getCustomParameters() 1376 return prng.Period( 1377 p.Duration(parameters.PsiphonAPIStatusRequestPeriodMin), 1378 p.Duration(parameters.PsiphonAPIStatusRequestPeriodMax)) 1379 } 1380 1381 statsTimer := time.NewTimer(nextStatusRequestPeriod()) 1382 defer statsTimer.Stop() 1383 1384 // Schedule an almost-immediate status request to deliver any unreported 1385 // persistent stats. 1386 unreported := CountUnreportedPersistentStats() 1387 if unreported > 0 { 1388 NoticeInfo("Unreported persistent stats: %d", unreported) 1389 p := tunnel.getCustomParameters() 1390 statsTimer.Reset( 1391 prng.Period( 1392 p.Duration(parameters.PsiphonAPIStatusRequestShortPeriodMin), 1393 p.Duration(parameters.PsiphonAPIStatusRequestShortPeriodMax))) 1394 } 1395 1396 nextSshKeepAlivePeriod := func() time.Duration { 1397 p := tunnel.getCustomParameters() 1398 return prng.Period( 1399 p.Duration(parameters.SSHKeepAlivePeriodMin), 1400 p.Duration(parameters.SSHKeepAlivePeriodMax)) 1401 } 1402 1403 // TODO: don't initialize timer when config.DisablePeriodicSshKeepAlive is set 1404 sshKeepAliveTimer := time.NewTimer(nextSshKeepAlivePeriod()) 1405 if tunnel.config.DisablePeriodicSshKeepAlive { 1406 sshKeepAliveTimer.Stop() 1407 } else { 1408 defer sshKeepAliveTimer.Stop() 1409 } 1410 1411 // Perform network requests in separate goroutines so as not to block 1412 // other operations. 1413 requestsWaitGroup := new(sync.WaitGroup) 1414 1415 requestsWaitGroup.Add(1) 1416 signalStatusRequest := make(chan struct{}) 1417 go func() { 1418 defer requestsWaitGroup.Done() 1419 for range signalStatusRequest { 1420 sendStats(tunnel) 1421 } 1422 }() 1423 1424 requestsWaitGroup.Add(1) 1425 signalPeriodicSshKeepAlive := make(chan time.Duration) 1426 sshKeepAliveError := make(chan error, 1) 1427 go func() { 1428 defer requestsWaitGroup.Done() 1429 isFirstPeriodicKeepAlive := true 1430 for timeout := range signalPeriodicSshKeepAlive { 1431 bytesUp := atomic.LoadInt64(&totalSent) 1432 bytesDown := atomic.LoadInt64(&totalReceived) 1433 err := tunnel.sendSshKeepAlive( 1434 isFirstPeriodicKeepAlive, false, timeout, bytesUp, bytesDown) 1435 if err != nil { 1436 select { 1437 case sshKeepAliveError <- err: 1438 default: 1439 } 1440 } 1441 isFirstPeriodicKeepAlive = false 1442 } 1443 }() 1444 1445 // Probe-type SSH keep alives have a distinct send worker and may be sent 1446 // concurrently, to ensure a long period keep alive timeout doesn't delay 1447 // failed tunnel detection. 1448 1449 requestsWaitGroup.Add(1) 1450 signalProbeSshKeepAlive := make(chan time.Duration) 1451 go func() { 1452 defer requestsWaitGroup.Done() 1453 for timeout := range signalProbeSshKeepAlive { 1454 bytesUp := atomic.LoadInt64(&totalSent) 1455 bytesDown := atomic.LoadInt64(&totalReceived) 1456 err := tunnel.sendSshKeepAlive( 1457 false, true, timeout, bytesUp, bytesDown) 1458 if err != nil { 1459 select { 1460 case sshKeepAliveError <- err: 1461 default: 1462 } 1463 } 1464 } 1465 }() 1466 1467 shutdown := false 1468 var err error 1469 for !shutdown && err == nil { 1470 select { 1471 case <-noticeBytesTransferredTicker.C: 1472 sent, received := transferstats.ReportRecentBytesTransferredForServer( 1473 tunnel.dialParams.ServerEntry.IpAddress) 1474 1475 if received > 0 { 1476 lastBytesReceivedTime = time.Now() 1477 } 1478 1479 bytesUp := atomic.AddInt64(&totalSent, sent) 1480 bytesDown := atomic.AddInt64(&totalReceived, received) 1481 1482 p := tunnel.getCustomParameters() 1483 noticePeriod := p.Duration(parameters.TotalBytesTransferredNoticePeriod) 1484 doEmitMemoryMetrics := p.Bool(parameters.TotalBytesTransferredEmitMemoryMetrics) 1485 replayTargetUpstreamBytes := p.Int(parameters.ReplayTargetUpstreamBytes) 1486 replayTargetDownstreamBytes := p.Int(parameters.ReplayTargetDownstreamBytes) 1487 replayTargetTunnelDuration := p.Duration(parameters.ReplayTargetTunnelDuration) 1488 1489 if lastTotalBytesTransferedTime.Add(noticePeriod).Before(time.Now()) { 1490 NoticeTotalBytesTransferred( 1491 tunnel.dialParams.ServerEntry.GetDiagnosticID(), bytesUp, bytesDown) 1492 if doEmitMemoryMetrics { 1493 emitMemoryMetrics() 1494 } 1495 lastTotalBytesTransferedTime = time.Now() 1496 } 1497 1498 // Only emit the frequent BytesTransferred notice when tunnel is not idle. 1499 if tunnel.config.EmitBytesTransferred && (sent > 0 || received > 0) { 1500 NoticeBytesTransferred( 1501 tunnel.dialParams.ServerEntry.GetDiagnosticID(), sent, received) 1502 } 1503 1504 // Once the tunnel has connected, activated, successfully transmitted the 1505 // targeted number of bytes, and been up for the targeted duration 1506 // (measured from the end of establishment), store its dial parameters for 1507 // subsequent replay. 1508 // 1509 // Even when target bytes and duration are all 0, the tunnel must remain up 1510 // for at least 1 second due to use of noticeBytesTransferredTicker; for 1511 // the same reason the granularity of ReplayTargetTunnelDuration is 1512 // seconds. 1513 if !setDialParamsSucceeded && 1514 bytesUp >= int64(replayTargetUpstreamBytes) && 1515 bytesDown >= int64(replayTargetDownstreamBytes) && 1516 time.Since(tunnel.establishedTime) >= replayTargetTunnelDuration { 1517 1518 tunnel.dialParams.Succeeded() 1519 setDialParamsSucceeded = true 1520 } 1521 1522 case <-statsTimer.C: 1523 select { 1524 case signalStatusRequest <- struct{}{}: 1525 default: 1526 } 1527 statsTimer.Reset(nextStatusRequestPeriod()) 1528 1529 case <-sshKeepAliveTimer.C: 1530 p := tunnel.getCustomParameters() 1531 inactivePeriod := p.Duration(parameters.SSHKeepAlivePeriodicInactivePeriod) 1532 if lastBytesReceivedTime.Add(inactivePeriod).Before(time.Now()) { 1533 timeout := p.Duration(parameters.SSHKeepAlivePeriodicTimeout) 1534 select { 1535 case signalPeriodicSshKeepAlive <- timeout: 1536 default: 1537 } 1538 } 1539 sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod()) 1540 1541 case <-tunnel.signalPortForwardFailure: 1542 // Note: no mutex on portForwardFailureTotal; only referenced here 1543 tunnel.totalPortForwardFailures++ 1544 NoticeInfo("port forward failures for %s: %d", 1545 tunnel.dialParams.ServerEntry.GetDiagnosticID(), 1546 tunnel.totalPortForwardFailures) 1547 1548 // If the underlying Conn has closed (meek and other plugin protocols may 1549 // close themselves in certain error conditions), the tunnel has certainly 1550 // failed. Otherwise, probe with an SSH keep alive. 1551 // 1552 // TODO: the IsClosed case omits the failed tunnel logging and reset 1553 // actions performed by sendSshKeepAlive. Should self-closing protocols 1554 // perform these actions themselves? 1555 1556 if tunnel.conn.IsClosed() { 1557 err = errors.TraceNew("underlying conn is closed") 1558 } else { 1559 p := tunnel.getCustomParameters() 1560 inactivePeriod := p.Duration(parameters.SSHKeepAliveProbeInactivePeriod) 1561 if lastBytesReceivedTime.Add(inactivePeriod).Before(time.Now()) { 1562 timeout := p.Duration(parameters.SSHKeepAliveProbeTimeout) 1563 select { 1564 case signalProbeSshKeepAlive <- timeout: 1565 default: 1566 } 1567 } 1568 if !tunnel.config.DisablePeriodicSshKeepAlive { 1569 sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod()) 1570 } 1571 1572 } 1573 1574 case err = <-sshKeepAliveError: 1575 1576 case serverRequest := <-tunnel.sshServerRequests: 1577 if serverRequest != nil { 1578 err := HandleServerRequest(tunnelOwner, tunnel, serverRequest.Type, serverRequest.Payload) 1579 if err == nil { 1580 serverRequest.Reply(true, nil) 1581 } else { 1582 NoticeWarning("HandleServerRequest for %s failed: %s", serverRequest.Type, err) 1583 serverRequest.Reply(false, nil) 1584 1585 } 1586 } 1587 1588 case <-tunnel.operateCtx.Done(): 1589 shutdown = true 1590 } 1591 } 1592 1593 close(signalPeriodicSshKeepAlive) 1594 close(signalProbeSshKeepAlive) 1595 close(signalStatusRequest) 1596 requestsWaitGroup.Wait() 1597 1598 // Capture bytes transferred since the last noticeBytesTransferredTicker tick 1599 sent, received := transferstats.ReportRecentBytesTransferredForServer( 1600 tunnel.dialParams.ServerEntry.IpAddress) 1601 bytesUp := atomic.AddInt64(&totalSent, sent) 1602 bytesDown := atomic.AddInt64(&totalReceived, received) 1603 1604 // Always emit a final NoticeTotalBytesTransferred 1605 NoticeTotalBytesTransferred( 1606 tunnel.dialParams.ServerEntry.GetDiagnosticID(), bytesUp, bytesDown) 1607 1608 if err == nil { 1609 1610 NoticeInfo("shutdown operate tunnel") 1611 1612 // This commanded shutdown case is initiated by Tunnel.Close, which will 1613 // wait up to parameters.TunnelOperateShutdownTimeout to allow the following 1614 // requests to complete. 1615 1616 // Send a final status request in order to report any outstanding persistent 1617 // stats and domain bytes transferred as soon as possible. 1618 1619 sendStats(tunnel) 1620 1621 // The controller connectedReporter may have initiated a connected request 1622 // concurrent to this commanded shutdown. SetInFlightConnectedRequest 1623 // ensures that a connected request doesn't start after the commanded 1624 // shutdown. AwaitInFlightConnectedRequest blocks until any in flight 1625 // request completes or is aborted after TunnelOperateShutdownTimeout. 1626 // 1627 // As any connected request is performed by a concurrent goroutine, 1628 // sendStats is called first and AwaitInFlightConnectedRequest second. 1629 1630 tunnel.AwaitInFlightConnectedRequest() 1631 1632 } else { 1633 1634 NoticeWarning("operate tunnel error for %s: %s", 1635 tunnel.dialParams.ServerEntry.GetDiagnosticID(), err) 1636 1637 tunnelOwner.SignalTunnelFailure(tunnel) 1638 } 1639 } 1640 1641 // sendSshKeepAlive is a helper which sends a keepalive@openssh.com request 1642 // on the specified SSH connections and returns true of the request succeeds 1643 // within a specified timeout. If the request fails, the associated conn is 1644 // closed, which will terminate the associated tunnel. 1645 func (tunnel *Tunnel) sendSshKeepAlive( 1646 isFirstPeriodicKeepAlive bool, 1647 isProbeKeepAlive bool, 1648 timeout time.Duration, 1649 bytesUp int64, 1650 bytesDown int64) error { 1651 1652 p := tunnel.getCustomParameters() 1653 1654 // Random padding to frustrate fingerprinting. 1655 request := prng.Padding( 1656 p.Int(parameters.SSHKeepAlivePaddingMinBytes), 1657 p.Int(parameters.SSHKeepAlivePaddingMaxBytes)) 1658 1659 speedTestSample := isFirstPeriodicKeepAlive 1660 if !speedTestSample { 1661 speedTestSample = p.WeightedCoinFlip( 1662 parameters.SSHKeepAliveSpeedTestSampleProbability) 1663 } 1664 1665 networkConnectivityPollPeriod := p.Duration( 1666 parameters.SSHKeepAliveNetworkConnectivityPollingPeriod) 1667 1668 resetOnFailure := p.WeightedCoinFlip( 1669 parameters.SSHKeepAliveResetOnFailureProbability) 1670 1671 p.Close() 1672 1673 // Note: there is no request context since SSH requests cannot be interrupted 1674 // directly. Closing the tunnel will interrupt the request. A timeout is set 1675 // to unblock this function, but the goroutine may not exit until the tunnel 1676 // is closed. 1677 1678 // Use a buffer of 1 as there are two senders and only one guaranteed receive. 1679 errChannel := make(chan error, 1) 1680 1681 afterFunc := time.AfterFunc(timeout, func() { 1682 errChannel <- errors.TraceNew("timed out") 1683 }) 1684 defer afterFunc.Stop() 1685 1686 go func() { 1687 1688 startTime := time.Now() 1689 1690 // Note: reading a reply is important for last-received-time tunnel 1691 // duration calculation. 1692 requestOk, response, err := tunnel.sshClient.SendRequest( 1693 "keepalive@openssh.com", true, request) 1694 1695 elapsedTime := time.Since(startTime) 1696 1697 errChannel <- err 1698 1699 success := (err == nil && requestOk) 1700 1701 if success && isProbeKeepAlive { 1702 NoticeInfo("Probe SSH keep-alive RTT: %s", elapsedTime) 1703 } 1704 1705 // Record the keep alive round trip as a speed test sample. The first 1706 // periodic keep alive is always recorded, as many tunnels are short-lived 1707 // and we want to ensure that some data is gathered. Subsequent keep alives 1708 // are recorded with some configurable probability, which, considering that 1709 // only the last SpeedTestMaxSampleCount samples are retained, enables 1710 // tuning the sampling frequency. 1711 1712 if success && speedTestSample { 1713 1714 err = tactics.AddSpeedTestSample( 1715 tunnel.config.GetParameters(), 1716 GetTacticsStorer(tunnel.config), 1717 tunnel.config.GetNetworkID(), 1718 tunnel.dialParams.ServerEntry.Region, 1719 tunnel.dialParams.TunnelProtocol, 1720 elapsedTime, 1721 request, 1722 response) 1723 if err != nil { 1724 NoticeWarning("AddSpeedTestSample failed: %s", errors.Trace(err)) 1725 } 1726 } 1727 }() 1728 1729 // While awaiting the response, poll the network connectivity state. If there 1730 // is network connectivity, on the same network, for the entire duration of 1731 // the keep alive request and the request fails, record a failed tunnel 1732 // event. 1733 // 1734 // The network connectivity heuristic is intended to reduce the number of 1735 // failed tunnels reported due to routine situations such as varying mobile 1736 // network conditions. The polling may produce false positives if the network 1737 // goes down and up between polling periods, or changes to a new network and 1738 // back to the previous network between polling periods. 1739 // 1740 // For platforms that don't provide a NetworkConnectivityChecker, it is 1741 // assumed that there is network connectivity. 1742 // 1743 // The approximate number of tunneled bytes successfully sent and received is 1744 // recorded in the failed tunnel event as a quality indicator. 1745 1746 ticker := time.NewTicker(networkConnectivityPollPeriod) 1747 defer ticker.Stop() 1748 continuousNetworkConnectivity := true 1749 networkID := tunnel.config.GetNetworkID() 1750 1751 var err error 1752 loop: 1753 for { 1754 select { 1755 case err = <-errChannel: 1756 break loop 1757 case <-ticker.C: 1758 connectivityChecker := tunnel.config.NetworkConnectivityChecker 1759 if (connectivityChecker != nil && 1760 connectivityChecker.HasNetworkConnectivity() != 1) || 1761 (networkID != tunnel.config.GetNetworkID()) { 1762 1763 continuousNetworkConnectivity = false 1764 } 1765 } 1766 } 1767 1768 err = errors.Trace(err) 1769 1770 if err != nil { 1771 tunnel.sshClient.Close() 1772 tunnel.conn.Close() 1773 1774 // Don't perform log or reset actions when the keep alive may have been 1775 // interrupted due to shutdown. 1776 1777 isShutdown := false 1778 select { 1779 case <-tunnel.operateCtx.Done(): 1780 isShutdown = true 1781 default: 1782 } 1783 1784 // Ensure that at most one of the two SSH keep alive workers (periodic and 1785 // probe) perform the log and reset actions. 1786 1787 wasHandled := atomic.CompareAndSwapInt32(&tunnel.handledSSHKeepAliveFailure, 0, 1) 1788 1789 if continuousNetworkConnectivity && 1790 !isShutdown && 1791 !wasHandled { 1792 1793 _ = RecordFailedTunnelStat( 1794 tunnel.config, 1795 tunnel.dialParams, 1796 tunnel.livenessTestMetrics, 1797 bytesUp, 1798 bytesDown, 1799 err) 1800 if tunnel.extraFailureAction != nil { 1801 tunnel.extraFailureAction() 1802 } 1803 1804 // SSHKeepAliveResetOnFailureProbability is set when a late-lifecycle 1805 // impaired protocol attack is suspected. With the given probability, reset 1806 // server affinity and replay parameters for this server to avoid 1807 // continuously reconnecting to the server and/or using the same protocol 1808 // and dial parameters. 1809 1810 if resetOnFailure { 1811 NoticeInfo("Delete dial parameters for %s", tunnel.dialParams.ServerEntry.GetDiagnosticID()) 1812 err := DeleteDialParameters(tunnel.dialParams.ServerEntry.IpAddress, tunnel.dialParams.NetworkID) 1813 if err != nil { 1814 NoticeWarning("DeleteDialParameters failed: %s", err) 1815 } 1816 NoticeInfo("Delete server affinity for %s", tunnel.dialParams.ServerEntry.GetDiagnosticID()) 1817 err = DeleteServerEntryAffinity(tunnel.dialParams.ServerEntry.IpAddress) 1818 if err != nil { 1819 NoticeWarning("DeleteServerEntryAffinity failed: %s", err) 1820 } 1821 } 1822 } 1823 } 1824 1825 return err 1826 } 1827 1828 // sendStats is a helper for sending session stats to the server. 1829 func sendStats(tunnel *Tunnel) bool { 1830 1831 // Tunnel does not have a serverContext when DisableApi is set 1832 if tunnel.serverContext == nil { 1833 return true 1834 } 1835 1836 // Skip when tunnel is discarded 1837 if tunnel.IsDiscarded() { 1838 return true 1839 } 1840 1841 err := tunnel.serverContext.DoStatusRequest(tunnel) 1842 if err != nil { 1843 NoticeWarning("DoStatusRequest failed for %s: %s", 1844 tunnel.dialParams.ServerEntry.GetDiagnosticID(), err) 1845 } 1846 1847 return err == nil 1848 }