github.com/psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/refraction/refraction.go (about) 1 //go:build PSIPHON_ENABLE_REFRACTION_NETWORKING 2 // +build PSIPHON_ENABLE_REFRACTION_NETWORKING 3 4 /* 5 * Copyright (c) 2018, Psiphon Inc. 6 * All rights reserved. 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 * 21 */ 22 23 /* 24 25 Package refraction wraps github.com/refraction-networking/gotapdance with 26 net.Listener and net.Conn types that provide drop-in integration with Psiphon. 27 28 */ 29 package refraction 30 31 import ( 32 "context" 33 "crypto/sha256" 34 "fmt" 35 "io/ioutil" 36 "net" 37 "os" 38 "path/filepath" 39 "sync" 40 "sync/atomic" 41 "time" 42 43 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common" 44 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors" 45 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/protocol" 46 "github.com/armon/go-proxyproto" 47 lrucache "github.com/cognusion/go-cache-lru" 48 refraction_networking_proto "github.com/refraction-networking/gotapdance/protobuf" 49 refraction_networking_client "github.com/refraction-networking/gotapdance/tapdance" 50 ) 51 52 const ( 53 READ_PROXY_PROTOCOL_HEADER_TIMEOUT = 5 * time.Second 54 REGISTRATION_CACHE_MAX_ENTRIES = 256 55 ) 56 57 // Enabled indicates if Refraction Networking functionality is enabled. 58 func Enabled() bool { 59 return true 60 } 61 62 // Listener is a net.Listener. 63 type Listener struct { 64 net.Listener 65 } 66 67 // Listen creates a new Refraction Networking listener. 68 // 69 // The Refraction Networking station (TapDance or Conjure) will send the 70 // original client address via the HAProxy proxy protocol v1, 71 // https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt. The original 72 // client address is read and returned by accepted conns' RemoteAddr. 73 // RemoteAddr _must_ be called non-concurrently before calling Read on 74 // accepted conns as the HAProxy proxy protocol header reading logic sets 75 // SetReadDeadline and performs a Read. 76 // 77 // Psiphon server hosts should be configured to accept tunnel connections only 78 // from Refraction Networking stations. 79 func Listen(address string) (net.Listener, error) { 80 81 tcpListener, err := net.Listen("tcp", address) 82 if err != nil { 83 return nil, errors.Trace(err) 84 } 85 86 // Setting a timeout ensures that reading the proxy protocol 87 // header completes or times out and RemoteAddr will not block. See: 88 // https://godoc.org/github.com/armon/go-proxyproto#Conn.RemoteAddr 89 90 proxyListener := &proxyproto.Listener{ 91 Listener: tcpListener, 92 ProxyHeaderTimeout: READ_PROXY_PROTOCOL_HEADER_TIMEOUT} 93 94 stationListener := &stationListener{ 95 proxyListener: proxyListener, 96 } 97 98 return &Listener{Listener: stationListener}, nil 99 } 100 101 // stationListener uses the proxyproto.Listener SourceCheck callback to 102 // capture and record the direct remote address, the station address, and 103 // wraps accepted conns to provide station address metrics via GetMetrics. 104 // These metrics enable identifying which station fronted a connection, which 105 // is useful for network operations and troubleshooting. 106 // 107 // go-proxyproto.Conn.RemoteAddr reports the originating client IP address, 108 // which is geolocated and recorded for metrics. The underlying conn's remote 109 // address, the station address, is not accessible via the go-proxyproto API. 110 // 111 // stationListener is not safe for concurrent access. 112 type stationListener struct { 113 proxyListener *proxyproto.Listener 114 } 115 116 func (l *stationListener) Accept() (net.Conn, error) { 117 var stationRemoteAddr net.Addr 118 l.proxyListener.SourceCheck = func(addr net.Addr) (bool, error) { 119 stationRemoteAddr = addr 120 return true, nil 121 } 122 conn, err := l.proxyListener.Accept() 123 if err != nil { 124 return nil, err 125 } 126 if stationRemoteAddr == nil { 127 return nil, errors.TraceNew("missing station address") 128 } 129 return &stationConn{ 130 Conn: conn, 131 stationIPAddress: common.IPAddressFromAddr(stationRemoteAddr), 132 }, nil 133 } 134 135 func (l *stationListener) Close() error { 136 return l.proxyListener.Close() 137 } 138 139 func (l *stationListener) Addr() net.Addr { 140 return l.proxyListener.Addr() 141 } 142 143 type stationConn struct { 144 net.Conn 145 stationIPAddress string 146 } 147 148 // IrregularTunnelError implements the common.IrregularIndicator interface. 149 func (c *stationConn) IrregularTunnelError() error { 150 151 // We expect a PROXY protocol header, but go-proxyproto does not produce an 152 // error if the "PROXY " prefix is absent; instead the connection will 153 // proceed. To detect this case, check if the go-proxyproto RemoteAddr IP 154 // address matches the underlying connection IP address. When these values 155 // match, there was no PROXY protocol header. 156 // 157 // Limitation: the values will match if there is a PROXY protocol header 158 // containing the same IP address as the underlying connection. This is not 159 // an expected case. 160 161 if common.IPAddressFromAddr(c.RemoteAddr()) == c.stationIPAddress { 162 return errors.TraceNew("unexpected station IP address") 163 } 164 return nil 165 } 166 167 // GetMetrics implements the common.MetricsSource interface. 168 func (c *stationConn) GetMetrics() common.LogFields { 169 170 logFields := make(common.LogFields) 171 172 // Ensure we don't log a potential non-station IP address. 173 if c.IrregularTunnelError() == nil { 174 logFields["station_ip_address"] = c.stationIPAddress 175 } 176 177 return logFields 178 } 179 180 // DialTapDance establishes a new TapDance connection to a TapDance station 181 // specified in the config assets and forwarding through to the Psiphon server 182 // specified by address. 183 // 184 // The TapDance station config assets (which are also the Conjure station 185 // assets) are read from dataDirectory/"refraction-networking". When no config 186 // is found, default assets are paved. 187 // 188 // dialer specifies the custom dialer for underlying TCP dials. 189 // 190 // The input ctx is expected to have a timeout for the dial. 191 // 192 // Limitation: the parameters emitLogs and dataDirectory are used for one-time 193 // initialization and are ignored after the first DialTapDance/Conjure call. 194 func DialTapDance( 195 ctx context.Context, 196 emitLogs bool, 197 dataDirectory string, 198 dialer common.NetDialer, 199 address string) (net.Conn, error) { 200 201 return dial( 202 ctx, 203 emitLogs, 204 dataDirectory, 205 dialer, 206 address, 207 nil) 208 } 209 210 // DialConjure establishes a new Conjure connection to a Conjure station. 211 // 212 // dialer specifies the custom dialer to use for phantom dials. Additional 213 // Conjure-specific parameters are specified in conjureConfig. 214 // 215 // See DialTapdance comment. 216 func DialConjure( 217 ctx context.Context, 218 emitLogs bool, 219 dataDirectory string, 220 dialer common.NetDialer, 221 address string, 222 conjureConfig *ConjureConfig) (net.Conn, error) { 223 224 return dial( 225 ctx, 226 emitLogs, 227 dataDirectory, 228 dialer, 229 address, 230 conjureConfig) 231 } 232 233 func dial( 234 ctx context.Context, 235 emitLogs bool, 236 dataDirectory string, 237 dialer common.NetDialer, 238 address string, 239 conjureConfig *ConjureConfig) (net.Conn, error) { 240 241 err := initRefractionNetworking(emitLogs, dataDirectory) 242 if err != nil { 243 return nil, errors.Trace(err) 244 } 245 246 if _, ok := ctx.Deadline(); !ok { 247 return nil, errors.TraceNew("dial context has no timeout") 248 } 249 250 useConjure := conjureConfig != nil 251 252 manager := newDialManager() 253 254 refractionDialer := &refraction_networking_client.Dialer{ 255 TcpDialer: manager.makeManagedDialer(dialer.DialContext), 256 UseProxyHeader: true, 257 } 258 259 conjureCached := false 260 conjureDelay := time.Duration(0) 261 262 var conjureCachedRegistration *refraction_networking_client.ConjureReg 263 var conjureRecordRegistrar *recordRegistrar 264 265 if useConjure { 266 267 // Our strategy is to try one registration per dial attempt: a cached 268 // registration, if it exists, or API or decoy registration, as configured. 269 // This assumes Psiphon establishment will try/retry many candidates as 270 // required, and that the desired mix of API/decoy registrations will be 271 // configured and generated. In good network conditions, internal gotapdance 272 // retries (via APIRegistrar.MaxRetries or APIRegistrar.SecondaryRegistrar) 273 // are unlikely to start before the Conjure dial is canceled. 274 275 // Caching registrations reduces average Conjure dial time by often 276 // eliminating the registration phase. This is especially impactful for 277 // short duration tunnels, such as on mobile. Caching also reduces domain 278 // fronted traffic and load on the API registrar and decoys. 279 // 280 // We implement a simple in-memory registration cache with the following 281 // behavior: 282 // 283 // - If a new registration succeeds, but the overall Conjure dial is 284 // _canceled_, the registration is optimistically cached. 285 // - If the Conjure phantom dial fails, any associated cached registration 286 // is discarded. 287 // - A cached registration's TTL is extended upon phantom dial success. 288 // - If the configured TTL changes, the cache is cleared. 289 // 290 // Limitations: 291 // - The cache is not persistent. 292 // - There is no TTL extension during a long connection. 293 // - Caching a successful registration when the phantom dial is canceled may 294 // skip the necessary "delay" step (however, an immediate re-establishment 295 // to the same candidate is unlikely in this case). 296 // 297 // TODO: 298 // - Revisit when gotapdance adds its own caching. 299 // - Consider "pre-registering" Conjure when already connected with a 300 // different protocol, so a Conjure registration is available on the next 301 // establishment; in this scenario, a tunneled API registration would not 302 // require domain fronting. 303 304 refractionDialer.DarkDecoy = true 305 306 // The pop operation removes the registration from the cache. This 307 // eliminates the possibility of concurrent candidates (with the same cache 308 // key) using and modifying the same registration, a potential race 309 // condition. The popped cached registration must be reinserted in the cache 310 // after canceling or success, but not on phantom dial failure. 311 312 conjureCachedRegistration = conjureRegistrationCache.pop(conjureConfig) 313 314 if conjureCachedRegistration != nil { 315 316 refractionDialer.DarkDecoyRegistrar = &cachedRegistrar{ 317 registration: conjureCachedRegistration, 318 } 319 320 conjureCached = true 321 conjureDelay = 0 // report no delay 322 323 } else if conjureConfig.APIRegistrarBidirectionalURL != "" { 324 325 if conjureConfig.APIRegistrarHTTPClient == nil { 326 // While not a guaranteed check, if the APIRegistrarHTTPClient isn't set 327 // then the API registration would certainly be unfronted, resulting in a 328 // fingerprintable connection leak. 329 return nil, errors.TraceNew("missing APIRegistrarHTTPClient") 330 } 331 332 refractionDialer.DarkDecoyRegistrar = &refraction_networking_client.APIRegistrarBidirectional{ 333 Endpoint: conjureConfig.APIRegistrarBidirectionalURL, 334 ConnectionDelay: conjureConfig.APIRegistrarDelay, 335 MaxRetries: 0, 336 Client: conjureConfig.APIRegistrarHTTPClient, 337 } 338 339 conjureDelay = conjureConfig.APIRegistrarDelay 340 341 } else if conjureConfig.DecoyRegistrarDialer != nil { 342 343 refractionDialer.DarkDecoyRegistrar = &refraction_networking_client.DecoyRegistrar{ 344 TcpDialer: manager.makeManagedDialer( 345 conjureConfig.DecoyRegistrarDialer.DialContext), 346 } 347 348 refractionDialer.Width = conjureConfig.DecoyRegistrarWidth 349 350 // Limitation: the decoy regsitration delay is not currently exposed in the 351 // gotapdance API. 352 conjureDelay = -1 // don't report delay 353 354 } else { 355 356 return nil, errors.TraceNew("no conjure registrar specified") 357 } 358 359 if conjureCachedRegistration == nil && conjureConfig.RegistrationCacheTTL != 0 { 360 361 // Record the registration result in order to cache it. 362 conjureRecordRegistrar = &recordRegistrar{ 363 registrar: refractionDialer.DarkDecoyRegistrar, 364 } 365 refractionDialer.DarkDecoyRegistrar = conjureRecordRegistrar 366 } 367 368 switch conjureConfig.Transport { 369 case protocol.CONJURE_TRANSPORT_MIN_OSSH: 370 refractionDialer.Transport = refraction_networking_proto.TransportType_Min 371 refractionDialer.TcpDialer = newMinTransportDialer(refractionDialer.TcpDialer) 372 case protocol.CONJURE_TRANSPORT_OBFS4_OSSH: 373 refractionDialer.Transport = refraction_networking_proto.TransportType_Obfs4 374 default: 375 return nil, errors.Tracef("invalid Conjure transport: %s", conjureConfig.Transport) 376 } 377 378 if conjureCachedRegistration != nil { 379 380 // When using a cached registration, patch its TcpDialer to use the custom 381 // dialer for this dial. In the non-cached code path, gotapdance will set 382 // refractionDialer.TcpDialer into a new registration. 383 conjureCachedRegistration.TcpDialer = refractionDialer.TcpDialer 384 } 385 } 386 387 // If the dial context is cancelled, use dialManager to interrupt 388 // refractionDialer.DialContext. See dialManager comment explaining why 389 // refractionDialer.DialContext may block even when the input context is 390 // cancelled. 391 dialComplete := make(chan struct{}) 392 go func() { 393 select { 394 case <-ctx.Done(): 395 case <-dialComplete: 396 } 397 select { 398 // Prioritize the dialComplete case. 399 case <-dialComplete: 400 return 401 default: 402 } 403 manager.close() 404 }() 405 406 conn, err := refractionDialer.DialContext(ctx, "tcp", address) 407 close(dialComplete) 408 409 if err != nil { 410 // Call manager.close before updating cache, to synchronously shutdown dials 411 // and ensure there are no further concurrent reads/writes to the recorded 412 // registration before referencing it. 413 manager.close() 414 } 415 416 // Cache (or put back) a successful registration. Also put back in the 417 // specific error case where the phantom dial was canceled, as the 418 // registration may still be valid. This operation implicitly extends the TTL 419 // of a reused cached registration; we assume the Conjure station is also 420 // extending the TTL by the same amount. 421 // 422 // Limitation: the cancel case shouldn't extend the TTL. 423 424 if useConjure && (conjureCachedRegistration != nil || conjureRecordRegistrar != nil) { 425 426 isCanceled := (err != nil && ctx.Err() == context.Canceled) 427 428 if err == nil || isCanceled { 429 430 registration := conjureCachedRegistration 431 if registration == nil { 432 // We assume gotapdance is no longer accessing the Registrar. 433 registration = conjureRecordRegistrar.registration 434 } 435 436 // conjureRecordRegistrar.registration will be nil if there was no cached 437 // registration _and_ registration didn't succeed before a cancel. 438 if registration != nil { 439 440 // Do not retain a reference to the custom dialer, as its context will not 441 // be valid for future dials using this cached registration. Assumes that 442 // gotapdance will no longer reference the TcpDialer now that the 443 // connection is established. 444 registration.TcpDialer = nil 445 446 conjureRegistrationCache.put(conjureConfig, registration, isCanceled) 447 } 448 449 } else if conjureCachedRegistration != nil { 450 451 conjureConfig.Logger.WithTraceFields( 452 common.LogFields{ 453 "diagnosticID": conjureConfig.DiagnosticID, 454 "reason": "phantom dial failed", 455 }).Info( 456 "drop cached registration") 457 } 458 } 459 460 if err != nil { 461 return nil, errors.Trace(err) 462 } 463 464 manager.startUsingRunCtx() 465 466 refractionConn := &refractionConn{ 467 Conn: conn, 468 manager: manager, 469 } 470 471 if useConjure { 472 // Retain these values for logging metrics. 473 refractionConn.isConjure = true 474 refractionConn.conjureCached = conjureCached 475 refractionConn.conjureDelay = conjureDelay 476 refractionConn.conjureTransport = conjureConfig.Transport 477 } 478 479 return refractionConn, nil 480 } 481 482 func DeleteCachedConjureRegistration(config *ConjureConfig) { 483 conjureRegistrationCache.delete(config) 484 } 485 486 type registrationCache struct { 487 mutex sync.Mutex 488 TTL time.Duration 489 cache *lrucache.Cache 490 } 491 492 func newRegistrationCache() *registrationCache { 493 return ®istrationCache{ 494 cache: lrucache.NewWithLRU( 495 lrucache.NoExpiration, 496 1*time.Minute, 497 REGISTRATION_CACHE_MAX_ENTRIES), 498 } 499 } 500 501 func (c *registrationCache) put( 502 config *ConjureConfig, 503 registration *refraction_networking_client.ConjureReg, 504 isCanceled bool) { 505 506 c.mutex.Lock() 507 defer c.mutex.Unlock() 508 509 // Clear the entire cache if the configured TTL changes to avoid retaining 510 // items for too long. This is expected to be an infrequent event. The 511 // go-cache-lru API does not offer a mechanism to inspect and adjust the TTL 512 // of all existing items. 513 if c.TTL != config.RegistrationCacheTTL { 514 c.cache.Flush() 515 c.TTL = config.RegistrationCacheTTL 516 } 517 518 // Drop the cached registration if another entry is found under the same key. 519 // Since the dial pops its entry out of the cache, finding an existing entry 520 // implies that another tunnel establishment candidate with the same key has 521 // successfully registered and connected (or canceled) in the meantime. 522 // Prefer that newer cached registration. 523 // 524 // For Psiphon, one scenario resulting in this condition is that the first 525 // dial to a given server, using a cached registration, is delayed long 526 // enough that a new candidate for the same server has been started and 527 // outpaced the first candidate. 528 _, found := c.cache.Get(config.RegistrationCacheKey) 529 if found { 530 config.Logger.WithTraceFields( 531 common.LogFields{ 532 "diagnosticID": config.DiagnosticID, 533 "reason": "existing entry found", 534 }).Info( 535 "drop cached registration") 536 return 537 } 538 539 reason := "connected" 540 if isCanceled { 541 reason = "canceled" 542 } 543 544 config.Logger.WithTraceFields( 545 common.LogFields{ 546 "diagnosticID": config.DiagnosticID, 547 "cacheSize": c.cache.ItemCount(), 548 "reason": reason, 549 }).Info( 550 "put cached registration") 551 552 c.cache.Set( 553 config.RegistrationCacheKey, 554 registration, 555 c.TTL) 556 } 557 558 func (c *registrationCache) pop( 559 config *ConjureConfig) *refraction_networking_client.ConjureReg { 560 561 c.mutex.Lock() 562 defer c.mutex.Unlock() 563 564 // See TTL/Flush comment in put. 565 if c.TTL != config.RegistrationCacheTTL { 566 c.cache.Flush() 567 c.TTL = config.RegistrationCacheTTL 568 } 569 570 entry, found := c.cache.Get(config.RegistrationCacheKey) 571 572 config.Logger.WithTraceFields( 573 common.LogFields{ 574 "diagnosticID": config.DiagnosticID, 575 "cacheSize": c.cache.ItemCount(), 576 "found": found, 577 }).Info( 578 "pop cached registration") 579 580 if found { 581 c.cache.Delete(config.RegistrationCacheKey) 582 return entry.(*refraction_networking_client.ConjureReg) 583 } 584 585 return nil 586 } 587 588 func (c *registrationCache) delete(config *ConjureConfig) { 589 590 c.mutex.Lock() 591 defer c.mutex.Unlock() 592 593 _, found := c.cache.Get(config.RegistrationCacheKey) 594 595 config.Logger.WithTraceFields( 596 common.LogFields{ 597 "diagnosticID": config.DiagnosticID, 598 "found": found, 599 }).Info( 600 "delete cached registration") 601 602 if found { 603 c.cache.Delete(config.RegistrationCacheKey) 604 } 605 } 606 607 var conjureRegistrationCache = newRegistrationCache() 608 609 type cachedRegistrar struct { 610 registration *refraction_networking_client.ConjureReg 611 } 612 613 func (r *cachedRegistrar) Register( 614 _ *refraction_networking_client.ConjureSession, 615 _ context.Context) (*refraction_networking_client.ConjureReg, error) { 616 617 return r.registration, nil 618 } 619 620 type recordRegistrar struct { 621 registrar refraction_networking_client.Registrar 622 registration *refraction_networking_client.ConjureReg 623 } 624 625 func (r *recordRegistrar) Register( 626 session *refraction_networking_client.ConjureSession, 627 ctx context.Context) (*refraction_networking_client.ConjureReg, error) { 628 629 registration, err := r.registrar.Register(session, ctx) 630 if err != nil { 631 return nil, errors.Trace(err) 632 } 633 r.registration = registration 634 return registration, nil 635 } 636 637 // minTransportConn buffers the first 32-byte random HMAC write performed by 638 // Conjure TransportType_Min, and prepends it to the subsequent first write 639 // made by OSSH. The purpose is to avoid a distinct fingerprint consisting of 640 // the initial TCP data packet always containing exactly 32 bytes of payload. 641 // The first write by OSSH will be a variable length multi-packet-sized 642 // sequence of random bytes. 643 type minTransportConn struct { 644 net.Conn 645 646 mutex sync.Mutex 647 state int 648 buffer []byte 649 err error 650 } 651 652 const ( 653 stateMinTransportInit = iota 654 stateMinTransportBufferedHMAC 655 stateMinTransportWroteHMAC 656 stateMinTransportFailed 657 ) 658 659 func newMinTransportConn(conn net.Conn) *minTransportConn { 660 return &minTransportConn{ 661 Conn: conn, 662 state: stateMinTransportInit, 663 } 664 } 665 666 func (conn *minTransportConn) Write(p []byte) (int, error) { 667 conn.mutex.Lock() 668 defer conn.mutex.Unlock() 669 670 switch conn.state { 671 case stateMinTransportInit: 672 if len(p) != sha256.Size { 673 conn.state = stateMinTransportFailed 674 conn.err = errors.TraceNew("unexpected HMAC write size") 675 return 0, conn.err 676 } 677 conn.buffer = make([]byte, sha256.Size) 678 copy(conn.buffer, p) 679 conn.state = stateMinTransportBufferedHMAC 680 return sha256.Size, nil 681 case stateMinTransportBufferedHMAC: 682 conn.buffer = append(conn.buffer, p...) 683 n, err := conn.Conn.Write(conn.buffer) 684 if n < sha256.Size { 685 conn.state = stateMinTransportFailed 686 conn.err = errors.TraceNew("failed to write HMAC") 687 if err == nil { 688 // As Write must return an error when failing to write the entire buffer, 689 // we don't expect to hit this case. 690 err = conn.err 691 } 692 } else { 693 conn.state = stateMinTransportWroteHMAC 694 } 695 n -= sha256.Size 696 // Do not wrap Conn.Write errors, and do not return conn.err here. 697 return n, err 698 case stateMinTransportWroteHMAC: 699 return conn.Conn.Write(p) 700 case stateMinTransportFailed: 701 return 0, conn.err 702 default: 703 return 0, errors.TraceNew("unexpected state") 704 } 705 } 706 707 func newMinTransportDialer(dialer common.Dialer) common.Dialer { 708 return func(ctx context.Context, network, address string) (net.Conn, error) { 709 conn, err := dialer(ctx, network, address) 710 if err != nil { 711 return nil, errors.Trace(err) 712 } 713 return newMinTransportConn(conn), nil 714 } 715 } 716 717 // dialManager tracks all dials performed by and dialed conns used by a 718 // refraction_networking_client conn. dialManager.close interrupts/closes 719 // all pending dials and established conns immediately. This ensures that 720 // blocking calls within refraction_networking_client, such as tls.Handhake, 721 // are interrupted: 722 // E.g., https://github.com/refraction-networking/gotapdance/blob/4d84655dad2e242b0af0459c31f687b12085dcca/tapdance/conn_raw.go#L307 723 // (...preceeding SetDeadline is insufficient for immediate cancellation.) 724 type dialManager struct { 725 ctxMutex sync.Mutex 726 useRunCtx bool 727 initialDialCtx context.Context 728 runCtx context.Context 729 stopRunning context.CancelFunc 730 731 conns *common.Conns 732 } 733 734 func newDialManager() *dialManager { 735 runCtx, stopRunning := context.WithCancel(context.Background()) 736 return &dialManager{ 737 runCtx: runCtx, 738 stopRunning: stopRunning, 739 conns: common.NewConns(), 740 } 741 } 742 743 func (manager *dialManager) makeManagedDialer(dialer common.Dialer) common.Dialer { 744 745 return func(ctx context.Context, network, address string) (net.Conn, error) { 746 return manager.dialWithDialer(dialer, ctx, network, address) 747 } 748 } 749 750 func (manager *dialManager) dialWithDialer( 751 dialer common.Dialer, 752 ctx context.Context, 753 network string, 754 address string) (net.Conn, error) { 755 756 if network != "tcp" { 757 return nil, errors.Tracef("unsupported network: %s", network) 758 } 759 760 // The context for this dial is either: 761 // - ctx, during the initial refraction_networking_client.DialContext, when 762 // this is Psiphon tunnel establishment. 763 // - manager.runCtx after the initial refraction_networking_client.Dial 764 // completes, in which case this is a TapDance protocol reconnection that 765 // occurs periodically for already established tunnels. 766 767 manager.ctxMutex.Lock() 768 if manager.useRunCtx { 769 770 // Preserve the random timeout configured by the TapDance client: 771 // https://github.com/refraction-networking/gotapdance/blob/4d84655dad2e242b0af0459c31f687b12085dcca/tapdance/conn_raw.go#L263 772 deadline, ok := ctx.Deadline() 773 if !ok { 774 return nil, errors.Tracef("unexpected nil deadline") 775 } 776 var cancelFunc context.CancelFunc 777 ctx, cancelFunc = context.WithDeadline(manager.runCtx, deadline) 778 defer cancelFunc() 779 } 780 manager.ctxMutex.Unlock() 781 782 conn, err := dialer(ctx, network, address) 783 if err != nil { 784 return nil, errors.Trace(err) 785 } 786 787 // Fail immediately if CloseWrite isn't available in the underlying dialed 788 // conn. The equivalent check in managedConn.CloseWrite isn't fatal and 789 // TapDance will run in a degraded state. 790 // Limitation: if the underlying conn _also_ passes through CloseWrite, this 791 // check may be insufficient. 792 if _, ok := conn.(common.CloseWriter); !ok { 793 return nil, errors.TraceNew("underlying conn is not a CloseWriter") 794 } 795 796 conn = &managedConn{ 797 Conn: conn, 798 manager: manager, 799 } 800 801 if !manager.conns.Add(conn) { 802 conn.Close() 803 return nil, errors.TraceNew("already closed") 804 } 805 806 return conn, nil 807 } 808 809 func (manager *dialManager) startUsingRunCtx() { 810 manager.ctxMutex.Lock() 811 manager.initialDialCtx = nil 812 manager.useRunCtx = true 813 manager.ctxMutex.Unlock() 814 } 815 816 func (manager *dialManager) close() { 817 manager.conns.CloseAll() 818 manager.stopRunning() 819 } 820 821 type managedConn struct { 822 net.Conn 823 manager *dialManager 824 } 825 826 // CloseWrite exposes the net.TCPConn.CloseWrite() functionality 827 // required by TapDance. 828 func (conn *managedConn) CloseWrite() error { 829 if closeWriter, ok := conn.Conn.(common.CloseWriter); ok { 830 return closeWriter.CloseWrite() 831 } 832 return errors.TraceNew("underlying conn is not a CloseWriter") 833 } 834 835 func (conn *managedConn) Close() error { 836 // Remove must be invoked asynchronously, as this Close may be called by 837 // conns.CloseAll, leading to a reentrant lock situation. 838 go conn.manager.conns.Remove(conn) 839 return conn.Conn.Close() 840 } 841 842 type refractionConn struct { 843 net.Conn 844 manager *dialManager 845 isClosed int32 846 847 isConjure bool 848 conjureCached bool 849 conjureDelay time.Duration 850 conjureTransport string 851 } 852 853 func (conn *refractionConn) Close() error { 854 conn.manager.close() 855 err := conn.Conn.Close() 856 atomic.StoreInt32(&conn.isClosed, 1) 857 return err 858 } 859 860 func (conn *refractionConn) IsClosed() bool { 861 return atomic.LoadInt32(&conn.isClosed) == 1 862 } 863 864 // GetMetrics implements the common.MetricsSource interface. 865 func (conn *refractionConn) GetMetrics() common.LogFields { 866 logFields := make(common.LogFields) 867 if conn.isConjure { 868 869 cached := "0" 870 if conn.conjureCached { 871 cached = "1" 872 } 873 logFields["conjure_cached"] = cached 874 875 if conn.conjureDelay != -1 { 876 logFields["conjure_delay"] = fmt.Sprintf("%d", conn.conjureDelay/time.Millisecond) 877 } 878 879 logFields["conjure_transport"] = conn.conjureTransport 880 } 881 return logFields 882 } 883 884 var initRefractionNetworkingOnce sync.Once 885 886 func initRefractionNetworking(emitLogs bool, dataDirectory string) error { 887 888 var initErr error 889 initRefractionNetworkingOnce.Do(func() { 890 891 if !emitLogs { 892 refraction_networking_client.Logger().Out = ioutil.Discard 893 } 894 895 assetsDir := filepath.Join(dataDirectory, "refraction-networking") 896 897 err := os.MkdirAll(assetsDir, 0700) 898 if err != nil { 899 initErr = errors.Trace(err) 900 return 901 } 902 903 clientConfFileName := filepath.Join(assetsDir, "ClientConf") 904 _, err = os.Stat(clientConfFileName) 905 if err != nil && os.IsNotExist(err) { 906 err = ioutil.WriteFile(clientConfFileName, getEmbeddedClientConf(), 0644) 907 } 908 if err != nil { 909 initErr = errors.Trace(err) 910 return 911 } 912 913 refraction_networking_client.AssetsSetDir(assetsDir) 914 }) 915 916 return initErr 917 }