github.com/psiphon-labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/resolver/resolver.go (about) 1 /* 2 * Copyright (c) 2022, Psiphon Inc. 3 * All rights reserved. 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 // Package resolver implements a DNS stub resolver, or DNS client, which 21 // resolves domain names. 22 // 23 // The resolver is Psiphon-specific and oriented towards blocking resistance. 24 // See ResolveIP for more details. 25 package resolver 26 27 import ( 28 "context" 29 "encoding/hex" 30 "fmt" 31 "net" 32 "sync" 33 "sync/atomic" 34 "syscall" 35 "time" 36 37 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common" 38 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors" 39 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters" 40 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng" 41 "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms" 42 lrucache "github.com/cognusion/go-cache-lru" 43 "github.com/miekg/dns" 44 ) 45 46 const ( 47 resolverCacheDefaultTTL = 1 * time.Minute 48 resolverCacheReapFrequency = 1 * time.Minute 49 resolverCacheMaxEntries = 10000 50 resolverServersUpdateTTL = 5 * time.Second 51 resolverDefaultAttemptsPerServer = 2 52 resolverDefaultRequestTimeout = 5 * time.Second 53 resolverDefaultAwaitTimeout = 10 * time.Millisecond 54 resolverDefaultAnswerTTL = 1 * time.Minute 55 resolverDNSPort = "53" 56 udpPacketBufferSize = 1232 57 ) 58 59 // NetworkConfig specifies network-level configuration for a Resolver. 60 type NetworkConfig struct { 61 62 // GetDNSServers returns a list of system DNS server addresses (IP:port, or 63 // IP only with port 53 assumed), as determined via OS APIs, in priority 64 // order. GetDNSServers may be nil. 65 GetDNSServers func() []string 66 67 // BindToDevice should ensure the input file descriptor, a UDP socket, is 68 // excluded from VPN routing. BindToDevice may be nil. 69 BindToDevice func(fd int) (string, error) 70 71 // AllowDefaultResolverWithBindToDevice indicates that it's safe to use 72 // the default resolver when BindToDevice is configured, as the host OS 73 // will automatically exclude DNS requests from the VPN. 74 AllowDefaultResolverWithBindToDevice bool 75 76 // IPv6Synthesize should apply NAT64 synthesis to the input IPv4 address, 77 // returning a synthesized IPv6 address that will route to the same 78 // endpoint. IPv6Synthesize may be nil. 79 IPv6Synthesize func(IPv4 string) string 80 81 // HasIPv6Route should return true when the host has an IPv6 route. 82 // Resolver has an internal implementation, hasRoutableIPv6Interface, to 83 // determine this, but it can fail on some platforms ("route ip+net: 84 // netlinkrib: permission denied" on Android, for example; see Go issue 85 // 40569). When HasIPv6Route is nil, the internal implementation is used. 86 HasIPv6Route func() bool 87 88 // LogWarning is an optional callback which is used to log warnings and 89 // transient errors which would otherwise not be recorded or returned. 90 LogWarning func(error) 91 92 // LogHostnames indicates whether to log hostname in errors or not. 93 LogHostnames bool 94 95 // CacheExtensionInitialTTL specifies a minimum TTL to use when caching 96 // domain resolution results. This minimum will override any TTL in the 97 // DNS response. CacheExtensionInitialTTL is off when 0. 98 CacheExtensionInitialTTL time.Duration 99 100 // CacheExtensionVerifiedTTL specifies the minimum TTL to set for a cached 101 // domain resolution result after the result has been verified. 102 // CacheExtensionVerifiedTTL is off when 0. 103 // 104 // DNS cache extension is a workaround to partially mitigate issues with 105 // obtaining underlying system DNS server IPs on platforms such as iOS 106 // once a VPN is running and after network changes, such as changing from 107 // Wi-Fi to mobile. While ResolveParameters.AlternateDNSServer can be 108 // used to specify a known public DNS server, it may be the case that 109 // public DNS servers are blocked or always falling back to a public DNS 110 // server creates unusual traffic. And while it may be possible to use 111 // the default system resolver, it lacks certain circumvention 112 // capabilities. 113 // 114 // Extending the TTL for cached responses allows Psiphon to redial domains 115 // using recently successful IPs. 116 // 117 // CacheExtensionInitialTTL allows for a greater initial minimum TTL, so 118 // that the response entry remains in the cache long enough for a dial to 119 // fully complete and verify the endpoint. Psiphon will call 120 // Resolver.VerifyExtendCacheTTL once a dial has authenticated, for 121 // example, the destination Psiphon server. VerifyCacheExtension will 122 // further extend the corresponding TTL to CacheExtensionVerifiedTTL, a 123 // longer TTL. CacheExtensionInitialTTL is intended to be on the order of 124 // minutes and CacheExtensionVerifiedTTL may be on the order of hours. 125 // 126 // When CacheExtensionVerifiedTTL is on, the DNS cache is not flushed on 127 // network changes, to allow for the previously cached entries to remain 128 // available in the problematic scenario. Like adjusting TTLs, this is an 129 // explicit trade-off which doesn't adhere to standard best practise, but 130 // is expected to be more blocking resistent; this approach also assumes 131 // that endpoints such as CDN IPs are typically available on any network. 132 CacheExtensionVerifiedTTL time.Duration 133 } 134 135 func (c *NetworkConfig) allowDefaultResolver() bool { 136 // When BindToDevice is configured, the standard library resolver is not 137 // used, as the system resolver may not route outside of the VPN. 138 return c.BindToDevice == nil || c.AllowDefaultResolverWithBindToDevice 139 } 140 141 func (c *NetworkConfig) logWarning(err error) { 142 if c.LogWarning != nil { 143 c.LogWarning(err) 144 } 145 } 146 147 // ResolveParameters specifies the configuration and behavior of a single 148 // ResolveIP call, a single domain name resolution. 149 // 150 // New ResolveParameters may be generated by calling MakeResolveParameters, 151 // which takes tactics parameters as an input. 152 // 153 // ResolveParameters may be persisted for replay. 154 type ResolveParameters struct { 155 156 // AttemptsPerServer specifies how many requests to send to each DNS 157 // server before trying the next server. IPv4 and IPv6 requests are sent 158 // concurrently and count as one attempt. 159 AttemptsPerServer int 160 161 // AttemptsPerPreferredServer is AttemptsPerServer for a preferred 162 // alternate DNS server. 163 AttemptsPerPreferredServer int 164 165 // RequestTimeout specifies how long to wait for a valid response before 166 // moving on to the next attempt. 167 RequestTimeout time.Duration 168 169 // AwaitTimeout specifies how long to await an additional response after 170 // the first response is received. This additional wait time applies only 171 // when there is either no IPv4 or IPv6 response. 172 AwaitTimeout time.Duration 173 174 // PreresolvedIPAddress specifies an IP address result to be used in place 175 // of making a request. 176 PreresolvedIPAddress string 177 178 // AlternateDNSServer specifies an alterate DNS server (IP:port, or IP 179 // only with port 53 assumed) to be used when either no system DNS 180 // servers are available or when PreferAlternateDNSServer is set. 181 AlternateDNSServer string 182 183 // PreferAlternateDNSServer indicates whether to prioritize using the 184 // AlternateDNSServer. When set, the AlternateDNSServer is attempted 185 // before any system DNS servers. 186 PreferAlternateDNSServer bool 187 188 // ProtocolTransformName specifies the name associated with 189 // ProtocolTransformSpec and is used for metrics. 190 ProtocolTransformName string 191 192 // ProtocolTransformSpec specifies a transform to apply to the DNS request packet. 193 // See: "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms". 194 // 195 // As transforms operate on strings and DNS requests are binary, 196 // transforms should be expressed using hex characters. 197 // 198 // DNS transforms include strategies discovered by the Geneva team, 199 // https://geneva.cs.umd.edu. 200 ProtocolTransformSpec transforms.Spec 201 202 // ProtocolTransformSeed specifies the seed to use for generating random 203 // data in the ProtocolTransformSpec transform. To replay a transform, 204 // specify the same seed. 205 ProtocolTransformSeed *prng.Seed 206 207 // IncludeEDNS0 indicates whether to include the EDNS(0) UDP maximum 208 // response size extension in DNS requests. The resolver can handle 209 // responses larger than 512 bytes (RFC 1035 maximum) regardless of 210 // whether the extension is included; the extension may be included as 211 // part of appearing similar to other DNS traffic. 212 IncludeEDNS0 bool 213 214 firstAttemptWithAnswer int32 215 } 216 217 // GetFirstAttemptWithAnswer returns the index of the first request attempt 218 // that received a valid response, for the most recent ResolveIP call using 219 // this ResolveParameters. This information is used for logging metrics. The 220 // first attempt has index 1. GetFirstAttemptWithAnswer return 0 when no 221 // request attempt has reported a valid response. 222 // 223 // The caller is responsible for synchronizing use of a ResolveParameters 224 // instance (e.g, use a distinct ResolveParameters per ResolveIP to ensure 225 // GetFirstAttemptWithAnswer refers to a specific ResolveIP). 226 func (r *ResolveParameters) GetFirstAttemptWithAnswer() int { 227 return int(atomic.LoadInt32(&r.firstAttemptWithAnswer)) 228 } 229 230 func (r *ResolveParameters) setFirstAttemptWithAnswer(attempt int) { 231 atomic.StoreInt32(&r.firstAttemptWithAnswer, int32(attempt)) 232 } 233 234 // Implementation note: Go's standard net.Resolver supports specifying a 235 // custom Dial function. This could be used to implement at least a large 236 // subset of the Resolver functionality on top of Go's standard library 237 // resolver. However, net.Resolver is limited to using the CGO resolver on 238 // Android, https://github.com/golang/go/issues/8877, in which case the 239 // custom Dial function is not used. Furthermore, the the pure Go resolver in 240 // net/dnsclient_unix.go appears to not be used on Windows at this time. 241 // 242 // Go also provides golang.org/x/net/dns/dnsmessage, a DNS message marshaller, 243 // which could potentially be used in place of github.com/miekg/dns. 244 245 // Resolver is a DNS stub resolver, or DNS client, which resolves domain 246 // names. A Resolver instance maintains a cache, a network state snapshot, 247 // and metrics. All ResolveIP calls will share the same cache and state. 248 // Multiple concurrent ResolveIP calls are supported. 249 type Resolver struct { 250 networkConfig *NetworkConfig 251 252 mutex sync.Mutex 253 networkID string 254 hasIPv6Route bool 255 systemServers []string 256 lastServersUpdate time.Time 257 cache *lrucache.Cache 258 metrics resolverMetrics 259 } 260 261 type resolverMetrics struct { 262 resolves int 263 cacheHits int 264 verifiedCacheExtensions int 265 requestsIPv4 int 266 requestsIPv6 int 267 responsesIPv4 int 268 responsesIPv6 int 269 defaultResolves int 270 defaultSuccesses int 271 peakInFlight int64 272 minRTT time.Duration 273 maxRTT time.Duration 274 } 275 276 func newResolverMetrics() resolverMetrics { 277 return resolverMetrics{minRTT: -1} 278 } 279 280 // NewResolver creates a new Resolver instance. 281 func NewResolver(networkConfig *NetworkConfig, networkID string) *Resolver { 282 283 r := &Resolver{ 284 networkConfig: networkConfig, 285 metrics: newResolverMetrics(), 286 } 287 288 // updateNetworkState will initialize the cache and network state, 289 // including system DNS servers. 290 r.updateNetworkState(networkID) 291 292 return r 293 } 294 295 // Stop clears the Resolver cache and resets metrics. Stop must be called only 296 // after ceasing all in-flight ResolveIP goroutines, or else the cache or 297 // metrics may repopulate. A Resolver may be resumed after calling Stop, but 298 // Update must be called first. 299 func (r *Resolver) Stop() { 300 r.mutex.Lock() 301 defer r.mutex.Unlock() 302 303 // r.networkConfig is not set to nil to avoid possible nil pointer 304 // dereferences by concurrent ResolveIP calls. 305 306 r.networkID = "" 307 r.hasIPv6Route = false 308 r.systemServers = nil 309 r.cache.Flush() 310 r.metrics = newResolverMetrics() 311 } 312 313 // MakeResolveParameters generates ResolveParameters using the input tactics 314 // parameters and optional frontingProviderID context. 315 func (r *Resolver) MakeResolveParameters( 316 p parameters.ParametersAccessor, 317 frontingProviderID string) (*ResolveParameters, error) { 318 319 params := &ResolveParameters{ 320 AttemptsPerServer: p.Int(parameters.DNSResolverAttemptsPerServer), 321 AttemptsPerPreferredServer: p.Int(parameters.DNSResolverAttemptsPerPreferredServer), 322 RequestTimeout: p.Duration(parameters.DNSResolverRequestTimeout), 323 AwaitTimeout: p.Duration(parameters.DNSResolverAwaitTimeout), 324 } 325 326 // When a frontingProviderID is specified, generate a pre-resolved IP 327 // address, based on tactics configuration. 328 if frontingProviderID != "" { 329 if p.WeightedCoinFlip(parameters.DNSResolverPreresolvedIPAddressProbability) { 330 CIDRs := p.LabeledCIDRs(parameters.DNSResolverPreresolvedIPAddressCIDRs, frontingProviderID) 331 if len(CIDRs) > 0 { 332 CIDR := CIDRs[prng.Intn(len(CIDRs))] 333 IP, err := generateIPAddressFromCIDR(CIDR) 334 if err != nil { 335 return nil, errors.Trace(err) 336 } 337 params.PreresolvedIPAddress = IP.String() 338 } 339 } 340 } 341 342 // When PreresolvedIPAddress is set, there's no DNS request and the 343 // following params can be skipped. 344 if params.PreresolvedIPAddress != "" { 345 return params, nil 346 } 347 348 // When preferring an alternate DNS server, select the alternate from 349 // DNSResolverPreferredAlternateServers. This list is for circumvention 350 // operations, such as using a public DNS server with a protocol 351 // transform. Otherwise, select from DNSResolverAlternateServers, which 352 // is a fallback list of DNS servers to be used when the system DNS 353 // servers cannot be obtained. 354 355 preferredServers := p.Strings(parameters.DNSResolverPreferredAlternateServers) 356 preferAlternateDNSServer := len(preferredServers) > 0 && p.WeightedCoinFlip( 357 parameters.DNSResolverPreferAlternateServerProbability) 358 359 alternateServers := preferredServers 360 if !preferAlternateDNSServer { 361 alternateServers = p.Strings(parameters.DNSResolverAlternateServers) 362 } 363 364 // Select an alternate DNS server, typically a public DNS server. Ensure 365 // tactics is configured with an empty DNSResolverAlternateServers list 366 // in cases where attempts to public DNS server are unwanted. 367 if len(alternateServers) > 0 { 368 369 alternateServer := alternateServers[prng.Intn(len(alternateServers))] 370 371 // Check that the alternateServer has a well-formed IP address; and add 372 // a default port if none it present. 373 host, _, err := net.SplitHostPort(alternateServer) 374 if err != nil { 375 // Assume the SplitHostPort error is due to missing port. 376 host = alternateServer 377 alternateServer = net.JoinHostPort(alternateServer, resolverDNSPort) 378 } 379 if net.ParseIP(host) == nil { 380 // Log warning and proceed without this DNS server. 381 r.networkConfig.logWarning( 382 errors.TraceNew("invalid alternate DNS server IP address")) 383 384 } else { 385 386 params.AlternateDNSServer = alternateServer 387 params.PreferAlternateDNSServer = preferAlternateDNSServer 388 } 389 390 } 391 392 // Select a DNS transform. DNS request transforms are "scoped" by 393 // alternate DNS server (IP address without port); that is, when an 394 // alternate DNS server is certain to be attempted first, a transform 395 // associated with and known to work with that DNS server will be 396 // selected. Otherwise, a transform from the default scope 397 // (transforms.SCOPE_ANY == "") is selected. 398 // 399 // In any case, ResolveIP will only apply a transform on the first request 400 // attempt. 401 if p.WeightedCoinFlip(parameters.DNSResolverProtocolTransformProbability) { 402 403 specs := p.ProtocolTransformSpecs( 404 parameters.DNSResolverProtocolTransformSpecs) 405 scopedSpecNames := p.ProtocolTransformScopedSpecNames( 406 parameters.DNSResolverProtocolTransformScopedSpecNames) 407 408 // The alternate DNS server will be the first attempt if 409 // PreferAlternateDNSServer or the list of system DNS servers is empty. 410 // 411 // Limitation: the system DNS server list may change, due to a later 412 // Resolver.update call when ResolveIP is called with these 413 // ResolveParameters. 414 _, systemServers := r.getNetworkState() 415 scope := transforms.SCOPE_ANY 416 if params.AlternateDNSServer != "" && 417 (params.PreferAlternateDNSServer || len(systemServers) == 0) { 418 419 // Remove the port number, as the scope key is an IP address only. 420 // 421 // TODO: when we only just added the default port above, which is 422 // the common case, we could avoid this extra split. 423 host, _, err := net.SplitHostPort(params.AlternateDNSServer) 424 if err != nil { 425 return nil, errors.Trace(err) 426 } 427 scope = host 428 } 429 430 name, spec := specs.Select(scope, scopedSpecNames) 431 432 if spec != nil { 433 params.ProtocolTransformName = name 434 params.ProtocolTransformSpec = spec 435 var err error 436 params.ProtocolTransformSeed, err = prng.NewSeed() 437 if err != nil { 438 return nil, errors.Trace(err) 439 } 440 } 441 } 442 443 if p.WeightedCoinFlip(parameters.DNSResolverIncludeEDNS0Probability) { 444 params.IncludeEDNS0 = true 445 } 446 447 return params, nil 448 } 449 450 // ResolveAddress splits the input host:port address, calls ResolveIP to 451 // resolve the IP address of the host, selects an IP if there are multiple, 452 // and returns a rejoined IP:port. 453 func (r *Resolver) ResolveAddress( 454 ctx context.Context, 455 networkID string, 456 params *ResolveParameters, 457 address string) (string, error) { 458 459 hostname, port, err := net.SplitHostPort(address) 460 if err != nil { 461 return "", errors.Trace(err) 462 } 463 464 IPs, err := r.ResolveIP(ctx, networkID, params, hostname) 465 if err != nil { 466 return "", errors.Trace(err) 467 } 468 469 return net.JoinHostPort(IPs[prng.Intn(len(IPs))].String(), port), nil 470 } 471 472 // ResolveIP resolves a domain name. 473 // 474 // The input params may be nil, in which case default timeouts are used. 475 // 476 // ResolveIP performs concurrent A and AAAA lookups, returns any valid 477 // response IPs, and caches results. An error is returned when there are 478 // no valid response IPs. 479 // 480 // ResolveIP is not a general purpose resolver and is Psiphon-specific. For 481 // example, resolved domains are expected to exist; ResolveIP does not 482 // fallback to TCP; does not consult any "hosts" file; does not perform RFC 483 // 3484 sorting logic (see Go issue 18518); only implements a subset of 484 // Go/glibc/resolv.conf(5) resolver parameters (attempts and timeouts, but 485 // not rotate, single-request etc.) ResolveIP does not implement singleflight 486 // logic, as the Go resolver does, and allows multiple concurrent request for 487 // the same domain -- Psiphon won't often resolve the exact same domain 488 // multiple times concurrently, and, when it does, there's a circumvention 489 // benefit to attempting different DNS servers and protocol transforms. 490 // 491 // ResolveIP does not currently support DoT, DoH, or TCP; those protocols are 492 // often blocked or less common. Instead, ResolveIP makes a best effort to 493 // evade plaintext UDP DNS interference by ignoring invalid responses and by 494 // optionally applying protocol transforms that may evade blocking. 495 func (r *Resolver) ResolveIP( 496 ctx context.Context, 497 networkID string, 498 params *ResolveParameters, 499 hostname string) ([]net.IP, error) { 500 501 // ResolveIP does _not_ lock r.mutex for the lifetime of the function, to 502 // ensure many ResolveIP calls can run concurrently. 503 504 // If the hostname is already an IP address, just return that. For 505 // metrics, this does not count as a resolve, as the caller may invoke 506 // ResolveIP for all dials. 507 IP := net.ParseIP(hostname) 508 if IP != nil { 509 return []net.IP{IP}, nil 510 } 511 512 // Count all resolves of an actual domain, including cached and 513 // pre-resolved cases. 514 r.updateMetricResolves() 515 516 // Call updateNetworkState immediately before resolving, as a best effort 517 // to ensure that system DNS servers and IPv6 routing network state 518 // reflects the current network. updateNetworkState locks the Resolver 519 // mutex for its duration, and so concurrent ResolveIP calls may block at 520 // this point. However, all updateNetworkState operations are local to 521 // the host or device; and, if the networkID is unchanged since the last 522 // call, updateNetworkState may not perform any operations; and after the 523 // updateNetworkState call, ResolveIP proceeds without holding the mutex 524 // lock. As a result, this step should not prevent ResolveIP concurrency. 525 r.updateNetworkState(networkID) 526 527 if params == nil { 528 // Supply default ResolveParameters 529 params = &ResolveParameters{ 530 AttemptsPerServer: resolverDefaultAttemptsPerServer, 531 AttemptsPerPreferredServer: resolverDefaultAttemptsPerServer, 532 RequestTimeout: resolverDefaultRequestTimeout, 533 AwaitTimeout: resolverDefaultAwaitTimeout, 534 } 535 } 536 537 // When PreresolvedIPAddress is set, tactics parameters determined the IP address 538 // in this case. 539 if params.PreresolvedIPAddress != "" { 540 IP := net.ParseIP(params.PreresolvedIPAddress) 541 if IP == nil { 542 // Unexpected case, as MakeResolveParameters selects the IP address. 543 return nil, errors.TraceNew("invalid IP address") 544 } 545 return []net.IP{IP}, nil 546 } 547 548 // Use a snapshot of the current network state, including IPv6 routing and 549 // system DNS servers. 550 // 551 // Limitation: these values are used even if the network changes in the 552 // middle of a ResolveIP call; ResolveIP is not interrupted if the 553 // network changes. 554 hasIPv6Route, systemServers := r.getNetworkState() 555 556 // Use the standard library resolver when there's no GetDNSServers, or the 557 // system server list is otherwise empty, and no alternate DNS server is 558 // configured. 559 // 560 // Note that in the case where there are no system DNS servers and there 561 // is an AlternateDNSServer, if the AlternateDNSServer attempt fails, 562 // control does not flow back to defaultResolverLookupIP. On platforms 563 // without GetDNSServers, the caller must arrange for distinct attempts 564 // that try a AlternateDNSServer, or just use the standard library 565 // resolver. 566 // 567 // ResolveIP should always be called, even when defaultResolverLookupIP is 568 // expected to be used, to ensure correct metrics counts and ensure a 569 // consistent error message log stack for all DNS-related failures. 570 // 571 if len(systemServers) == 0 && 572 params.AlternateDNSServer == "" && 573 r.networkConfig.allowDefaultResolver() { 574 575 IPs, err := defaultResolverLookupIP(ctx, hostname, r.networkConfig.LogHostnames) 576 r.updateMetricDefaultResolver(err == nil) 577 if err != nil { 578 return nil, errors.Trace(err) 579 } 580 return IPs, err 581 } 582 583 // Consult the cache before making queries. This comes after the standard 584 // library case, to allow the standard library to provide its own caching 585 // logic. 586 IPs := r.getCache(hostname) 587 if IPs != nil { 588 return IPs, nil 589 } 590 591 // Set the list of DNS servers to attempt. AlternateDNSServer is used 592 // first when PreferAlternateDNSServer is set; otherwise 593 // AlternateDNSServer is used only when there is no system DNS server. 594 var servers []string 595 if params.AlternateDNSServer != "" && 596 (len(systemServers) == 0 || params.PreferAlternateDNSServer) { 597 servers = []string{params.AlternateDNSServer} 598 } 599 servers = append(servers, systemServers...) 600 if len(servers) == 0 { 601 return nil, errors.TraceNew("no DNS servers") 602 } 603 604 // Set the request timeout and set up a reusable timer for handling 605 // request and await timeouts. 606 // 607 // We expect to always have a request timeout. Handle the unexpected no 608 // timeout, 0, case by setting the longest timeout possible, ~290 years; 609 // always having a non-zero timeout makes the following code marginally 610 // simpler. 611 requestTimeout := params.RequestTimeout 612 if requestTimeout == 0 { 613 requestTimeout = 1<<63 - 1 614 } 615 var timer *time.Timer 616 timerDrained := true 617 resetTimer := func(timeout time.Duration) { 618 if timer == nil { 619 timer = time.NewTimer(timeout) 620 } else { 621 if !timerDrained && !timer.Stop() { 622 <-timer.C 623 } 624 timer.Reset(timeout) 625 } 626 timerDrained = false 627 } 628 629 // Orchestrate the DNS requests 630 631 resolveCtx, cancelFunc := context.WithCancel(ctx) 632 defer cancelFunc() 633 waitGroup := new(sync.WaitGroup) 634 conns := common.NewConns() 635 type answer struct { 636 attempt int 637 IPs []net.IP 638 TTLs []time.Duration 639 } 640 var maxAttempts int 641 if params.PreferAlternateDNSServer { 642 maxAttempts = params.AttemptsPerPreferredServer 643 maxAttempts += (len(servers) - 1) * params.AttemptsPerServer 644 } else { 645 maxAttempts = len(servers) * params.AttemptsPerServer 646 } 647 answerChan := make(chan *answer, maxAttempts*2) 648 inFlight := int64(0) 649 awaitA := int32(1) 650 awaitAAAA := int32(1) 651 if !hasIPv6Route { 652 awaitAAAA = 0 653 } 654 var result *answer 655 var lastErr atomic.Value 656 657 stop := false 658 for i := 0; !stop && i < maxAttempts; i++ { 659 660 var index int 661 if params.PreferAlternateDNSServer { 662 if i < params.AttemptsPerPreferredServer { 663 index = 0 664 } else { 665 index = 1 + ((i - params.AttemptsPerPreferredServer) / params.AttemptsPerServer) 666 } 667 } else { 668 index = i / params.AttemptsPerServer 669 } 670 671 server := servers[index] 672 673 // Only the first attempt pair tries transforms, as it's not certain 674 // the transforms will be compatible with DNS servers. 675 useProtocolTransform := (i == 0 && params.ProtocolTransformSpec != nil) 676 677 // Send A and AAAA requests concurrently. 678 questionTypes := []resolverQuestionType{resolverQuestionTypeA, resolverQuestionTypeAAAA} 679 if !hasIPv6Route { 680 questionTypes = questionTypes[0:1] 681 } 682 683 for _, questionType := range questionTypes { 684 685 waitGroup.Add(1) 686 687 // For metrics, track peak concurrent in-flight requests for 688 // a _single_ ResolveIP. inFlight for this ResolveIP is also used 689 // to determine whether to await additional responses once the 690 // first, valid response is received. For that logic to be 691 // correct, we must increment inFlight in this outer goroutine to 692 // ensure the await logic sees either inFlight > 0 or an answer 693 // in the channel. 694 r.updateMetricPeakInFlight(atomic.AddInt64(&inFlight, 1)) 695 696 go func(attempt int, questionType resolverQuestionType, useProtocolTransform bool) { 697 defer waitGroup.Done() 698 699 // We must decrement inFlight only after sending an answer and 700 // setting awaitA or awaitAAAA to ensure that the await logic 701 // in the outer goroutine will see inFlight 0 only once those 702 // operations are complete. 703 // 704 // We cannot wait and decrement inFlight when the outer 705 // goroutine receives answers, as no answer is sent in some 706 // cases, such as when the resolve fails due to NXDOMAIN. 707 defer atomic.AddInt64(&inFlight, -1) 708 709 // The request count metric counts the _intention_ to send 710 // requests, as there's a possibility that newResolverConn or 711 // performDNSQuery fail locally before sending a request packet. 712 switch questionType { 713 case resolverQuestionTypeA: 714 r.updateMetricRequestsIPv4() 715 case resolverQuestionTypeAAAA: 716 r.updateMetricRequestsIPv6() 717 } 718 719 // While it's possible, and potentially more optimal, to use 720 // the same UDP socket for both the A and AAAA request, we 721 // use a distinct socket per request, as common DNS clients do. 722 conn, err := r.newResolverConn(r.networkConfig.logWarning, server) 723 if err != nil { 724 lastErr.Store(errors.Trace(err)) 725 return 726 } 727 defer conn.Close() 728 729 // There's no context.Context support in the underlying API 730 // used by performDNSQuery, so instead collect all the 731 // request conns so that they can be closed, and any blocking 732 // network I/O interrupted, below, if resolveCtx is done. 733 if !conns.Add(conn) { 734 // Add fails when conns is already closed. 735 return 736 } 737 738 // performDNSQuery will send the request and read a response. 739 // performDNSQuery will continue reading responses until it 740 // receives a valid response, which can mitigate a subset of 741 // DNS injection attacks (to the limited extent possible for 742 // plaintext DNS). 743 // 744 // For IPv4, NXDOMAIN or a response with no IPs is not 745 // expected for domains resolved by Psiphon, so 746 // performDNSQuery treats such a response as invalid. For 747 // IPv6, a response with no IPs, may be valid(even though the 748 // response could be forged); the resolver will continue its 749 // attempts loop if it has no other IPs. 750 // 751 // Each performDNSQuery has no timeout and runs 752 // until it has read a valid response or the requestCtx is 753 // done. This allows for slow arriving, valid responses to 754 // eventually succeed, even if the read time exceeds 755 // requestTimeout, as long as the read time is less than the 756 // requestCtx timeout. 757 // 758 // With this approach, the overall ResolveIP call may have 759 // more than 2 performDNSQuery requests in-flight at a time, 760 // as requestTimeout is used to schedule sending the next 761 // attempt but not cancel the current attempt. For 762 // connectionless UDP, the resulting network traffic should 763 // be similar to common DNS clients which do cancel request 764 // before beginning the next attempt. 765 IPs, TTLs, RTT, err := performDNSQuery( 766 resolveCtx, 767 r.networkConfig.logWarning, 768 params, 769 useProtocolTransform, 770 conn, 771 questionType, 772 hostname) 773 774 // Update the min/max RTT metric when reported (>=0) even if 775 // the result is an error; i.e., the even if there was an 776 // invalid response. 777 // 778 // Limitation: since individual requests aren't cancelled 779 // after requestTimeout, RTT metrics won't reflect 780 // no-response cases, although request and response count 781 // disparities will still show up in the metrics. 782 if RTT >= 0 { 783 r.updateMetricRTT(RTT) 784 } 785 786 if err != nil { 787 lastErr.Store(errors.Trace(err)) 788 return 789 } 790 791 if len(IPs) > 0 { 792 select { 793 case answerChan <- &answer{attempt: attempt, IPs: IPs, TTLs: TTLs}: 794 default: 795 } 796 } 797 798 // Mark no longer awaiting A or AAAA as long as there is a 799 // valid response, even if there are no IPs in the IPv6 case. 800 switch questionType { 801 case resolverQuestionTypeA: 802 r.updateMetricResponsesIPv4() 803 atomic.StoreInt32(&awaitA, 0) 804 case resolverQuestionTypeAAAA: 805 r.updateMetricResponsesIPv6() 806 atomic.StoreInt32(&awaitAAAA, 0) 807 default: 808 } 809 810 }(i+1, questionType, useProtocolTransform) 811 } 812 813 resetTimer(requestTimeout) 814 815 select { 816 case result = <-answerChan: 817 // When the first answer, a response with valid IPs, arrives, exit 818 // the attempts loop. The following await branch may collect 819 // additional answers. 820 params.setFirstAttemptWithAnswer(result.attempt) 821 stop = true 822 case <-timer.C: 823 // When requestTimeout arrives, loop around and launch the next 824 // attempt; leave the existing requests running in case they 825 // eventually respond. 826 timerDrained = true 827 case <-resolveCtx.Done(): 828 // When resolveCtx is done, exit the attempts loop. 829 // 830 // Append the existing lastErr, which may convey useful 831 // information to be reported in a failed_tunnel error message. 832 lastErr.Store(errors.Tracef("%v (lastErr: %v)", ctx.Err(), lastErr.Load())) 833 stop = true 834 } 835 } 836 837 // Receive any additional answers, now present in the channel, which 838 // arrived concurrent with the first answer. This receive avoids a race 839 // condition where inFlight may now be 0, with additional answers 840 // enqueued, in which case the following await branch is not taken. 841 // 842 // It's possible for the attempts loop to exit with no received answer due 843 // to timeouts or cancellation while, concurrently, an answer is sent to 844 // the channel. In this case, when result == nil, we ignore the answers 845 // and leave this as a failed resolve. 846 if result != nil { 847 for loop := true; loop; { 848 select { 849 case nextAnswer := <-answerChan: 850 result.IPs = append(result.IPs, nextAnswer.IPs...) 851 result.TTLs = append(result.TTLs, nextAnswer.TTLs...) 852 default: 853 loop = false 854 } 855 } 856 } 857 858 // When we have an answer, await -- for a short time, 859 // params.AwaitTimeout -- extra answers from any remaining in-flight 860 // requests. Only await if the request isn't cancelled and we don't 861 // already have at least one IPv4 and one IPv6 response; only await AAAA 862 // if it was sent; note that a valid AAAA response may include no IPs 863 // lastErr is not set in timeout/cancelled cases here, since we already 864 // have an answer. 865 if result != nil && 866 resolveCtx.Err() == nil && 867 atomic.LoadInt64(&inFlight) > 0 && 868 (atomic.LoadInt32(&awaitA) != 0 || atomic.LoadInt32(&awaitAAAA) != 0) && 869 params.AwaitTimeout > 0 { 870 871 resetTimer(params.AwaitTimeout) 872 873 for { 874 875 stop := false 876 select { 877 case nextAnswer := <-answerChan: 878 result.IPs = append(result.IPs, nextAnswer.IPs...) 879 result.TTLs = append(result.TTLs, nextAnswer.TTLs...) 880 case <-timer.C: 881 timerDrained = true 882 stop = true 883 case <-resolveCtx.Done(): 884 stop = true 885 } 886 887 if stop || 888 atomic.LoadInt64(&inFlight) == 0 || 889 (atomic.LoadInt32(&awaitA) == 0 && atomic.LoadInt32(&awaitAAAA) == 0) { 890 break 891 } 892 } 893 } 894 895 timer.Stop() 896 897 // Interrupt all workers. 898 cancelFunc() 899 conns.CloseAll() 900 waitGroup.Wait() 901 902 // When there's no answer, return the last error. 903 if result == nil { 904 err := lastErr.Load() 905 if err == nil { 906 err = errors.TraceNew("unexpected missing error") 907 } 908 if r.networkConfig.LogHostnames { 909 err = fmt.Errorf("resolve %s : %w", hostname, err.(error)) 910 } 911 return nil, errors.Trace(err.(error)) 912 } 913 914 if len(result.IPs) == 0 { 915 // Unexpected, since a len(IPs) > 0 check precedes sending to answerChan. 916 return nil, errors.TraceNew("unexpected no IPs") 917 } 918 919 // Update the cache now, after all results are gathered. 920 r.setCache(hostname, result.IPs, result.TTLs) 921 922 return result.IPs, nil 923 } 924 925 // VerifyCacheExtension extends the TTL for any cached result for the 926 // specified hostname to at least NetworkConfig.CacheExtensionVerifiedTTL. 927 func (r *Resolver) VerifyCacheExtension(hostname string) { 928 r.mutex.Lock() 929 defer r.mutex.Unlock() 930 931 if r.networkConfig.CacheExtensionVerifiedTTL == 0 { 932 return 933 } 934 935 if net.ParseIP(hostname) != nil { 936 return 937 } 938 939 entry, expires, ok := r.cache.GetWithExpiration(hostname) 940 if !ok { 941 return 942 } 943 944 // Change the TTL only if the entry expires and the existing TTL isn't 945 // longer than the extension. 946 neverExpires := time.Time{} 947 if expires == neverExpires || 948 expires.After(time.Now().Add(r.networkConfig.CacheExtensionVerifiedTTL)) { 949 return 950 } 951 952 r.cache.Set(hostname, entry, r.networkConfig.CacheExtensionVerifiedTTL) 953 954 r.metrics.verifiedCacheExtensions += 1 955 } 956 957 // GetMetrics returns a summary of DNS metrics. 958 func (r *Resolver) GetMetrics() string { 959 r.mutex.Lock() 960 defer r.mutex.Unlock() 961 962 // When r.metrics.minRTT < 0, min/maxRTT is unset. 963 minRTT := "n/a" 964 maxRTT := minRTT 965 if r.metrics.minRTT >= 0 { 966 minRTT = fmt.Sprintf("%d", r.metrics.minRTT/time.Millisecond) 967 maxRTT = fmt.Sprintf("%d", r.metrics.maxRTT/time.Millisecond) 968 } 969 970 extend := "" 971 if r.networkConfig.CacheExtensionVerifiedTTL > 0 { 972 extend = fmt.Sprintf("| extend %d ", r.metrics.verifiedCacheExtensions) 973 } 974 975 defaultResolves := "" 976 if r.networkConfig.allowDefaultResolver() { 977 defaultResolves = fmt.Sprintf( 978 " | def %d/%d", r.metrics.defaultResolves, r.metrics.defaultSuccesses) 979 } 980 981 // Note that the number of system resolvers is a point-in-time value, 982 // while the others are cumulative. 983 984 return fmt.Sprintf("resolves %d | hit %d %s| req v4/v6 %d/%d | resp %d/%d | peak %d | rtt %s - %s ms. | sys %d%s", 985 r.metrics.resolves, 986 r.metrics.cacheHits, 987 extend, 988 r.metrics.requestsIPv4, 989 r.metrics.requestsIPv6, 990 r.metrics.responsesIPv4, 991 r.metrics.responsesIPv6, 992 r.metrics.peakInFlight, 993 minRTT, 994 maxRTT, 995 len(r.systemServers), 996 defaultResolves) 997 } 998 999 // updateNetworkState updates the system DNS server list, IPv6 state, and the 1000 // cache. 1001 // 1002 // Any errors that occur while querying network state are logged; in error 1003 // conditions the functionality of the resolver may be reduced, but the 1004 // resolver remains operational. 1005 func (r *Resolver) updateNetworkState(networkID string) { 1006 r.mutex.Lock() 1007 defer r.mutex.Unlock() 1008 1009 // Only perform blocking/expensive update operations when necessary. 1010 updateAll := false 1011 updateIPv6Route := false 1012 updateServers := false 1013 flushCache := false 1014 1015 // If r.cache is nil, this is the first update call in NewResolver. Create 1016 // the cache and perform all updates. 1017 if r.cache == nil { 1018 r.cache = lrucache.NewWithLRU( 1019 resolverCacheDefaultTTL, 1020 resolverCacheReapFrequency, 1021 resolverCacheMaxEntries) 1022 updateAll = true 1023 } 1024 1025 // Perform all updates when the networkID has changed, which indicates a 1026 // different network. 1027 if r.networkID != networkID { 1028 updateAll = true 1029 } 1030 1031 if updateAll { 1032 updateIPv6Route = true 1033 updateServers = true 1034 flushCache = true 1035 } 1036 1037 // Even when the networkID has not changed, update DNS servers 1038 // periodically. This is similar to how other DNS clients 1039 // poll /etc/resolv.conf, including the period of 5s. 1040 if time.Since(r.lastServersUpdate) > resolverServersUpdateTTL { 1041 updateServers = true 1042 } 1043 1044 // Update hasIPv6Route, which indicates whether the current network has an 1045 // IPv6 route and so if DNS requests for AAAA records will be sent. 1046 // There's no use for AAAA records on IPv4-only networks; and other 1047 // common DNS clients omit AAAA requests on IPv4-only records, so these 1048 // requests would otherwise be unusual. 1049 // 1050 // There's no hasIPv4Route as we always need to resolve A records, 1051 // particularly for IPv4-only endpoints; for IPv6-only networks, 1052 // NetworkConfig.IPv6Synthesize should be used to accomodate IPv4 DNS 1053 // server addresses, and dials performed outside the Resolver will 1054 // similarly use NAT 64 (on iOS; on Android, 464XLAT will handle this 1055 // transparently). 1056 if updateIPv6Route { 1057 1058 if r.networkConfig.HasIPv6Route != nil { 1059 1060 r.hasIPv6Route = r.networkConfig.HasIPv6Route() 1061 1062 } else { 1063 1064 hasIPv6Route, err := hasRoutableIPv6Interface() 1065 if err != nil { 1066 // Log warning and proceed without IPv6. 1067 r.networkConfig.logWarning( 1068 errors.Tracef("unable to determine IPv6 route: %v", err)) 1069 hasIPv6Route = false 1070 } 1071 r.hasIPv6Route = hasIPv6Route 1072 } 1073 } 1074 1075 // Update the list of system DNS servers. It's not an error condition here 1076 // if the list is empty: a subsequent ResolveIP may use 1077 // ResolveParameters which specifies an AlternateDNSServer. 1078 if updateServers && r.networkConfig.GetDNSServers != nil { 1079 1080 systemServers := []string{} 1081 for _, systemServer := range r.networkConfig.GetDNSServers() { 1082 host, _, err := net.SplitHostPort(systemServer) 1083 if err != nil { 1084 // Assume the SplitHostPort error is due to systemServer being 1085 // an IP only, and append the default port, 53. If 1086 // systemServer _isn't_ an IP, the following ParseIP will fail. 1087 host = systemServer 1088 systemServer = net.JoinHostPort(systemServer, resolverDNSPort) 1089 } 1090 if net.ParseIP(host) == nil { 1091 // Log warning and proceed without this DNS server. 1092 r.networkConfig.logWarning( 1093 errors.TraceNew("invalid DNS server IP address")) 1094 continue 1095 } 1096 systemServers = append(systemServers, systemServer) 1097 } 1098 1099 // Check if the list of servers has changed, including order. If 1100 // changed, flush the cache even if the networkID has not changed. 1101 // Cached results are only considered valid as long as the system DNS 1102 // configuration remains the same. 1103 equal := len(r.systemServers) == len(systemServers) 1104 if equal { 1105 for i := 0; i < len(r.systemServers); i++ { 1106 if r.systemServers[i] != systemServers[i] { 1107 equal = false 1108 break 1109 } 1110 } 1111 } 1112 flushCache = flushCache || !equal 1113 1114 // Concurrency note: once the r.systemServers slice is set, the 1115 // contents of the backing array must not be modified due to 1116 // concurrent ResolveIP calls. 1117 r.systemServers = systemServers 1118 1119 r.lastServersUpdate = time.Now() 1120 } 1121 1122 // Skip cache flushes when the extended DNS caching mechanism is enabled. 1123 // TODO: retain only verified cache entries? 1124 if flushCache && r.networkConfig.CacheExtensionVerifiedTTL == 0 { 1125 r.cache.Flush() 1126 } 1127 1128 // Set r.networkID only after all operations complete without errors; if 1129 // r.networkID were set earlier, a subsequent 1130 // ResolveIP/updateNetworkState call might proceed as if the network 1131 // state were updated for the specified network ID. 1132 r.networkID = networkID 1133 } 1134 1135 func (r *Resolver) getNetworkState() (bool, []string) { 1136 r.mutex.Lock() 1137 defer r.mutex.Unlock() 1138 1139 return r.hasIPv6Route, r.systemServers 1140 } 1141 1142 func (r *Resolver) setCache(hostname string, IPs []net.IP, TTLs []time.Duration) { 1143 r.mutex.Lock() 1144 defer r.mutex.Unlock() 1145 1146 // The shortest TTL is used. In some cases, a DNS server may omit the TTL 1147 // or set a 0 TTL, in which case the default is used. 1148 TTL := resolverDefaultAnswerTTL 1149 for _, answerTTL := range TTLs { 1150 if answerTTL > 0 && answerTTL < TTL { 1151 TTL = answerTTL 1152 } 1153 } 1154 1155 // When NetworkConfig.CacheExtensionInitialTTL configured, ensure the TTL 1156 // is no shorter than CacheExtensionInitialTTL. 1157 if r.networkConfig.CacheExtensionInitialTTL != 0 && 1158 TTL < r.networkConfig.CacheExtensionInitialTTL { 1159 1160 TTL = r.networkConfig.CacheExtensionInitialTTL 1161 } 1162 1163 // Limitation: with concurrent ResolveIPs for the same domain, the last 1164 // setCache call determines the cache value. The results are not merged. 1165 1166 r.cache.Set(hostname, IPs, TTL) 1167 } 1168 1169 func (r *Resolver) getCache(hostname string) []net.IP { 1170 r.mutex.Lock() 1171 defer r.mutex.Unlock() 1172 1173 entry, ok := r.cache.Get(hostname) 1174 if !ok { 1175 return nil 1176 } 1177 r.metrics.cacheHits += 1 1178 return entry.([]net.IP) 1179 } 1180 1181 // newResolverConn creates a UDP socket that will send packets to serverAddr. 1182 // serverAddr is an IP:port, which allows specifying the port for testing or 1183 // in rare cases where the port isn't 53. 1184 func (r *Resolver) newResolverConn( 1185 logWarning func(error), 1186 serverAddr string) (retConn net.Conn, retErr error) { 1187 1188 defer func() { 1189 if retErr != nil { 1190 logWarning(retErr) 1191 } 1192 }() 1193 1194 // When configured, attempt to synthesize an IPv6 address from 1195 // an IPv4 address for compatibility on DNS64/NAT64 networks. 1196 // If synthesize fails, try the original address. 1197 if r.networkConfig.IPv6Synthesize != nil { 1198 serverIPStr, port, err := net.SplitHostPort(serverAddr) 1199 if err != nil { 1200 return nil, errors.Trace(err) 1201 } 1202 serverIP := net.ParseIP(serverIPStr) 1203 if serverIP != nil && serverIP.To4() != nil { 1204 synthesized := r.networkConfig.IPv6Synthesize(serverIPStr) 1205 if synthesized != "" && net.ParseIP(synthesized) != nil { 1206 serverAddr = net.JoinHostPort(synthesized, port) 1207 } 1208 } 1209 } 1210 1211 dialer := &net.Dialer{} 1212 if r.networkConfig.BindToDevice != nil { 1213 dialer.Control = func(_, _ string, c syscall.RawConn) error { 1214 var controlErr error 1215 err := c.Control(func(fd uintptr) { 1216 _, err := r.networkConfig.BindToDevice(int(fd)) 1217 if err != nil { 1218 controlErr = errors.Tracef("BindToDevice failed: %v", err) 1219 return 1220 } 1221 }) 1222 if controlErr != nil { 1223 return errors.Trace(controlErr) 1224 } 1225 return errors.Trace(err) 1226 } 1227 } 1228 1229 // context.Background is ok in this case as the UDP dial is just a local 1230 // syscall to create the socket. 1231 conn, err := dialer.DialContext(context.Background(), "udp", serverAddr) 1232 if err != nil { 1233 return nil, errors.Trace(err) 1234 } 1235 1236 return conn, nil 1237 } 1238 1239 func (r *Resolver) updateMetricResolves() { 1240 r.mutex.Lock() 1241 defer r.mutex.Unlock() 1242 1243 r.metrics.resolves += 1 1244 } 1245 1246 func (r *Resolver) updateMetricRequestsIPv4() { 1247 r.mutex.Lock() 1248 defer r.mutex.Unlock() 1249 1250 r.metrics.requestsIPv4 += 1 1251 } 1252 1253 func (r *Resolver) updateMetricRequestsIPv6() { 1254 r.mutex.Lock() 1255 defer r.mutex.Unlock() 1256 1257 r.metrics.requestsIPv6 += 1 1258 } 1259 1260 func (r *Resolver) updateMetricResponsesIPv4() { 1261 r.mutex.Lock() 1262 defer r.mutex.Unlock() 1263 1264 r.metrics.responsesIPv4 += 1 1265 } 1266 1267 func (r *Resolver) updateMetricResponsesIPv6() { 1268 r.mutex.Lock() 1269 defer r.mutex.Unlock() 1270 1271 r.metrics.responsesIPv6 += 1 1272 } 1273 1274 func (r *Resolver) updateMetricDefaultResolver(success bool) { 1275 r.mutex.Lock() 1276 defer r.mutex.Unlock() 1277 1278 r.metrics.defaultResolves += 1 1279 if success { 1280 r.metrics.defaultSuccesses += 1 1281 } 1282 } 1283 1284 func (r *Resolver) updateMetricPeakInFlight(inFlight int64) { 1285 r.mutex.Lock() 1286 defer r.mutex.Unlock() 1287 1288 if inFlight > r.metrics.peakInFlight { 1289 r.metrics.peakInFlight = inFlight 1290 } 1291 } 1292 1293 func (r *Resolver) updateMetricRTT(rtt time.Duration) { 1294 r.mutex.Lock() 1295 defer r.mutex.Unlock() 1296 1297 if rtt < 0 { 1298 // Ignore invalid input. 1299 return 1300 } 1301 1302 // When r.metrics.minRTT < 0, min/maxRTT is unset. 1303 if r.metrics.minRTT < 0 || rtt < r.metrics.minRTT { 1304 r.metrics.minRTT = rtt 1305 } 1306 1307 if rtt > r.metrics.maxRTT { 1308 r.metrics.maxRTT = rtt 1309 } 1310 } 1311 1312 func hasRoutableIPv6Interface() (bool, error) { 1313 1314 interfaces, err := net.Interfaces() 1315 if err != nil { 1316 return false, errors.Trace(err) 1317 } 1318 1319 for _, in := range interfaces { 1320 1321 if (in.Flags&net.FlagUp == 0) || 1322 (in.Flags&(net.FlagLoopback|net.FlagPointToPoint)) != 0 { 1323 continue 1324 } 1325 1326 addrs, err := in.Addrs() 1327 if err != nil { 1328 return false, errors.Trace(err) 1329 } 1330 1331 for _, addr := range addrs { 1332 if IPNet, ok := addr.(*net.IPNet); ok && 1333 IPNet.IP.To4() == nil && 1334 !IPNet.IP.IsLinkLocalUnicast() { 1335 1336 return true, nil 1337 } 1338 } 1339 } 1340 1341 return false, nil 1342 } 1343 1344 func generateIPAddressFromCIDR(CIDR string) (net.IP, error) { 1345 _, IPNet, err := net.ParseCIDR(CIDR) 1346 if err != nil { 1347 return nil, errors.Trace(err) 1348 } 1349 // A retry is required, since a CIDR may include broadcast IPs (a.b.c.0) or 1350 // other invalid values. The number of retries is limited to ensure we 1351 // don't hang in the case of a misconfiguration. 1352 for i := 0; i < 10; i++ { 1353 randBytes := prng.Bytes(len(IPNet.IP)) 1354 IP := make(net.IP, len(IPNet.IP)) 1355 // The 1 bits in the mask must apply to the IP in the CIDR and the 0 1356 // bits in the mask are available to randomize. 1357 for i := 0; i < len(IP); i++ { 1358 IP[i] = (IPNet.IP[i] & IPNet.Mask[i]) | (randBytes[i] & ^IPNet.Mask[i]) 1359 } 1360 if IP.IsGlobalUnicast() && !common.IsBogon(IP) { 1361 return IP, nil 1362 } 1363 } 1364 return nil, errors.TraceNew("failed to generate random IP") 1365 } 1366 1367 type resolverQuestionType int 1368 1369 const ( 1370 resolverQuestionTypeA = 0 1371 resolverQuestionTypeAAAA = 1 1372 ) 1373 1374 func performDNSQuery( 1375 resolveCtx context.Context, 1376 logWarning func(error), 1377 params *ResolveParameters, 1378 useProtocolTransform bool, 1379 conn net.Conn, 1380 questionType resolverQuestionType, 1381 hostname string) ([]net.IP, []time.Duration, time.Duration, error) { 1382 1383 if useProtocolTransform { 1384 if params.ProtocolTransformSpec == nil || 1385 params.ProtocolTransformSeed == nil { 1386 return nil, nil, -1, errors.TraceNew("invalid protocol transform configuration") 1387 } 1388 // miekg/dns expects conn to be a net.PacketConn or else it writes the 1389 // TCP length prefix 1390 udpConn, ok := conn.(*net.UDPConn) 1391 if !ok { 1392 return nil, nil, -1, errors.TraceNew("conn is not a *net.UDPConn") 1393 } 1394 conn = &transformDNSPacketConn{ 1395 UDPConn: udpConn, 1396 transform: params.ProtocolTransformSpec, 1397 seed: params.ProtocolTransformSeed, 1398 } 1399 } 1400 1401 // UDPSize sets the receive buffer to > 512, even when we don't include 1402 // EDNS(0), which will mitigate issues with RFC 1035 non-compliant 1403 // servers. See Go issue 51127. 1404 dnsConn := &dns.Conn{ 1405 Conn: conn, 1406 UDPSize: udpPacketBufferSize, 1407 } 1408 defer dnsConn.Close() 1409 1410 // SetQuestion initializes request.MsgHdr.Id to a random value 1411 request := &dns.Msg{MsgHdr: dns.MsgHdr{RecursionDesired: true}} 1412 switch questionType { 1413 case resolverQuestionTypeA: 1414 request.SetQuestion(dns.Fqdn(hostname), dns.TypeA) 1415 case resolverQuestionTypeAAAA: 1416 request.SetQuestion(dns.Fqdn(hostname), dns.TypeAAAA) 1417 default: 1418 return nil, nil, -1, errors.TraceNew("unknown DNS request question type") 1419 } 1420 if params.IncludeEDNS0 { 1421 // miekg/dns: "RFC 6891, Section 6.1.1 allows the OPT record to appear 1422 // anywhere in the additional record section, but it's usually at the 1423 // end..." 1424 request.SetEdns0(udpPacketBufferSize, false) 1425 } 1426 1427 startTime := time.Now() 1428 1429 // Send the DNS request 1430 dnsConn.WriteMsg(request) 1431 1432 // Read and process the DNS response 1433 var IPs []net.IP 1434 var TTLs []time.Duration 1435 var lastErr error 1436 RTT := time.Duration(-1) 1437 for { 1438 1439 // Stop when resolveCtx is done; the caller, ResolveIP, will also 1440 // close conn, which will interrupt a blocking dnsConn.ReadMsg. 1441 if resolveCtx.Err() != nil { 1442 1443 // ResolveIP, which calls performDNSQuery, already records the 1444 // context error (e.g., context timeout), so instead report 1445 // lastErr, when present, as it may contain more useful 1446 // information about why a response was rejected. 1447 err := lastErr 1448 if err == nil { 1449 err = errors.Trace(resolveCtx.Err()) 1450 } 1451 1452 return nil, nil, RTT, err 1453 } 1454 1455 // Read a response. RTT is the elapsed time between sending the 1456 // request and reading the last received response. 1457 response, err := dnsConn.ReadMsg() 1458 RTT = time.Since(startTime) 1459 if err == nil && response.MsgHdr.Id != request.MsgHdr.Id { 1460 err = dns.ErrId 1461 } 1462 if err != nil { 1463 // Try reading again, in case the first response packet failed to 1464 // unmarshal or had an invalid ID. The Go resolver also does this; 1465 // see Go issue 13281. 1466 if resolveCtx.Err() == nil { 1467 // Only log if resolveCtx is not done; otherwise the error could 1468 // be due to conn being closed by ResolveIP. 1469 lastErr = errors.Tracef("invalid response: %v", err) 1470 logWarning(lastErr) 1471 } 1472 continue 1473 } 1474 1475 // Check the RCode. 1476 // 1477 // For IPv4, we expect RCodeSuccess as Psiphon will typically only 1478 // resolve domains that exist and have a valid IP (when this isn't 1479 // the case, and we retry, the overall ResolveIP and its parent dial 1480 // will still abort after resolveCtx is done, or RequestTimeout 1481 // expires for maxAttempts). 1482 // 1483 // For IPv6, we should also expect RCodeSuccess even if there is no 1484 // AAAA record, as long as the domain exists and has an A record. 1485 // However, per RFC 6147 section 5.1.2, we may receive 1486 // NXDOMAIN: "...some servers respond with RCODE=3 to a AAAA query 1487 // even if there is an A record available for that owner name. Those 1488 // servers are in clear violation of the meaning of RCODE 3...". In 1489 // this case, we coalesce NXDOMAIN into success to treat the response 1490 // the same as success with no AAAA record. 1491 // 1492 // All other RCodes, which are unexpected, lead to a read retry. 1493 if response.MsgHdr.Rcode != dns.RcodeSuccess && 1494 !(questionType == resolverQuestionTypeAAAA && response.MsgHdr.Rcode == dns.RcodeNameError) { 1495 1496 errMsg, ok := dns.RcodeToString[response.MsgHdr.Rcode] 1497 if !ok { 1498 errMsg = fmt.Sprintf("Rcode: %d", response.MsgHdr.Rcode) 1499 } 1500 lastErr = errors.Tracef("unexpected RCode: %v", errMsg) 1501 logWarning(lastErr) 1502 continue 1503 } 1504 1505 // Extract all IP answers, along with corresponding TTLs for caching. 1506 // Perform additional validation, which may lead to another read 1507 // retry. However, if _any_ valid IP is found, stop reading and 1508 // return that result. Again, the validation is only best effort. 1509 1510 checkFailed := false 1511 for _, answer := range response.Answer { 1512 haveAnswer := false 1513 var IP net.IP 1514 var TTLSec uint32 1515 switch questionType { 1516 case resolverQuestionTypeA: 1517 if a, ok := answer.(*dns.A); ok { 1518 IP = a.A 1519 TTLSec = a.Hdr.Ttl 1520 haveAnswer = true 1521 } 1522 case resolverQuestionTypeAAAA: 1523 if aaaa, ok := answer.(*dns.AAAA); ok { 1524 IP = aaaa.AAAA 1525 TTLSec = aaaa.Hdr.Ttl 1526 haveAnswer = true 1527 } 1528 } 1529 if !haveAnswer { 1530 continue 1531 } 1532 err := checkDNSAnswerIP(IP) 1533 if err != nil { 1534 checkFailed = true 1535 lastErr = errors.Tracef("invalid IP: %v", err) 1536 logWarning(lastErr) 1537 // Check the next answer 1538 continue 1539 } 1540 IPs = append(IPs, IP) 1541 TTLs = append(TTLs, time.Duration(TTLSec)*time.Second) 1542 } 1543 1544 // For IPv4, an IP is expected, as noted in the comment above. 1545 // 1546 // In potential cases where we resolve a domain that has only an IPv6 1547 // address, the concurrent AAAA request will deliver its result to 1548 // ResolveIP, and that answer will be selected, so only the "await" 1549 // logic will delay the parent dial in that case. 1550 if questionType == resolverQuestionTypeA && len(IPs) == 0 && !checkFailed { 1551 checkFailed = true 1552 lastErr = errors.TraceNew("unexpected empty A response") 1553 logWarning(lastErr) 1554 } 1555 1556 // Retry if there are no valid IPs and any error; if no error, this 1557 // may be a valid AAAA response with no IPs, in which case return the 1558 // result. 1559 if len(IPs) == 0 && checkFailed { 1560 continue 1561 } 1562 1563 return IPs, TTLs, RTT, nil 1564 } 1565 } 1566 1567 func checkDNSAnswerIP(IP net.IP) error { 1568 1569 if IP == nil { 1570 return errors.TraceNew("IP is nil") 1571 } 1572 1573 // Limitation: this could still be a phony/injected response, it's not 1574 // possible to verify with plaintext DNS, but a "bogon" IP is clearly 1575 // invalid. 1576 if common.IsBogon(IP) { 1577 return errors.TraceNew("IP is bogon") 1578 } 1579 1580 // Create a temporary socket bound to the destination IP. This checks 1581 // thats the local host has a route to this IP. If not, we'll reject the 1582 // IP. This prevents selecting an IP which is guaranteed to fail to dial. 1583 // Use UDP as this results in no network traffic; the destination port is 1584 // arbitrary. The Go resolver performs a similar operation. 1585 // 1586 // Limitations: 1587 // - We may cache the IP and reuse it without checking routability again; 1588 // the cache should be flushed when network state changes. 1589 // - Given that the AAAA is requested only when the host has an IPv6 1590 // route, we don't expect this to often fail with a _valid_ response. 1591 // However, this remains a possibility and in this case, 1592 // performDNSQuery will keep awaiting a response which can trigger 1593 // the "await" logic. 1594 conn, err := net.DialUDP("udp", nil, &net.UDPAddr{IP: IP, Port: 443}) 1595 if err != nil { 1596 return errors.Trace(err) 1597 } 1598 conn.Close() 1599 1600 return nil 1601 } 1602 1603 func defaultResolverLookupIP( 1604 ctx context.Context, hostname string, logHostnames bool) ([]net.IP, error) { 1605 1606 addrs, err := net.DefaultResolver.LookupIPAddr(ctx, hostname) 1607 1608 if err != nil && !logHostnames { 1609 // Remove domain names from "net" error messages. 1610 err = common.RedactNetError(err) 1611 } 1612 1613 if err != nil { 1614 return nil, errors.Trace(err) 1615 } 1616 1617 ips := make([]net.IP, len(addrs)) 1618 for i, addr := range addrs { 1619 ips[i] = addr.IP 1620 } 1621 1622 return ips, nil 1623 } 1624 1625 // transformDNSPacketConn wraps a *net.UDPConn, intercepting Write calls and 1626 // applying the specified protocol transform. 1627 // 1628 // As transforms operate on strings and DNS requests are binary, the transform 1629 // should be expressed using hex characters. The DNS packet to be written 1630 // (input the Write) is converted to hex, transformed, and converted back to 1631 // binary and then actually written to the UDP socket. 1632 type transformDNSPacketConn struct { 1633 *net.UDPConn 1634 transform transforms.Spec 1635 seed *prng.Seed 1636 } 1637 1638 func (conn *transformDNSPacketConn) Write(b []byte) (int, error) { 1639 1640 // Limitation: there is no check that a transformed packet remains within 1641 // the network packet MTU. 1642 1643 input := hex.EncodeToString(b) 1644 output, err := conn.transform.Apply(conn.seed, input) 1645 if err != nil { 1646 return 0, errors.Trace(err) 1647 } 1648 packet, err := hex.DecodeString(output) 1649 if err != nil { 1650 return 0, errors.Trace(err) 1651 } 1652 1653 _, err = conn.UDPConn.Write(packet) 1654 if err != nil { 1655 // In the error case, don't report bytes written as the number could 1656 // exceed the pre-transform length. 1657 return 0, errors.Trace(err) 1658 } 1659 1660 // Report the pre-transform length as bytes written, as the caller may check 1661 // that the requested len(b) bytes were written. 1662 return len(b), nil 1663 }