github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/resolver/resolver.go (about)

     1  /*
     2   * Copyright (c) 2022, Psiphon Inc.
     3   * All rights reserved.
     4   *
     5   * This program is free software: you can redistribute it and/or modify
     6   * it under the terms of the GNU General Public License as published by
     7   * the Free Software Foundation, either version 3 of the License, or
     8   * (at your option) any later version.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  // Package resolver implements a DNS stub resolver, or DNS client, which
    21  // resolves domain names.
    22  //
    23  // The resolver is Psiphon-specific and oriented towards blocking resistance.
    24  // See ResolveIP for more details.
    25  package resolver
    26  
    27  import (
    28  	"context"
    29  	"encoding/hex"
    30  	"fmt"
    31  	"net"
    32  	"sync"
    33  	"sync/atomic"
    34  	"syscall"
    35  	"time"
    36  
    37  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common"
    38  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/errors"
    39  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/parameters"
    40  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/prng"
    41  	"github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms"
    42  	lrucache "github.com/cognusion/go-cache-lru"
    43  	"github.com/miekg/dns"
    44  )
    45  
    46  const (
    47  	resolverCacheDefaultTTL          = 1 * time.Minute
    48  	resolverCacheReapFrequency       = 1 * time.Minute
    49  	resolverCacheMaxEntries          = 10000
    50  	resolverServersUpdateTTL         = 5 * time.Second
    51  	resolverDefaultAttemptsPerServer = 2
    52  	resolverDefaultRequestTimeout    = 5 * time.Second
    53  	resolverDefaultAwaitTimeout      = 10 * time.Millisecond
    54  	resolverDefaultAnswerTTL         = 1 * time.Minute
    55  	resolverDNSPort                  = "53"
    56  	udpPacketBufferSize              = 1232
    57  )
    58  
    59  // NetworkConfig specifies network-level configuration for a Resolver.
    60  type NetworkConfig struct {
    61  
    62  	// GetDNSServers returns a list of system DNS server addresses (IP:port, or
    63  	// IP only with port 53 assumed), as determined via OS APIs, in priority
    64  	// order. GetDNSServers may be nil.
    65  	GetDNSServers func() []string
    66  
    67  	// BindToDevice should ensure the input file descriptor, a UDP socket, is
    68  	// excluded from VPN routing. BindToDevice may be nil.
    69  	BindToDevice func(fd int) (string, error)
    70  
    71  	// AllowDefaultResolverWithBindToDevice indicates that it's safe to use
    72  	// the default resolver when BindToDevice is configured, as the host OS
    73  	// will automatically exclude DNS requests from the VPN.
    74  	AllowDefaultResolverWithBindToDevice bool
    75  
    76  	// IPv6Synthesize should apply NAT64 synthesis to the input IPv4 address,
    77  	// returning a synthesized IPv6 address that will route to the same
    78  	// endpoint. IPv6Synthesize may be nil.
    79  	IPv6Synthesize func(IPv4 string) string
    80  
    81  	// HasIPv6Route should return true when the host has an IPv6 route.
    82  	// Resolver has an internal implementation, hasRoutableIPv6Interface, to
    83  	// determine this, but it can fail on some platforms ("route ip+net:
    84  	// netlinkrib: permission denied" on Android, for example; see Go issue
    85  	// 40569). When HasIPv6Route is nil, the internal implementation is used.
    86  	HasIPv6Route func() bool
    87  
    88  	// LogWarning is an optional callback which is used to log warnings and
    89  	// transient errors which would otherwise not be recorded or returned.
    90  	LogWarning func(error)
    91  
    92  	// LogHostnames indicates whether to log hostname in errors or not.
    93  	LogHostnames bool
    94  
    95  	// CacheExtensionInitialTTL specifies a minimum TTL to use when caching
    96  	// domain resolution results. This minimum will override any TTL in the
    97  	// DNS response. CacheExtensionInitialTTL is off when 0.
    98  	CacheExtensionInitialTTL time.Duration
    99  
   100  	// CacheExtensionVerifiedTTL specifies the minimum TTL to set for a cached
   101  	// domain resolution result after the result has been verified.
   102  	// CacheExtensionVerifiedTTL is off when 0.
   103  	//
   104  	// DNS cache extension is a workaround to partially mitigate issues with
   105  	// obtaining underlying system DNS server IPs on platforms such as iOS
   106  	// once a VPN is running and after network changes, such as changing from
   107  	// Wi-Fi to mobile. While ResolveParameters.AlternateDNSServer can be
   108  	// used to specify a known public DNS server, it may be the case that
   109  	// public DNS servers are blocked or always falling back to a public DNS
   110  	// server creates unusual traffic. And while it may be possible to use
   111  	// the default system resolver, it lacks certain circumvention
   112  	// capabilities.
   113  	//
   114  	// Extending the TTL for cached responses allows Psiphon to redial domains
   115  	// using recently successful IPs.
   116  	//
   117  	// CacheExtensionInitialTTL allows for a greater initial minimum TTL, so
   118  	// that the response entry remains in the cache long enough for a dial to
   119  	// fully complete and verify the endpoint. Psiphon will call
   120  	// Resolver.VerifyExtendCacheTTL once a dial has authenticated, for
   121  	// example, the destination Psiphon server. VerifyCacheExtension will
   122  	// further extend the corresponding TTL to CacheExtensionVerifiedTTL, a
   123  	// longer TTL. CacheExtensionInitialTTL is intended to be on the order of
   124  	// minutes and CacheExtensionVerifiedTTL may be on the order of hours.
   125  	//
   126  	// When CacheExtensionVerifiedTTL is on, the DNS cache is not flushed on
   127  	// network changes, to allow for the previously cached entries to remain
   128  	// available in the problematic scenario. Like adjusting TTLs, this is an
   129  	// explicit trade-off which doesn't adhere to standard best practise, but
   130  	// is expected to be more blocking resistent; this approach also assumes
   131  	// that endpoints such as CDN IPs are typically available on any network.
   132  	CacheExtensionVerifiedTTL time.Duration
   133  }
   134  
   135  func (c *NetworkConfig) allowDefaultResolver() bool {
   136  	// When BindToDevice is configured, the standard library resolver is not
   137  	// used, as the system resolver may not route outside of the VPN.
   138  	return c.BindToDevice == nil || c.AllowDefaultResolverWithBindToDevice
   139  }
   140  
   141  func (c *NetworkConfig) logWarning(err error) {
   142  	if c.LogWarning != nil {
   143  		c.LogWarning(err)
   144  	}
   145  }
   146  
   147  // ResolveParameters specifies the configuration and behavior of a single
   148  // ResolveIP call, a single domain name resolution.
   149  //
   150  // New ResolveParameters may be generated by calling MakeResolveParameters,
   151  // which takes tactics parameters as an input.
   152  //
   153  // ResolveParameters may be persisted for replay.
   154  type ResolveParameters struct {
   155  
   156  	// AttemptsPerServer specifies how many requests to send to each DNS
   157  	// server before trying the next server. IPv4 and IPv6 requests are sent
   158  	// concurrently and count as one attempt.
   159  	AttemptsPerServer int
   160  
   161  	// AttemptsPerPreferredServer is AttemptsPerServer for a preferred
   162  	// alternate DNS server.
   163  	AttemptsPerPreferredServer int
   164  
   165  	// RequestTimeout specifies how long to wait for a valid response before
   166  	// moving on to the next attempt.
   167  	RequestTimeout time.Duration
   168  
   169  	// AwaitTimeout specifies how long to await an additional response after
   170  	// the first response is received. This additional wait time applies only
   171  	// when there is either no IPv4 or IPv6 response.
   172  	AwaitTimeout time.Duration
   173  
   174  	// PreresolvedIPAddress specifies an IP address result to be used in place
   175  	// of making a request.
   176  	PreresolvedIPAddress string
   177  
   178  	// AlternateDNSServer specifies an alterate DNS server (IP:port, or IP
   179  	// only with port 53 assumed) to be used when either no system DNS
   180  	// servers are available or when PreferAlternateDNSServer is set.
   181  	AlternateDNSServer string
   182  
   183  	// PreferAlternateDNSServer indicates whether to prioritize using the
   184  	// AlternateDNSServer. When set, the AlternateDNSServer is attempted
   185  	// before any system DNS servers.
   186  	PreferAlternateDNSServer bool
   187  
   188  	// ProtocolTransformName specifies the name associated with
   189  	// ProtocolTransformSpec and is used for metrics.
   190  	ProtocolTransformName string
   191  
   192  	// ProtocolTransformSpec specifies a transform to apply to the DNS request packet.
   193  	// See: "github.com/Psiphon-Labs/psiphon-tunnel-core/psiphon/common/transforms".
   194  	//
   195  	// As transforms operate on strings and DNS requests are binary,
   196  	// transforms should be expressed using hex characters.
   197  	//
   198  	// DNS transforms include strategies discovered by the Geneva team,
   199  	// https://geneva.cs.umd.edu.
   200  	ProtocolTransformSpec transforms.Spec
   201  
   202  	// ProtocolTransformSeed specifies the seed to use for generating random
   203  	// data in the ProtocolTransformSpec transform. To replay a transform,
   204  	// specify the same seed.
   205  	ProtocolTransformSeed *prng.Seed
   206  
   207  	// IncludeEDNS0 indicates whether to include the EDNS(0) UDP maximum
   208  	// response size extension in DNS requests. The resolver can handle
   209  	// responses larger than 512 bytes (RFC 1035 maximum) regardless of
   210  	// whether the extension is included; the extension may be included as
   211  	// part of appearing similar to other DNS traffic.
   212  	IncludeEDNS0 bool
   213  
   214  	firstAttemptWithAnswer int32
   215  }
   216  
   217  // GetFirstAttemptWithAnswer returns the index of the first request attempt
   218  // that received a valid response, for the most recent ResolveIP call using
   219  // this ResolveParameters. This information is used for logging metrics. The
   220  // first attempt has index 1. GetFirstAttemptWithAnswer return 0 when no
   221  // request attempt has reported a valid response.
   222  //
   223  // The caller is responsible for synchronizing use of a ResolveParameters
   224  // instance (e.g, use a distinct ResolveParameters per ResolveIP to ensure
   225  // GetFirstAttemptWithAnswer refers to a specific ResolveIP).
   226  func (r *ResolveParameters) GetFirstAttemptWithAnswer() int {
   227  	return int(atomic.LoadInt32(&r.firstAttemptWithAnswer))
   228  }
   229  
   230  func (r *ResolveParameters) setFirstAttemptWithAnswer(attempt int) {
   231  	atomic.StoreInt32(&r.firstAttemptWithAnswer, int32(attempt))
   232  }
   233  
   234  // Implementation note: Go's standard net.Resolver supports specifying a
   235  // custom Dial function. This could be used to implement at least a large
   236  // subset of the Resolver functionality on top of Go's standard library
   237  // resolver. However, net.Resolver is limited to using the CGO resolver on
   238  // Android, https://github.com/golang/go/issues/8877, in which case the
   239  // custom Dial function is not used. Furthermore, the the pure Go resolver in
   240  // net/dnsclient_unix.go appears to not be used on Windows at this time.
   241  //
   242  // Go also provides golang.org/x/net/dns/dnsmessage, a DNS message marshaller,
   243  // which could potentially be used in place of github.com/miekg/dns.
   244  
   245  // Resolver is a DNS stub resolver, or DNS client, which resolves domain
   246  // names. A Resolver instance maintains a cache, a network state snapshot,
   247  // and metrics. All ResolveIP calls will share the same cache and state.
   248  // Multiple concurrent ResolveIP calls are supported.
   249  type Resolver struct {
   250  	networkConfig *NetworkConfig
   251  
   252  	mutex             sync.Mutex
   253  	networkID         string
   254  	hasIPv6Route      bool
   255  	systemServers     []string
   256  	lastServersUpdate time.Time
   257  	cache             *lrucache.Cache
   258  	metrics           resolverMetrics
   259  }
   260  
   261  type resolverMetrics struct {
   262  	resolves                int
   263  	cacheHits               int
   264  	verifiedCacheExtensions int
   265  	requestsIPv4            int
   266  	requestsIPv6            int
   267  	responsesIPv4           int
   268  	responsesIPv6           int
   269  	defaultResolves         int
   270  	defaultSuccesses        int
   271  	peakInFlight            int64
   272  	minRTT                  time.Duration
   273  	maxRTT                  time.Duration
   274  }
   275  
   276  func newResolverMetrics() resolverMetrics {
   277  	return resolverMetrics{minRTT: -1}
   278  }
   279  
   280  // NewResolver creates a new Resolver instance.
   281  func NewResolver(networkConfig *NetworkConfig, networkID string) *Resolver {
   282  
   283  	r := &Resolver{
   284  		networkConfig: networkConfig,
   285  		metrics:       newResolverMetrics(),
   286  	}
   287  
   288  	// updateNetworkState will initialize the cache and network state,
   289  	// including system DNS servers.
   290  	r.updateNetworkState(networkID)
   291  
   292  	return r
   293  }
   294  
   295  // Stop clears the Resolver cache and resets metrics. Stop must be called only
   296  // after ceasing all in-flight ResolveIP goroutines, or else the cache or
   297  // metrics may repopulate. A Resolver may be resumed after calling Stop, but
   298  // Update must be called first.
   299  func (r *Resolver) Stop() {
   300  	r.mutex.Lock()
   301  	defer r.mutex.Unlock()
   302  
   303  	// r.networkConfig is not set to nil to avoid possible nil pointer
   304  	// dereferences by concurrent ResolveIP calls.
   305  
   306  	r.networkID = ""
   307  	r.hasIPv6Route = false
   308  	r.systemServers = nil
   309  	r.cache.Flush()
   310  	r.metrics = newResolverMetrics()
   311  }
   312  
   313  // MakeResolveParameters generates ResolveParameters using the input tactics
   314  // parameters and optional frontingProviderID context.
   315  func (r *Resolver) MakeResolveParameters(
   316  	p parameters.ParametersAccessor,
   317  	frontingProviderID string) (*ResolveParameters, error) {
   318  
   319  	params := &ResolveParameters{
   320  		AttemptsPerServer:          p.Int(parameters.DNSResolverAttemptsPerServer),
   321  		AttemptsPerPreferredServer: p.Int(parameters.DNSResolverAttemptsPerPreferredServer),
   322  		RequestTimeout:             p.Duration(parameters.DNSResolverRequestTimeout),
   323  		AwaitTimeout:               p.Duration(parameters.DNSResolverAwaitTimeout),
   324  	}
   325  
   326  	// When a frontingProviderID is specified, generate a pre-resolved IP
   327  	// address, based on tactics configuration.
   328  	if frontingProviderID != "" {
   329  		if p.WeightedCoinFlip(parameters.DNSResolverPreresolvedIPAddressProbability) {
   330  			CIDRs := p.LabeledCIDRs(parameters.DNSResolverPreresolvedIPAddressCIDRs, frontingProviderID)
   331  			if len(CIDRs) > 0 {
   332  				CIDR := CIDRs[prng.Intn(len(CIDRs))]
   333  				IP, err := generateIPAddressFromCIDR(CIDR)
   334  				if err != nil {
   335  					return nil, errors.Trace(err)
   336  				}
   337  				params.PreresolvedIPAddress = IP.String()
   338  			}
   339  		}
   340  	}
   341  
   342  	// When PreresolvedIPAddress is set, there's no DNS request and the
   343  	// following params can be skipped.
   344  	if params.PreresolvedIPAddress != "" {
   345  		return params, nil
   346  	}
   347  
   348  	// When preferring an alternate DNS server, select the alternate from
   349  	// DNSResolverPreferredAlternateServers. This list is for circumvention
   350  	// operations, such as using a public DNS server with a protocol
   351  	// transform. Otherwise, select from DNSResolverAlternateServers, which
   352  	// is a fallback list of DNS servers to be used when the system DNS
   353  	// servers cannot be obtained.
   354  
   355  	preferredServers := p.Strings(parameters.DNSResolverPreferredAlternateServers)
   356  	preferAlternateDNSServer := len(preferredServers) > 0 && p.WeightedCoinFlip(
   357  		parameters.DNSResolverPreferAlternateServerProbability)
   358  
   359  	alternateServers := preferredServers
   360  	if !preferAlternateDNSServer {
   361  		alternateServers = p.Strings(parameters.DNSResolverAlternateServers)
   362  	}
   363  
   364  	// Select an alternate DNS server, typically a public DNS server. Ensure
   365  	// tactics is configured with an empty DNSResolverAlternateServers list
   366  	// in cases where attempts to public DNS server are unwanted.
   367  	if len(alternateServers) > 0 {
   368  
   369  		alternateServer := alternateServers[prng.Intn(len(alternateServers))]
   370  
   371  		// Check that the alternateServer has a well-formed IP address; and add
   372  		// a default port if none it present.
   373  		host, _, err := net.SplitHostPort(alternateServer)
   374  		if err != nil {
   375  			// Assume the SplitHostPort error is due to missing port.
   376  			host = alternateServer
   377  			alternateServer = net.JoinHostPort(alternateServer, resolverDNSPort)
   378  		}
   379  		if net.ParseIP(host) == nil {
   380  			// Log warning and proceed without this DNS server.
   381  			r.networkConfig.logWarning(
   382  				errors.TraceNew("invalid alternate DNS server IP address"))
   383  
   384  		} else {
   385  
   386  			params.AlternateDNSServer = alternateServer
   387  			params.PreferAlternateDNSServer = preferAlternateDNSServer
   388  		}
   389  
   390  	}
   391  
   392  	// Select a DNS transform. DNS request transforms are "scoped" by
   393  	// alternate DNS server (IP address without port); that is, when an
   394  	// alternate DNS server is certain to be attempted first, a transform
   395  	// associated with and known to work with that DNS server will be
   396  	// selected. Otherwise, a transform from the default scope
   397  	// (transforms.SCOPE_ANY == "") is selected.
   398  	//
   399  	// In any case, ResolveIP will only apply a transform on the first request
   400  	// attempt.
   401  	if p.WeightedCoinFlip(parameters.DNSResolverProtocolTransformProbability) {
   402  
   403  		specs := p.ProtocolTransformSpecs(
   404  			parameters.DNSResolverProtocolTransformSpecs)
   405  		scopedSpecNames := p.ProtocolTransformScopedSpecNames(
   406  			parameters.DNSResolverProtocolTransformScopedSpecNames)
   407  
   408  		// The alternate DNS server will be the first attempt if
   409  		// PreferAlternateDNSServer or the list of system DNS servers is empty.
   410  		//
   411  		// Limitation: the system DNS server list may change, due to a later
   412  		// Resolver.update call when ResolveIP is called with these
   413  		// ResolveParameters.
   414  		_, systemServers := r.getNetworkState()
   415  		scope := transforms.SCOPE_ANY
   416  		if params.AlternateDNSServer != "" &&
   417  			(params.PreferAlternateDNSServer || len(systemServers) == 0) {
   418  
   419  			// Remove the port number, as the scope key is an IP address only.
   420  			//
   421  			// TODO: when we only just added the default port above, which is
   422  			// the common case, we could avoid this extra split.
   423  			host, _, err := net.SplitHostPort(params.AlternateDNSServer)
   424  			if err != nil {
   425  				return nil, errors.Trace(err)
   426  			}
   427  			scope = host
   428  		}
   429  
   430  		name, spec := specs.Select(scope, scopedSpecNames)
   431  
   432  		if spec != nil {
   433  			params.ProtocolTransformName = name
   434  			params.ProtocolTransformSpec = spec
   435  			var err error
   436  			params.ProtocolTransformSeed, err = prng.NewSeed()
   437  			if err != nil {
   438  				return nil, errors.Trace(err)
   439  			}
   440  		}
   441  	}
   442  
   443  	if p.WeightedCoinFlip(parameters.DNSResolverIncludeEDNS0Probability) {
   444  		params.IncludeEDNS0 = true
   445  	}
   446  
   447  	return params, nil
   448  }
   449  
   450  // ResolveAddress splits the input host:port address, calls ResolveIP to
   451  // resolve the IP address of the host, selects an IP if there are multiple,
   452  // and returns a rejoined IP:port.
   453  func (r *Resolver) ResolveAddress(
   454  	ctx context.Context,
   455  	networkID string,
   456  	params *ResolveParameters,
   457  	address string) (string, error) {
   458  
   459  	hostname, port, err := net.SplitHostPort(address)
   460  	if err != nil {
   461  		return "", errors.Trace(err)
   462  	}
   463  
   464  	IPs, err := r.ResolveIP(ctx, networkID, params, hostname)
   465  	if err != nil {
   466  		return "", errors.Trace(err)
   467  	}
   468  
   469  	return net.JoinHostPort(IPs[prng.Intn(len(IPs))].String(), port), nil
   470  }
   471  
   472  // ResolveIP resolves a domain name.
   473  //
   474  // The input params may be nil, in which case default timeouts are used.
   475  //
   476  // ResolveIP performs concurrent A and AAAA lookups, returns any valid
   477  // response IPs, and caches results. An error is returned when there are
   478  // no valid response IPs.
   479  //
   480  // ResolveIP is not a general purpose resolver and is Psiphon-specific. For
   481  // example, resolved domains are expected to exist; ResolveIP does not
   482  // fallback to TCP; does not consult any "hosts" file; does not perform RFC
   483  // 3484 sorting logic (see Go issue 18518); only implements a subset of
   484  // Go/glibc/resolv.conf(5) resolver parameters (attempts and timeouts, but
   485  // not rotate, single-request etc.) ResolveIP does not implement singleflight
   486  // logic, as the Go resolver does, and allows multiple concurrent request for
   487  // the same domain -- Psiphon won't often resolve the exact same domain
   488  // multiple times concurrently, and, when it does, there's a circumvention
   489  // benefit to attempting different DNS servers and protocol transforms.
   490  //
   491  // ResolveIP does not currently support DoT, DoH, or TCP; those protocols are
   492  // often blocked or less common. Instead, ResolveIP makes a best effort to
   493  // evade plaintext UDP DNS interference by ignoring invalid responses and by
   494  // optionally applying protocol transforms that may evade blocking.
   495  func (r *Resolver) ResolveIP(
   496  	ctx context.Context,
   497  	networkID string,
   498  	params *ResolveParameters,
   499  	hostname string) ([]net.IP, error) {
   500  
   501  	// ResolveIP does _not_ lock r.mutex for the lifetime of the function, to
   502  	// ensure many ResolveIP calls can run concurrently.
   503  
   504  	// If the hostname is already an IP address, just return that. For
   505  	// metrics, this does not count as a resolve, as the caller may invoke
   506  	// ResolveIP for all dials.
   507  	IP := net.ParseIP(hostname)
   508  	if IP != nil {
   509  		return []net.IP{IP}, nil
   510  	}
   511  
   512  	// Count all resolves of an actual domain, including cached and
   513  	// pre-resolved cases.
   514  	r.updateMetricResolves()
   515  
   516  	// Call updateNetworkState immediately before resolving, as a best effort
   517  	// to ensure that system DNS servers and IPv6 routing network state
   518  	// reflects the current network. updateNetworkState locks the Resolver
   519  	// mutex for its duration, and so concurrent ResolveIP calls may block at
   520  	// this point. However, all updateNetworkState operations are local to
   521  	// the host or device; and, if the networkID is unchanged since the last
   522  	// call, updateNetworkState may not perform any operations; and after the
   523  	// updateNetworkState call, ResolveIP proceeds without holding the mutex
   524  	// lock. As a result, this step should not prevent ResolveIP concurrency.
   525  	r.updateNetworkState(networkID)
   526  
   527  	if params == nil {
   528  		// Supply default ResolveParameters
   529  		params = &ResolveParameters{
   530  			AttemptsPerServer:          resolverDefaultAttemptsPerServer,
   531  			AttemptsPerPreferredServer: resolverDefaultAttemptsPerServer,
   532  			RequestTimeout:             resolverDefaultRequestTimeout,
   533  			AwaitTimeout:               resolverDefaultAwaitTimeout,
   534  		}
   535  	}
   536  
   537  	// When PreresolvedIPAddress is set, tactics parameters determined the IP address
   538  	// in this case.
   539  	if params.PreresolvedIPAddress != "" {
   540  		IP := net.ParseIP(params.PreresolvedIPAddress)
   541  		if IP == nil {
   542  			// Unexpected case, as MakeResolveParameters selects the IP address.
   543  			return nil, errors.TraceNew("invalid IP address")
   544  		}
   545  		return []net.IP{IP}, nil
   546  	}
   547  
   548  	// Use a snapshot of the current network state, including IPv6 routing and
   549  	// system DNS servers.
   550  	//
   551  	// Limitation: these values are used even if the network changes in the
   552  	// middle of a ResolveIP call; ResolveIP is not interrupted if the
   553  	// network changes.
   554  	hasIPv6Route, systemServers := r.getNetworkState()
   555  
   556  	// Use the standard library resolver when there's no GetDNSServers, or the
   557  	// system server list is otherwise empty, and no alternate DNS server is
   558  	// configured.
   559  	//
   560  	// Note that in the case where there are no system DNS servers and there
   561  	// is an AlternateDNSServer, if the AlternateDNSServer attempt fails,
   562  	// control does not flow back to defaultResolverLookupIP. On platforms
   563  	// without GetDNSServers, the caller must arrange for distinct attempts
   564  	// that try a AlternateDNSServer, or just use the standard library
   565  	// resolver.
   566  	//
   567  	// ResolveIP should always be called, even when defaultResolverLookupIP is
   568  	// expected to be used, to ensure correct metrics counts and ensure a
   569  	// consistent error message log stack for all DNS-related failures.
   570  	//
   571  	if len(systemServers) == 0 &&
   572  		params.AlternateDNSServer == "" &&
   573  		r.networkConfig.allowDefaultResolver() {
   574  
   575  		IPs, err := defaultResolverLookupIP(ctx, hostname, r.networkConfig.LogHostnames)
   576  		r.updateMetricDefaultResolver(err == nil)
   577  		if err != nil {
   578  			return nil, errors.Trace(err)
   579  		}
   580  		return IPs, err
   581  	}
   582  
   583  	// Consult the cache before making queries. This comes after the standard
   584  	// library case, to allow the standard library to provide its own caching
   585  	// logic.
   586  	IPs := r.getCache(hostname)
   587  	if IPs != nil {
   588  		return IPs, nil
   589  	}
   590  
   591  	// Set the list of DNS servers to attempt. AlternateDNSServer is used
   592  	// first when PreferAlternateDNSServer is set; otherwise
   593  	// AlternateDNSServer is used only when there is no system DNS server.
   594  	var servers []string
   595  	if params.AlternateDNSServer != "" &&
   596  		(len(systemServers) == 0 || params.PreferAlternateDNSServer) {
   597  		servers = []string{params.AlternateDNSServer}
   598  	}
   599  	servers = append(servers, systemServers...)
   600  	if len(servers) == 0 {
   601  		return nil, errors.TraceNew("no DNS servers")
   602  	}
   603  
   604  	// Set the request timeout and set up a reusable timer for handling
   605  	// request and await timeouts.
   606  	//
   607  	// We expect to always have a request timeout. Handle the unexpected no
   608  	// timeout, 0, case by setting the longest timeout possible, ~290 years;
   609  	// always having a non-zero timeout makes the following code marginally
   610  	// simpler.
   611  	requestTimeout := params.RequestTimeout
   612  	if requestTimeout == 0 {
   613  		requestTimeout = 1<<63 - 1
   614  	}
   615  	var timer *time.Timer
   616  	timerDrained := true
   617  	resetTimer := func(timeout time.Duration) {
   618  		if timer == nil {
   619  			timer = time.NewTimer(timeout)
   620  		} else {
   621  			if !timerDrained && !timer.Stop() {
   622  				<-timer.C
   623  			}
   624  			timer.Reset(timeout)
   625  		}
   626  		timerDrained = false
   627  	}
   628  
   629  	// Orchestrate the DNS requests
   630  
   631  	resolveCtx, cancelFunc := context.WithCancel(ctx)
   632  	defer cancelFunc()
   633  	waitGroup := new(sync.WaitGroup)
   634  	conns := common.NewConns()
   635  	type answer struct {
   636  		attempt int
   637  		IPs     []net.IP
   638  		TTLs    []time.Duration
   639  	}
   640  	var maxAttempts int
   641  	if params.PreferAlternateDNSServer {
   642  		maxAttempts = params.AttemptsPerPreferredServer
   643  		maxAttempts += (len(servers) - 1) * params.AttemptsPerServer
   644  	} else {
   645  		maxAttempts = len(servers) * params.AttemptsPerServer
   646  	}
   647  	answerChan := make(chan *answer, maxAttempts*2)
   648  	inFlight := int64(0)
   649  	awaitA := int32(1)
   650  	awaitAAAA := int32(1)
   651  	if !hasIPv6Route {
   652  		awaitAAAA = 0
   653  	}
   654  	var result *answer
   655  	var lastErr atomic.Value
   656  
   657  	stop := false
   658  	for i := 0; !stop && i < maxAttempts; i++ {
   659  
   660  		var index int
   661  		if params.PreferAlternateDNSServer {
   662  			if i < params.AttemptsPerPreferredServer {
   663  				index = 0
   664  			} else {
   665  				index = 1 + ((i - params.AttemptsPerPreferredServer) / params.AttemptsPerServer)
   666  			}
   667  		} else {
   668  			index = i / params.AttemptsPerServer
   669  		}
   670  
   671  		server := servers[index]
   672  
   673  		// Only the first attempt pair tries transforms, as it's not certain
   674  		// the transforms will be compatible with DNS servers.
   675  		useProtocolTransform := (i == 0 && params.ProtocolTransformSpec != nil)
   676  
   677  		// Send A and AAAA requests concurrently.
   678  		questionTypes := []resolverQuestionType{resolverQuestionTypeA, resolverQuestionTypeAAAA}
   679  		if !hasIPv6Route {
   680  			questionTypes = questionTypes[0:1]
   681  		}
   682  
   683  		for _, questionType := range questionTypes {
   684  
   685  			waitGroup.Add(1)
   686  
   687  			// For metrics, track peak concurrent in-flight requests for
   688  			// a _single_ ResolveIP. inFlight for this ResolveIP is also used
   689  			// to determine whether to await additional responses once the
   690  			// first, valid response is received. For that logic to be
   691  			// correct, we must increment inFlight in this outer goroutine to
   692  			// ensure the await logic sees either inFlight > 0 or an answer
   693  			// in the channel.
   694  			r.updateMetricPeakInFlight(atomic.AddInt64(&inFlight, 1))
   695  
   696  			go func(attempt int, questionType resolverQuestionType, useProtocolTransform bool) {
   697  				defer waitGroup.Done()
   698  
   699  				// We must decrement inFlight only after sending an answer and
   700  				// setting awaitA or awaitAAAA to ensure that the await logic
   701  				// in the outer goroutine will see inFlight 0 only once those
   702  				// operations are complete.
   703  				//
   704  				// We cannot wait and decrement inFlight when the outer
   705  				// goroutine receives answers, as no answer is sent in some
   706  				// cases, such as when the resolve fails due to NXDOMAIN.
   707  				defer atomic.AddInt64(&inFlight, -1)
   708  
   709  				// The request count metric counts the _intention_ to send
   710  				// requests, as there's a possibility that newResolverConn or
   711  				// performDNSQuery fail locally before sending a request packet.
   712  				switch questionType {
   713  				case resolverQuestionTypeA:
   714  					r.updateMetricRequestsIPv4()
   715  				case resolverQuestionTypeAAAA:
   716  					r.updateMetricRequestsIPv6()
   717  				}
   718  
   719  				// While it's possible, and potentially more optimal, to use
   720  				// the same UDP socket for both the A and AAAA request, we
   721  				// use a distinct socket per request, as common DNS clients do.
   722  				conn, err := r.newResolverConn(r.networkConfig.logWarning, server)
   723  				if err != nil {
   724  					lastErr.Store(errors.Trace(err))
   725  					return
   726  				}
   727  				defer conn.Close()
   728  
   729  				// There's no context.Context support in the underlying API
   730  				// used by performDNSQuery, so instead collect all the
   731  				// request conns so that they can be closed, and any blocking
   732  				// network I/O interrupted, below, if resolveCtx is done.
   733  				if !conns.Add(conn) {
   734  					// Add fails when conns is already closed.
   735  					return
   736  				}
   737  
   738  				// performDNSQuery will send the request and read a response.
   739  				// performDNSQuery will continue reading responses until it
   740  				// receives a valid response, which can mitigate a subset of
   741  				// DNS injection attacks (to the limited extent possible for
   742  				// plaintext DNS).
   743  				//
   744  				// For IPv4, NXDOMAIN or a response with no IPs is not
   745  				// expected for domains resolved by Psiphon, so
   746  				// performDNSQuery treats such a response as invalid. For
   747  				// IPv6, a response with no IPs, may be valid(even though the
   748  				// response could be forged); the resolver will continue its
   749  				// attempts loop if it has no other IPs.
   750  				//
   751  				// Each performDNSQuery has no timeout and runs
   752  				// until it has read a valid response or the requestCtx is
   753  				// done. This allows for slow arriving, valid responses to
   754  				// eventually succeed, even if the read time exceeds
   755  				// requestTimeout, as long as the read time is less than the
   756  				// requestCtx timeout.
   757  				//
   758  				// With this approach, the overall ResolveIP call may have
   759  				// more than 2 performDNSQuery requests in-flight at a time,
   760  				// as requestTimeout is used to schedule sending the next
   761  				// attempt but not cancel the current attempt. For
   762  				// connectionless UDP, the resulting network traffic should
   763  				// be similar to common DNS clients which do cancel request
   764  				// before beginning the next attempt.
   765  				IPs, TTLs, RTT, err := performDNSQuery(
   766  					resolveCtx,
   767  					r.networkConfig.logWarning,
   768  					params,
   769  					useProtocolTransform,
   770  					conn,
   771  					questionType,
   772  					hostname)
   773  
   774  				// Update the min/max RTT metric when reported (>=0) even if
   775  				// the result is an error; i.e., the even if there was an
   776  				// invalid response.
   777  				//
   778  				// Limitation: since individual requests aren't cancelled
   779  				// after requestTimeout, RTT metrics won't reflect
   780  				// no-response cases, although request and response count
   781  				// disparities will still show up in the metrics.
   782  				if RTT >= 0 {
   783  					r.updateMetricRTT(RTT)
   784  				}
   785  
   786  				if err != nil {
   787  					lastErr.Store(errors.Trace(err))
   788  					return
   789  				}
   790  
   791  				if len(IPs) > 0 {
   792  					select {
   793  					case answerChan <- &answer{attempt: attempt, IPs: IPs, TTLs: TTLs}:
   794  					default:
   795  					}
   796  				}
   797  
   798  				// Mark no longer awaiting A or AAAA as long as there is a
   799  				// valid response, even if there are no IPs in the IPv6 case.
   800  				switch questionType {
   801  				case resolverQuestionTypeA:
   802  					r.updateMetricResponsesIPv4()
   803  					atomic.StoreInt32(&awaitA, 0)
   804  				case resolverQuestionTypeAAAA:
   805  					r.updateMetricResponsesIPv6()
   806  					atomic.StoreInt32(&awaitAAAA, 0)
   807  				default:
   808  				}
   809  
   810  			}(i+1, questionType, useProtocolTransform)
   811  		}
   812  
   813  		resetTimer(requestTimeout)
   814  
   815  		select {
   816  		case result = <-answerChan:
   817  			// When the first answer, a response with valid IPs, arrives, exit
   818  			// the attempts loop. The following await branch may collect
   819  			// additional answers.
   820  			params.setFirstAttemptWithAnswer(result.attempt)
   821  			stop = true
   822  		case <-timer.C:
   823  			// When requestTimeout arrives, loop around and launch the next
   824  			// attempt; leave the existing requests running in case they
   825  			// eventually respond.
   826  			timerDrained = true
   827  		case <-resolveCtx.Done():
   828  			// When resolveCtx is done, exit the attempts loop.
   829  			//
   830  			// Append the existing lastErr, which may convey useful
   831  			// information to be reported in a failed_tunnel error message.
   832  			lastErr.Store(errors.Tracef("%v (lastErr: %v)", ctx.Err(), lastErr.Load()))
   833  			stop = true
   834  		}
   835  	}
   836  
   837  	// Receive any additional answers, now present in the channel, which
   838  	// arrived concurrent with the first answer. This receive avoids a race
   839  	// condition where inFlight may now be 0, with additional answers
   840  	// enqueued, in which case the following await branch is not taken.
   841  	//
   842  	// It's possible for the attempts loop to exit with no received answer due
   843  	// to timeouts or cancellation while, concurrently, an answer is sent to
   844  	// the channel. In this case, when result == nil, we ignore the answers
   845  	// and leave this as a failed resolve.
   846  	if result != nil {
   847  		for loop := true; loop; {
   848  			select {
   849  			case nextAnswer := <-answerChan:
   850  				result.IPs = append(result.IPs, nextAnswer.IPs...)
   851  				result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
   852  			default:
   853  				loop = false
   854  			}
   855  		}
   856  	}
   857  
   858  	// When we have an answer, await -- for a short time,
   859  	// params.AwaitTimeout -- extra answers from any remaining in-flight
   860  	// requests. Only await if the request isn't cancelled and we don't
   861  	// already have at least one IPv4 and one IPv6 response; only await AAAA
   862  	// if it was sent; note that a valid AAAA response may include no IPs
   863  	// lastErr is not set in timeout/cancelled cases here, since we already
   864  	// have an answer.
   865  	if result != nil &&
   866  		resolveCtx.Err() == nil &&
   867  		atomic.LoadInt64(&inFlight) > 0 &&
   868  		(atomic.LoadInt32(&awaitA) != 0 || atomic.LoadInt32(&awaitAAAA) != 0) &&
   869  		params.AwaitTimeout > 0 {
   870  
   871  		resetTimer(params.AwaitTimeout)
   872  
   873  		for {
   874  
   875  			stop := false
   876  			select {
   877  			case nextAnswer := <-answerChan:
   878  				result.IPs = append(result.IPs, nextAnswer.IPs...)
   879  				result.TTLs = append(result.TTLs, nextAnswer.TTLs...)
   880  			case <-timer.C:
   881  				timerDrained = true
   882  				stop = true
   883  			case <-resolveCtx.Done():
   884  				stop = true
   885  			}
   886  
   887  			if stop ||
   888  				atomic.LoadInt64(&inFlight) == 0 ||
   889  				(atomic.LoadInt32(&awaitA) == 0 && atomic.LoadInt32(&awaitAAAA) == 0) {
   890  				break
   891  			}
   892  		}
   893  	}
   894  
   895  	timer.Stop()
   896  
   897  	// Interrupt all workers.
   898  	cancelFunc()
   899  	conns.CloseAll()
   900  	waitGroup.Wait()
   901  
   902  	// When there's no answer, return the last error.
   903  	if result == nil {
   904  		err := lastErr.Load()
   905  		if err == nil {
   906  			err = errors.TraceNew("unexpected missing error")
   907  		}
   908  		if r.networkConfig.LogHostnames {
   909  			err = fmt.Errorf("resolve %s : %w", hostname, err.(error))
   910  		}
   911  		return nil, errors.Trace(err.(error))
   912  	}
   913  
   914  	if len(result.IPs) == 0 {
   915  		// Unexpected, since a len(IPs) > 0 check precedes sending to answerChan.
   916  		return nil, errors.TraceNew("unexpected no IPs")
   917  	}
   918  
   919  	// Update the cache now, after all results are gathered.
   920  	r.setCache(hostname, result.IPs, result.TTLs)
   921  
   922  	return result.IPs, nil
   923  }
   924  
   925  // VerifyCacheExtension extends the TTL for any cached result for the
   926  // specified hostname to at least NetworkConfig.CacheExtensionVerifiedTTL.
   927  func (r *Resolver) VerifyCacheExtension(hostname string) {
   928  	r.mutex.Lock()
   929  	defer r.mutex.Unlock()
   930  
   931  	if r.networkConfig.CacheExtensionVerifiedTTL == 0 {
   932  		return
   933  	}
   934  
   935  	if net.ParseIP(hostname) != nil {
   936  		return
   937  	}
   938  
   939  	entry, expires, ok := r.cache.GetWithExpiration(hostname)
   940  	if !ok {
   941  		return
   942  	}
   943  
   944  	// Change the TTL only if the entry expires and the existing TTL isn't
   945  	// longer than the extension.
   946  	neverExpires := time.Time{}
   947  	if expires == neverExpires ||
   948  		expires.After(time.Now().Add(r.networkConfig.CacheExtensionVerifiedTTL)) {
   949  		return
   950  	}
   951  
   952  	r.cache.Set(hostname, entry, r.networkConfig.CacheExtensionVerifiedTTL)
   953  
   954  	r.metrics.verifiedCacheExtensions += 1
   955  }
   956  
   957  // GetMetrics returns a summary of DNS metrics.
   958  func (r *Resolver) GetMetrics() string {
   959  	r.mutex.Lock()
   960  	defer r.mutex.Unlock()
   961  
   962  	// When r.metrics.minRTT < 0, min/maxRTT is unset.
   963  	minRTT := "n/a"
   964  	maxRTT := minRTT
   965  	if r.metrics.minRTT >= 0 {
   966  		minRTT = fmt.Sprintf("%d", r.metrics.minRTT/time.Millisecond)
   967  		maxRTT = fmt.Sprintf("%d", r.metrics.maxRTT/time.Millisecond)
   968  	}
   969  
   970  	extend := ""
   971  	if r.networkConfig.CacheExtensionVerifiedTTL > 0 {
   972  		extend = fmt.Sprintf("| extend %d ", r.metrics.verifiedCacheExtensions)
   973  	}
   974  
   975  	defaultResolves := ""
   976  	if r.networkConfig.allowDefaultResolver() {
   977  		defaultResolves = fmt.Sprintf(
   978  			" | def %d/%d", r.metrics.defaultResolves, r.metrics.defaultSuccesses)
   979  	}
   980  
   981  	// Note that the number of system resolvers is a point-in-time value,
   982  	// while the others are cumulative.
   983  
   984  	return fmt.Sprintf("resolves %d | hit %d %s| req v4/v6 %d/%d | resp %d/%d | peak %d | rtt %s - %s ms. | sys %d%s",
   985  		r.metrics.resolves,
   986  		r.metrics.cacheHits,
   987  		extend,
   988  		r.metrics.requestsIPv4,
   989  		r.metrics.requestsIPv6,
   990  		r.metrics.responsesIPv4,
   991  		r.metrics.responsesIPv6,
   992  		r.metrics.peakInFlight,
   993  		minRTT,
   994  		maxRTT,
   995  		len(r.systemServers),
   996  		defaultResolves)
   997  }
   998  
   999  // updateNetworkState updates the system DNS server list, IPv6 state, and the
  1000  // cache.
  1001  //
  1002  // Any errors that occur while querying network state are logged; in error
  1003  // conditions the functionality of the resolver may be reduced, but the
  1004  // resolver remains operational.
  1005  func (r *Resolver) updateNetworkState(networkID string) {
  1006  	r.mutex.Lock()
  1007  	defer r.mutex.Unlock()
  1008  
  1009  	// Only perform blocking/expensive update operations when necessary.
  1010  	updateAll := false
  1011  	updateIPv6Route := false
  1012  	updateServers := false
  1013  	flushCache := false
  1014  
  1015  	// If r.cache is nil, this is the first update call in NewResolver. Create
  1016  	// the cache and perform all updates.
  1017  	if r.cache == nil {
  1018  		r.cache = lrucache.NewWithLRU(
  1019  			resolverCacheDefaultTTL,
  1020  			resolverCacheReapFrequency,
  1021  			resolverCacheMaxEntries)
  1022  		updateAll = true
  1023  	}
  1024  
  1025  	// Perform all updates when the networkID has changed, which indicates a
  1026  	// different network.
  1027  	if r.networkID != networkID {
  1028  		updateAll = true
  1029  	}
  1030  
  1031  	if updateAll {
  1032  		updateIPv6Route = true
  1033  		updateServers = true
  1034  		flushCache = true
  1035  	}
  1036  
  1037  	// Even when the networkID has not changed, update DNS servers
  1038  	// periodically. This is similar to how other DNS clients
  1039  	// poll /etc/resolv.conf, including the period of 5s.
  1040  	if time.Since(r.lastServersUpdate) > resolverServersUpdateTTL {
  1041  		updateServers = true
  1042  	}
  1043  
  1044  	// Update hasIPv6Route, which indicates whether the current network has an
  1045  	// IPv6 route and so if DNS requests for AAAA records will be sent.
  1046  	// There's no use for AAAA records on IPv4-only networks; and other
  1047  	// common DNS clients omit AAAA requests on IPv4-only records, so these
  1048  	// requests would otherwise be unusual.
  1049  	//
  1050  	// There's no hasIPv4Route as we always need to resolve A records,
  1051  	// particularly for IPv4-only endpoints; for IPv6-only networks,
  1052  	// NetworkConfig.IPv6Synthesize should be used to accomodate IPv4 DNS
  1053  	// server addresses, and dials performed outside the Resolver will
  1054  	// similarly use NAT 64 (on iOS; on Android, 464XLAT will handle this
  1055  	// transparently).
  1056  	if updateIPv6Route {
  1057  
  1058  		if r.networkConfig.HasIPv6Route != nil {
  1059  
  1060  			r.hasIPv6Route = r.networkConfig.HasIPv6Route()
  1061  
  1062  		} else {
  1063  
  1064  			hasIPv6Route, err := hasRoutableIPv6Interface()
  1065  			if err != nil {
  1066  				// Log warning and proceed without IPv6.
  1067  				r.networkConfig.logWarning(
  1068  					errors.Tracef("unable to determine IPv6 route: %v", err))
  1069  				hasIPv6Route = false
  1070  			}
  1071  			r.hasIPv6Route = hasIPv6Route
  1072  		}
  1073  	}
  1074  
  1075  	// Update the list of system DNS servers. It's not an error condition here
  1076  	// if the list is empty: a subsequent ResolveIP may use
  1077  	// ResolveParameters which specifies an AlternateDNSServer.
  1078  	if updateServers && r.networkConfig.GetDNSServers != nil {
  1079  
  1080  		systemServers := []string{}
  1081  		for _, systemServer := range r.networkConfig.GetDNSServers() {
  1082  			host, _, err := net.SplitHostPort(systemServer)
  1083  			if err != nil {
  1084  				// Assume the SplitHostPort error is due to systemServer being
  1085  				// an IP only, and append the default port, 53. If
  1086  				// systemServer _isn't_ an IP, the following ParseIP will fail.
  1087  				host = systemServer
  1088  				systemServer = net.JoinHostPort(systemServer, resolverDNSPort)
  1089  			}
  1090  			if net.ParseIP(host) == nil {
  1091  				// Log warning and proceed without this DNS server.
  1092  				r.networkConfig.logWarning(
  1093  					errors.TraceNew("invalid DNS server IP address"))
  1094  				continue
  1095  			}
  1096  			systemServers = append(systemServers, systemServer)
  1097  		}
  1098  
  1099  		// Check if the list of servers has changed, including order. If
  1100  		// changed, flush the cache even if the networkID has not changed.
  1101  		// Cached results are only considered valid as long as the system DNS
  1102  		// configuration remains the same.
  1103  		equal := len(r.systemServers) == len(systemServers)
  1104  		if equal {
  1105  			for i := 0; i < len(r.systemServers); i++ {
  1106  				if r.systemServers[i] != systemServers[i] {
  1107  					equal = false
  1108  					break
  1109  				}
  1110  			}
  1111  		}
  1112  		flushCache = flushCache || !equal
  1113  
  1114  		// Concurrency note: once the r.systemServers slice is set, the
  1115  		// contents of the backing array must not be modified due to
  1116  		// concurrent ResolveIP calls.
  1117  		r.systemServers = systemServers
  1118  
  1119  		r.lastServersUpdate = time.Now()
  1120  	}
  1121  
  1122  	// Skip cache flushes when the extended DNS caching mechanism is enabled.
  1123  	// TODO: retain only verified cache entries?
  1124  	if flushCache && r.networkConfig.CacheExtensionVerifiedTTL == 0 {
  1125  		r.cache.Flush()
  1126  	}
  1127  
  1128  	// Set r.networkID only after all operations complete without errors; if
  1129  	// r.networkID were set earlier, a subsequent
  1130  	// ResolveIP/updateNetworkState call might proceed as if the network
  1131  	// state were updated for the specified network ID.
  1132  	r.networkID = networkID
  1133  }
  1134  
  1135  func (r *Resolver) getNetworkState() (bool, []string) {
  1136  	r.mutex.Lock()
  1137  	defer r.mutex.Unlock()
  1138  
  1139  	return r.hasIPv6Route, r.systemServers
  1140  }
  1141  
  1142  func (r *Resolver) setCache(hostname string, IPs []net.IP, TTLs []time.Duration) {
  1143  	r.mutex.Lock()
  1144  	defer r.mutex.Unlock()
  1145  
  1146  	// The shortest TTL is used. In some cases, a DNS server may omit the TTL
  1147  	// or set a 0 TTL, in which case the default is used.
  1148  	TTL := resolverDefaultAnswerTTL
  1149  	for _, answerTTL := range TTLs {
  1150  		if answerTTL > 0 && answerTTL < TTL {
  1151  			TTL = answerTTL
  1152  		}
  1153  	}
  1154  
  1155  	// When NetworkConfig.CacheExtensionInitialTTL configured, ensure the TTL
  1156  	// is no shorter than CacheExtensionInitialTTL.
  1157  	if r.networkConfig.CacheExtensionInitialTTL != 0 &&
  1158  		TTL < r.networkConfig.CacheExtensionInitialTTL {
  1159  
  1160  		TTL = r.networkConfig.CacheExtensionInitialTTL
  1161  	}
  1162  
  1163  	// Limitation: with concurrent ResolveIPs for the same domain, the last
  1164  	// setCache call determines the cache value. The results are not merged.
  1165  
  1166  	r.cache.Set(hostname, IPs, TTL)
  1167  }
  1168  
  1169  func (r *Resolver) getCache(hostname string) []net.IP {
  1170  	r.mutex.Lock()
  1171  	defer r.mutex.Unlock()
  1172  
  1173  	entry, ok := r.cache.Get(hostname)
  1174  	if !ok {
  1175  		return nil
  1176  	}
  1177  	r.metrics.cacheHits += 1
  1178  	return entry.([]net.IP)
  1179  }
  1180  
  1181  // newResolverConn creates a UDP socket that will send packets to serverAddr.
  1182  // serverAddr is an IP:port, which allows specifying the port for testing or
  1183  // in rare cases where the port isn't 53.
  1184  func (r *Resolver) newResolverConn(
  1185  	logWarning func(error),
  1186  	serverAddr string) (retConn net.Conn, retErr error) {
  1187  
  1188  	defer func() {
  1189  		if retErr != nil {
  1190  			logWarning(retErr)
  1191  		}
  1192  	}()
  1193  
  1194  	// When configured, attempt to synthesize an IPv6 address from
  1195  	// an IPv4 address for compatibility on DNS64/NAT64 networks.
  1196  	// If synthesize fails, try the original address.
  1197  	if r.networkConfig.IPv6Synthesize != nil {
  1198  		serverIPStr, port, err := net.SplitHostPort(serverAddr)
  1199  		if err != nil {
  1200  			return nil, errors.Trace(err)
  1201  		}
  1202  		serverIP := net.ParseIP(serverIPStr)
  1203  		if serverIP != nil && serverIP.To4() != nil {
  1204  			synthesized := r.networkConfig.IPv6Synthesize(serverIPStr)
  1205  			if synthesized != "" && net.ParseIP(synthesized) != nil {
  1206  				serverAddr = net.JoinHostPort(synthesized, port)
  1207  			}
  1208  		}
  1209  	}
  1210  
  1211  	dialer := &net.Dialer{}
  1212  	if r.networkConfig.BindToDevice != nil {
  1213  		dialer.Control = func(_, _ string, c syscall.RawConn) error {
  1214  			var controlErr error
  1215  			err := c.Control(func(fd uintptr) {
  1216  				_, err := r.networkConfig.BindToDevice(int(fd))
  1217  				if err != nil {
  1218  					controlErr = errors.Tracef("BindToDevice failed: %v", err)
  1219  					return
  1220  				}
  1221  			})
  1222  			if controlErr != nil {
  1223  				return errors.Trace(controlErr)
  1224  			}
  1225  			return errors.Trace(err)
  1226  		}
  1227  	}
  1228  
  1229  	// context.Background is ok in this case as the UDP dial is just a local
  1230  	// syscall to create the socket.
  1231  	conn, err := dialer.DialContext(context.Background(), "udp", serverAddr)
  1232  	if err != nil {
  1233  		return nil, errors.Trace(err)
  1234  	}
  1235  
  1236  	return conn, nil
  1237  }
  1238  
  1239  func (r *Resolver) updateMetricResolves() {
  1240  	r.mutex.Lock()
  1241  	defer r.mutex.Unlock()
  1242  
  1243  	r.metrics.resolves += 1
  1244  }
  1245  
  1246  func (r *Resolver) updateMetricRequestsIPv4() {
  1247  	r.mutex.Lock()
  1248  	defer r.mutex.Unlock()
  1249  
  1250  	r.metrics.requestsIPv4 += 1
  1251  }
  1252  
  1253  func (r *Resolver) updateMetricRequestsIPv6() {
  1254  	r.mutex.Lock()
  1255  	defer r.mutex.Unlock()
  1256  
  1257  	r.metrics.requestsIPv6 += 1
  1258  }
  1259  
  1260  func (r *Resolver) updateMetricResponsesIPv4() {
  1261  	r.mutex.Lock()
  1262  	defer r.mutex.Unlock()
  1263  
  1264  	r.metrics.responsesIPv4 += 1
  1265  }
  1266  
  1267  func (r *Resolver) updateMetricResponsesIPv6() {
  1268  	r.mutex.Lock()
  1269  	defer r.mutex.Unlock()
  1270  
  1271  	r.metrics.responsesIPv6 += 1
  1272  }
  1273  
  1274  func (r *Resolver) updateMetricDefaultResolver(success bool) {
  1275  	r.mutex.Lock()
  1276  	defer r.mutex.Unlock()
  1277  
  1278  	r.metrics.defaultResolves += 1
  1279  	if success {
  1280  		r.metrics.defaultSuccesses += 1
  1281  	}
  1282  }
  1283  
  1284  func (r *Resolver) updateMetricPeakInFlight(inFlight int64) {
  1285  	r.mutex.Lock()
  1286  	defer r.mutex.Unlock()
  1287  
  1288  	if inFlight > r.metrics.peakInFlight {
  1289  		r.metrics.peakInFlight = inFlight
  1290  	}
  1291  }
  1292  
  1293  func (r *Resolver) updateMetricRTT(rtt time.Duration) {
  1294  	r.mutex.Lock()
  1295  	defer r.mutex.Unlock()
  1296  
  1297  	if rtt < 0 {
  1298  		// Ignore invalid input.
  1299  		return
  1300  	}
  1301  
  1302  	// When r.metrics.minRTT < 0, min/maxRTT is unset.
  1303  	if r.metrics.minRTT < 0 || rtt < r.metrics.minRTT {
  1304  		r.metrics.minRTT = rtt
  1305  	}
  1306  
  1307  	if rtt > r.metrics.maxRTT {
  1308  		r.metrics.maxRTT = rtt
  1309  	}
  1310  }
  1311  
  1312  func hasRoutableIPv6Interface() (bool, error) {
  1313  
  1314  	interfaces, err := net.Interfaces()
  1315  	if err != nil {
  1316  		return false, errors.Trace(err)
  1317  	}
  1318  
  1319  	for _, in := range interfaces {
  1320  
  1321  		if (in.Flags&net.FlagUp == 0) ||
  1322  			(in.Flags&(net.FlagLoopback|net.FlagPointToPoint)) != 0 {
  1323  			continue
  1324  		}
  1325  
  1326  		addrs, err := in.Addrs()
  1327  		if err != nil {
  1328  			return false, errors.Trace(err)
  1329  		}
  1330  
  1331  		for _, addr := range addrs {
  1332  			if IPNet, ok := addr.(*net.IPNet); ok &&
  1333  				IPNet.IP.To4() == nil &&
  1334  				!IPNet.IP.IsLinkLocalUnicast() {
  1335  
  1336  				return true, nil
  1337  			}
  1338  		}
  1339  	}
  1340  
  1341  	return false, nil
  1342  }
  1343  
  1344  func generateIPAddressFromCIDR(CIDR string) (net.IP, error) {
  1345  	_, IPNet, err := net.ParseCIDR(CIDR)
  1346  	if err != nil {
  1347  		return nil, errors.Trace(err)
  1348  	}
  1349  	// A retry is required, since a CIDR may include broadcast IPs (a.b.c.0) or
  1350  	// other invalid values. The number of retries is limited to ensure we
  1351  	// don't hang in the case of a misconfiguration.
  1352  	for i := 0; i < 10; i++ {
  1353  		randBytes := prng.Bytes(len(IPNet.IP))
  1354  		IP := make(net.IP, len(IPNet.IP))
  1355  		// The 1 bits in the mask must apply to the IP in the CIDR and the 0
  1356  		// bits in the mask are available to randomize.
  1357  		for i := 0; i < len(IP); i++ {
  1358  			IP[i] = (IPNet.IP[i] & IPNet.Mask[i]) | (randBytes[i] & ^IPNet.Mask[i])
  1359  		}
  1360  		if IP.IsGlobalUnicast() && !common.IsBogon(IP) {
  1361  			return IP, nil
  1362  		}
  1363  	}
  1364  	return nil, errors.TraceNew("failed to generate random IP")
  1365  }
  1366  
  1367  type resolverQuestionType int
  1368  
  1369  const (
  1370  	resolverQuestionTypeA    = 0
  1371  	resolverQuestionTypeAAAA = 1
  1372  )
  1373  
  1374  func performDNSQuery(
  1375  	resolveCtx context.Context,
  1376  	logWarning func(error),
  1377  	params *ResolveParameters,
  1378  	useProtocolTransform bool,
  1379  	conn net.Conn,
  1380  	questionType resolverQuestionType,
  1381  	hostname string) ([]net.IP, []time.Duration, time.Duration, error) {
  1382  
  1383  	if useProtocolTransform {
  1384  		if params.ProtocolTransformSpec == nil ||
  1385  			params.ProtocolTransformSeed == nil {
  1386  			return nil, nil, -1, errors.TraceNew("invalid protocol transform configuration")
  1387  		}
  1388  		// miekg/dns expects conn to be a net.PacketConn or else it writes the
  1389  		// TCP length prefix
  1390  		udpConn, ok := conn.(*net.UDPConn)
  1391  		if !ok {
  1392  			return nil, nil, -1, errors.TraceNew("conn is not a *net.UDPConn")
  1393  		}
  1394  		conn = &transformDNSPacketConn{
  1395  			UDPConn:   udpConn,
  1396  			transform: params.ProtocolTransformSpec,
  1397  			seed:      params.ProtocolTransformSeed,
  1398  		}
  1399  	}
  1400  
  1401  	// UDPSize sets the receive buffer to > 512, even when we don't include
  1402  	// EDNS(0), which will mitigate issues with RFC 1035 non-compliant
  1403  	// servers. See Go issue 51127.
  1404  	dnsConn := &dns.Conn{
  1405  		Conn:    conn,
  1406  		UDPSize: udpPacketBufferSize,
  1407  	}
  1408  	defer dnsConn.Close()
  1409  
  1410  	// SetQuestion initializes request.MsgHdr.Id to a random value
  1411  	request := &dns.Msg{MsgHdr: dns.MsgHdr{RecursionDesired: true}}
  1412  	switch questionType {
  1413  	case resolverQuestionTypeA:
  1414  		request.SetQuestion(dns.Fqdn(hostname), dns.TypeA)
  1415  	case resolverQuestionTypeAAAA:
  1416  		request.SetQuestion(dns.Fqdn(hostname), dns.TypeAAAA)
  1417  	default:
  1418  		return nil, nil, -1, errors.TraceNew("unknown DNS request question type")
  1419  	}
  1420  	if params.IncludeEDNS0 {
  1421  		// miekg/dns: "RFC 6891, Section 6.1.1 allows the OPT record to appear
  1422  		// anywhere in the additional record section, but it's usually at the
  1423  		// end..."
  1424  		request.SetEdns0(udpPacketBufferSize, false)
  1425  	}
  1426  
  1427  	startTime := time.Now()
  1428  
  1429  	// Send the DNS request
  1430  	dnsConn.WriteMsg(request)
  1431  
  1432  	// Read and process the DNS response
  1433  	var IPs []net.IP
  1434  	var TTLs []time.Duration
  1435  	var lastErr error
  1436  	RTT := time.Duration(-1)
  1437  	for {
  1438  
  1439  		// Stop when resolveCtx is done; the caller, ResolveIP, will also
  1440  		// close conn, which will interrupt a blocking dnsConn.ReadMsg.
  1441  		if resolveCtx.Err() != nil {
  1442  
  1443  			// ResolveIP, which calls performDNSQuery, already records the
  1444  			// context error (e.g., context timeout), so instead report
  1445  			// lastErr, when present, as it may contain more useful
  1446  			// information about why a response was rejected.
  1447  			err := lastErr
  1448  			if err == nil {
  1449  				err = errors.Trace(resolveCtx.Err())
  1450  			}
  1451  
  1452  			return nil, nil, RTT, err
  1453  		}
  1454  
  1455  		// Read a response. RTT is the elapsed time between sending the
  1456  		// request and reading the last received response.
  1457  		response, err := dnsConn.ReadMsg()
  1458  		RTT = time.Since(startTime)
  1459  		if err == nil && response.MsgHdr.Id != request.MsgHdr.Id {
  1460  			err = dns.ErrId
  1461  		}
  1462  		if err != nil {
  1463  			// Try reading again, in case the first response packet failed to
  1464  			// unmarshal or had an invalid ID. The Go resolver also does this;
  1465  			// see Go issue 13281.
  1466  			if resolveCtx.Err() == nil {
  1467  				// Only log if resolveCtx is not done; otherwise the error could
  1468  				// be due to conn being closed by ResolveIP.
  1469  				lastErr = errors.Tracef("invalid response: %v", err)
  1470  				logWarning(lastErr)
  1471  			}
  1472  			continue
  1473  		}
  1474  
  1475  		// Check the RCode.
  1476  		//
  1477  		// For IPv4, we expect RCodeSuccess as Psiphon will typically only
  1478  		// resolve domains that exist and have a valid IP (when this isn't
  1479  		// the case, and we retry, the overall ResolveIP and its parent dial
  1480  		// will still abort after resolveCtx is done, or RequestTimeout
  1481  		// expires for maxAttempts).
  1482  		//
  1483  		// For IPv6, we should also expect RCodeSuccess even if there is no
  1484  		// AAAA record, as long as the domain exists and has an A record.
  1485  		// However, per RFC 6147 section 5.1.2, we may receive
  1486  		// NXDOMAIN: "...some servers respond with RCODE=3 to a AAAA query
  1487  		// even if there is an A record available for that owner name. Those
  1488  		// servers are in clear violation of the meaning of RCODE 3...". In
  1489  		// this case, we coalesce NXDOMAIN into success to treat the response
  1490  		// the same as success with no AAAA record.
  1491  		//
  1492  		// All other RCodes, which are unexpected, lead to a read retry.
  1493  		if response.MsgHdr.Rcode != dns.RcodeSuccess &&
  1494  			!(questionType == resolverQuestionTypeAAAA && response.MsgHdr.Rcode == dns.RcodeNameError) {
  1495  
  1496  			errMsg, ok := dns.RcodeToString[response.MsgHdr.Rcode]
  1497  			if !ok {
  1498  				errMsg = fmt.Sprintf("Rcode: %d", response.MsgHdr.Rcode)
  1499  			}
  1500  			lastErr = errors.Tracef("unexpected RCode: %v", errMsg)
  1501  			logWarning(lastErr)
  1502  			continue
  1503  		}
  1504  
  1505  		// Extract all IP answers, along with corresponding TTLs for caching.
  1506  		// Perform additional validation, which may lead to another read
  1507  		// retry. However, if _any_ valid IP is found, stop reading and
  1508  		// return that result. Again, the validation is only best effort.
  1509  
  1510  		checkFailed := false
  1511  		for _, answer := range response.Answer {
  1512  			haveAnswer := false
  1513  			var IP net.IP
  1514  			var TTLSec uint32
  1515  			switch questionType {
  1516  			case resolverQuestionTypeA:
  1517  				if a, ok := answer.(*dns.A); ok {
  1518  					IP = a.A
  1519  					TTLSec = a.Hdr.Ttl
  1520  					haveAnswer = true
  1521  				}
  1522  			case resolverQuestionTypeAAAA:
  1523  				if aaaa, ok := answer.(*dns.AAAA); ok {
  1524  					IP = aaaa.AAAA
  1525  					TTLSec = aaaa.Hdr.Ttl
  1526  					haveAnswer = true
  1527  				}
  1528  			}
  1529  			if !haveAnswer {
  1530  				continue
  1531  			}
  1532  			err := checkDNSAnswerIP(IP)
  1533  			if err != nil {
  1534  				checkFailed = true
  1535  				lastErr = errors.Tracef("invalid IP: %v", err)
  1536  				logWarning(lastErr)
  1537  				// Check the next answer
  1538  				continue
  1539  			}
  1540  			IPs = append(IPs, IP)
  1541  			TTLs = append(TTLs, time.Duration(TTLSec)*time.Second)
  1542  		}
  1543  
  1544  		// For IPv4, an IP is expected, as noted in the comment above.
  1545  		//
  1546  		// In potential cases where we resolve a domain that has only an IPv6
  1547  		// address, the concurrent AAAA request will deliver its result to
  1548  		// ResolveIP, and that answer will be selected, so only the "await"
  1549  		// logic will delay the parent dial in that case.
  1550  		if questionType == resolverQuestionTypeA && len(IPs) == 0 && !checkFailed {
  1551  			checkFailed = true
  1552  			lastErr = errors.TraceNew("unexpected empty A response")
  1553  			logWarning(lastErr)
  1554  		}
  1555  
  1556  		// Retry if there are no valid IPs and any error; if no error, this
  1557  		// may be a valid AAAA response with no IPs, in which case return the
  1558  		// result.
  1559  		if len(IPs) == 0 && checkFailed {
  1560  			continue
  1561  		}
  1562  
  1563  		return IPs, TTLs, RTT, nil
  1564  	}
  1565  }
  1566  
  1567  func checkDNSAnswerIP(IP net.IP) error {
  1568  
  1569  	if IP == nil {
  1570  		return errors.TraceNew("IP is nil")
  1571  	}
  1572  
  1573  	// Limitation: this could still be a phony/injected response, it's not
  1574  	// possible to verify with plaintext DNS, but a "bogon" IP is clearly
  1575  	// invalid.
  1576  	if common.IsBogon(IP) {
  1577  		return errors.TraceNew("IP is bogon")
  1578  	}
  1579  
  1580  	// Create a temporary socket bound to the destination IP. This checks
  1581  	// thats the local host has a route to this IP. If not, we'll reject the
  1582  	// IP. This prevents selecting an IP which is guaranteed to fail to dial.
  1583  	// Use UDP as this results in no network traffic; the destination port is
  1584  	// arbitrary. The Go resolver performs a similar operation.
  1585  	//
  1586  	// Limitations:
  1587  	// - We may cache the IP and reuse it without checking routability again;
  1588  	//   the cache should be flushed when network state changes.
  1589  	// - Given that the AAAA is requested only when the host has an IPv6
  1590  	//   route, we don't expect this to often fail with a _valid_ response.
  1591  	//   However, this remains a possibility and in this case,
  1592  	//   performDNSQuery will keep awaiting a response which can trigger
  1593  	//   the "await" logic.
  1594  	conn, err := net.DialUDP("udp", nil, &net.UDPAddr{IP: IP, Port: 443})
  1595  	if err != nil {
  1596  		return errors.Trace(err)
  1597  	}
  1598  	conn.Close()
  1599  
  1600  	return nil
  1601  }
  1602  
  1603  func defaultResolverLookupIP(
  1604  	ctx context.Context, hostname string, logHostnames bool) ([]net.IP, error) {
  1605  
  1606  	addrs, err := net.DefaultResolver.LookupIPAddr(ctx, hostname)
  1607  
  1608  	if err != nil && !logHostnames {
  1609  		// Remove domain names from "net" error messages.
  1610  		err = common.RedactNetError(err)
  1611  	}
  1612  
  1613  	if err != nil {
  1614  		return nil, errors.Trace(err)
  1615  	}
  1616  
  1617  	ips := make([]net.IP, len(addrs))
  1618  	for i, addr := range addrs {
  1619  		ips[i] = addr.IP
  1620  	}
  1621  
  1622  	return ips, nil
  1623  }
  1624  
  1625  // transformDNSPacketConn wraps a *net.UDPConn, intercepting Write calls and
  1626  // applying the specified protocol transform.
  1627  //
  1628  // As transforms operate on strings and DNS requests are binary, the transform
  1629  // should be expressed using hex characters. The DNS packet to be written
  1630  // (input the Write) is converted to hex, transformed, and converted back to
  1631  // binary and then actually written to the UDP socket.
  1632  type transformDNSPacketConn struct {
  1633  	*net.UDPConn
  1634  	transform transforms.Spec
  1635  	seed      *prng.Seed
  1636  }
  1637  
  1638  func (conn *transformDNSPacketConn) Write(b []byte) (int, error) {
  1639  
  1640  	// Limitation: there is no check that a transformed packet remains within
  1641  	// the network packet MTU.
  1642  
  1643  	input := hex.EncodeToString(b)
  1644  	output, err := conn.transform.Apply(conn.seed, input)
  1645  	if err != nil {
  1646  		return 0, errors.Trace(err)
  1647  	}
  1648  	packet, err := hex.DecodeString(output)
  1649  	if err != nil {
  1650  		return 0, errors.Trace(err)
  1651  	}
  1652  
  1653  	_, err = conn.UDPConn.Write(packet)
  1654  	if err != nil {
  1655  		// In the error case, don't report bytes written as the number could
  1656  		// exceed the pre-transform length.
  1657  		return 0, errors.Trace(err)
  1658  	}
  1659  
  1660  	// Report the pre-transform length as bytes written, as the caller may check
  1661  	// that the requested len(b) bytes were written.
  1662  	return len(b), nil
  1663  }