github.com/kelleygo/clashcore@v1.0.2/dns/doq.go (about)

     1  package dns
     2  
     3  import (
     4  	"context"
     5  	"crypto/tls"
     6  	"encoding/binary"
     7  	"errors"
     8  	"fmt"
     9  	"net"
    10  	"runtime"
    11  	"strconv"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/kelleygo/clashcore/component/ca"
    16  	C "github.com/kelleygo/clashcore/constant"
    17  	"github.com/kelleygo/clashcore/log"
    18  	"github.com/metacubex/quic-go"
    19  
    20  	D "github.com/miekg/dns"
    21  )
    22  
    23  const NextProtoDQ = "doq"
    24  const (
    25  	// QUICCodeNoError is used when the connection or stream needs to be closed,
    26  	// but there is no error to signal.
    27  	QUICCodeNoError = quic.ApplicationErrorCode(0)
    28  	// QUICCodeInternalError signals that the DoQ implementation encountered
    29  	// an internal error and is incapable of pursuing the transaction or the
    30  	// connection.
    31  	QUICCodeInternalError = quic.ApplicationErrorCode(1)
    32  	// QUICKeepAlivePeriod is the value that we pass to *quic.Config and that
    33  	// controls the period with with keep-alive frames are being sent to the
    34  	// connection. We set it to 20s as it would be in the quic-go@v0.27.1 with
    35  	// KeepAlive field set to true This value is specified in
    36  	// https://pkg.go.dev/github.com/metacubex/quic-go/internal/protocol#MaxKeepAliveInterval.
    37  	//
    38  	// TODO(ameshkov):  Consider making it configurable.
    39  	QUICKeepAlivePeriod = time.Second * 20
    40  	DefaultTimeout      = time.Second * 5
    41  )
    42  
    43  // dnsOverQUIC is a struct that implements the Upstream interface for the
    44  // DNS-over-QUIC protocol (spec: https://www.rfc-editor.org/rfc/rfc9250.html).
    45  type dnsOverQUIC struct {
    46  	// quicConfig is the QUIC configuration that is used for establishing
    47  	// connections to the upstream.  This configuration includes the TokenStore
    48  	// that needs to be stored for the lifetime of dnsOverQUIC since we can
    49  	// re-create the connection.
    50  	quicConfig      *quic.Config
    51  	quicConfigGuard sync.Mutex
    52  
    53  	// conn is the current active QUIC connection.  It can be closed and
    54  	// re-opened when needed.
    55  	conn   quic.Connection
    56  	connMu sync.RWMutex
    57  
    58  	// bytesPool is a *sync.Pool we use to store byte buffers in.  These byte
    59  	// buffers are used to read responses from the upstream.
    60  	bytesPool      *sync.Pool
    61  	bytesPoolGuard sync.Mutex
    62  
    63  	addr         string
    64  	proxyAdapter C.ProxyAdapter
    65  	proxyName    string
    66  	r            *Resolver
    67  }
    68  
    69  // type check
    70  var _ dnsClient = (*dnsOverQUIC)(nil)
    71  
    72  // newDoQ returns the DNS-over-QUIC Upstream.
    73  func newDoQ(resolver *Resolver, addr string, proxyAdapter C.ProxyAdapter, proxyName string) (dnsClient, error) {
    74  	doq := &dnsOverQUIC{
    75  		addr:         addr,
    76  		proxyAdapter: proxyAdapter,
    77  		proxyName:    proxyName,
    78  		r:            resolver,
    79  		quicConfig: &quic.Config{
    80  			KeepAlivePeriod: QUICKeepAlivePeriod,
    81  			TokenStore:      newQUICTokenStore(),
    82  		},
    83  	}
    84  
    85  	runtime.SetFinalizer(doq, (*dnsOverQUIC).Close)
    86  	return doq, nil
    87  }
    88  
    89  // Address implements the Upstream interface for *dnsOverQUIC.
    90  func (doq *dnsOverQUIC) Address() string { return doq.addr }
    91  
    92  func (doq *dnsOverQUIC) ExchangeContext(ctx context.Context, m *D.Msg) (msg *D.Msg, err error) {
    93  	// When sending queries over a QUIC connection, the DNS Message ID MUST be
    94  	// set to zero.
    95  	m = m.Copy()
    96  	id := m.Id
    97  	m.Id = 0
    98  	defer func() {
    99  		// Restore the original ID to not break compatibility with proxies.
   100  		m.Id = id
   101  		if msg != nil {
   102  			msg.Id = id
   103  		}
   104  	}()
   105  
   106  	// Check if there was already an active conn before sending the request.
   107  	// We'll only attempt to re-connect if there was one.
   108  	hasConnection := doq.hasConnection()
   109  
   110  	// Make the first attempt to send the DNS query.
   111  	msg, err = doq.exchangeQUIC(ctx, m)
   112  
   113  	// Make up to 2 attempts to re-open the QUIC connection and send the request
   114  	// again.  There are several cases where this workaround is necessary to
   115  	// make DoQ usable.  We need to make 2 attempts in the case when the
   116  	// connection was closed (due to inactivity for example) AND the server
   117  	// refuses to open a 0-RTT connection.
   118  	for i := 0; hasConnection && doq.shouldRetry(err) && i < 2; i++ {
   119  		log.Debugln("re-creating the QUIC connection and retrying due to %v", err)
   120  
   121  		// Close the active connection to make sure we'll try to re-connect.
   122  		doq.closeConnWithError(err)
   123  
   124  		// Retry sending the request.
   125  		msg, err = doq.exchangeQUIC(ctx, m)
   126  	}
   127  
   128  	if err != nil {
   129  		// If we're unable to exchange messages, make sure the connection is
   130  		// closed and signal about an internal error.
   131  		doq.closeConnWithError(err)
   132  	}
   133  
   134  	return msg, err
   135  }
   136  
   137  // Close implements the Upstream interface for *dnsOverQUIC.
   138  func (doq *dnsOverQUIC) Close() (err error) {
   139  	doq.connMu.Lock()
   140  	defer doq.connMu.Unlock()
   141  
   142  	runtime.SetFinalizer(doq, nil)
   143  
   144  	if doq.conn != nil {
   145  		err = doq.conn.CloseWithError(QUICCodeNoError, "")
   146  	}
   147  
   148  	return err
   149  }
   150  
   151  // exchangeQUIC attempts to open a QUIC connection, send the DNS message
   152  // through it and return the response it got from the server.
   153  func (doq *dnsOverQUIC) exchangeQUIC(ctx context.Context, msg *D.Msg) (resp *D.Msg, err error) {
   154  	var conn quic.Connection
   155  	conn, err = doq.getConnection(ctx, true)
   156  	if err != nil {
   157  		return nil, err
   158  	}
   159  
   160  	var buf []byte
   161  	buf, err = msg.Pack()
   162  	if err != nil {
   163  		return nil, fmt.Errorf("failed to pack DNS message for DoQ: %w", err)
   164  	}
   165  
   166  	var stream quic.Stream
   167  	stream, err = doq.openStream(ctx, conn)
   168  	if err != nil {
   169  		return nil, err
   170  	}
   171  
   172  	_, err = stream.Write(AddPrefix(buf))
   173  	if err != nil {
   174  		return nil, fmt.Errorf("failed to write to a QUIC stream: %w", err)
   175  	}
   176  
   177  	// The client MUST send the DNS query over the selected stream, and MUST
   178  	// indicate through the STREAM FIN mechanism that no further data will
   179  	// be sent on that stream. Note, that stream.Close() closes the
   180  	// write-direction of the stream, but does not prevent reading from it.
   181  	_ = stream.Close()
   182  
   183  	return doq.readMsg(stream)
   184  }
   185  
   186  // AddPrefix adds a 2-byte prefix with the DNS message length.
   187  func AddPrefix(b []byte) (m []byte) {
   188  	m = make([]byte, 2+len(b))
   189  	binary.BigEndian.PutUint16(m, uint16(len(b)))
   190  	copy(m[2:], b)
   191  
   192  	return m
   193  }
   194  
   195  // shouldRetry checks what error we received and decides whether it is required
   196  // to re-open the connection and retry sending the request.
   197  func (doq *dnsOverQUIC) shouldRetry(err error) (ok bool) {
   198  	return isQUICRetryError(err)
   199  }
   200  
   201  // getBytesPool returns (creates if needed) a pool we store byte buffers in.
   202  func (doq *dnsOverQUIC) getBytesPool() (pool *sync.Pool) {
   203  	doq.bytesPoolGuard.Lock()
   204  	defer doq.bytesPoolGuard.Unlock()
   205  
   206  	if doq.bytesPool == nil {
   207  		doq.bytesPool = &sync.Pool{
   208  			New: func() interface{} {
   209  				b := make([]byte, MaxMsgSize)
   210  
   211  				return &b
   212  			},
   213  		}
   214  	}
   215  
   216  	return doq.bytesPool
   217  }
   218  
   219  // getConnection opens or returns an existing quic.Connection. useCached
   220  // argument controls whether we should try to use the existing cached
   221  // connection.  If it is false, we will forcibly create a new connection and
   222  // close the existing one if needed.
   223  func (doq *dnsOverQUIC) getConnection(ctx context.Context, useCached bool) (quic.Connection, error) {
   224  	var conn quic.Connection
   225  	doq.connMu.RLock()
   226  	conn = doq.conn
   227  	if conn != nil && useCached {
   228  		doq.connMu.RUnlock()
   229  
   230  		return conn, nil
   231  	}
   232  	if conn != nil {
   233  		// we're recreating the connection, let's create a new one.
   234  		_ = conn.CloseWithError(QUICCodeNoError, "")
   235  	}
   236  	doq.connMu.RUnlock()
   237  
   238  	doq.connMu.Lock()
   239  	defer doq.connMu.Unlock()
   240  
   241  	var err error
   242  	conn, err = doq.openConnection(ctx)
   243  	if err != nil {
   244  		return nil, err
   245  	}
   246  	doq.conn = conn
   247  
   248  	return conn, nil
   249  }
   250  
   251  // hasConnection returns true if there's an active QUIC connection.
   252  func (doq *dnsOverQUIC) hasConnection() (ok bool) {
   253  	doq.connMu.Lock()
   254  	defer doq.connMu.Unlock()
   255  
   256  	return doq.conn != nil
   257  }
   258  
   259  // getQUICConfig returns the QUIC config in a thread-safe manner.  Note, that
   260  // this method returns a pointer, it is forbidden to change its properties.
   261  func (doq *dnsOverQUIC) getQUICConfig() (c *quic.Config) {
   262  	doq.quicConfigGuard.Lock()
   263  	defer doq.quicConfigGuard.Unlock()
   264  
   265  	return doq.quicConfig
   266  }
   267  
   268  // resetQUICConfig re-creates the tokens store as we may need to use a new one
   269  // if we failed to connect.
   270  func (doq *dnsOverQUIC) resetQUICConfig() {
   271  	doq.quicConfigGuard.Lock()
   272  	defer doq.quicConfigGuard.Unlock()
   273  
   274  	doq.quicConfig = doq.quicConfig.Clone()
   275  	doq.quicConfig.TokenStore = newQUICTokenStore()
   276  }
   277  
   278  // openStream opens a new QUIC stream for the specified connection.
   279  func (doq *dnsOverQUIC) openStream(ctx context.Context, conn quic.Connection) (quic.Stream, error) {
   280  	ctx, cancel := context.WithCancel(ctx)
   281  	defer cancel()
   282  
   283  	stream, err := conn.OpenStreamSync(ctx)
   284  	if err == nil {
   285  		return stream, nil
   286  	}
   287  
   288  	// We can get here if the old QUIC connection is not valid anymore.  We
   289  	// should try to re-create the connection again in this case.
   290  	newConn, err := doq.getConnection(ctx, false)
   291  	if err != nil {
   292  		return nil, err
   293  	}
   294  	// Open a new stream.
   295  	return newConn.OpenStreamSync(ctx)
   296  }
   297  
   298  // openConnection opens a new QUIC connection.
   299  func (doq *dnsOverQUIC) openConnection(ctx context.Context) (conn quic.Connection, err error) {
   300  	// we're using bootstrapped address instead of what's passed to the function
   301  	// it does not create an actual connection, but it helps us determine
   302  	// what IP is actually reachable (when there're v4/v6 addresses).
   303  	rawConn, err := getDialHandler(doq.r, doq.proxyAdapter, doq.proxyName)(ctx, "udp", doq.addr)
   304  	if err != nil {
   305  		return nil, fmt.Errorf("failed to open a QUIC connection: %w", err)
   306  	}
   307  	addr := rawConn.RemoteAddr().String()
   308  	// It's never actually used
   309  	_ = rawConn.Close()
   310  
   311  	ip, port, err := net.SplitHostPort(addr)
   312  	if err != nil {
   313  		return nil, err
   314  	}
   315  
   316  	p, err := strconv.Atoi(port)
   317  	udpAddr := net.UDPAddr{IP: net.ParseIP(ip), Port: p}
   318  	udp, err := listenPacket(ctx, doq.proxyAdapter, doq.proxyName, "udp", addr, doq.r)
   319  	if err != nil {
   320  		return nil, err
   321  	}
   322  
   323  	host, _, err := net.SplitHostPort(doq.addr)
   324  	if err != nil {
   325  		return nil, err
   326  	}
   327  
   328  	tlsConfig := ca.GetGlobalTLSConfig(
   329  		&tls.Config{
   330  			ServerName:         host,
   331  			InsecureSkipVerify: false,
   332  			NextProtos: []string{
   333  				NextProtoDQ,
   334  			},
   335  			SessionTicketsDisabled: false,
   336  		})
   337  
   338  	transport := quic.Transport{Conn: udp}
   339  	transport.SetCreatedConn(true) // auto close conn
   340  	transport.SetSingleUse(true)   // auto close transport
   341  	conn, err = transport.Dial(ctx, &udpAddr, tlsConfig, doq.getQUICConfig())
   342  	if err != nil {
   343  		return nil, fmt.Errorf("opening quic connection to %s: %w", doq.addr, err)
   344  	}
   345  
   346  	return conn, nil
   347  }
   348  
   349  // closeConnWithError closes the active connection with error to make sure that
   350  // new queries were processed in another connection.  We can do that in the case
   351  // of a fatal error.
   352  func (doq *dnsOverQUIC) closeConnWithError(err error) {
   353  	doq.connMu.Lock()
   354  	defer doq.connMu.Unlock()
   355  
   356  	if doq.conn == nil {
   357  		// Do nothing, there's no active conn anyways.
   358  		return
   359  	}
   360  
   361  	code := QUICCodeNoError
   362  	if err != nil {
   363  		code = QUICCodeInternalError
   364  	}
   365  
   366  	if errors.Is(err, quic.Err0RTTRejected) {
   367  		// Reset the TokenStore only if 0-RTT was rejected.
   368  		doq.resetQUICConfig()
   369  	}
   370  
   371  	err = doq.conn.CloseWithError(code, "")
   372  	if err != nil {
   373  		log.Errorln("failed to close the conn: %v", err)
   374  	}
   375  	doq.conn = nil
   376  }
   377  
   378  // readMsg reads the incoming DNS message from the QUIC stream.
   379  func (doq *dnsOverQUIC) readMsg(stream quic.Stream) (m *D.Msg, err error) {
   380  	pool := doq.getBytesPool()
   381  	bufPtr := pool.Get().(*[]byte)
   382  
   383  	defer pool.Put(bufPtr)
   384  
   385  	respBuf := *bufPtr
   386  	n, err := stream.Read(respBuf)
   387  	if err != nil && n == 0 {
   388  		return nil, fmt.Errorf("reading response from %s: %w", doq.Address(), err)
   389  	}
   390  
   391  	// All DNS messages (queries and responses) sent over DoQ connections MUST
   392  	// be encoded as a 2-octet length field followed by the message content as
   393  	// specified in [RFC1035].
   394  	// IMPORTANT: Note, that we ignore this prefix here as this implementation
   395  	// does not support receiving multiple messages over a single connection.
   396  	m = new(D.Msg)
   397  	err = m.Unpack(respBuf[2:])
   398  	if err != nil {
   399  		return nil, fmt.Errorf("unpacking response from %s: %w", doq.Address(), err)
   400  	}
   401  
   402  	return m, nil
   403  }
   404  
   405  // newQUICTokenStore creates a new quic.TokenStore that is necessary to have
   406  // in order to benefit from 0-RTT.
   407  func newQUICTokenStore() (s quic.TokenStore) {
   408  	// You can read more on address validation here:
   409  	// https://datatracker.ietf.org/doc/html/rfc9000#section-8.1
   410  	// Setting maxOrigins to 1 and tokensPerOrigin to 10 assuming that this is
   411  	// more than enough for the way we use it (one connection per upstream).
   412  	return quic.NewLRUTokenStore(1, 10)
   413  }
   414  
   415  // isQUICRetryError checks the error and determines whether it may signal that
   416  // we should re-create the QUIC connection.  This requirement is caused by
   417  // quic-go issues, see the comments inside this function.
   418  // TODO(ameshkov): re-test when updating quic-go.
   419  func isQUICRetryError(err error) (ok bool) {
   420  	var qAppErr *quic.ApplicationError
   421  	if errors.As(err, &qAppErr) && qAppErr.ErrorCode == 0 {
   422  		// This error is often returned when the server has been restarted,
   423  		// and we try to use the same connection on the client-side. It seems,
   424  		// that the old connections aren't closed immediately on the server-side
   425  		// and that's why one can run into this.
   426  		// In addition to that, quic-go HTTP3 client implementation does not
   427  		// clean up dead connections (this one is specific to DoH3 upstream):
   428  		// https://github.com/metacubex/quic-go/issues/765
   429  		return true
   430  	}
   431  
   432  	var qIdleErr *quic.IdleTimeoutError
   433  	if errors.As(err, &qIdleErr) {
   434  		// This error means that the connection was closed due to being idle.
   435  		// In this case we should forcibly re-create the QUIC connection.
   436  		// Reproducing is rather simple, stop the server and wait for 30 seconds
   437  		// then try to send another request via the same upstream.
   438  		return true
   439  	}
   440  
   441  	var resetErr *quic.StatelessResetError
   442  	if errors.As(err, &resetErr) {
   443  		// A stateless reset is sent when a server receives a QUIC packet that
   444  		// it doesn't know how to decrypt.  For instance, it may happen when
   445  		// the server was recently rebooted.  We should reconnect and try again
   446  		// in this case.
   447  		return true
   448  	}
   449  
   450  	var qTransportError *quic.TransportError
   451  	if errors.As(err, &qTransportError) && qTransportError.ErrorCode == quic.NoError {
   452  		// A transport error with the NO_ERROR error code could be sent by the
   453  		// server when it considers that it's time to close the connection.
   454  		// For example, Google DNS eventually closes an active connection with
   455  		// the NO_ERROR code and "Connection max age expired" message:
   456  		// https://github.com/AdguardTeam/dnsproxy/issues/283
   457  		return true
   458  	}
   459  
   460  	if errors.Is(err, quic.Err0RTTRejected) {
   461  		// This error happens when we try to establish a 0-RTT connection with
   462  		// a token the server is no more aware of.  This can be reproduced by
   463  		// restarting the QUIC server (it will clear its tokens cache).  The
   464  		// next connection attempt will return this error until the client's
   465  		// tokens cache is purged.
   466  		return true
   467  	}
   468  
   469  	return false
   470  }