github.com/kelleygo/clashcore@v1.0.2/dns/doq.go (about) 1 package dns 2 3 import ( 4 "context" 5 "crypto/tls" 6 "encoding/binary" 7 "errors" 8 "fmt" 9 "net" 10 "runtime" 11 "strconv" 12 "sync" 13 "time" 14 15 "github.com/kelleygo/clashcore/component/ca" 16 C "github.com/kelleygo/clashcore/constant" 17 "github.com/kelleygo/clashcore/log" 18 "github.com/metacubex/quic-go" 19 20 D "github.com/miekg/dns" 21 ) 22 23 const NextProtoDQ = "doq" 24 const ( 25 // QUICCodeNoError is used when the connection or stream needs to be closed, 26 // but there is no error to signal. 27 QUICCodeNoError = quic.ApplicationErrorCode(0) 28 // QUICCodeInternalError signals that the DoQ implementation encountered 29 // an internal error and is incapable of pursuing the transaction or the 30 // connection. 31 QUICCodeInternalError = quic.ApplicationErrorCode(1) 32 // QUICKeepAlivePeriod is the value that we pass to *quic.Config and that 33 // controls the period with with keep-alive frames are being sent to the 34 // connection. We set it to 20s as it would be in the quic-go@v0.27.1 with 35 // KeepAlive field set to true This value is specified in 36 // https://pkg.go.dev/github.com/metacubex/quic-go/internal/protocol#MaxKeepAliveInterval. 37 // 38 // TODO(ameshkov): Consider making it configurable. 39 QUICKeepAlivePeriod = time.Second * 20 40 DefaultTimeout = time.Second * 5 41 ) 42 43 // dnsOverQUIC is a struct that implements the Upstream interface for the 44 // DNS-over-QUIC protocol (spec: https://www.rfc-editor.org/rfc/rfc9250.html). 45 type dnsOverQUIC struct { 46 // quicConfig is the QUIC configuration that is used for establishing 47 // connections to the upstream. This configuration includes the TokenStore 48 // that needs to be stored for the lifetime of dnsOverQUIC since we can 49 // re-create the connection. 50 quicConfig *quic.Config 51 quicConfigGuard sync.Mutex 52 53 // conn is the current active QUIC connection. It can be closed and 54 // re-opened when needed. 55 conn quic.Connection 56 connMu sync.RWMutex 57 58 // bytesPool is a *sync.Pool we use to store byte buffers in. These byte 59 // buffers are used to read responses from the upstream. 60 bytesPool *sync.Pool 61 bytesPoolGuard sync.Mutex 62 63 addr string 64 proxyAdapter C.ProxyAdapter 65 proxyName string 66 r *Resolver 67 } 68 69 // type check 70 var _ dnsClient = (*dnsOverQUIC)(nil) 71 72 // newDoQ returns the DNS-over-QUIC Upstream. 73 func newDoQ(resolver *Resolver, addr string, proxyAdapter C.ProxyAdapter, proxyName string) (dnsClient, error) { 74 doq := &dnsOverQUIC{ 75 addr: addr, 76 proxyAdapter: proxyAdapter, 77 proxyName: proxyName, 78 r: resolver, 79 quicConfig: &quic.Config{ 80 KeepAlivePeriod: QUICKeepAlivePeriod, 81 TokenStore: newQUICTokenStore(), 82 }, 83 } 84 85 runtime.SetFinalizer(doq, (*dnsOverQUIC).Close) 86 return doq, nil 87 } 88 89 // Address implements the Upstream interface for *dnsOverQUIC. 90 func (doq *dnsOverQUIC) Address() string { return doq.addr } 91 92 func (doq *dnsOverQUIC) ExchangeContext(ctx context.Context, m *D.Msg) (msg *D.Msg, err error) { 93 // When sending queries over a QUIC connection, the DNS Message ID MUST be 94 // set to zero. 95 m = m.Copy() 96 id := m.Id 97 m.Id = 0 98 defer func() { 99 // Restore the original ID to not break compatibility with proxies. 100 m.Id = id 101 if msg != nil { 102 msg.Id = id 103 } 104 }() 105 106 // Check if there was already an active conn before sending the request. 107 // We'll only attempt to re-connect if there was one. 108 hasConnection := doq.hasConnection() 109 110 // Make the first attempt to send the DNS query. 111 msg, err = doq.exchangeQUIC(ctx, m) 112 113 // Make up to 2 attempts to re-open the QUIC connection and send the request 114 // again. There are several cases where this workaround is necessary to 115 // make DoQ usable. We need to make 2 attempts in the case when the 116 // connection was closed (due to inactivity for example) AND the server 117 // refuses to open a 0-RTT connection. 118 for i := 0; hasConnection && doq.shouldRetry(err) && i < 2; i++ { 119 log.Debugln("re-creating the QUIC connection and retrying due to %v", err) 120 121 // Close the active connection to make sure we'll try to re-connect. 122 doq.closeConnWithError(err) 123 124 // Retry sending the request. 125 msg, err = doq.exchangeQUIC(ctx, m) 126 } 127 128 if err != nil { 129 // If we're unable to exchange messages, make sure the connection is 130 // closed and signal about an internal error. 131 doq.closeConnWithError(err) 132 } 133 134 return msg, err 135 } 136 137 // Close implements the Upstream interface for *dnsOverQUIC. 138 func (doq *dnsOverQUIC) Close() (err error) { 139 doq.connMu.Lock() 140 defer doq.connMu.Unlock() 141 142 runtime.SetFinalizer(doq, nil) 143 144 if doq.conn != nil { 145 err = doq.conn.CloseWithError(QUICCodeNoError, "") 146 } 147 148 return err 149 } 150 151 // exchangeQUIC attempts to open a QUIC connection, send the DNS message 152 // through it and return the response it got from the server. 153 func (doq *dnsOverQUIC) exchangeQUIC(ctx context.Context, msg *D.Msg) (resp *D.Msg, err error) { 154 var conn quic.Connection 155 conn, err = doq.getConnection(ctx, true) 156 if err != nil { 157 return nil, err 158 } 159 160 var buf []byte 161 buf, err = msg.Pack() 162 if err != nil { 163 return nil, fmt.Errorf("failed to pack DNS message for DoQ: %w", err) 164 } 165 166 var stream quic.Stream 167 stream, err = doq.openStream(ctx, conn) 168 if err != nil { 169 return nil, err 170 } 171 172 _, err = stream.Write(AddPrefix(buf)) 173 if err != nil { 174 return nil, fmt.Errorf("failed to write to a QUIC stream: %w", err) 175 } 176 177 // The client MUST send the DNS query over the selected stream, and MUST 178 // indicate through the STREAM FIN mechanism that no further data will 179 // be sent on that stream. Note, that stream.Close() closes the 180 // write-direction of the stream, but does not prevent reading from it. 181 _ = stream.Close() 182 183 return doq.readMsg(stream) 184 } 185 186 // AddPrefix adds a 2-byte prefix with the DNS message length. 187 func AddPrefix(b []byte) (m []byte) { 188 m = make([]byte, 2+len(b)) 189 binary.BigEndian.PutUint16(m, uint16(len(b))) 190 copy(m[2:], b) 191 192 return m 193 } 194 195 // shouldRetry checks what error we received and decides whether it is required 196 // to re-open the connection and retry sending the request. 197 func (doq *dnsOverQUIC) shouldRetry(err error) (ok bool) { 198 return isQUICRetryError(err) 199 } 200 201 // getBytesPool returns (creates if needed) a pool we store byte buffers in. 202 func (doq *dnsOverQUIC) getBytesPool() (pool *sync.Pool) { 203 doq.bytesPoolGuard.Lock() 204 defer doq.bytesPoolGuard.Unlock() 205 206 if doq.bytesPool == nil { 207 doq.bytesPool = &sync.Pool{ 208 New: func() interface{} { 209 b := make([]byte, MaxMsgSize) 210 211 return &b 212 }, 213 } 214 } 215 216 return doq.bytesPool 217 } 218 219 // getConnection opens or returns an existing quic.Connection. useCached 220 // argument controls whether we should try to use the existing cached 221 // connection. If it is false, we will forcibly create a new connection and 222 // close the existing one if needed. 223 func (doq *dnsOverQUIC) getConnection(ctx context.Context, useCached bool) (quic.Connection, error) { 224 var conn quic.Connection 225 doq.connMu.RLock() 226 conn = doq.conn 227 if conn != nil && useCached { 228 doq.connMu.RUnlock() 229 230 return conn, nil 231 } 232 if conn != nil { 233 // we're recreating the connection, let's create a new one. 234 _ = conn.CloseWithError(QUICCodeNoError, "") 235 } 236 doq.connMu.RUnlock() 237 238 doq.connMu.Lock() 239 defer doq.connMu.Unlock() 240 241 var err error 242 conn, err = doq.openConnection(ctx) 243 if err != nil { 244 return nil, err 245 } 246 doq.conn = conn 247 248 return conn, nil 249 } 250 251 // hasConnection returns true if there's an active QUIC connection. 252 func (doq *dnsOverQUIC) hasConnection() (ok bool) { 253 doq.connMu.Lock() 254 defer doq.connMu.Unlock() 255 256 return doq.conn != nil 257 } 258 259 // getQUICConfig returns the QUIC config in a thread-safe manner. Note, that 260 // this method returns a pointer, it is forbidden to change its properties. 261 func (doq *dnsOverQUIC) getQUICConfig() (c *quic.Config) { 262 doq.quicConfigGuard.Lock() 263 defer doq.quicConfigGuard.Unlock() 264 265 return doq.quicConfig 266 } 267 268 // resetQUICConfig re-creates the tokens store as we may need to use a new one 269 // if we failed to connect. 270 func (doq *dnsOverQUIC) resetQUICConfig() { 271 doq.quicConfigGuard.Lock() 272 defer doq.quicConfigGuard.Unlock() 273 274 doq.quicConfig = doq.quicConfig.Clone() 275 doq.quicConfig.TokenStore = newQUICTokenStore() 276 } 277 278 // openStream opens a new QUIC stream for the specified connection. 279 func (doq *dnsOverQUIC) openStream(ctx context.Context, conn quic.Connection) (quic.Stream, error) { 280 ctx, cancel := context.WithCancel(ctx) 281 defer cancel() 282 283 stream, err := conn.OpenStreamSync(ctx) 284 if err == nil { 285 return stream, nil 286 } 287 288 // We can get here if the old QUIC connection is not valid anymore. We 289 // should try to re-create the connection again in this case. 290 newConn, err := doq.getConnection(ctx, false) 291 if err != nil { 292 return nil, err 293 } 294 // Open a new stream. 295 return newConn.OpenStreamSync(ctx) 296 } 297 298 // openConnection opens a new QUIC connection. 299 func (doq *dnsOverQUIC) openConnection(ctx context.Context) (conn quic.Connection, err error) { 300 // we're using bootstrapped address instead of what's passed to the function 301 // it does not create an actual connection, but it helps us determine 302 // what IP is actually reachable (when there're v4/v6 addresses). 303 rawConn, err := getDialHandler(doq.r, doq.proxyAdapter, doq.proxyName)(ctx, "udp", doq.addr) 304 if err != nil { 305 return nil, fmt.Errorf("failed to open a QUIC connection: %w", err) 306 } 307 addr := rawConn.RemoteAddr().String() 308 // It's never actually used 309 _ = rawConn.Close() 310 311 ip, port, err := net.SplitHostPort(addr) 312 if err != nil { 313 return nil, err 314 } 315 316 p, err := strconv.Atoi(port) 317 udpAddr := net.UDPAddr{IP: net.ParseIP(ip), Port: p} 318 udp, err := listenPacket(ctx, doq.proxyAdapter, doq.proxyName, "udp", addr, doq.r) 319 if err != nil { 320 return nil, err 321 } 322 323 host, _, err := net.SplitHostPort(doq.addr) 324 if err != nil { 325 return nil, err 326 } 327 328 tlsConfig := ca.GetGlobalTLSConfig( 329 &tls.Config{ 330 ServerName: host, 331 InsecureSkipVerify: false, 332 NextProtos: []string{ 333 NextProtoDQ, 334 }, 335 SessionTicketsDisabled: false, 336 }) 337 338 transport := quic.Transport{Conn: udp} 339 transport.SetCreatedConn(true) // auto close conn 340 transport.SetSingleUse(true) // auto close transport 341 conn, err = transport.Dial(ctx, &udpAddr, tlsConfig, doq.getQUICConfig()) 342 if err != nil { 343 return nil, fmt.Errorf("opening quic connection to %s: %w", doq.addr, err) 344 } 345 346 return conn, nil 347 } 348 349 // closeConnWithError closes the active connection with error to make sure that 350 // new queries were processed in another connection. We can do that in the case 351 // of a fatal error. 352 func (doq *dnsOverQUIC) closeConnWithError(err error) { 353 doq.connMu.Lock() 354 defer doq.connMu.Unlock() 355 356 if doq.conn == nil { 357 // Do nothing, there's no active conn anyways. 358 return 359 } 360 361 code := QUICCodeNoError 362 if err != nil { 363 code = QUICCodeInternalError 364 } 365 366 if errors.Is(err, quic.Err0RTTRejected) { 367 // Reset the TokenStore only if 0-RTT was rejected. 368 doq.resetQUICConfig() 369 } 370 371 err = doq.conn.CloseWithError(code, "") 372 if err != nil { 373 log.Errorln("failed to close the conn: %v", err) 374 } 375 doq.conn = nil 376 } 377 378 // readMsg reads the incoming DNS message from the QUIC stream. 379 func (doq *dnsOverQUIC) readMsg(stream quic.Stream) (m *D.Msg, err error) { 380 pool := doq.getBytesPool() 381 bufPtr := pool.Get().(*[]byte) 382 383 defer pool.Put(bufPtr) 384 385 respBuf := *bufPtr 386 n, err := stream.Read(respBuf) 387 if err != nil && n == 0 { 388 return nil, fmt.Errorf("reading response from %s: %w", doq.Address(), err) 389 } 390 391 // All DNS messages (queries and responses) sent over DoQ connections MUST 392 // be encoded as a 2-octet length field followed by the message content as 393 // specified in [RFC1035]. 394 // IMPORTANT: Note, that we ignore this prefix here as this implementation 395 // does not support receiving multiple messages over a single connection. 396 m = new(D.Msg) 397 err = m.Unpack(respBuf[2:]) 398 if err != nil { 399 return nil, fmt.Errorf("unpacking response from %s: %w", doq.Address(), err) 400 } 401 402 return m, nil 403 } 404 405 // newQUICTokenStore creates a new quic.TokenStore that is necessary to have 406 // in order to benefit from 0-RTT. 407 func newQUICTokenStore() (s quic.TokenStore) { 408 // You can read more on address validation here: 409 // https://datatracker.ietf.org/doc/html/rfc9000#section-8.1 410 // Setting maxOrigins to 1 and tokensPerOrigin to 10 assuming that this is 411 // more than enough for the way we use it (one connection per upstream). 412 return quic.NewLRUTokenStore(1, 10) 413 } 414 415 // isQUICRetryError checks the error and determines whether it may signal that 416 // we should re-create the QUIC connection. This requirement is caused by 417 // quic-go issues, see the comments inside this function. 418 // TODO(ameshkov): re-test when updating quic-go. 419 func isQUICRetryError(err error) (ok bool) { 420 var qAppErr *quic.ApplicationError 421 if errors.As(err, &qAppErr) && qAppErr.ErrorCode == 0 { 422 // This error is often returned when the server has been restarted, 423 // and we try to use the same connection on the client-side. It seems, 424 // that the old connections aren't closed immediately on the server-side 425 // and that's why one can run into this. 426 // In addition to that, quic-go HTTP3 client implementation does not 427 // clean up dead connections (this one is specific to DoH3 upstream): 428 // https://github.com/metacubex/quic-go/issues/765 429 return true 430 } 431 432 var qIdleErr *quic.IdleTimeoutError 433 if errors.As(err, &qIdleErr) { 434 // This error means that the connection was closed due to being idle. 435 // In this case we should forcibly re-create the QUIC connection. 436 // Reproducing is rather simple, stop the server and wait for 30 seconds 437 // then try to send another request via the same upstream. 438 return true 439 } 440 441 var resetErr *quic.StatelessResetError 442 if errors.As(err, &resetErr) { 443 // A stateless reset is sent when a server receives a QUIC packet that 444 // it doesn't know how to decrypt. For instance, it may happen when 445 // the server was recently rebooted. We should reconnect and try again 446 // in this case. 447 return true 448 } 449 450 var qTransportError *quic.TransportError 451 if errors.As(err, &qTransportError) && qTransportError.ErrorCode == quic.NoError { 452 // A transport error with the NO_ERROR error code could be sent by the 453 // server when it considers that it's time to close the connection. 454 // For example, Google DNS eventually closes an active connection with 455 // the NO_ERROR code and "Connection max age expired" message: 456 // https://github.com/AdguardTeam/dnsproxy/issues/283 457 return true 458 } 459 460 if errors.Is(err, quic.Err0RTTRejected) { 461 // This error happens when we try to establish a 0-RTT connection with 462 // a token the server is no more aware of. This can be reproduced by 463 // restarting the QUIC server (it will clear its tokens cache). The 464 // next connection attempt will return this error until the client's 465 // tokens cache is purged. 466 return true 467 } 468 469 return false 470 }