dubbo.apache.org/dubbo-go/v3@v3.1.1/xds/server/listener_wrapper.go (about)

     1  /*
     2   * Licensed to the Apache Software Foundation (ASF) under one or more
     3   * contributor license agreements.  See the NOTICE file distributed with
     4   * this work for additional information regarding copyright ownership.
     5   * The ASF licenses this file to You under the Apache License, Version 2.0
     6   * (the "License"); you may not use this file except in compliance with
     7   * the License.  You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  /*
    19   *
    20   * Copyright 2021 gRPC authors.
    21   *
    22   */
    23  
    24  // Package server contains internal server-side functionality used by the public
    25  // facing xds package.
    26  package server
    27  
    28  import (
    29  	"errors"
    30  	"fmt"
    31  	"net"
    32  	"sync"
    33  	"sync/atomic"
    34  	"time"
    35  	"unsafe"
    36  )
    37  
    38  import (
    39  	dubbogoLogger "github.com/dubbogo/gost/log/logger"
    40  
    41  	"google.golang.org/grpc/backoff"
    42  
    43  	"google.golang.org/grpc/connectivity"
    44  
    45  	"google.golang.org/grpc/grpclog"
    46  )
    47  
    48  import (
    49  	"dubbo.apache.org/dubbo-go/v3/xds/client/bootstrap"
    50  	"dubbo.apache.org/dubbo-go/v3/xds/client/resource"
    51  	internalbackoff "dubbo.apache.org/dubbo-go/v3/xds/utils/backoff"
    52  	"dubbo.apache.org/dubbo-go/v3/xds/utils/envconfig"
    53  	"dubbo.apache.org/dubbo-go/v3/xds/utils/grpcsync"
    54  )
    55  
    56  var (
    57  	logger = grpclog.Component("xds")
    58  
    59  	// Backoff strategy for temporary errors received from Accept(). If this
    60  	// needs to be configurable, we can inject it through ListenerWrapperParams.
    61  	bs = internalbackoff.Exponential{Config: backoff.Config{
    62  		BaseDelay:  5 * time.Millisecond,
    63  		Multiplier: 2.0,
    64  		MaxDelay:   1 * time.Second,
    65  	}}
    66  	backoffFunc = bs.Backoff
    67  )
    68  
    69  // ServingModeCallback is the callback that users can register to get notified
    70  // about the server's serving mode changes. The callback is invoked with the
    71  // address of the listener and its new mode. The err parameter is set to a
    72  // non-nil error if the server has transitioned into not-serving mode.
    73  type ServingModeCallback func(addr net.Addr, mode connectivity.ServingMode, err error)
    74  
    75  // DrainCallback is the callback that an xDS-enabled server registers to get
    76  // notified about updates to the Listener configuration. The server is expected
    77  // to gracefully shutdown existing connections, thereby forcing clients to
    78  // reconnect and have the new configuration applied to the newly created
    79  // connections.
    80  type DrainCallback func(addr net.Addr)
    81  
    82  // XDSClient wraps the methods on the XDSClient which are required by
    83  // the listenerWrapper.
    84  type XDSClient interface {
    85  	WatchListener(string, func(resource.ListenerUpdate, error)) func()
    86  	WatchRouteConfig(string, func(resource.RouteConfigUpdate, error)) func()
    87  	BootstrapConfig() *bootstrap.Config
    88  }
    89  
    90  // ListenerWrapperParams wraps parameters required to create a listenerWrapper.
    91  type ListenerWrapperParams struct {
    92  	// Listener is the net.Listener passed by the user that is to be wrapped.
    93  	Listener net.Listener
    94  	// ListenerResourceName is the xDS Listener resource to request.
    95  	ListenerResourceName string
    96  	// XDSCredsInUse specifies whether or not the user expressed interest to
    97  	// receive security configuration from the control plane.
    98  	XDSCredsInUse bool
    99  	// XDSClient provides the functionality from the XDSClient required here.
   100  	XDSClient XDSClient
   101  	// ModeCallback is the callback to invoke when the serving mode changes.
   102  	ModeCallback ServingModeCallback
   103  	// DrainCallback is the callback to invoke when the Listener gets a LDS
   104  	// update.
   105  	DrainCallback DrainCallback
   106  }
   107  
   108  // NewListenerWrapper creates a new listenerWrapper with params. It returns a
   109  // net.Listener and a channel which is written to, indicating that the former is
   110  // ready to be passed to grpc.Serve().
   111  //
   112  // Only TCP listeners are supported.
   113  func NewListenerWrapper(params ListenerWrapperParams) (net.Listener, <-chan struct{}) {
   114  	lw := &listenerWrapper{
   115  		Listener:          params.Listener,
   116  		name:              params.ListenerResourceName,
   117  		xdsCredsInUse:     params.XDSCredsInUse,
   118  		xdsC:              params.XDSClient,
   119  		modeCallback:      params.ModeCallback,
   120  		drainCallback:     params.DrainCallback,
   121  		isUnspecifiedAddr: params.Listener.Addr().(*net.TCPAddr).IP.IsUnspecified(),
   122  
   123  		closed:      grpcsync.NewEvent(),
   124  		goodUpdate:  grpcsync.NewEvent(),
   125  		ldsUpdateCh: make(chan ldsUpdateWithError, 1),
   126  		rdsUpdateCh: make(chan rdsHandlerUpdate, 1),
   127  	}
   128  	lw.logger = dubbogoLogger.GetLogger()
   129  
   130  	// Serve() verifies that Addr() returns a valid TCPAddr. So, it is safe to
   131  	// ignore the error from SplitHostPort().
   132  	lisAddr := lw.Listener.Addr().String()
   133  	lw.addr, lw.port, _ = net.SplitHostPort(lisAddr)
   134  
   135  	lw.rdsHandler = newRDSHandler(lw.xdsC, lw.rdsUpdateCh)
   136  
   137  	cancelWatch := lw.xdsC.WatchListener(lw.name, lw.handleListenerUpdate)
   138  	lw.logger.Infof("Watch started on resource name %v", lw.name)
   139  	lw.cancelWatch = func() {
   140  		cancelWatch()
   141  		lw.logger.Infof("Watch canceled on resource name %v", lw.name)
   142  	}
   143  	go lw.run()
   144  	return lw, lw.goodUpdate.Done()
   145  }
   146  
   147  type ldsUpdateWithError struct {
   148  	update resource.ListenerUpdate
   149  	err    error
   150  }
   151  
   152  // listenerWrapper wraps the net.Listener associated with the listening address
   153  // passed to Serve(). It also contains all other state associated with this
   154  // particular invocation of Serve().
   155  type listenerWrapper struct {
   156  	net.Listener
   157  	logger dubbogoLogger.Logger
   158  
   159  	name          string
   160  	xdsCredsInUse bool
   161  	xdsC          XDSClient
   162  	cancelWatch   func()
   163  	modeCallback  ServingModeCallback
   164  	drainCallback DrainCallback
   165  
   166  	// Set to true if the listener is bound to the IP_ANY address (which is
   167  	// "0.0.0.0" for IPv4 and "::" for IPv6).
   168  	isUnspecifiedAddr bool
   169  	// Listening address and port. Used to validate the socket address in the
   170  	// Listener resource received from the control plane.
   171  	addr, port string
   172  
   173  	// This is used to notify that a good update has been received and that
   174  	// Serve() can be invoked on the underlying gRPC server. Using an event
   175  	// instead of a vanilla channel simplifies the update handler as it need not
   176  	// keep track of whether the received update is the first one or not.
   177  	goodUpdate *grpcsync.Event
   178  	// A small race exists in the XDSClient code between the receipt of an xDS
   179  	// response and the user canceling the associated watch. In this window,
   180  	// the registered callback may be invoked after the watch is canceled, and
   181  	// the user is expected to work around this. This event signifies that the
   182  	// listener is closed (and hence the watch is canceled), and we drop any
   183  	// updates received in the callback if this event has fired.
   184  	closed *grpcsync.Event
   185  
   186  	// mu guards access to the current serving mode and the filter chains. The
   187  	// reason for using an rw lock here is that these fields are read in
   188  	// Accept() for all incoming connections, but writes happen rarely (when we
   189  	// get a Listener resource update).
   190  	mu sync.RWMutex
   191  	// Current serving mode.
   192  	mode connectivity.ServingMode
   193  	// Filter chains received as part of the last good update.
   194  	filterChains *resource.FilterChainManager
   195  
   196  	// rdsHandler is used for any dynamic RDS resources specified in a LDS
   197  	// update.
   198  	rdsHandler *rdsHandler
   199  	// rdsUpdates are the RDS resources received from the management
   200  	// server, keyed on the RouteName of the RDS resource.
   201  	rdsUpdates unsafe.Pointer // map[string]xdsclient.RouteConfigUpdate
   202  	// ldsUpdateCh is a channel for XDSClient LDS updates.
   203  	ldsUpdateCh chan ldsUpdateWithError
   204  	// rdsUpdateCh is a channel for XDSClient RDS updates.
   205  	rdsUpdateCh chan rdsHandlerUpdate
   206  }
   207  
   208  // Accept blocks on an Accept() on the underlying listener, and wraps the
   209  // returned net.connWrapper with the configured certificate providers.
   210  func (l *listenerWrapper) Accept() (net.Conn, error) {
   211  	var retries int
   212  	for {
   213  		conn, err := l.Listener.Accept()
   214  		if err != nil {
   215  			// Temporary() method is implemented by certain error types returned
   216  			// from the net package, and it is useful for us to not shutdown the
   217  			// server in these conditions. The listen queue being full is one
   218  			// such case.
   219  			if ne, ok := err.(interface{ Temporary() bool }); !ok || !ne.Temporary() {
   220  				return nil, err
   221  			}
   222  			retries++
   223  			timer := time.NewTimer(backoffFunc(retries))
   224  			select {
   225  			case <-timer.C:
   226  			case <-l.closed.Done():
   227  				timer.Stop()
   228  				// Continuing here will cause us to call Accept() again
   229  				// which will return a non-temporary error.
   230  				continue
   231  			}
   232  			continue
   233  		}
   234  		// Reset retries after a successful Accept().
   235  		retries = 0
   236  
   237  		// Since the net.Conn represents an incoming connection, the source and
   238  		// destination address can be retrieved from the local address and
   239  		// remote address of the net.Conn respectively.
   240  		destAddr, ok1 := conn.LocalAddr().(*net.TCPAddr)
   241  		srcAddr, ok2 := conn.RemoteAddr().(*net.TCPAddr)
   242  		if !ok1 || !ok2 {
   243  			// If the incoming connection is not a TCP connection, which is
   244  			// really unexpected since we check whether the provided listener is
   245  			// a TCP listener in Serve(), we return an error which would cause
   246  			// us to stop serving.
   247  			return nil, fmt.Errorf("received connection with non-TCP address (local: %T, remote %T)", conn.LocalAddr(), conn.RemoteAddr())
   248  		}
   249  
   250  		l.mu.RLock()
   251  		if l.mode == connectivity.ServingModeNotServing {
   252  			// Close connections as soon as we accept them when we are in
   253  			// "not-serving" mode. Since we accept a net.Listener from the user
   254  			// in Serve(), we cannot close the listener when we move to
   255  			// "not-serving". Closing the connection immediately upon accepting
   256  			// is one of the other ways to implement the "not-serving" mode as
   257  			// outlined in gRFC A36.
   258  			l.mu.RUnlock()
   259  			conn.Close()
   260  			continue
   261  		}
   262  		fc, err := l.filterChains.Lookup(resource.FilterChainLookupParams{
   263  			IsUnspecifiedListener: l.isUnspecifiedAddr,
   264  			DestAddr:              destAddr.IP,
   265  			SourceAddr:            srcAddr.IP,
   266  			SourcePort:            srcAddr.Port,
   267  		})
   268  		l.mu.RUnlock()
   269  		if err != nil {
   270  			// When a matching filter chain is not found, we close the
   271  			// connection right away, but do not return an error back to
   272  			// `grpc.Serve()` from where this Accept() was invoked. Returning an
   273  			// error to `grpc.Serve()` causes the server to shutdown. If we want
   274  			// to avoid the server from shutting down, we would need to return
   275  			// an error type which implements the `Temporary() bool` method,
   276  			// which is invoked by `grpc.Serve()` to see if the returned error
   277  			// represents a temporary condition. In the case of a temporary
   278  			// error, `grpc.Serve()` method sleeps for a small duration and
   279  			// therefore ends up blocking all connection attempts during that
   280  			// time frame, which is also not ideal for an error like this.
   281  			l.logger.Warnf("connection from %s to %s failed to find any matching filter chain", conn.RemoteAddr().String(), conn.LocalAddr().String())
   282  			conn.Close()
   283  			continue
   284  		}
   285  		if !envconfig.XDSRBAC {
   286  			return &connWrapper{Conn: conn, filterChain: fc, parent: l}, nil
   287  		}
   288  		var rc resource.RouteConfigUpdate
   289  		if fc.InlineRouteConfig != nil {
   290  			rc = *fc.InlineRouteConfig
   291  		} else {
   292  			rcPtr := atomic.LoadPointer(&l.rdsUpdates)
   293  			rcuPtr := (*map[string]resource.RouteConfigUpdate)(rcPtr)
   294  			// This shouldn't happen, but this error protects against a panic.
   295  			if rcuPtr == nil {
   296  				return nil, errors.New("route configuration pointer is nil")
   297  			}
   298  			rcu := *rcuPtr
   299  			rc = rcu[fc.RouteConfigName]
   300  		}
   301  		// The filter chain will construct a usuable route table on each
   302  		// connection accept. This is done because preinstantiating every route
   303  		// table before it is needed for a connection would potentially lead to
   304  		// a lot of cpu time and memory allocated for route tables that will
   305  		// never be used. There was also a thought to cache this configuration,
   306  		// and reuse it for the next accepted connection. However, this would
   307  		// lead to a lot of code complexity (RDS Updates for a given route name
   308  		// can come it at any time), and connections aren't accepted too often,
   309  		// so this reinstantation of the Route Configuration is an acceptable
   310  		// tradeoff for simplicity.
   311  		vhswi, err := fc.ConstructUsableRouteConfiguration(rc)
   312  		if err != nil {
   313  			l.logger.Warnf("route configuration construction: %v", err)
   314  			conn.Close()
   315  			continue
   316  		}
   317  		return &connWrapper{Conn: conn, filterChain: fc, parent: l, virtualHosts: vhswi}, nil
   318  	}
   319  }
   320  
   321  // Close closes the underlying listener. It also cancels the xDS watch
   322  // registered in Serve() and closes any certificate provider instances created
   323  // based on security configuration received in the LDS response.
   324  func (l *listenerWrapper) Close() error {
   325  	l.closed.Fire()
   326  	l.Listener.Close()
   327  	if l.cancelWatch != nil {
   328  		l.cancelWatch()
   329  	}
   330  	l.rdsHandler.close()
   331  	return nil
   332  }
   333  
   334  // run is a long running goroutine which handles all xds updates. LDS and RDS
   335  // push updates onto a channel which is read and acted upon from this goroutine.
   336  func (l *listenerWrapper) run() {
   337  	for {
   338  		select {
   339  		case <-l.closed.Done():
   340  			return
   341  		case u := <-l.ldsUpdateCh:
   342  			l.handleLDSUpdate(u)
   343  		case u := <-l.rdsUpdateCh:
   344  			l.handleRDSUpdate(u)
   345  		}
   346  	}
   347  }
   348  
   349  // handleLDSUpdate is the callback which handles LDS Updates. It writes the
   350  // received update to the update channel, which is picked up by the run
   351  // goroutine.
   352  func (l *listenerWrapper) handleListenerUpdate(update resource.ListenerUpdate, err error) {
   353  	if l.closed.HasFired() {
   354  		l.logger.Warnf("Resource %q received update: %v with error: %v, after listener was closed", l.name, update, err)
   355  		return
   356  	}
   357  	// Remove any existing entry in ldsUpdateCh and replace with the new one, as the only update
   358  	// listener cares about is most recent update.
   359  	select {
   360  	case <-l.ldsUpdateCh:
   361  	default:
   362  	}
   363  	l.ldsUpdateCh <- ldsUpdateWithError{update: update, err: err}
   364  }
   365  
   366  // handleRDSUpdate handles a full rds update from rds handler. On a successful
   367  // update, the server will switch to ServingModeServing as the full
   368  // configuration (both LDS and RDS) has been received.
   369  func (l *listenerWrapper) handleRDSUpdate(update rdsHandlerUpdate) {
   370  	if l.closed.HasFired() {
   371  		l.logger.Warnf("RDS received update: %v with error: %v, after listener was closed", update.updates, update.err)
   372  		return
   373  	}
   374  	if update.err != nil {
   375  		l.logger.Warnf("Received error for rds names specified in resource %q: %+v", l.name, update.err)
   376  		if resource.ErrType(update.err) == resource.ErrorTypeResourceNotFound {
   377  			l.switchMode(nil, connectivity.ServingModeNotServing, update.err)
   378  		}
   379  		// For errors which are anything other than "resource-not-found", we
   380  		// continue to use the old configuration.
   381  		return
   382  	}
   383  	atomic.StorePointer(&l.rdsUpdates, unsafe.Pointer(&update.updates))
   384  
   385  	l.switchMode(l.filterChains, connectivity.ServingModeServing, nil)
   386  	l.goodUpdate.Fire()
   387  }
   388  
   389  func (l *listenerWrapper) handleLDSUpdate(update ldsUpdateWithError) {
   390  	if update.err != nil {
   391  		l.logger.Warnf("Received error for resource %q: %+v", l.name, update.err)
   392  		if resource.ErrType(update.err) == resource.ErrorTypeResourceNotFound {
   393  			l.switchMode(nil, connectivity.ServingModeNotServing, update.err)
   394  		}
   395  		// For errors which are anything other than "resource-not-found", we
   396  		// continue to use the old configuration.
   397  		return
   398  	}
   399  	l.logger.Infof("Received update for resource %q: %+v", l.name, update.update)
   400  
   401  	// Make sure that the socket address on the received Listener resource
   402  	// matches the address of the net.Listener passed to us by the user. This
   403  	// check is done here instead of at the XDSClient layer because of the
   404  	// following couple of reasons:
   405  	// - XDSClient cannot know the listening address of every listener in the
   406  	//   system, and hence cannot perform this check.
   407  	// - this is a very context-dependent check and only the server has the
   408  	//   appropriate context to perform this check.
   409  	//
   410  	// What this means is that the XDSClient has ACKed a resource which can push
   411  	// the server into a "not serving" mode. This is not ideal, but this is
   412  	// what we have decided to do. See gRPC A36 for more details.
   413  	ilc := update.update.InboundListenerCfg
   414  	if ilc.Address != l.addr || ilc.Port != l.port {
   415  		l.switchMode(nil, connectivity.ServingModeNotServing, fmt.Errorf("address (%s:%s) in Listener update does not match listening address: (%s:%s)", ilc.Address, ilc.Port, l.addr, l.port))
   416  		return
   417  	}
   418  
   419  	// "Updates to a Listener cause all older connections on that Listener to be
   420  	// gracefully shut down with a grace period of 10 minutes for long-lived
   421  	// RPC's, such that clients will reconnect and have the updated
   422  	// configuration apply." - A36 Note that this is not the same as moving the
   423  	// Server's state to ServingModeNotServing. That prevents new connections
   424  	// from being accepted, whereas here we simply want the clients to reconnect
   425  	// to get the updated configuration.
   426  	if envconfig.XDSRBAC {
   427  		if l.drainCallback != nil {
   428  			l.drainCallback(l.Listener.Addr())
   429  		}
   430  	}
   431  	l.rdsHandler.updateRouteNamesToWatch(ilc.FilterChains.RouteConfigNames)
   432  	// If there are no dynamic RDS Configurations still needed to be received
   433  	// from the management server, this listener has all the configuration
   434  	// needed, and is ready to serve.
   435  	if len(ilc.FilterChains.RouteConfigNames) == 0 {
   436  		l.switchMode(ilc.FilterChains, connectivity.ServingModeServing, nil)
   437  		l.goodUpdate.Fire()
   438  	}
   439  }
   440  
   441  func (l *listenerWrapper) switchMode(fcs *resource.FilterChainManager, newMode connectivity.ServingMode, err error) {
   442  	l.mu.Lock()
   443  	defer l.mu.Unlock()
   444  
   445  	l.filterChains = fcs
   446  	l.mode = newMode
   447  	if l.modeCallback != nil {
   448  		l.modeCallback(l.Listener.Addr(), newMode, err)
   449  	}
   450  	l.logger.Warnf("Listener %q entering mode: %q due to error: %v", l.Addr(), newMode, err)
   451  }