github.com/criteo-forks/consul@v1.4.5-criteonogrpc/connect/resolver.go (about)

     1  package connect
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math/rand"
     7  	"strings"
     8  
     9  	"github.com/hashicorp/consul/agent/connect"
    10  	"github.com/hashicorp/consul/api"
    11  )
    12  
    13  // Resolver is the interface implemented by a service discovery mechanism to get
    14  // the address and identity of an instance to connect to via Connect as a
    15  // client.
    16  type Resolver interface {
    17  	// Resolve returns a single service instance to connect to. Implementations
    18  	// may attempt to ensure the instance returned is currently available. It is
    19  	// expected that a client will re-dial on a connection failure so making an
    20  	// effort to return a different service instance each time where available
    21  	// increases reliability. The context passed can be used to impose timeouts
    22  	// which may or may not be respected by implementations that make network
    23  	// calls to resolve the service. The addr returned is a string in any valid
    24  	// form for passing directly to `net.Dial("tcp", addr)`. The certURI
    25  	// represents the identity of the service instance. It will be matched against
    26  	// the TLS certificate URI SAN presented by the server and the connection
    27  	// rejected if they don't match.
    28  	Resolve(ctx context.Context) (addr string, certURI connect.CertURI, err error)
    29  }
    30  
    31  // StaticResolver is a statically defined resolver. This can be used to Dial a
    32  // known Connect endpoint without performing service discovery.
    33  type StaticResolver struct {
    34  	// Addr is the network address (including port) of the instance. It must be
    35  	// the connect-enabled mTLS listener and may be a proxy in front of the actual
    36  	// target service process. It is a string in any valid form for passing
    37  	// directly to net.Dial("tcp", addr).
    38  	Addr string
    39  
    40  	// CertURL is the identity we expect the server to present in it's TLS
    41  	// certificate. It must be an exact URI string match or the connection will be
    42  	// rejected.
    43  	CertURI connect.CertURI
    44  }
    45  
    46  // Resolve implements Resolver by returning the static values.
    47  func (sr *StaticResolver) Resolve(ctx context.Context) (string, connect.CertURI, error) {
    48  	return sr.Addr, sr.CertURI, nil
    49  }
    50  
    51  const (
    52  	// ConsulResolverTypeService indicates resolving healthy service nodes.
    53  	ConsulResolverTypeService int = iota
    54  
    55  	// ConsulResolverTypePreparedQuery indicates resolving via prepared query.
    56  	ConsulResolverTypePreparedQuery
    57  )
    58  
    59  // ConsulResolver queries Consul for a service instance.
    60  type ConsulResolver struct {
    61  	// Client is the Consul API client to use. Must be non-nil or Resolve will
    62  	// panic.
    63  	Client *api.Client
    64  
    65  	// Namespace of the query target.
    66  	Namespace string
    67  
    68  	// Name of the query target.
    69  	Name string
    70  
    71  	// Type of the query target. Should be one of the defined ConsulResolverType*
    72  	// constants. Currently defaults to ConsulResolverTypeService.
    73  	Type int
    74  
    75  	// Datacenter to resolve in, empty indicates agent's local DC.
    76  	Datacenter string
    77  }
    78  
    79  // Resolve performs service discovery against the local Consul agent and returns
    80  // the address and expected identity of a suitable service instance.
    81  func (cr *ConsulResolver) Resolve(ctx context.Context) (string, connect.CertURI, error) {
    82  	switch cr.Type {
    83  	case ConsulResolverTypeService:
    84  		return cr.resolveService(ctx)
    85  	case ConsulResolverTypePreparedQuery:
    86  		return cr.resolveQuery(ctx)
    87  	default:
    88  		return "", nil, fmt.Errorf("unknown resolver type")
    89  	}
    90  }
    91  
    92  func (cr *ConsulResolver) resolveService(ctx context.Context) (string, connect.CertURI, error) {
    93  	health := cr.Client.Health()
    94  
    95  	svcs, _, err := health.Connect(cr.Name, "", true, cr.queryOptions(ctx))
    96  	if err != nil {
    97  		return "", nil, err
    98  	}
    99  
   100  	if len(svcs) < 1 {
   101  		return "", nil, fmt.Errorf("no healthy instances found")
   102  	}
   103  
   104  	// Services are not shuffled by HTTP API, pick one at (pseudo) random.
   105  	idx := 0
   106  	if len(svcs) > 1 {
   107  		idx = rand.Intn(len(svcs))
   108  	}
   109  
   110  	return cr.resolveServiceEntry(svcs[idx])
   111  }
   112  
   113  func (cr *ConsulResolver) resolveQuery(ctx context.Context) (string, connect.CertURI, error) {
   114  	resp, _, err := cr.Client.PreparedQuery().Execute(cr.Name, cr.queryOptions(ctx))
   115  	if err != nil {
   116  		return "", nil, err
   117  	}
   118  
   119  	svcs := resp.Nodes
   120  	if len(svcs) < 1 {
   121  		return "", nil, fmt.Errorf("no healthy instances found")
   122  	}
   123  
   124  	// Services are not shuffled by HTTP API, pick one at (pseudo) random.
   125  	idx := 0
   126  	if len(svcs) > 1 {
   127  		idx = rand.Intn(len(svcs))
   128  	}
   129  
   130  	return cr.resolveServiceEntry(&svcs[idx])
   131  }
   132  
   133  func (cr *ConsulResolver) resolveServiceEntry(entry *api.ServiceEntry) (string, connect.CertURI, error) {
   134  	addr := entry.Service.Address
   135  	if addr == "" {
   136  		addr = entry.Node.Address
   137  	}
   138  	port := entry.Service.Port
   139  
   140  	service := entry.Service.Proxy.DestinationServiceName
   141  	if entry.Service.Connect != nil && entry.Service.Connect.Native {
   142  		service = entry.Service.Service
   143  	}
   144  	if service == "" {
   145  		// Shouldn't happen but to protect against bugs in agent API returning bad
   146  		// service response...
   147  		return "", nil, fmt.Errorf("not a valid connect service")
   148  	}
   149  
   150  	// Generate the expected CertURI
   151  	certURI := &connect.SpiffeIDService{
   152  		// No host since we don't validate trust domain here (we rely on x509 to
   153  		// prove trust).
   154  		Namespace:  "default",
   155  		Datacenter: entry.Node.Datacenter,
   156  		Service:    service,
   157  	}
   158  
   159  	return fmt.Sprintf("%s:%d", addr, port), certURI, nil
   160  }
   161  
   162  func (cr *ConsulResolver) queryOptions(ctx context.Context) *api.QueryOptions {
   163  	q := &api.QueryOptions{
   164  		// We may make this configurable one day but we may also implement our own
   165  		// caching which is even more stale so...
   166  		AllowStale: true,
   167  		Datacenter: cr.Datacenter,
   168  
   169  		// For prepared queries
   170  		Connect: true,
   171  	}
   172  	return q.WithContext(ctx)
   173  }
   174  
   175  // ConsulResolverFromAddrFunc returns a function for constructing ConsulResolver
   176  // from a consul DNS formatted hostname (e.g. foo.service.consul or
   177  // foo.query.consul).
   178  //
   179  // Note, the returned ConsulResolver resolves the query via regular agent HTTP
   180  // discovery API. DNS is not needed or used for discovery, only the hostname
   181  // format re-used for consistency.
   182  func ConsulResolverFromAddrFunc(client *api.Client) func(addr string) (Resolver, error) {
   183  	// Capture client dependency
   184  	return func(addr string) (Resolver, error) {
   185  		// Http clients might provide hostname and port
   186  		host := strings.ToLower(stripPort(addr))
   187  
   188  		// For now we force use of `.consul` TLD regardless of the configured domain
   189  		// on the cluster. That's because we don't know that domain here and it
   190  		// would be really complicated to discover it inline here. We do however
   191  		// need to be able to distinguish a hostname with the optional datacenter
   192  		// segment which we can't do unambiguously if we allow arbitrary trailing
   193  		// domains.
   194  		domain := ".consul"
   195  		if !strings.HasSuffix(host, domain) {
   196  			return nil, fmt.Errorf("invalid Consul DNS domain: note Connect SDK " +
   197  				"currently requires use of .consul domain even if cluster is " +
   198  				"configured with a different domain.")
   199  		}
   200  
   201  		// Remove the domain suffix
   202  		host = host[0 : len(host)-len(domain)]
   203  
   204  		parts := strings.Split(host, ".")
   205  		numParts := len(parts)
   206  
   207  		r := &ConsulResolver{
   208  			Client:    client,
   209  			Namespace: "default",
   210  		}
   211  
   212  		// Note that 3 segments may be a valid DNS name like
   213  		// <tag>.<service>.service.consul but not one we support, it might also be
   214  		// <service>.service.<datacenter>.consul which we do want to support so we
   215  		// have to figure out if the last segment is supported keyword and if not
   216  		// check if the supported keyword is further up...
   217  
   218  		// To simplify logic for now, we must match one of the following (not domain
   219  		// is stripped):
   220  		//  <name>.[service|query]
   221  		//  <name>.[service|query].<dc>
   222  		if numParts < 2 || numParts > 3 || !supportedTypeLabel(parts[1]) {
   223  			return nil, fmt.Errorf("unsupported Consul DNS domain: must be either " +
   224  				"<name>.service[.<datacenter>].consul or " +
   225  				"<name>.query[.<datacenter>].consul")
   226  		}
   227  
   228  		if numParts == 3 {
   229  			// Must be datacenter case
   230  			r.Datacenter = parts[2]
   231  		}
   232  
   233  		// By know we must have a supported query type which means at least 2
   234  		// elements with first 2 being name, and type respectively.
   235  		r.Name = parts[0]
   236  		switch parts[1] {
   237  		case "service":
   238  			r.Type = ConsulResolverTypeService
   239  		case "query":
   240  			r.Type = ConsulResolverTypePreparedQuery
   241  		default:
   242  			// This should never happen (tm) unless the supportedTypeLabel
   243  			// implementation is changed and this switch isn't.
   244  			return nil, fmt.Errorf("invalid discovery type")
   245  		}
   246  
   247  		return r, nil
   248  	}
   249  }
   250  
   251  func supportedTypeLabel(label string) bool {
   252  	return label == "service" || label == "query"
   253  }
   254  
   255  // stripPort copied from net/url/url.go
   256  func stripPort(hostport string) string {
   257  	colon := strings.IndexByte(hostport, ':')
   258  	if colon == -1 {
   259  		return hostport
   260  	}
   261  	if i := strings.IndexByte(hostport, ']'); i != -1 {
   262  		return strings.TrimPrefix(hostport[:i], "[")
   263  	}
   264  	return hostport[:colon]
   265  }