github.phpd.cn/hashicorp/consul@v1.4.5/agent/consul/prepared_query_endpoint.go (about)

     1  package consul
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"log"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/armon/go-metrics"
    11  	"github.com/hashicorp/consul/acl"
    12  	"github.com/hashicorp/consul/agent/consul/state"
    13  	"github.com/hashicorp/consul/agent/structs"
    14  	"github.com/hashicorp/go-memdb"
    15  	"github.com/hashicorp/go-uuid"
    16  )
    17  
    18  var (
    19  	// ErrQueryNotFound is returned if the query lookup failed.
    20  	ErrQueryNotFound = errors.New("Query not found")
    21  )
    22  
    23  // PreparedQuery manages the prepared query endpoint.
    24  type PreparedQuery struct {
    25  	srv *Server
    26  }
    27  
    28  // Apply is used to apply a modifying request to the data store. This should
    29  // only be used for operations that modify the data. The ID of the session is
    30  // returned in the reply.
    31  func (p *PreparedQuery) Apply(args *structs.PreparedQueryRequest, reply *string) (err error) {
    32  	if done, err := p.srv.forward("PreparedQuery.Apply", args, args, reply); done {
    33  		return err
    34  	}
    35  	defer metrics.MeasureSince([]string{"prepared-query", "apply"}, time.Now())
    36  
    37  	// Validate the ID. We must create new IDs before applying to the Raft
    38  	// log since it's not deterministic.
    39  	if args.Op == structs.PreparedQueryCreate {
    40  		if args.Query.ID != "" {
    41  			return fmt.Errorf("ID must be empty when creating a new prepared query")
    42  		}
    43  
    44  		// We are relying on the fact that UUIDs are random and unlikely
    45  		// to collide since this isn't inside a write transaction.
    46  		state := p.srv.fsm.State()
    47  		for {
    48  			if args.Query.ID, err = uuid.GenerateUUID(); err != nil {
    49  				return fmt.Errorf("UUID generation for prepared query failed: %v", err)
    50  			}
    51  			_, query, err := state.PreparedQueryGet(nil, args.Query.ID)
    52  			if err != nil {
    53  				return fmt.Errorf("Prepared query lookup failed: %v", err)
    54  			}
    55  			if query == nil {
    56  				break
    57  			}
    58  		}
    59  	}
    60  	*reply = args.Query.ID
    61  
    62  	// Get the ACL token for the request for the checks below.
    63  	rule, err := p.srv.ResolveToken(args.Token)
    64  	if err != nil {
    65  		return err
    66  	}
    67  
    68  	// If prefix ACLs apply to the incoming query, then do an ACL check. We
    69  	// need to make sure they have write access for whatever they are
    70  	// proposing.
    71  	if prefix, ok := args.Query.GetACLPrefix(); ok {
    72  		if rule != nil && !rule.PreparedQueryWrite(prefix) {
    73  			p.srv.logger.Printf("[WARN] consul.prepared_query: Operation on prepared query '%s' denied due to ACLs", args.Query.ID)
    74  			return acl.ErrPermissionDenied
    75  		}
    76  	}
    77  
    78  	// This is the second part of the check above. If they are referencing
    79  	// an existing query then make sure it exists and that they have write
    80  	// access to whatever they are changing, if prefix ACLs apply to it.
    81  	if args.Op != structs.PreparedQueryCreate {
    82  		state := p.srv.fsm.State()
    83  		_, query, err := state.PreparedQueryGet(nil, args.Query.ID)
    84  		if err != nil {
    85  			return fmt.Errorf("Prepared Query lookup failed: %v", err)
    86  		}
    87  		if query == nil {
    88  			return fmt.Errorf("Cannot modify non-existent prepared query: '%s'", args.Query.ID)
    89  		}
    90  
    91  		if prefix, ok := query.GetACLPrefix(); ok {
    92  			if rule != nil && !rule.PreparedQueryWrite(prefix) {
    93  				p.srv.logger.Printf("[WARN] consul.prepared_query: Operation on prepared query '%s' denied due to ACLs", args.Query.ID)
    94  				return acl.ErrPermissionDenied
    95  			}
    96  		}
    97  	}
    98  
    99  	// Parse the query and prep it for the state store.
   100  	switch args.Op {
   101  	case structs.PreparedQueryCreate, structs.PreparedQueryUpdate:
   102  		if err := parseQuery(args.Query, p.srv.config.ACLEnforceVersion8); err != nil {
   103  			return fmt.Errorf("Invalid prepared query: %v", err)
   104  		}
   105  
   106  	case structs.PreparedQueryDelete:
   107  		// Nothing else to verify here, just do the delete (we only look
   108  		// at the ID field for this op).
   109  
   110  	default:
   111  		return fmt.Errorf("Unknown prepared query operation: %s", args.Op)
   112  	}
   113  
   114  	// Commit the query to the state store.
   115  	resp, err := p.srv.raftApply(structs.PreparedQueryRequestType, args)
   116  	if err != nil {
   117  		p.srv.logger.Printf("[ERR] consul.prepared_query: Apply failed %v", err)
   118  		return err
   119  	}
   120  	if respErr, ok := resp.(error); ok {
   121  		return respErr
   122  	}
   123  
   124  	return nil
   125  }
   126  
   127  // parseQuery makes sure the entries of a query are valid for a create or
   128  // update operation. Some of the fields are not checked or are partially
   129  // checked, as noted in the comments below. This also updates all the parsed
   130  // fields of the query.
   131  func parseQuery(query *structs.PreparedQuery, enforceVersion8 bool) error {
   132  	// We skip a few fields:
   133  	// - ID is checked outside this fn.
   134  	// - Name is optional with no restrictions, except for uniqueness which
   135  	//   is checked for integrity during the transaction. We also make sure
   136  	//   names do not overlap with IDs, which is also checked during the
   137  	//   transaction. Otherwise, people could "steal" queries that they don't
   138  	//   have proper ACL rights to change.
   139  	// - Template is checked during the transaction since that's where we
   140  	//   compile it.
   141  
   142  	// Anonymous queries require a session or need to be part of a template.
   143  	if enforceVersion8 {
   144  		if query.Name == "" && query.Template.Type == "" && query.Session == "" {
   145  			return fmt.Errorf("Must be bound to a session")
   146  		}
   147  	}
   148  
   149  	// Token is checked when the query is executed, but we do make sure the
   150  	// user hasn't accidentally pasted-in the special redacted token name,
   151  	// which if we allowed in would be super hard to debug and understand.
   152  	if query.Token == redactedToken {
   153  		return fmt.Errorf("Bad Token '%s', it looks like a query definition with a redacted token was submitted", query.Token)
   154  	}
   155  
   156  	// Parse the service query sub-structure.
   157  	if err := parseService(&query.Service); err != nil {
   158  		return err
   159  	}
   160  
   161  	// Parse the DNS options sub-structure.
   162  	if err := parseDNS(&query.DNS); err != nil {
   163  		return err
   164  	}
   165  
   166  	return nil
   167  }
   168  
   169  // parseService makes sure the entries of a query are valid for a create or
   170  // update operation. Some of the fields are not checked or are partially
   171  // checked, as noted in the comments below. This also updates all the parsed
   172  // fields of the query.
   173  func parseService(svc *structs.ServiceQuery) error {
   174  	// Service is required.
   175  	if svc.Service == "" {
   176  		return fmt.Errorf("Must provide a Service name to query")
   177  	}
   178  
   179  	// NearestN can be 0 which means "don't fail over by RTT".
   180  	if svc.Failover.NearestN < 0 {
   181  		return fmt.Errorf("Bad NearestN '%d', must be >= 0", svc.Failover.NearestN)
   182  	}
   183  
   184  	// Make sure the metadata filters are valid
   185  	if err := structs.ValidateMetadata(svc.NodeMeta, true); err != nil {
   186  		return err
   187  	}
   188  
   189  	// We skip a few fields:
   190  	// - There's no validation for Datacenters; we skip any unknown entries
   191  	//   at execution time.
   192  	// - OnlyPassing is just a boolean so doesn't need further validation.
   193  	// - Tags is a free-form list of tags and doesn't need further validation.
   194  
   195  	return nil
   196  }
   197  
   198  // parseDNS makes sure the entries of a query are valid for a create or
   199  // update operation. This also updates all the parsed fields of the query.
   200  func parseDNS(dns *structs.QueryDNSOptions) error {
   201  	if dns.TTL != "" {
   202  		ttl, err := time.ParseDuration(dns.TTL)
   203  		if err != nil {
   204  			return fmt.Errorf("Bad DNS TTL '%s': %v", dns.TTL, err)
   205  		}
   206  
   207  		if ttl < 0 {
   208  			return fmt.Errorf("DNS TTL '%d', must be >=0", ttl)
   209  		}
   210  	}
   211  
   212  	return nil
   213  }
   214  
   215  // Get returns a single prepared query by ID.
   216  func (p *PreparedQuery) Get(args *structs.PreparedQuerySpecificRequest,
   217  	reply *structs.IndexedPreparedQueries) error {
   218  	if done, err := p.srv.forward("PreparedQuery.Get", args, args, reply); done {
   219  		return err
   220  	}
   221  
   222  	return p.srv.blockingQuery(
   223  		&args.QueryOptions,
   224  		&reply.QueryMeta,
   225  		func(ws memdb.WatchSet, state *state.Store) error {
   226  			index, query, err := state.PreparedQueryGet(ws, args.QueryID)
   227  			if err != nil {
   228  				return err
   229  			}
   230  			if query == nil {
   231  				return ErrQueryNotFound
   232  			}
   233  
   234  			// If no prefix ACL applies to this query, then they are
   235  			// always allowed to see it if they have the ID. We still
   236  			// have to filter the remaining object for tokens.
   237  			reply.Index = index
   238  			reply.Queries = structs.PreparedQueries{query}
   239  			if _, ok := query.GetACLPrefix(); !ok {
   240  				return p.srv.filterACL(args.Token, &reply.Queries[0])
   241  			}
   242  
   243  			// Otherwise, attempt to filter it the usual way.
   244  			if err := p.srv.filterACL(args.Token, reply); err != nil {
   245  				return err
   246  			}
   247  
   248  			// Since this is a GET of a specific query, if ACLs have
   249  			// prevented us from returning something that exists,
   250  			// then alert the user with a permission denied error.
   251  			if len(reply.Queries) == 0 {
   252  				p.srv.logger.Printf("[WARN] consul.prepared_query: Request to get prepared query '%s' denied due to ACLs", args.QueryID)
   253  				return acl.ErrPermissionDenied
   254  			}
   255  
   256  			return nil
   257  		})
   258  }
   259  
   260  // List returns all the prepared queries.
   261  func (p *PreparedQuery) List(args *structs.DCSpecificRequest, reply *structs.IndexedPreparedQueries) error {
   262  	if done, err := p.srv.forward("PreparedQuery.List", args, args, reply); done {
   263  		return err
   264  	}
   265  
   266  	return p.srv.blockingQuery(
   267  		&args.QueryOptions,
   268  		&reply.QueryMeta,
   269  		func(ws memdb.WatchSet, state *state.Store) error {
   270  			index, queries, err := state.PreparedQueryList(ws)
   271  			if err != nil {
   272  				return err
   273  			}
   274  
   275  			reply.Index, reply.Queries = index, queries
   276  			return p.srv.filterACL(args.Token, reply)
   277  		})
   278  }
   279  
   280  // Explain resolves a prepared query and returns the (possibly rendered template)
   281  // to the caller. This is useful for letting operators figure out which query is
   282  // picking up a given name. We can also add additional info about how the query
   283  // will be executed here.
   284  func (p *PreparedQuery) Explain(args *structs.PreparedQueryExecuteRequest,
   285  	reply *structs.PreparedQueryExplainResponse) error {
   286  	if done, err := p.srv.forward("PreparedQuery.Explain", args, args, reply); done {
   287  		return err
   288  	}
   289  	defer metrics.MeasureSince([]string{"prepared-query", "explain"}, time.Now())
   290  
   291  	// We have to do this ourselves since we are not doing a blocking RPC.
   292  	p.srv.setQueryMeta(&reply.QueryMeta)
   293  	if args.RequireConsistent {
   294  		if err := p.srv.consistentRead(); err != nil {
   295  			return err
   296  		}
   297  	}
   298  
   299  	// Try to locate the query.
   300  	state := p.srv.fsm.State()
   301  	_, query, err := state.PreparedQueryResolve(args.QueryIDOrName, args.Agent)
   302  	if err != nil {
   303  		return err
   304  	}
   305  	if query == nil {
   306  		return ErrQueryNotFound
   307  	}
   308  
   309  	// Place the query into a list so we can run the standard ACL filter on
   310  	// it.
   311  	queries := &structs.IndexedPreparedQueries{
   312  		Queries: structs.PreparedQueries{query},
   313  	}
   314  	if err := p.srv.filterACL(args.Token, queries); err != nil {
   315  		return err
   316  	}
   317  
   318  	// If the query was filtered out, return an error.
   319  	if len(queries.Queries) == 0 {
   320  		p.srv.logger.Printf("[WARN] consul.prepared_query: Explain on prepared query '%s' denied due to ACLs", query.ID)
   321  		return acl.ErrPermissionDenied
   322  	}
   323  
   324  	reply.Query = *(queries.Queries[0])
   325  	return nil
   326  }
   327  
   328  // Execute runs a prepared query and returns the results. This will perform the
   329  // failover logic if no local results are available. This is typically called as
   330  // part of a DNS lookup, or when executing prepared queries from the HTTP API.
   331  func (p *PreparedQuery) Execute(args *structs.PreparedQueryExecuteRequest,
   332  	reply *structs.PreparedQueryExecuteResponse) error {
   333  	if done, err := p.srv.forward("PreparedQuery.Execute", args, args, reply); done {
   334  		return err
   335  	}
   336  	defer metrics.MeasureSince([]string{"prepared-query", "execute"}, time.Now())
   337  
   338  	// We have to do this ourselves since we are not doing a blocking RPC.
   339  	p.srv.setQueryMeta(&reply.QueryMeta)
   340  	if args.RequireConsistent {
   341  		if err := p.srv.consistentRead(); err != nil {
   342  			return err
   343  		}
   344  	}
   345  
   346  	// Try to locate the query.
   347  	state := p.srv.fsm.State()
   348  	_, query, err := state.PreparedQueryResolve(args.QueryIDOrName, args.Agent)
   349  	if err != nil {
   350  		return err
   351  	}
   352  	if query == nil {
   353  		return ErrQueryNotFound
   354  	}
   355  
   356  	// Execute the query for the local DC.
   357  	if err := p.execute(query, reply, args.Connect); err != nil {
   358  		return err
   359  	}
   360  
   361  	// If they supplied a token with the query, use that, otherwise use the
   362  	// token passed in with the request.
   363  	token := args.QueryOptions.Token
   364  	if query.Token != "" {
   365  		token = query.Token
   366  	}
   367  	if err := p.srv.filterACL(token, &reply.Nodes); err != nil {
   368  		return err
   369  	}
   370  
   371  	// TODO (slackpad) We could add a special case here that will avoid the
   372  	// fail over if we filtered everything due to ACLs. This seems like it
   373  	// might not be worth the code complexity and behavior differences,
   374  	// though, since this is essentially a misconfiguration.
   375  
   376  	// Shuffle the results in case coordinates are not available if they
   377  	// requested an RTT sort.
   378  	reply.Nodes.Shuffle()
   379  
   380  	// Build the query source. This can be provided by the client, or by
   381  	// the prepared query. Client-specified takes priority.
   382  	qs := args.Source
   383  	if qs.Datacenter == "" {
   384  		qs.Datacenter = args.Agent.Datacenter
   385  	}
   386  	if query.Service.Near != "" && qs.Node == "" {
   387  		qs.Node = query.Service.Near
   388  	}
   389  
   390  	// Respect the magic "_agent" flag.
   391  	if qs.Node == "_agent" {
   392  		qs.Node = args.Agent.Node
   393  	} else if qs.Node == "_ip" {
   394  		if args.Source.Ip != "" {
   395  			_, nodes, err := state.Nodes(nil)
   396  			if err != nil {
   397  				return err
   398  			}
   399  
   400  			for _, node := range nodes {
   401  				if args.Source.Ip == node.Address {
   402  					qs.Node = node.Node
   403  					break
   404  				}
   405  			}
   406  		} else {
   407  			p.srv.logger.Printf("[WARN] Prepared Query using near=_ip requires " +
   408  				"the source IP to be set but none was provided. No distance " +
   409  				"sorting will be done.")
   410  
   411  		}
   412  
   413  		// Either a source IP was given but we couldnt find the associated node
   414  		// or no source ip was given. In both cases we should wipe the Node value
   415  		if qs.Node == "_ip" {
   416  			qs.Node = ""
   417  		}
   418  	}
   419  
   420  	// Perform the distance sort
   421  	err = p.srv.sortNodesByDistanceFrom(qs, reply.Nodes)
   422  	if err != nil {
   423  		return err
   424  	}
   425  
   426  	// If we applied a distance sort, make sure that the node queried for is in
   427  	// position 0, provided the results are from the same datacenter.
   428  	if qs.Node != "" && reply.Datacenter == qs.Datacenter {
   429  		for i, node := range reply.Nodes {
   430  			if node.Node.Node == qs.Node {
   431  				reply.Nodes[0], reply.Nodes[i] = reply.Nodes[i], reply.Nodes[0]
   432  				break
   433  			}
   434  
   435  			// Put a cap on the depth of the search. The local agent should
   436  			// never be further in than this if distance sorting was applied.
   437  			if i == 9 {
   438  				break
   439  			}
   440  		}
   441  	}
   442  
   443  	// Apply the limit if given.
   444  	if args.Limit > 0 && len(reply.Nodes) > args.Limit {
   445  		reply.Nodes = reply.Nodes[:args.Limit]
   446  	}
   447  
   448  	// In the happy path where we found some healthy nodes we go with that
   449  	// and bail out. Otherwise, we fail over and try remote DCs, as allowed
   450  	// by the query setup.
   451  	if len(reply.Nodes) == 0 {
   452  		wrapper := &queryServerWrapper{p.srv}
   453  		if err := queryFailover(wrapper, query, args, reply); err != nil {
   454  			return err
   455  		}
   456  	}
   457  
   458  	return nil
   459  }
   460  
   461  // ExecuteRemote is used when a local node doesn't have any instances of a
   462  // service available and needs to probe remote DCs. This sends the full query
   463  // over since the remote side won't have it in its state store, and this doesn't
   464  // do the failover logic since that's already being run on the originating DC.
   465  // We don't want things to fan out further than one level.
   466  func (p *PreparedQuery) ExecuteRemote(args *structs.PreparedQueryExecuteRemoteRequest,
   467  	reply *structs.PreparedQueryExecuteResponse) error {
   468  	if done, err := p.srv.forward("PreparedQuery.ExecuteRemote", args, args, reply); done {
   469  		return err
   470  	}
   471  	defer metrics.MeasureSince([]string{"prepared-query", "execute_remote"}, time.Now())
   472  
   473  	// We have to do this ourselves since we are not doing a blocking RPC.
   474  	p.srv.setQueryMeta(&reply.QueryMeta)
   475  	if args.RequireConsistent {
   476  		if err := p.srv.consistentRead(); err != nil {
   477  			return err
   478  		}
   479  	}
   480  
   481  	// Run the query locally to see what we can find.
   482  	if err := p.execute(&args.Query, reply, args.Connect); err != nil {
   483  		return err
   484  	}
   485  
   486  	// If they supplied a token with the query, use that, otherwise use the
   487  	// token passed in with the request.
   488  	token := args.QueryOptions.Token
   489  	if args.Query.Token != "" {
   490  		token = args.Query.Token
   491  	}
   492  	if err := p.srv.filterACL(token, &reply.Nodes); err != nil {
   493  		return err
   494  	}
   495  
   496  	// We don't bother trying to do an RTT sort here since we are by
   497  	// definition in another DC. We just shuffle to make sure that we
   498  	// balance the load across the results.
   499  	reply.Nodes.Shuffle()
   500  
   501  	// Apply the limit if given.
   502  	if args.Limit > 0 && len(reply.Nodes) > args.Limit {
   503  		reply.Nodes = reply.Nodes[:args.Limit]
   504  	}
   505  
   506  	return nil
   507  }
   508  
   509  // execute runs a prepared query in the local DC without any failover. We don't
   510  // apply any sorting options or ACL checks at this level - it should be done up above.
   511  func (p *PreparedQuery) execute(query *structs.PreparedQuery,
   512  	reply *structs.PreparedQueryExecuteResponse,
   513  	forceConnect bool) error {
   514  	state := p.srv.fsm.State()
   515  
   516  	// If we're requesting Connect-capable services, then switch the
   517  	// lookup to be the Connect function.
   518  	f := state.CheckServiceNodes
   519  	if query.Service.Connect || forceConnect {
   520  		f = state.CheckConnectServiceNodes
   521  	}
   522  
   523  	_, nodes, err := f(nil, query.Service.Service)
   524  	if err != nil {
   525  		return err
   526  	}
   527  
   528  	// Filter out any unhealthy nodes.
   529  	nodes = nodes.FilterIgnore(query.Service.OnlyPassing,
   530  		query.Service.IgnoreCheckIDs)
   531  
   532  	// Apply the node metadata filters, if any.
   533  	if len(query.Service.NodeMeta) > 0 {
   534  		nodes = nodeMetaFilter(query.Service.NodeMeta, nodes)
   535  	}
   536  
   537  	// Apply the service metadata filters, if any.
   538  	if len(query.Service.ServiceMeta) > 0 {
   539  		nodes = serviceMetaFilter(query.Service.ServiceMeta, nodes)
   540  	}
   541  
   542  	// Apply the tag filters, if any.
   543  	if len(query.Service.Tags) > 0 {
   544  		nodes = tagFilter(query.Service.Tags, nodes)
   545  	}
   546  
   547  	// Capture the nodes and pass the DNS information through to the reply.
   548  	reply.Service = query.Service.Service
   549  	reply.Nodes = nodes
   550  	reply.DNS = query.DNS
   551  
   552  	// Stamp the result for this datacenter.
   553  	reply.Datacenter = p.srv.config.Datacenter
   554  
   555  	return nil
   556  }
   557  
   558  // tagFilter returns a list of nodes who satisfy the given tags. Nodes must have
   559  // ALL the given tags, and NONE of the forbidden tags (prefixed with !). Note
   560  // for performance this modifies the original slice.
   561  func tagFilter(tags []string, nodes structs.CheckServiceNodes) structs.CheckServiceNodes {
   562  	// Build up lists of required and disallowed tags.
   563  	must, not := make([]string, 0), make([]string, 0)
   564  	for _, tag := range tags {
   565  		tag = strings.ToLower(tag)
   566  		if strings.HasPrefix(tag, "!") {
   567  			tag = tag[1:]
   568  			not = append(not, tag)
   569  		} else {
   570  			must = append(must, tag)
   571  		}
   572  	}
   573  
   574  	n := len(nodes)
   575  	for i := 0; i < n; i++ {
   576  		node := nodes[i]
   577  
   578  		// Index the tags so lookups this way are cheaper.
   579  		index := make(map[string]struct{})
   580  		if node.Service != nil {
   581  			for _, tag := range node.Service.Tags {
   582  				tag = strings.ToLower(tag)
   583  				index[tag] = struct{}{}
   584  			}
   585  		}
   586  
   587  		// Bail if any of the required tags are missing.
   588  		for _, tag := range must {
   589  			if _, ok := index[tag]; !ok {
   590  				goto DELETE
   591  			}
   592  		}
   593  
   594  		// Bail if any of the disallowed tags are present.
   595  		for _, tag := range not {
   596  			if _, ok := index[tag]; ok {
   597  				goto DELETE
   598  			}
   599  		}
   600  
   601  		// At this point, the service is ok to leave in the list.
   602  		continue
   603  
   604  	DELETE:
   605  		nodes[i], nodes[n-1] = nodes[n-1], structs.CheckServiceNode{}
   606  		n--
   607  		i--
   608  	}
   609  	return nodes[:n]
   610  }
   611  
   612  // nodeMetaFilter returns a list of the nodes who satisfy the given metadata filters. Nodes
   613  // must have ALL the given tags.
   614  func nodeMetaFilter(filters map[string]string, nodes structs.CheckServiceNodes) structs.CheckServiceNodes {
   615  	var filtered structs.CheckServiceNodes
   616  	for _, node := range nodes {
   617  		if structs.SatisfiesMetaFilters(node.Node.Meta, filters) {
   618  			filtered = append(filtered, node)
   619  		}
   620  	}
   621  	return filtered
   622  }
   623  
   624  func serviceMetaFilter(filters map[string]string, nodes structs.CheckServiceNodes) structs.CheckServiceNodes {
   625  	var filtered structs.CheckServiceNodes
   626  	for _, node := range nodes {
   627  		if structs.SatisfiesMetaFilters(node.Service.Meta, filters) {
   628  			filtered = append(filtered, node)
   629  		}
   630  	}
   631  	return filtered
   632  }
   633  
   634  // queryServer is a wrapper that makes it easier to test the failover logic.
   635  type queryServer interface {
   636  	GetLogger() *log.Logger
   637  	GetOtherDatacentersByDistance() ([]string, error)
   638  	ForwardDC(method, dc string, args interface{}, reply interface{}) error
   639  }
   640  
   641  // queryServerWrapper applies the queryServer interface to a Server.
   642  type queryServerWrapper struct {
   643  	srv *Server
   644  }
   645  
   646  // GetLogger returns the server's logger.
   647  func (q *queryServerWrapper) GetLogger() *log.Logger {
   648  	return q.srv.logger
   649  }
   650  
   651  // GetOtherDatacentersByDistance calls into the server's fn and filters out the
   652  // server's own DC.
   653  func (q *queryServerWrapper) GetOtherDatacentersByDistance() ([]string, error) {
   654  	// TODO (slackpad) - We should cache this result since it's expensive to
   655  	// compute.
   656  	dcs, err := q.srv.router.GetDatacentersByDistance()
   657  	if err != nil {
   658  		return nil, err
   659  	}
   660  
   661  	var result []string
   662  	for _, dc := range dcs {
   663  		if dc != q.srv.config.Datacenter {
   664  			result = append(result, dc)
   665  		}
   666  	}
   667  	return result, nil
   668  }
   669  
   670  // ForwardDC calls into the server's RPC forwarder.
   671  func (q *queryServerWrapper) ForwardDC(method, dc string, args interface{}, reply interface{}) error {
   672  	return q.srv.forwardDC(method, dc, args, reply)
   673  }
   674  
   675  // queryFailover runs an algorithm to determine which DCs to try and then calls
   676  // them to try to locate alternative services.
   677  func queryFailover(q queryServer, query *structs.PreparedQuery,
   678  	args *structs.PreparedQueryExecuteRequest,
   679  	reply *structs.PreparedQueryExecuteResponse) error {
   680  
   681  	// Pull the list of other DCs. This is sorted by RTT in case the user
   682  	// has selected that.
   683  	nearest, err := q.GetOtherDatacentersByDistance()
   684  	if err != nil {
   685  		return err
   686  	}
   687  
   688  	// This will help us filter unknown DCs supplied by the user.
   689  	known := make(map[string]struct{})
   690  	for _, dc := range nearest {
   691  		known[dc] = struct{}{}
   692  	}
   693  
   694  	// Build a candidate list of DCs to try, starting with the nearest N
   695  	// from RTTs.
   696  	var dcs []string
   697  	index := make(map[string]struct{})
   698  	if query.Service.Failover.NearestN > 0 {
   699  		for i, dc := range nearest {
   700  			if !(i < query.Service.Failover.NearestN) {
   701  				break
   702  			}
   703  
   704  			dcs = append(dcs, dc)
   705  			index[dc] = struct{}{}
   706  		}
   707  	}
   708  
   709  	// Then add any DCs explicitly listed that weren't selected above.
   710  	for _, dc := range query.Service.Failover.Datacenters {
   711  		// This will prevent a log of other log spammage if we do not
   712  		// attempt to talk to datacenters we don't know about.
   713  		if _, ok := known[dc]; !ok {
   714  			q.GetLogger().Printf("[DEBUG] consul.prepared_query: Skipping unknown datacenter '%s' in prepared query", dc)
   715  			continue
   716  		}
   717  
   718  		// This will make sure we don't re-try something that fails
   719  		// from the NearestN list.
   720  		if _, ok := index[dc]; !ok {
   721  			dcs = append(dcs, dc)
   722  		}
   723  	}
   724  
   725  	// Now try the selected DCs in priority order.
   726  	failovers := 0
   727  	for _, dc := range dcs {
   728  		// This keeps track of how many iterations we actually run.
   729  		failovers++
   730  
   731  		// Be super paranoid and set the nodes slice to nil since it's
   732  		// the same slice we used before. We know there's nothing in
   733  		// there, but the underlying msgpack library has a policy of
   734  		// updating the slice when it's non-nil, and that feels dirty.
   735  		// Let's just set it to nil so there's no way to communicate
   736  		// through this slice across successive RPC calls.
   737  		reply.Nodes = nil
   738  
   739  		// Note that we pass along the limit since it can be applied
   740  		// remotely to save bandwidth. We also pass along the consistency
   741  		// mode information and token we were given, so that applies to
   742  		// the remote query as well.
   743  		remote := &structs.PreparedQueryExecuteRemoteRequest{
   744  			Datacenter:   dc,
   745  			Query:        *query,
   746  			Limit:        args.Limit,
   747  			QueryOptions: args.QueryOptions,
   748  			Connect:      args.Connect,
   749  		}
   750  		if err := q.ForwardDC("PreparedQuery.ExecuteRemote", dc, remote, reply); err != nil {
   751  			q.GetLogger().Printf("[WARN] consul.prepared_query: Failed querying for service '%s' in datacenter '%s': %s", query.Service.Service, dc, err)
   752  			continue
   753  		}
   754  
   755  		// We can stop if we found some nodes.
   756  		if len(reply.Nodes) > 0 {
   757  			break
   758  		}
   759  	}
   760  
   761  	// Set this at the end because the response from the remote doesn't have
   762  	// this information.
   763  	reply.Failovers = failovers
   764  
   765  	return nil
   766  }