get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/test/service_latency_test.go (about)

     1  // Copyright 2019-2021 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package test
    15  
    16  import (
    17  	"encoding/json"
    18  	"fmt"
    19  	"math/rand"
    20  	"net/http"
    21  	"os"
    22  	"path/filepath"
    23  	"strings"
    24  	"sync"
    25  	"sync/atomic"
    26  	"testing"
    27  	"time"
    28  
    29  	"get.pme.sh/pnats/server"
    30  	"github.com/nats-io/jwt/v2"
    31  	"github.com/nats-io/nats.go"
    32  	"github.com/nats-io/nkeys"
    33  )
    34  
    35  // Used to setup superclusters for tests.
    36  type supercluster struct {
    37  	t        *testing.T
    38  	clusters []*cluster
    39  }
    40  
    41  func (sc *supercluster) shutdown() {
    42  	if sc == nil {
    43  		return
    44  	}
    45  	for _, c := range sc.clusters {
    46  		shutdownCluster(c)
    47  	}
    48  }
    49  
    50  const digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    51  const base = 36
    52  const cnlen = 8
    53  
    54  func randClusterName() string {
    55  	var name []byte
    56  	rn := rand.Int63()
    57  	for i := 0; i < cnlen; i++ {
    58  		name = append(name, digits[rn%base])
    59  		rn /= base
    60  	}
    61  	return string(name[:cnlen])
    62  }
    63  
    64  func createSuperCluster(t *testing.T, numServersPer, numClusters int) *supercluster {
    65  	clusters := []*cluster{}
    66  
    67  	for i := 0; i < numClusters; i++ {
    68  		// Pick cluster name and setup default accounts.
    69  		c := createClusterEx(t, true, 5*time.Millisecond, true, randClusterName(), numServersPer, clusters...)
    70  		clusters = append(clusters, c)
    71  	}
    72  	return &supercluster{t, clusters}
    73  }
    74  
    75  func (sc *supercluster) setResponseThreshold(t *testing.T, maxTime time.Duration) {
    76  	t.Helper()
    77  	for _, c := range sc.clusters {
    78  		for _, s := range c.servers {
    79  			foo, err := s.LookupAccount("FOO")
    80  			if err != nil {
    81  				t.Fatalf("Error looking up account 'FOO': %v", err)
    82  			}
    83  			if err := foo.SetServiceExportResponseThreshold("ngs.usage.*", maxTime); err != nil {
    84  				t.Fatalf("Error setting response threshold")
    85  			}
    86  		}
    87  	}
    88  }
    89  
    90  func (sc *supercluster) setImportShare(t *testing.T) {
    91  	t.Helper()
    92  	for _, c := range sc.clusters {
    93  		for _, s := range c.servers {
    94  			foo, err := s.LookupAccount("FOO")
    95  			if err != nil {
    96  				t.Fatalf("Error looking up account 'FOO': %v", err)
    97  			}
    98  			bar, err := s.LookupAccount("BAR")
    99  			if err != nil {
   100  				t.Fatalf("Error looking up account 'BAR': %v", err)
   101  			}
   102  			if err := bar.SetServiceImportSharing(foo, "ngs.usage.bar", true); err != nil {
   103  				t.Fatalf("Error setting import sharing: %v", err)
   104  			}
   105  		}
   106  	}
   107  }
   108  
   109  func (sc *supercluster) setupLatencyTracking(t *testing.T, p int) {
   110  	t.Helper()
   111  	for _, c := range sc.clusters {
   112  		for _, s := range c.servers {
   113  			foo, err := s.LookupAccount("FOO")
   114  			if err != nil {
   115  				t.Fatalf("Error looking up account 'FOO': %v", err)
   116  			}
   117  			bar, err := s.LookupAccount("BAR")
   118  			if err != nil {
   119  				t.Fatalf("Error looking up account 'BAR': %v", err)
   120  			}
   121  			if err := foo.AddServiceExport("ngs.usage.*", nil); err != nil {
   122  				t.Fatalf("Error adding service export to 'FOO': %v", err)
   123  			}
   124  			if err := foo.TrackServiceExportWithSampling("ngs.usage.*", "results", p); err != nil {
   125  				t.Fatalf("Error adding latency tracking to 'FOO': %v", err)
   126  			}
   127  			if err := bar.AddServiceImport(foo, "ngs.usage", "ngs.usage.bar"); err != nil {
   128  				t.Fatalf("Error adding service import to 'ngs.usage': %v", err)
   129  			}
   130  		}
   131  	}
   132  }
   133  
   134  func (sc *supercluster) removeLatencyTracking(t *testing.T) {
   135  	t.Helper()
   136  	for _, c := range sc.clusters {
   137  		for _, s := range c.servers {
   138  			foo, err := s.LookupAccount("FOO")
   139  			if err != nil {
   140  				t.Fatalf("Error looking up account 'FOO': %v", err)
   141  			}
   142  			foo.UnTrackServiceExport("ngs.usage.*")
   143  		}
   144  	}
   145  }
   146  
   147  func (sc *supercluster) totalSubs() int {
   148  	totalSubs := 0
   149  	for _, c := range sc.clusters {
   150  		totalSubs += c.totalSubs()
   151  	}
   152  	return totalSubs
   153  }
   154  
   155  func clientConnectWithName(t *testing.T, opts *server.Options, user, appname string) *nats.Conn {
   156  	t.Helper()
   157  	url := fmt.Sprintf("nats://%s:pass@%s:%d", user, opts.Host, opts.Port)
   158  	nc, err := nats.Connect(url, nats.Name(appname))
   159  	if err != nil {
   160  		t.Fatalf("Error on connect: %v", err)
   161  	}
   162  	return nc
   163  }
   164  
   165  func clientConnect(t *testing.T, opts *server.Options, user string) *nats.Conn {
   166  	t.Helper()
   167  	return clientConnectWithName(t, opts, user, "")
   168  }
   169  
   170  func clientConnectOldRequest(t *testing.T, opts *server.Options, user string) *nats.Conn {
   171  	t.Helper()
   172  	url := fmt.Sprintf("nats://%s:pass@%s:%d", user, opts.Host, opts.Port)
   173  	nc, err := nats.Connect(url, nats.UseOldRequestStyle())
   174  	if err != nil {
   175  		t.Fatalf("Error on connect: %v", err)
   176  	}
   177  	return nc
   178  }
   179  
   180  func checkServiceLatency(t *testing.T, sl server.ServiceLatency, start time.Time, serviceTime time.Duration) {
   181  	t.Helper()
   182  
   183  	if sl.Status != 200 {
   184  		t.Fatalf("Bad status received, wanted 200 got %d", sl.Status)
   185  	}
   186  
   187  	serviceTime = serviceTime.Round(time.Millisecond)
   188  
   189  	startDelta := sl.RequestStart.Sub(start)
   190  	// Original test was 5ms, but got GitHub Action failure with "Bad start delta 5.033929ms",
   191  	// and Travis will get something like: "Bad start delta 15.046059ms", so be more generous.
   192  	if startDelta > 20*time.Millisecond {
   193  		t.Fatalf("Bad start delta %v", startDelta)
   194  	}
   195  	// Since RTT during tests is estimate we remove from calculation.
   196  	if (sl.ServiceLatency + sl.Responder.RTT) < time.Duration(float64(serviceTime)*0.8) {
   197  		t.Fatalf("Bad service latency: %v (%v)", sl.ServiceLatency, serviceTime)
   198  	}
   199  	if sl.TotalLatency < sl.ServiceLatency {
   200  		t.Fatalf("Bad total latency: %v (%v)", sl.TotalLatency, sl.ServiceLatency)
   201  	}
   202  	// We should have NATS latency here that is non-zero with real clients.
   203  	if sl.Requestor.RTT == 0 {
   204  		t.Fatalf("Expected non-zero NATS Requestor latency")
   205  	}
   206  	if sl.Responder.RTT == 0 {
   207  		t.Fatalf("Expected non-zero NATS Requestor latency")
   208  	}
   209  
   210  	// Make sure they add up
   211  	got := sl.TotalLatency
   212  	expected := sl.ServiceLatency + sl.NATSTotalTime()
   213  	if got != expected {
   214  		t.Fatalf("Numbers do not add up: %+v,\ngot: %v\nexpected: %v", sl, got, expected)
   215  	}
   216  }
   217  
   218  func extendedCheck(t *testing.T, lc *server.ClientInfo, eUser, appName, eServer string) {
   219  	t.Helper()
   220  	if lc.User != eUser {
   221  		t.Fatalf("Expected user of %q, got %q", eUser, lc.User)
   222  	}
   223  	if appName != "" && appName != lc.Name {
   224  		t.Fatalf("Expected appname of %q, got %q\n", appName, lc.Name)
   225  	}
   226  	if lc.Host == "" {
   227  		t.Fatalf("Expected non-empty IP")
   228  	}
   229  	if lc.ID < 1 || lc.ID > 20 {
   230  		t.Fatalf("Expected a ID in range, got %d", lc.ID)
   231  	}
   232  	if eServer != "" && eServer != lc.Server {
   233  		t.Fatalf("Expected server of %q, got %q", eServer, lc.Server)
   234  	}
   235  }
   236  
   237  func noShareCheck(t *testing.T, lc *server.ClientInfo) {
   238  	t.Helper()
   239  	if lc.Name != "" {
   240  		t.Fatalf("appname should not have been shared, got %q", lc.Name)
   241  	}
   242  	if lc.User != "" {
   243  		t.Fatalf("user should not have been shared, got %q", lc.User)
   244  	}
   245  	if lc.Host != "" {
   246  		t.Fatalf("client ip should not have been shared, got %q", lc.Host)
   247  	}
   248  	if lc.ID != 0 {
   249  		t.Fatalf("client id should not have been shared, got %d", lc.ID)
   250  	}
   251  	if lc.Server != "" {
   252  		t.Fatalf("client' server should not have been shared, got %q", lc.Server)
   253  	}
   254  }
   255  
   256  func TestServiceLatencySingleServerConnect(t *testing.T) {
   257  	sc := createSuperCluster(t, 3, 2)
   258  	defer sc.shutdown()
   259  
   260  	// Now add in new service export to FOO and have bar import that with tracking enabled.
   261  	sc.setupLatencyTracking(t, 100)
   262  
   263  	// Now we can setup and test, do single node only first.
   264  	// This is the service provider.
   265  	nc := clientConnectWithName(t, sc.clusters[0].opts[0], "foo", "service22")
   266  	defer nc.Close()
   267  
   268  	// The service listener.
   269  	serviceTime := 25 * time.Millisecond
   270  	nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
   271  		time.Sleep(serviceTime)
   272  		msg.Respond([]byte("22 msgs"))
   273  	})
   274  
   275  	// Listen for metrics
   276  	rsub, _ := nc.SubscribeSync("results")
   277  
   278  	// Requestor
   279  	nc2 := clientConnect(t, sc.clusters[0].opts[0], "bar")
   280  	defer nc2.Close()
   281  
   282  	// Send the request.
   283  	start := time.Now()
   284  	if _, err := nc2.Request("ngs.usage", []byte("1h"), time.Second); err != nil {
   285  		t.Fatalf("Expected a response")
   286  	}
   287  
   288  	var sl server.ServiceLatency
   289  	rmsg, _ := rsub.NextMsg(time.Second)
   290  	json.Unmarshal(rmsg.Data, &sl)
   291  
   292  	checkServiceLatency(t, sl, start, serviceTime)
   293  
   294  	rs := sc.clusters[0].servers[0]
   295  	extendedCheck(t, sl.Responder, "foo", "service22", rs.Name())
   296  	// Normally requestor's don't share
   297  	noShareCheck(t, sl.Requestor)
   298  
   299  	// Now make sure normal use case works with old request style.
   300  	nc3 := clientConnectOldRequest(t, sc.clusters[0].opts[0], "bar")
   301  	defer nc3.Close()
   302  
   303  	start = time.Now()
   304  	if _, err := nc3.Request("ngs.usage", []byte("1h"), time.Second); err != nil {
   305  		t.Fatalf("Expected a response")
   306  	}
   307  	nc3.Close()
   308  
   309  	checkServiceLatency(t, sl, start, serviceTime)
   310  	extendedCheck(t, sl.Responder, "foo", "service22", rs.Name())
   311  	// Normally requestor's don't share
   312  	noShareCheck(t, sl.Requestor)
   313  }
   314  
   315  // If a client has a longer RTT that the effective RTT for NATS + responder
   316  // the requestor RTT will be marked as 0. This can happen quite often with
   317  // utility programs that are far away from a cluster like NGS but the service
   318  // response time has a shorter RTT.
   319  func TestServiceLatencyClientRTTSlowerVsServiceRTT(t *testing.T) {
   320  	sc := createSuperCluster(t, 2, 2)
   321  	defer sc.shutdown()
   322  
   323  	// Now add in new service export to FOO and have BAR import that with tracking enabled.
   324  	sc.setupLatencyTracking(t, 100)
   325  
   326  	nc := clientConnect(t, sc.clusters[0].opts[0], "foo")
   327  	defer nc.Close()
   328  
   329  	// The service listener. Mostly instant response.
   330  	nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
   331  		time.Sleep(time.Millisecond)
   332  		msg.Respond([]byte("22 msgs"))
   333  	})
   334  
   335  	// Listen for metrics
   336  	rsub, _ := nc.SubscribeSync("results")
   337  	nc.Flush()
   338  
   339  	// Requestor and processing
   340  	requestAndCheck := func(cindex, sindex int) {
   341  		t.Helper()
   342  		sopts := sc.clusters[cindex].opts[sindex]
   343  
   344  		if nmsgs, _, err := rsub.Pending(); err != nil || nmsgs != 0 {
   345  			t.Fatalf("Did not expect any latency results, got %d", nmsgs)
   346  		}
   347  
   348  		rtt := 10 * time.Millisecond
   349  		bw := 1024 * 1024
   350  		sp := newSlowProxy(rtt+5*time.Millisecond, bw, bw, sopts)
   351  		defer sp.stop()
   352  
   353  		nc2 := clientConnect(t, sp.opts(), "bar")
   354  		defer nc2.Close()
   355  
   356  		start := time.Now()
   357  		nc2.Flush()
   358  		// Check rtt for slow proxy
   359  		if d := time.Since(start); d < rtt {
   360  			t.Fatalf("Expected an rtt of at least %v, got %v", rtt, d)
   361  		}
   362  
   363  		// Send the request.
   364  		_, err := nc2.Request("ngs.usage", []byte("1h"), time.Second)
   365  		if err != nil {
   366  			t.Fatalf("Expected a response")
   367  		}
   368  
   369  		var sl server.ServiceLatency
   370  		rmsg, err := rsub.NextMsg(time.Second)
   371  		if err != nil || rmsg == nil {
   372  			t.Fatalf("Did not receive latency results")
   373  		}
   374  		json.Unmarshal(rmsg.Data, &sl)
   375  
   376  		if sl.Status != 200 {
   377  			t.Fatalf("Expected a status 200 Ok, got [%d] %q", sl.Status, sl.Error)
   378  		}
   379  		// We want to test here that when the client requestor RTT is larger then the response time
   380  		// we still deliver a requestor value > 0.
   381  		// Now check that it is close to rtt.
   382  		if sl.Requestor.RTT < rtt {
   383  			t.Fatalf("Expected requestor latency to be > %v, got %v", rtt, sl.Requestor.RTT)
   384  		}
   385  		if sl.TotalLatency < rtt {
   386  			t.Fatalf("Expected total latency to be > %v, got %v", rtt, sl.TotalLatency)
   387  		}
   388  
   389  		rs := sc.clusters[0].servers[0]
   390  		extendedCheck(t, sl.Responder, "foo", "", rs.Name())
   391  		// Normally requestor's don't share
   392  		noShareCheck(t, sl.Requestor)
   393  
   394  		// Check for trailing duplicates..
   395  		rmsg, err = rsub.NextMsg(100 * time.Millisecond)
   396  		if err == nil {
   397  			t.Fatalf("Duplicate metric result, %q", rmsg.Data)
   398  		}
   399  	}
   400  
   401  	// Check same server.
   402  	requestAndCheck(0, 0)
   403  	// Check from remote server across GW.
   404  	requestAndCheck(1, 1)
   405  	// Same cluster but different server
   406  	requestAndCheck(0, 1)
   407  }
   408  
   409  func connRTT(nc *nats.Conn) time.Duration {
   410  	// Do 5x to flatten
   411  	total := time.Duration(0)
   412  	for i := 0; i < 5; i++ {
   413  		start := time.Now()
   414  		nc.Flush()
   415  		total += time.Since(start)
   416  	}
   417  	return total / 5
   418  }
   419  
   420  func TestServiceLatencyRemoteConnect(t *testing.T) {
   421  	sc := createSuperCluster(t, 3, 2)
   422  	defer sc.shutdown()
   423  
   424  	// Now add in new service export to FOO and have bar import that with tracking enabled.
   425  	sc.setupLatencyTracking(t, 100)
   426  
   427  	// Now we can setup and test, do single node only first.
   428  	// This is the service provider.
   429  	nc := clientConnect(t, sc.clusters[0].opts[0], "foo")
   430  	defer nc.Close()
   431  
   432  	subsBefore := int(sc.clusters[0].servers[0].NumSubscriptions())
   433  
   434  	// The service listener.
   435  	serviceTime := 25 * time.Millisecond
   436  	nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
   437  		time.Sleep(serviceTime)
   438  		msg.Respond([]byte("22 msgs"))
   439  	})
   440  
   441  	// Listen for metrics
   442  	rsub, _ := nc.SubscribeSync("results")
   443  	nc.Flush()
   444  
   445  	if err := checkExpectedSubs(subsBefore+2, sc.clusters[0].servers...); err != nil {
   446  		t.Fatal(err.Error())
   447  	}
   448  
   449  	// Same Cluster Requestor
   450  	nc2 := clientConnect(t, sc.clusters[0].opts[2], "bar")
   451  	defer nc2.Close()
   452  
   453  	// Send the request.
   454  	start := time.Now()
   455  	_, err := nc2.Request("ngs.usage", []byte("1h"), time.Second)
   456  	if err != nil {
   457  		t.Fatalf("Expected a response")
   458  	}
   459  
   460  	var sl server.ServiceLatency
   461  	rmsg, err := rsub.NextMsg(time.Second)
   462  	if err != nil {
   463  		t.Fatalf("Error getting latency measurement: %v", err)
   464  	}
   465  	json.Unmarshal(rmsg.Data, &sl)
   466  	checkServiceLatency(t, sl, start, serviceTime)
   467  	rs := sc.clusters[0].servers[0]
   468  	extendedCheck(t, sl.Responder, "foo", "", rs.Name())
   469  	// Normally requestor's don't share
   470  	noShareCheck(t, sl.Requestor)
   471  
   472  	// Lastly here, we need to make sure we are properly tracking the extra hops.
   473  	// We will make sure that NATS latency is close to what we see from the outside in terms of RTT.
   474  	if crtt := connRTT(nc) + connRTT(nc2); sl.NATSTotalTime() < crtt {
   475  		t.Fatalf("Not tracking second measurement for NATS latency across servers: %v vs %v", sl.NATSTotalTime(), crtt)
   476  	}
   477  
   478  	// Gateway Requestor
   479  	nc2 = clientConnect(t, sc.clusters[1].opts[1], "bar")
   480  	defer nc2.Close()
   481  
   482  	// Send the request.
   483  	start = time.Now()
   484  	_, err = nc2.Request("ngs.usage", []byte("1h"), time.Second)
   485  	if err != nil {
   486  		t.Fatalf("Expected a response")
   487  	}
   488  
   489  	rmsg, err = rsub.NextMsg(time.Second)
   490  	if err != nil {
   491  		t.Fatalf("Error getting latency measurement: %v", err)
   492  	}
   493  	json.Unmarshal(rmsg.Data, &sl)
   494  	checkServiceLatency(t, sl, start, serviceTime)
   495  	extendedCheck(t, sl.Responder, "foo", "", rs.Name())
   496  	// Normally requestor's don't share
   497  	noShareCheck(t, sl.Requestor)
   498  
   499  	// Lastly here, we need to make sure we are properly tracking the extra hops.
   500  	// We will make sure that NATS latency is close to what we see from the outside in terms of RTT.
   501  	if crtt := connRTT(nc) + connRTT(nc2); sl.NATSTotalTime() < crtt {
   502  		t.Fatalf("Not tracking second measurement for NATS latency across servers: %v vs %v", sl.NATSTotalTime(), crtt)
   503  	}
   504  
   505  	// Now turn off and make sure we no longer receive updates.
   506  	sc.removeLatencyTracking(t)
   507  	_, err = nc2.Request("ngs.usage", []byte("1h"), time.Second)
   508  	if err != nil {
   509  		t.Fatalf("Expected a response")
   510  	}
   511  
   512  	_, err = rsub.NextMsg(100 * time.Millisecond)
   513  	if err == nil {
   514  		t.Fatalf("Did not expect to receive a latency metric")
   515  	}
   516  }
   517  
   518  func TestServiceLatencySampling(t *testing.T) {
   519  	sc := createSuperCluster(t, 3, 2)
   520  	defer sc.shutdown()
   521  
   522  	// Now add in new service export to FOO and have bar import that with tracking enabled.
   523  	sc.setupLatencyTracking(t, 50)
   524  
   525  	// Now we can setup and test, do single node only first.
   526  	// This is the service provider.
   527  	nc := clientConnect(t, sc.clusters[0].opts[0], "foo")
   528  	defer nc.Close()
   529  
   530  	// The service listener.
   531  	nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
   532  		msg.Respond([]byte("22 msgs"))
   533  	})
   534  
   535  	// Listen for metrics
   536  	received := int32(0)
   537  
   538  	nc.Subscribe("results", func(msg *nats.Msg) {
   539  		atomic.AddInt32(&received, 1)
   540  	})
   541  
   542  	// Same Cluster Requestor
   543  	nc2 := clientConnect(t, sc.clusters[0].opts[2], "bar")
   544  	defer nc2.Close()
   545  
   546  	toSend := 1000
   547  	for i := 0; i < toSend; i++ {
   548  		nc2.Request("ngs.usage", []byte("1h"), time.Second)
   549  	}
   550  	// Wait for results to flow in.
   551  	time.Sleep(100 * time.Millisecond)
   552  
   553  	mid := toSend / 2
   554  	delta := toSend / 10 // 10%
   555  	got := int(atomic.LoadInt32(&received))
   556  
   557  	if got > mid+delta || got < mid-delta {
   558  		t.Fatalf("Sampling number incorrect: %d vs %d", mid, got)
   559  	}
   560  }
   561  
   562  func TestServiceLatencyNoSubsLeak(t *testing.T) {
   563  	sc := createSuperCluster(t, 3, 3)
   564  	defer sc.shutdown()
   565  
   566  	// Now add in new service export to FOO and have bar import that with tracking enabled.
   567  	sc.setupLatencyTracking(t, 100)
   568  
   569  	nc := clientConnectWithName(t, sc.clusters[0].opts[1], "foo", "dlc22")
   570  	defer nc.Close()
   571  
   572  	// The service listener.
   573  	nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
   574  		msg.Respond([]byte("22 msgs"))
   575  	})
   576  	nc.Flush()
   577  	// Propagation of sub through super cluster.
   578  	time.Sleep(100 * time.Millisecond)
   579  
   580  	startSubs := sc.totalSubs()
   581  
   582  	fooAcc, _ := sc.clusters[1].servers[1].LookupAccount("FOO")
   583  	startNumSis := fooAcc.NumServiceImports()
   584  
   585  	for i := 0; i < 100; i++ {
   586  		nc := clientConnect(t, sc.clusters[1].opts[1], "bar")
   587  		defer nc.Close()
   588  		if _, err := nc.Request("ngs.usage", []byte("1h"), time.Second); err != nil {
   589  			t.Fatalf("Error on request: %v", err)
   590  		}
   591  		nc.Close()
   592  	}
   593  
   594  	// We are adding 3 here for the wildcard response subject for service replies.
   595  	// we only have one but it will show in three places.
   596  	startSubs += 3
   597  
   598  	checkFor(t, time.Second, 50*time.Millisecond, func() error {
   599  		if numSubs := sc.totalSubs(); numSubs != startSubs {
   600  			return fmt.Errorf("Leaked %d subs", numSubs-startSubs)
   601  		}
   602  		return nil
   603  	})
   604  
   605  	// Now also check to make sure the service imports created for the request go away as well.
   606  	checkFor(t, time.Second, 50*time.Millisecond, func() error {
   607  		if numSis := fooAcc.NumServiceImports(); numSis != startNumSis {
   608  			return fmt.Errorf("Leaked %d service imports", numSis-startNumSis)
   609  		}
   610  		return nil
   611  	})
   612  }
   613  
   614  func TestServiceLatencyWithName(t *testing.T) {
   615  	sc := createSuperCluster(t, 1, 1)
   616  	defer sc.shutdown()
   617  
   618  	// Now add in new service export to FOO and have bar import that with tracking enabled.
   619  	sc.setupLatencyTracking(t, 100)
   620  
   621  	opts := sc.clusters[0].opts[0]
   622  
   623  	nc := clientConnectWithName(t, opts, "foo", "dlc22")
   624  	defer nc.Close()
   625  
   626  	// The service listener.
   627  	nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
   628  		msg.Respond([]byte("22 msgs"))
   629  	})
   630  
   631  	// Listen for metrics
   632  	rsub, _ := nc.SubscribeSync("results")
   633  	nc.Flush()
   634  
   635  	nc2 := clientConnect(t, opts, "bar")
   636  	defer nc2.Close()
   637  	nc2.Request("ngs.usage", []byte("1h"), time.Second)
   638  
   639  	var sl server.ServiceLatency
   640  	rmsg, err := rsub.NextMsg(time.Second)
   641  	if err != nil {
   642  		t.Fatalf("Error getting message: %v", err)
   643  	}
   644  	json.Unmarshal(rmsg.Data, &sl)
   645  
   646  	// Make sure we have AppName set.
   647  	rs := sc.clusters[0].servers[0]
   648  	extendedCheck(t, sl.Responder, "foo", "dlc22", rs.Name())
   649  	// Normally requestor's don't share
   650  	noShareCheck(t, sl.Requestor)
   651  }
   652  
   653  func TestServiceLatencyWithNameMultiServer(t *testing.T) {
   654  	sc := createSuperCluster(t, 3, 2)
   655  	defer sc.shutdown()
   656  
   657  	// Now add in new service export to FOO and have bar import that with tracking enabled.
   658  	sc.setupLatencyTracking(t, 100)
   659  
   660  	nc := clientConnectWithName(t, sc.clusters[0].opts[1], "foo", "dlc22")
   661  	defer nc.Close()
   662  
   663  	// The service listener.
   664  	nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
   665  		msg.Respond([]byte("22 msgs"))
   666  	})
   667  
   668  	// Listen for metrics
   669  	rsub, _ := nc.SubscribeSync("results")
   670  	nc.Flush()
   671  
   672  	nc2 := clientConnect(t, sc.clusters[1].opts[1], "bar")
   673  	defer nc2.Close()
   674  	nc2.Request("ngs.usage", []byte("1h"), time.Second)
   675  
   676  	var sl server.ServiceLatency
   677  	rmsg, _ := rsub.NextMsg(time.Second)
   678  	json.Unmarshal(rmsg.Data, &sl)
   679  
   680  	// Make sure we have AppName set.
   681  	rs := sc.clusters[0].servers[1]
   682  	extendedCheck(t, sl.Responder, "foo", "dlc22", rs.Name())
   683  	// Normally requestor's don't share
   684  	noShareCheck(t, sl.Requestor)
   685  }
   686  
   687  func createAccountWithJWT(t *testing.T) (string, nkeys.KeyPair, *jwt.AccountClaims) {
   688  	t.Helper()
   689  	okp, _ := nkeys.FromSeed(oSeed)
   690  	akp, _ := nkeys.CreateAccount()
   691  	pub, _ := akp.PublicKey()
   692  	nac := jwt.NewAccountClaims(pub)
   693  	jwt, _ := nac.Encode(okp)
   694  	return jwt, akp, nac
   695  }
   696  
   697  func TestServiceLatencyWithJWT(t *testing.T) {
   698  	okp, _ := nkeys.FromSeed(oSeed)
   699  
   700  	// Create three accounts, system, service and normal account.
   701  	sysJWT, sysKP, _ := createAccountWithJWT(t)
   702  	sysPub, _ := sysKP.PublicKey()
   703  
   704  	_, svcKP, svcAcc := createAccountWithJWT(t)
   705  	svcPub, _ := svcKP.PublicKey()
   706  
   707  	// Add in the service export with latency tracking here.
   708  	serviceExport := &jwt.Export{Subject: "req.*", Type: jwt.Service}
   709  	svcAcc.Exports.Add(serviceExport)
   710  	svcJWT, err := svcAcc.Encode(okp)
   711  	if err != nil {
   712  		t.Fatalf("Error generating account JWT: %v", err)
   713  	}
   714  
   715  	_, accKP, accAcc := createAccountWithJWT(t)
   716  	accPub, _ := accKP.PublicKey()
   717  
   718  	// Add in the import.
   719  	serviceImport := &jwt.Import{Account: svcPub, Subject: "request", To: "req.echo", Type: jwt.Service}
   720  	accAcc.Imports.Add(serviceImport)
   721  	accJWT, err := accAcc.Encode(okp)
   722  	if err != nil {
   723  		t.Fatalf("Error generating account JWT: %v", err)
   724  	}
   725  
   726  	cf := `
   727  	listen: 127.0.0.1:-1
   728  	cluster {
   729  		name: "A"
   730  		listen: 127.0.0.1:-1
   731  		authorization {
   732  			timeout: 2.2
   733  		} %s
   734  	}
   735  
   736  	operator = "./configs/nkeys/op.jwt"
   737  	system_account = "%s"
   738  
   739  	resolver = MEMORY
   740  	resolver_preload = {
   741  		%s : "%s"
   742  		%s : "%s"
   743  		%s : "%s"
   744  	}
   745  	`
   746  	contents := strings.Replace(fmt.Sprintf(cf, "", sysPub, sysPub, sysJWT, svcPub, svcJWT, accPub, accJWT), "\n\t", "\n", -1)
   747  	conf := createConfFile(t, []byte(contents))
   748  
   749  	s, opts := RunServerWithConfig(conf)
   750  	defer s.Shutdown()
   751  
   752  	// Create a new server and route to main one.
   753  	routeStr := fmt.Sprintf("\n\t\troutes = [nats-route://%s:%d]", opts.Cluster.Host, opts.Cluster.Port)
   754  	contents2 := strings.Replace(fmt.Sprintf(cf, routeStr, sysPub, sysPub, sysJWT, svcPub, svcJWT, accPub, accJWT), "\n\t", "\n", -1)
   755  
   756  	conf2 := createConfFile(t, []byte(contents2))
   757  
   758  	s2, opts2 := RunServerWithConfig(conf2)
   759  	defer s2.Shutdown()
   760  
   761  	checkClusterFormed(t, s, s2)
   762  
   763  	// Create service provider.
   764  	url := fmt.Sprintf("nats://%s:%d", opts.Host, opts.Port)
   765  	copt, pubUser := createUserCredsOption(t, s, svcKP)
   766  	nc, err := nats.Connect(url, copt, nats.Name("fooService"))
   767  	if err != nil {
   768  		t.Fatalf("Error on connect: %v", err)
   769  	}
   770  	defer nc.Close()
   771  
   772  	// The service listener.
   773  	serviceTime := 25 * time.Millisecond
   774  	nc.Subscribe("req.echo", func(msg *nats.Msg) {
   775  		time.Sleep(serviceTime)
   776  		msg.Respond(msg.Data)
   777  	})
   778  
   779  	// Listen for metrics
   780  	rsub, _ := nc.SubscribeSync("results")
   781  	nc.Flush()
   782  
   783  	// Create second client and send request from this one.
   784  	url2 := fmt.Sprintf("nats://%s:%d/", opts2.Host, opts2.Port)
   785  	nc2, err := nats.Connect(url2, createUserCreds(t, s2, accKP))
   786  	if err != nil {
   787  		t.Fatalf("Error creating client: %v\n", err)
   788  	}
   789  	defer nc2.Close()
   790  
   791  	// Send the request.
   792  	_, err = nc2.Request("request", []byte("hello"), time.Second)
   793  	if err != nil {
   794  		t.Fatalf("Expected a response")
   795  	}
   796  
   797  	// We should not receive latency at this time.
   798  	_, err = rsub.NextMsg(100 * time.Millisecond)
   799  	if err == nil {
   800  		t.Fatalf("Did not expect to receive a latency metric")
   801  	}
   802  
   803  	// Now turn it on..
   804  	updateAccount := func() {
   805  		t.Helper()
   806  		for _, s := range []*server.Server{s, s2} {
   807  			svcAccount, err := s.LookupAccount(svcPub)
   808  			if err != nil {
   809  				t.Fatalf("Could not lookup service account from server %+v", s)
   810  			}
   811  			s.UpdateAccountClaims(svcAccount, svcAcc)
   812  		}
   813  	}
   814  	serviceExport.Latency = &jwt.ServiceLatency{Sampling: 100, Results: "results"}
   815  	updateAccount()
   816  
   817  	// Grab the service responder's user.
   818  
   819  	// Send the request.
   820  	start := time.Now()
   821  	_, err = nc2.Request("request", []byte("hello"), time.Second)
   822  	if err != nil {
   823  		t.Fatalf("Expected a response")
   824  	}
   825  
   826  	var sl server.ServiceLatency
   827  	rmsg, err := rsub.NextMsg(time.Second)
   828  	if err != nil || rmsg == nil {
   829  		t.Fatalf("Did not receive a latency metric")
   830  	}
   831  	json.Unmarshal(rmsg.Data, &sl)
   832  	checkServiceLatency(t, sl, start, serviceTime)
   833  	extendedCheck(t, sl.Responder, pubUser, "fooService", s.Name())
   834  	// Normally requestor's don't share
   835  	noShareCheck(t, sl.Requestor)
   836  
   837  	// Now we will remove tracking. Do this by simulating a JWT update.
   838  	serviceExport.Latency = nil
   839  	updateAccount()
   840  
   841  	// Now we should not get any tracking data.
   842  	_, err = nc2.Request("request", []byte("hello"), time.Second)
   843  	if err != nil {
   844  		t.Fatalf("Expected a response")
   845  	}
   846  	_, err = rsub.NextMsg(100 * time.Millisecond)
   847  	if err == nil {
   848  		t.Fatalf("Did not expect to receive a latency metric")
   849  	}
   850  }
   851  
   852  func TestServiceLatencyAdjustNegativeLatencyValues(t *testing.T) {
   853  	sc := createSuperCluster(t, 3, 2)
   854  	defer sc.shutdown()
   855  
   856  	// Now add in new service export to FOO and have bar import
   857  	// that with tracking enabled.
   858  	sc.setupLatencyTracking(t, 100)
   859  
   860  	// Now we can setup and test, do single node only first.
   861  	// This is the service provider.
   862  	nc := clientConnect(t, sc.clusters[0].opts[0], "foo")
   863  	defer nc.Close()
   864  
   865  	// The service listener.
   866  	nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
   867  		msg.Respond([]byte("22 msgs"))
   868  	})
   869  
   870  	// Listen for metrics
   871  	rsub, err := nc.SubscribeSync("results")
   872  	if err != nil {
   873  		t.Fatal(err)
   874  	}
   875  	nc.Flush()
   876  
   877  	// Requestor
   878  	nc2 := clientConnect(t, sc.clusters[0].opts[0], "bar")
   879  	defer nc2.Close()
   880  
   881  	// Send the request.
   882  	totalSamples := 50
   883  	for i := 0; i < totalSamples; i++ {
   884  		if _, err := nc2.Request("ngs.usage", []byte("1h"), time.Second); err != nil {
   885  			t.Fatalf("Expected a response")
   886  		}
   887  	}
   888  
   889  	var sl server.ServiceLatency
   890  	for i := 0; i < totalSamples; i++ {
   891  		rmsg, err := rsub.NextMsg(time.Second)
   892  		if err != nil {
   893  			t.Fatalf("Expected to receive latency metric: %d, %s", i, err)
   894  		}
   895  		if err := json.Unmarshal(rmsg.Data, &sl); err != nil {
   896  			t.Errorf("Unexpected error processing latency metric: %s", err)
   897  		}
   898  		if sl.ServiceLatency < 0 {
   899  			t.Fatalf("Unexpected negative latency value: %v", sl)
   900  		}
   901  	}
   902  }
   903  
   904  func TestServiceLatencyRemoteConnectAdjustNegativeValues(t *testing.T) {
   905  	sc := createSuperCluster(t, 3, 2)
   906  	defer sc.shutdown()
   907  
   908  	// Now add in new service export to FOO and have bar import that with tracking enabled.
   909  	sc.setupLatencyTracking(t, 100)
   910  
   911  	// Now we can setup and test, do single node only first.
   912  	// This is the service provider.
   913  	nc := clientConnect(t, sc.clusters[0].opts[0], "foo")
   914  	defer nc.Close()
   915  
   916  	// The service listener.
   917  	nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
   918  		msg.Respond([]byte("22 msgs"))
   919  	})
   920  
   921  	// Listen for metrics
   922  	rsub, err := nc.SubscribeSync("results")
   923  	if err != nil {
   924  		t.Fatal(err)
   925  	}
   926  	nc.Flush()
   927  
   928  	// Same Cluster Requestor
   929  	nc2 := clientConnect(t, sc.clusters[0].opts[2], "bar")
   930  	defer nc2.Close()
   931  
   932  	// Gateway Requestor
   933  	nc3 := clientConnect(t, sc.clusters[1].opts[1], "bar")
   934  	defer nc3.Close()
   935  
   936  	// Send a few initial requests to ensure interest is propagated
   937  	// both for cluster and gateway requestors.
   938  	checkFor(t, 3*time.Second, time.Second, func() error {
   939  		_, err1 := nc2.Request("ngs.usage", []byte("1h"), time.Second)
   940  		_, err2 := nc3.Request("ngs.usage", []byte("1h"), time.Second)
   941  
   942  		if err1 != nil || err2 != nil {
   943  			return fmt.Errorf("Timed out waiting for super cluster to be ready")
   944  		}
   945  		return nil
   946  	})
   947  
   948  	// Send the request.
   949  	totalSamples := 20
   950  	for i := 0; i < totalSamples; i++ {
   951  		if _, err := nc2.Request("ngs.usage", []byte("1h"), time.Second); err != nil {
   952  			t.Fatalf("Expected a response")
   953  		}
   954  	}
   955  
   956  	for i := 0; i < totalSamples; i++ {
   957  		if _, err := nc3.Request("ngs.usage", []byte("1h"), time.Second); err != nil {
   958  			t.Fatalf("Expected a response")
   959  		}
   960  	}
   961  
   962  	var sl server.ServiceLatency
   963  	for i := 0; i < totalSamples*2; i++ {
   964  		rmsg, err := rsub.NextMsg(time.Second)
   965  		if err != nil {
   966  			t.Fatalf("Expected to receive latency metric: %d, %s", i, err)
   967  		}
   968  		if err = json.Unmarshal(rmsg.Data, &sl); err != nil {
   969  			t.Errorf("Unexpected error processing latency metric: %s", err)
   970  		}
   971  		if sl.ServiceLatency < 0 {
   972  			t.Fatalf("Unexpected negative service latency value: %v", sl)
   973  		}
   974  		if sl.SystemLatency < 0 {
   975  			t.Fatalf("Unexpected negative system latency value: %v", sl)
   976  		}
   977  	}
   978  }
   979  
   980  func TestServiceLatencyFailureReportingSingleServer(t *testing.T) {
   981  	sc := createSuperCluster(t, 1, 1)
   982  	defer sc.shutdown()
   983  
   984  	// Now add in new service export to FOO and have bar import that with tracking enabled.
   985  	sc.setupLatencyTracking(t, 100)
   986  	sc.setResponseThreshold(t, 20*time.Millisecond)
   987  
   988  	nc := clientConnect(t, sc.clusters[0].opts[0], "foo")
   989  	defer nc.Close()
   990  
   991  	// Listen for metrics
   992  	rsub, err := nc.SubscribeSync("results")
   993  	if err != nil {
   994  		t.Fatal(err)
   995  	}
   996  	nc.Flush()
   997  
   998  	getMetricResult := func() *server.ServiceLatency {
   999  		t.Helper()
  1000  		rmsg, err := rsub.NextMsg(time.Second)
  1001  		if err != nil {
  1002  			t.Fatalf("Expected to receive latency metric: %v", err)
  1003  		}
  1004  		var sl server.ServiceLatency
  1005  		if err = json.Unmarshal(rmsg.Data, &sl); err != nil {
  1006  			t.Errorf("Unexpected error processing latency metric: %s", err)
  1007  		}
  1008  		return &sl
  1009  	}
  1010  
  1011  	// Same server
  1012  	nc2 := clientConnect(t, sc.clusters[0].opts[0], "bar")
  1013  	defer nc2.Close()
  1014  
  1015  	// Test a request with no reply subject
  1016  	nc2.Publish("ngs.usage", []byte("1h"))
  1017  	sl := getMetricResult()
  1018  
  1019  	if sl.Status != 400 {
  1020  		t.Fatalf("Expected to get a bad request status [400], got %d", sl.Status)
  1021  	}
  1022  
  1023  	// Proper request, but no responders.
  1024  	nc2.Request("ngs.usage", []byte("1h"), 20*time.Millisecond)
  1025  	sl = getMetricResult()
  1026  	if sl.Status != 503 {
  1027  		t.Fatalf("Expected to get a service unavailable status [503], got %d", sl.Status)
  1028  	}
  1029  
  1030  	// The service listener. Make it slow. 20ms is respThreshold, so take 2X
  1031  	sub, _ := nc.Subscribe("ngs.usage.bar", func(msg *nats.Msg) {
  1032  		time.Sleep(40 * time.Millisecond)
  1033  		msg.Respond([]byte("22 msgs"))
  1034  	})
  1035  	nc.Flush()
  1036  
  1037  	nc2.Request("ngs.usage", []byte("1h"), 20*time.Millisecond)
  1038  	sl = getMetricResult()
  1039  	if sl.Status != 504 {
  1040  		t.Fatalf("Expected to get a service timeout status [504], got %d", sl.Status)
  1041  	}
  1042  
  1043  	// Make sure we do not get duplicates.
  1044  	if rmsg, err := rsub.NextMsg(50 * time.Millisecond); err == nil {
  1045  		t.Fatalf("Unexpected second response metric: %q\n", rmsg.Data)
  1046  	}
  1047  
  1048  	// Now setup a responder that will respond under the threshold.
  1049  	sub.Unsubscribe()
  1050  	nc.Subscribe("ngs.usage.bar", func(msg *nats.Msg) {
  1051  		time.Sleep(5 * time.Millisecond)
  1052  		msg.Respond([]byte("22 msgs"))
  1053  	})
  1054  	nc.Flush()
  1055  	time.Sleep(100 * time.Millisecond)
  1056  
  1057  	// Now create a responder using old request and we will do a short timeout.
  1058  	nc3 := clientConnectOldRequest(t, sc.clusters[0].opts[0], "bar")
  1059  	defer nc3.Close()
  1060  
  1061  	nc3.Request("ngs.usage", []byte("1h"), time.Millisecond)
  1062  	sl = getMetricResult()
  1063  	if sl.Status != 408 {
  1064  		t.Fatalf("Expected to get a request timeout status [408], got %d", sl.Status)
  1065  	}
  1066  }
  1067  
  1068  func TestServiceLatencyFailureReportingMultipleServers(t *testing.T) {
  1069  	sc := createSuperCluster(t, 3, 3)
  1070  	defer sc.shutdown()
  1071  
  1072  	// Now add in new service export to FOO and have bar import that with tracking enabled.
  1073  	sc.setupLatencyTracking(t, 100)
  1074  	sc.setResponseThreshold(t, 10*time.Millisecond)
  1075  
  1076  	nc := clientConnect(t, sc.clusters[0].opts[0], "foo")
  1077  	defer nc.Close()
  1078  
  1079  	// Listen for metrics
  1080  	rsub, err := nc.SubscribeSync("results")
  1081  	if err != nil {
  1082  		t.Fatal(err)
  1083  	}
  1084  	nc.Flush()
  1085  
  1086  	getMetricResult := func() *server.ServiceLatency {
  1087  		t.Helper()
  1088  		rmsg, err := rsub.NextMsg(time.Second)
  1089  		if err != nil {
  1090  			t.Fatalf("Expected to receive latency metric: %v", err)
  1091  		}
  1092  		var sl server.ServiceLatency
  1093  		if err = json.Unmarshal(rmsg.Data, &sl); err != nil {
  1094  			t.Errorf("Unexpected error processing latency metric: %s", err)
  1095  		}
  1096  		return &sl
  1097  	}
  1098  
  1099  	cases := []struct {
  1100  		ci, si int
  1101  		desc   string
  1102  	}{
  1103  		{0, 0, "same server"},
  1104  		{0, 1, "same cluster, different server"},
  1105  		{1, 1, "different cluster"},
  1106  	}
  1107  
  1108  	for _, cs := range cases {
  1109  		// Select the server to send request from.
  1110  		nc2 := clientConnect(t, sc.clusters[cs.ci].opts[cs.si], "bar")
  1111  		defer nc2.Close()
  1112  
  1113  		// Test a request with no reply subject
  1114  		nc2.Publish("ngs.usage", []byte("1h"))
  1115  		sl := getMetricResult()
  1116  		if sl.Status != 400 {
  1117  			t.Fatalf("Test %q, Expected to get a bad request status [400], got %d", cs.desc, sl.Status)
  1118  		}
  1119  
  1120  		// We wait here for the gateways to report no interest b/c optimistic mode.
  1121  		time.Sleep(50 * time.Millisecond)
  1122  
  1123  		// Proper request, but no responders.
  1124  		nc2.Request("ngs.usage", []byte("1h"), 10*time.Millisecond)
  1125  		sl = getMetricResult()
  1126  		if sl.Status != 503 {
  1127  			t.Fatalf("Test %q, Expected to get a service unavailable status [503], got %d", cs.desc, sl.Status)
  1128  		}
  1129  
  1130  		// The service listener. Make it slow. 10ms is respThreshold, so make 3X
  1131  		sub, _ := nc.Subscribe("ngs.usage.bar", func(msg *nats.Msg) {
  1132  			time.Sleep(30 * time.Millisecond)
  1133  			msg.Respond([]byte("22 msgs"))
  1134  		})
  1135  		defer sub.Unsubscribe()
  1136  		nc.Flush()
  1137  		// Wait to propagate.
  1138  		time.Sleep(200 * time.Millisecond)
  1139  
  1140  		nc2.Request("ngs.usage", []byte("1h"), 10*time.Millisecond)
  1141  		sl = getMetricResult()
  1142  		if sl.Status != 504 {
  1143  			t.Fatalf("Test %q, Expected to get a service timeout status [504], got %d", cs.desc, sl.Status)
  1144  		}
  1145  
  1146  		// Clean up subscriber and requestor
  1147  		nc2.Close()
  1148  		sub.Unsubscribe()
  1149  		nc.Flush()
  1150  		// Wait to propagate.
  1151  		time.Sleep(200 * time.Millisecond)
  1152  	}
  1153  }
  1154  
  1155  // To test a bug rip@nats.io is seeing.
  1156  func TestServiceLatencyOldRequestStyleSingleServer(t *testing.T) {
  1157  	conf := createConfFile(t, []byte(`
  1158  		listen: 127.0.0.1:-1
  1159  		accounts: {
  1160  		    SVC: {
  1161  		        users: [ {user: svc, password: pass} ]
  1162  		        exports: [  {
  1163  					service: "svc.echo"
  1164  					accounts: [CLIENT]
  1165  					latency: {
  1166  						sampling: 100%
  1167  						subject: latency.svc
  1168  					}
  1169  				} ]
  1170  		    },
  1171  		    CLIENT: {
  1172  		        users: [{user: client, password: pass} ]
  1173  			    imports: [ {service: {account: SVC, subject: svc.echo}, to: SVC} ]
  1174  		    },
  1175  		    SYS: { users: [{user: admin, password: pass}] }
  1176  		}
  1177  
  1178  		system_account: SYS
  1179  	`))
  1180  
  1181  	srv, opts := RunServerWithConfig(conf)
  1182  	defer srv.Shutdown()
  1183  
  1184  	// Responder
  1185  	nc, err := nats.Connect(fmt.Sprintf("nats://svc:pass@%s:%d", opts.Host, opts.Port))
  1186  	if err != nil {
  1187  		t.Fatalf("Error on connect: %v", err)
  1188  	}
  1189  	defer nc.Close()
  1190  
  1191  	// Listen for metrics
  1192  	rsub, _ := nc.SubscribeSync("latency.svc")
  1193  
  1194  	// Requestor
  1195  	nc2, err := nats.Connect(fmt.Sprintf("nats://client:pass@%s:%d", opts.Host, opts.Port), nats.UseOldRequestStyle())
  1196  	if err != nil {
  1197  		t.Fatalf("Error on connect: %v", err)
  1198  	}
  1199  	defer nc2.Close()
  1200  
  1201  	// Setup responder
  1202  	serviceTime := 25 * time.Millisecond
  1203  	sub, _ := nc.Subscribe("svc.echo", func(msg *nats.Msg) {
  1204  		time.Sleep(serviceTime)
  1205  		msg.Respond([]byte("world"))
  1206  	})
  1207  	nc.Flush()
  1208  	defer sub.Unsubscribe()
  1209  
  1210  	// Send a request
  1211  	start := time.Now()
  1212  	if _, err := nc2.Request("SVC", []byte("1h"), time.Second); err != nil {
  1213  		t.Fatalf("Expected a response")
  1214  	}
  1215  
  1216  	var sl server.ServiceLatency
  1217  	rmsg, _ := rsub.NextMsg(time.Second)
  1218  	json.Unmarshal(rmsg.Data, &sl)
  1219  
  1220  	checkServiceLatency(t, sl, start, serviceTime)
  1221  	extendedCheck(t, sl.Responder, "svc", "", srv.Name())
  1222  	noShareCheck(t, sl.Requestor)
  1223  }
  1224  
  1225  // To test a bug wally@nats.io is seeing.
  1226  func TestServiceAndStreamStackOverflow(t *testing.T) {
  1227  	conf := createConfFile(t, []byte(`
  1228  		accounts {
  1229  		  STATIC {
  1230  		    users = [ { user: "static", pass: "foo" } ]
  1231  		    exports [
  1232  		      { stream: > }
  1233  		      { service: my.service }
  1234  		    ]
  1235  		  }
  1236  		  DYN {
  1237  		    users = [ { user: "foo", pass: "bar" } ]
  1238  		    imports [
  1239  		      { stream { subject: >, account: STATIC } }
  1240  		      { service { subject: my.service, account: STATIC } }
  1241  		    ]
  1242  		  }
  1243  		}
  1244  	`))
  1245  
  1246  	srv, opts := RunServerWithConfig(conf)
  1247  	defer srv.Shutdown()
  1248  
  1249  	// Responder (just request sub)
  1250  	nc, err := nats.Connect(fmt.Sprintf("nats://static:foo@%s:%d", opts.Host, opts.Port))
  1251  	if err != nil {
  1252  		t.Fatalf("Error on connect: %v", err)
  1253  	}
  1254  	defer nc.Close()
  1255  
  1256  	sub, _ := nc.SubscribeSync("my.service")
  1257  	nc.Flush()
  1258  
  1259  	// Requestor
  1260  	nc2, err := nats.Connect(fmt.Sprintf("nats://foo:bar@%s:%d", opts.Host, opts.Port))
  1261  	if err != nil {
  1262  		t.Fatalf("Error on connect: %v", err)
  1263  	}
  1264  	defer nc2.Close()
  1265  
  1266  	// Send a single request.
  1267  	nc2.PublishRequest("my.service", "foo", []byte("hi"))
  1268  	checkFor(t, time.Second, 10*time.Millisecond, func() error {
  1269  		if nm, _, err := sub.Pending(); err != nil || nm != 1 {
  1270  			return fmt.Errorf("Expected one request, got %d", nm)
  1271  		}
  1272  		return nil
  1273  	})
  1274  
  1275  	// Make sure works for queue subscribers as well.
  1276  	sub.Unsubscribe()
  1277  	sub, _ = nc.QueueSubscribeSync("my.service", "prod")
  1278  	nc.Flush()
  1279  
  1280  	// Send a single request.
  1281  	nc2.PublishRequest("my.service", "foo", []byte("hi"))
  1282  	checkFor(t, time.Second, 10*time.Millisecond, func() error {
  1283  		if nm, _, err := sub.Pending(); err != nil || nm != 1 {
  1284  			return fmt.Errorf("Expected one request, got %d", nm)
  1285  		}
  1286  		return nil
  1287  	})
  1288  
  1289  	// Now create an interest in the stream from nc2. that is a queue subscriber.
  1290  	sub2, _ := nc2.QueueSubscribeSync("my.service", "prod")
  1291  	defer sub2.Unsubscribe()
  1292  	nc2.Flush()
  1293  
  1294  	// Send a single request.
  1295  	nc2.PublishRequest("my.service", "foo", []byte("hi"))
  1296  	time.Sleep(10 * time.Millisecond)
  1297  	checkFor(t, time.Second, 10*time.Millisecond, func() error {
  1298  		if nm, _, err := sub.Pending(); err != nil || nm != 2 {
  1299  			return fmt.Errorf("Expected two requests, got %d", nm)
  1300  		}
  1301  		return nil
  1302  	})
  1303  }
  1304  
  1305  // Check we get the proper detailed information for the requestor when allowed.
  1306  func TestServiceLatencyRequestorSharesDetailedInfo(t *testing.T) {
  1307  	sc := createSuperCluster(t, 3, 3)
  1308  	defer sc.shutdown()
  1309  
  1310  	// Now add in new service export to FOO and have bar import that with tracking enabled.
  1311  	sc.setupLatencyTracking(t, 100)
  1312  	sc.setResponseThreshold(t, 10*time.Millisecond)
  1313  	sc.setImportShare(t)
  1314  
  1315  	nc := clientConnect(t, sc.clusters[0].opts[0], "foo")
  1316  	defer nc.Close()
  1317  
  1318  	// Listen for metrics
  1319  	rsub, err := nc.SubscribeSync("results")
  1320  	if err != nil {
  1321  		t.Fatal(err)
  1322  	}
  1323  	nc.Flush()
  1324  
  1325  	getMetricResult := func() *server.ServiceLatency {
  1326  		t.Helper()
  1327  		rmsg, err := rsub.NextMsg(time.Second)
  1328  		if err != nil {
  1329  			t.Fatalf("Expected to receive latency metric: %v", err)
  1330  		}
  1331  		var sl server.ServiceLatency
  1332  		if err = json.Unmarshal(rmsg.Data, &sl); err != nil {
  1333  			t.Errorf("Unexpected error processing latency metric: %s", err)
  1334  		}
  1335  		return &sl
  1336  	}
  1337  
  1338  	cases := []struct {
  1339  		ci, si int
  1340  		desc   string
  1341  	}{
  1342  		{0, 0, "same server"},
  1343  		{0, 1, "same cluster, different server"},
  1344  		{1, 1, "different cluster"},
  1345  	}
  1346  
  1347  	for _, cs := range cases {
  1348  		// Select the server to send request from.
  1349  		nc2 := clientConnect(t, sc.clusters[cs.ci].opts[cs.si], "bar")
  1350  		defer nc2.Close()
  1351  
  1352  		rs := sc.clusters[cs.ci].servers[cs.si]
  1353  
  1354  		// Test a request with no reply subject
  1355  		nc2.Publish("ngs.usage", []byte("1h"))
  1356  		sl := getMetricResult()
  1357  		if sl.Status != 400 {
  1358  			t.Fatalf("Test %q, Expected to get a bad request status [400], got %d", cs.desc, sl.Status)
  1359  		}
  1360  		extendedCheck(t, sl.Requestor, "bar", "", rs.Name())
  1361  
  1362  		// We wait here for the gateways to report no interest b/c optimistic mode.
  1363  		time.Sleep(50 * time.Millisecond)
  1364  
  1365  		// Proper request, but no responders.
  1366  		nc2.Request("ngs.usage", []byte("1h"), 10*time.Millisecond)
  1367  		sl = getMetricResult()
  1368  		if sl.Status != 503 {
  1369  			t.Fatalf("Test %q, Expected to get a service unavailable status [503], got %d", cs.desc, sl.Status)
  1370  		}
  1371  		extendedCheck(t, sl.Requestor, "bar", "", rs.Name())
  1372  
  1373  		// The service listener. Make it slow. 10ms is respThreshold, so take 2.5X
  1374  		sub, _ := nc.Subscribe("ngs.usage.bar", func(msg *nats.Msg) {
  1375  			time.Sleep(25 * time.Millisecond)
  1376  			msg.Respond([]byte("22 msgs"))
  1377  		})
  1378  		defer sub.Unsubscribe()
  1379  		nc.Flush()
  1380  		// Wait to propagate.
  1381  		time.Sleep(200 * time.Millisecond)
  1382  
  1383  		nc2.Request("ngs.usage", []byte("1h"), 10*time.Millisecond)
  1384  		sl = getMetricResult()
  1385  		if sl.Status != 504 {
  1386  			t.Fatalf("Test %q, Expected to get a service timeout status [504], got %d", cs.desc, sl.Status)
  1387  		}
  1388  		extendedCheck(t, sl.Requestor, "bar", "", rs.Name())
  1389  
  1390  		// Clean up subscriber and requestor
  1391  		nc2.Close()
  1392  		sub.Unsubscribe()
  1393  		nc.Flush()
  1394  		// Wait to propagate.
  1395  		time.Sleep(200 * time.Millisecond)
  1396  	}
  1397  }
  1398  
  1399  func TestServiceLatencyRequestorSharesConfig(t *testing.T) {
  1400  	conf := createConfFile(t, []byte(`
  1401  		listen: 127.0.0.1:-1
  1402  		accounts: {
  1403  		    SVC: {
  1404  		        users: [ {user: svc, password: pass} ]
  1405  		        exports: [  {
  1406  					service: "svc.echo"
  1407  					accounts: [CLIENT]
  1408  					latency: {
  1409  						sampling: 100%
  1410  						subject: latency.svc
  1411  					}
  1412  				} ]
  1413  		    },
  1414  		    CLIENT: {
  1415  		        users: [{user: client, password: pass} ]
  1416  			    imports: [ {service: {account: SVC, subject: svc.echo}, to: SVC, share:true} ]
  1417  		    },
  1418  		    SYS: { users: [{user: admin, password: pass}] }
  1419  		}
  1420  
  1421  		system_account: SYS
  1422  	`))
  1423  
  1424  	srv, opts := RunServerWithConfig(conf)
  1425  	defer srv.Shutdown()
  1426  
  1427  	// Responder
  1428  	nc, err := nats.Connect(fmt.Sprintf("nats://svc:pass@%s:%d", opts.Host, opts.Port))
  1429  	if err != nil {
  1430  		t.Fatalf("Error on connect: %v", err)
  1431  	}
  1432  	defer nc.Close()
  1433  
  1434  	// Listen for metrics
  1435  	rsub, _ := nc.SubscribeSync("latency.svc")
  1436  
  1437  	// Requestor
  1438  	nc2, err := nats.Connect(fmt.Sprintf("nats://client:pass@%s:%d", opts.Host, opts.Port), nats.UseOldRequestStyle())
  1439  	if err != nil {
  1440  		t.Fatalf("Error on connect: %v", err)
  1441  	}
  1442  	defer nc2.Close()
  1443  
  1444  	// Setup responder
  1445  	serviceTime := 25 * time.Millisecond
  1446  	sub, _ := nc.Subscribe("svc.echo", func(msg *nats.Msg) {
  1447  		time.Sleep(serviceTime)
  1448  		msg.Respond([]byte("world"))
  1449  	})
  1450  	nc.Flush()
  1451  	defer sub.Unsubscribe()
  1452  
  1453  	// Send a request
  1454  	start := time.Now()
  1455  	if _, err := nc2.Request("SVC", []byte("1h"), time.Second); err != nil {
  1456  		t.Fatalf("Expected a response")
  1457  	}
  1458  
  1459  	var sl server.ServiceLatency
  1460  	rmsg, _ := rsub.NextMsg(time.Second)
  1461  	json.Unmarshal(rmsg.Data, &sl)
  1462  
  1463  	checkServiceLatency(t, sl, start, serviceTime)
  1464  	extendedCheck(t, sl.Responder, "svc", "", srv.Name())
  1465  	extendedCheck(t, sl.Requestor, "client", "", srv.Name())
  1466  
  1467  	// Check reload.
  1468  	newConf := []byte(`
  1469  		listen: 127.0.0.1:-1
  1470  		accounts: {
  1471  		    SVC: {
  1472  		        users: [ {user: svc, password: pass} ]
  1473  		        exports: [  {
  1474  					service: "svc.echo"
  1475  					accounts: [CLIENT]
  1476  					latency: {
  1477  						sampling: 100%
  1478  						subject: latency.svc
  1479  					}
  1480  				} ]
  1481  		    },
  1482  		    CLIENT: {
  1483  		        users: [{user: client, password: pass} ]
  1484  			    imports: [ {service: {account: SVC, subject: svc.echo}, to: SVC} ]
  1485  		    },
  1486  		    SYS: { users: [{user: admin, password: pass}] }
  1487  		}
  1488  
  1489  		system_account: SYS
  1490  	`)
  1491  	if err := os.WriteFile(conf, newConf, 0600); err != nil {
  1492  		t.Fatalf("Error rewriting server's config file: %v", err)
  1493  	}
  1494  	if err := srv.Reload(); err != nil {
  1495  		t.Fatalf("Error on server reload: %v", err)
  1496  	}
  1497  
  1498  	if _, err = nc2.Request("SVC", []byte("1h"), time.Second); err != nil {
  1499  		t.Fatalf("Expected a response")
  1500  	}
  1501  
  1502  	var sl2 server.ServiceLatency
  1503  	rmsg, _ = rsub.NextMsg(time.Second)
  1504  	json.Unmarshal(rmsg.Data, &sl2)
  1505  	noShareCheck(t, sl2.Requestor)
  1506  }
  1507  
  1508  func TestServiceLatencyLossTest(t *testing.T) {
  1509  	// assure that behavior with respect to requests timing out (and samples being reordered) is as expected.
  1510  	conf := createConfFile(t, []byte(`
  1511  		listen: 127.0.0.1:-1
  1512  		accounts: {
  1513  		    SVC: {
  1514  		        users: [ {user: svc, password: pass} ]
  1515  		        exports: [  {
  1516  					service: "svc.echo"
  1517  					threshold: "500ms"
  1518  					accounts: [CLIENT]
  1519  					latency: {
  1520  						sampling: headers
  1521  						subject: latency.svc
  1522  					}
  1523  				} ]
  1524  		    },
  1525  		    CLIENT: {
  1526  		        users: [{user: client, password: pass} ]
  1527  			    imports: [ {service: {account: SVC, subject: svc.echo}, to: SVC, share:true} ]
  1528  		    },
  1529  		    SYS: { users: [{user: admin, password: pass}] }
  1530  		}
  1531  		system_account: SYS
  1532  	`))
  1533  	srv, opts := RunServerWithConfig(conf)
  1534  	defer srv.Shutdown()
  1535  
  1536  	// Responder connection
  1537  	ncr, err := nats.Connect(fmt.Sprintf("nats://svc:pass@%s:%d", opts.Host, opts.Port), nats.Name("responder"))
  1538  	if err != nil {
  1539  		t.Fatalf("Error on connect: %v", err)
  1540  	}
  1541  	defer ncr.Close()
  1542  
  1543  	ncl, err := nats.Connect(fmt.Sprintf("nats://svc:pass@%s:%d", opts.Host, opts.Port), nats.Name("latency"))
  1544  	if err != nil {
  1545  		t.Fatalf("Error on connect: %v", err)
  1546  	}
  1547  	defer ncl.Close()
  1548  	// Table of expected state for which message.
  1549  	// This also codifies that the first message, in respsonse to second request is ok.
  1550  	// Second message, in response to first request times out.
  1551  	expectedState := map[int]int{1: http.StatusOK, 2: http.StatusGatewayTimeout}
  1552  	msgCnt := 0
  1553  	start := time.Now().Add(250 * time.Millisecond)
  1554  
  1555  	var latErr []error
  1556  	// Listen for metrics
  1557  	wg := sync.WaitGroup{}
  1558  	wg.Add(2)
  1559  	rsub, _ := ncl.Subscribe("latency.svc", func(rmsg *nats.Msg) {
  1560  		defer wg.Done()
  1561  		var sl server.ServiceLatency
  1562  		json.Unmarshal(rmsg.Data, &sl)
  1563  		msgCnt++
  1564  		if want := expectedState[msgCnt]; want != sl.Status {
  1565  			latErr = append(latErr, fmt.Errorf("Expected different status for msg #%d: %d != %d", msgCnt, want, sl.Status))
  1566  		}
  1567  		if msgCnt > 1 {
  1568  			if start.Before(sl.RequestStart) {
  1569  				latErr = append(latErr, fmt.Errorf("start times should indicate reordering %v : %v", start, sl.RequestStart))
  1570  			}
  1571  		}
  1572  		start = sl.RequestStart
  1573  		if strings.EqualFold(sl.RequestHeader.Get("Uber-Trace-Id"), fmt.Sprintf("msg-%d", msgCnt)) {
  1574  			latErr = append(latErr, fmt.Errorf("no header present"))
  1575  		}
  1576  	})
  1577  	defer rsub.Unsubscribe()
  1578  	// Setup requestor
  1579  	nc2, err := nats.Connect(fmt.Sprintf("nats://client:pass@%s:%d", opts.Host, opts.Port),
  1580  		nats.UseOldRequestStyle(), nats.Name("requestor"))
  1581  	if err != nil {
  1582  		t.Fatalf("Error on connect: %v", err)
  1583  	}
  1584  	defer nc2.Close()
  1585  
  1586  	respCnt := int64(0)
  1587  	reply := nc2.NewRespInbox()
  1588  	repSub, _ := nc2.Subscribe(reply, func(msg *nats.Msg) {
  1589  		atomic.AddInt64(&respCnt, 1)
  1590  	})
  1591  	defer repSub.Unsubscribe()
  1592  	nc2.Flush()
  1593  	// use dedicated send that publishes requests using same reply subject
  1594  	send := func(msg string) {
  1595  		if err := nc2.PublishMsg(&nats.Msg{Subject: "SVC", Data: []byte(msg), Reply: reply,
  1596  			Header: nats.Header{"X-B3-Sampled": []string{"1"}}}); err != nil {
  1597  			t.Fatalf("Expected a response got: %v", err)
  1598  		}
  1599  	}
  1600  	// Setup responder that skips responding and triggers next request OR responds
  1601  	sub, _ := ncr.Subscribe("svc.echo", func(msg *nats.Msg) {
  1602  		if string(msg.Data) != "msg2" {
  1603  			msg.Respond([]byte("response"))
  1604  		} else {
  1605  			wg.Add(1)
  1606  			go func() { // second request (use go routine to not block in responders callback)
  1607  				defer wg.Done()
  1608  				time.Sleep(250 * time.Millisecond)
  1609  				send("msg1") // will cause the first latency measurement
  1610  			}()
  1611  		}
  1612  	})
  1613  	ncr.Flush()
  1614  	ncl.Flush()
  1615  	nc2.Flush()
  1616  	defer sub.Unsubscribe()
  1617  	// Send first request, which is expected to timeout
  1618  	send("msg2")
  1619  	// wait till we got enough responses
  1620  	wg.Wait()
  1621  	if len(latErr) > 0 {
  1622  		t.Fatalf("Got errors %v", latErr)
  1623  	}
  1624  	if atomic.LoadInt64(&respCnt) != 1 {
  1625  		t.Fatalf("Expected only one message")
  1626  	}
  1627  }
  1628  
  1629  func TestServiceLatencyHeaderTriggered(t *testing.T) {
  1630  	receiveAndTest := func(t *testing.T, rsub *nats.Subscription, shared bool, header nats.Header, status int, srvName string) server.ServiceLatency {
  1631  		t.Helper()
  1632  		var sl server.ServiceLatency
  1633  		rmsg, _ := rsub.NextMsg(time.Second)
  1634  		if rmsg == nil {
  1635  			t.Fatal("Expected message")
  1636  			return sl
  1637  		}
  1638  		json.Unmarshal(rmsg.Data, &sl)
  1639  		if sl.Status != status {
  1640  			t.Fatalf("Expected different status %d != %d", status, sl.Status)
  1641  		}
  1642  		if status == http.StatusOK {
  1643  			extendedCheck(t, sl.Responder, "svc", "", srvName)
  1644  		}
  1645  		if shared {
  1646  			extendedCheck(t, sl.Requestor, "client", "", srvName)
  1647  		} else {
  1648  			noShareCheck(t, sl.Requestor)
  1649  		}
  1650  		// header are always included
  1651  		if v := sl.RequestHeader.Get("Some-Other"); v != "" {
  1652  			t.Fatalf("Expected header to be gone")
  1653  		}
  1654  		for k, value := range header {
  1655  			if v := sl.RequestHeader.Get(k); v != value[0] {
  1656  				t.Fatalf("Expected header %q to be set", k)
  1657  			}
  1658  		}
  1659  		return sl
  1660  	}
  1661  	for _, v := range []struct {
  1662  		shared bool
  1663  		header nats.Header
  1664  	}{
  1665  		{true, nats.Header{"Uber-Trace-Id": []string{"479fefe9525eddb:479fefe9525eddb:0:1"}}},
  1666  		{true, nats.Header{"X-B3-Sampled": []string{"1"}}},
  1667  		{true, nats.Header{"X-B3-TraceId": []string{"80f198ee56343ba864fe8b2a57d3eff7"}}},
  1668  		{true, nats.Header{"B3": []string{"80f198ee56343ba864fe8b2a57d3eff7-e457b5a2e4d86bd1-1-05e3ac9a4f6e3b90"}}},
  1669  		{true, nats.Header{"Traceparent": []string{"00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"}}},
  1670  		{false, nats.Header{"Uber-Trace-Id": []string{"479fefe9525eddb:479fefe9525eddb:0:1"}}},
  1671  		{false, nats.Header{"X-B3-Sampled": []string{"1"}}},
  1672  		{false, nats.Header{"X-B3-TraceId": []string{"80f198ee56343ba864fe8b2a57d3eff7"}}},
  1673  		{false, nats.Header{"B3": []string{"80f198ee56343ba864fe8b2a57d3eff7-e457b5a2e4d86bd1-1-05e3ac9a4f6e3b90"}}},
  1674  		{false, nats.Header{"Traceparent": []string{"00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"}}},
  1675  		{false, nats.Header{
  1676  			"X-B3-TraceId":      []string{"80f198ee56343ba864fe8b2a57d3eff7"},
  1677  			"X-B3-ParentSpanId": []string{"05e3ac9a4f6e3b90"},
  1678  			"X-B3-SpanId":       []string{"e457b5a2e4d86bd1"},
  1679  			"X-B3-Sampled":      []string{"1"},
  1680  		}},
  1681  		{false, nats.Header{
  1682  			"X-B3-TraceId":      []string{"80f198ee56343ba864fe8b2a57d3eff7"},
  1683  			"X-B3-ParentSpanId": []string{"05e3ac9a4f6e3b90"},
  1684  			"X-B3-SpanId":       []string{"e457b5a2e4d86bd1"},
  1685  		}},
  1686  		{false, nats.Header{
  1687  			"Uber-Trace-Id": []string{"479fefe9525eddb:479fefe9525eddb:0:1"},
  1688  			"Uberctx-X":     []string{"foo"},
  1689  		}},
  1690  		{false, nats.Header{
  1691  			"Traceparent": []string{"00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01"},
  1692  			"Tracestate":  []string{"rojo=00f067aa0ba902b7,congo=t61rcWkgMzE"},
  1693  		}},
  1694  	} {
  1695  		t.Run(fmt.Sprintf("%s_%t_%s", t.Name(), v.shared, v.header), func(t *testing.T) {
  1696  			conf := createConfFile(t, []byte(fmt.Sprintf(`
  1697  				listen: 127.0.0.1:-1
  1698  				accounts: {
  1699  					SVC: {
  1700  						users: [ {user: svc, password: pass} ]
  1701  						exports: [  {
  1702  							service: "svc.echo"
  1703  							accounts: [CLIENT]
  1704  							latency: {
  1705  								sampling: headers
  1706  								subject: latency.svc
  1707  							}
  1708  						}]
  1709  					},
  1710  					CLIENT: {
  1711  						users: [{user: client, password: pass} ]
  1712  						imports: [ {service: {account: SVC, subject: svc.echo}, to: SVC, share:%t} ]
  1713  					},
  1714  					SYS: { users: [{user: admin, password: pass}] }
  1715  				}
  1716  
  1717  				system_account: SYS
  1718  			`, v.shared)))
  1719  			srv, opts := RunServerWithConfig(conf)
  1720  			defer srv.Shutdown()
  1721  
  1722  			// Responder
  1723  			nc, err := nats.Connect(fmt.Sprintf("nats://svc:pass@%s:%d", opts.Host, opts.Port))
  1724  			if err != nil {
  1725  				t.Fatalf("Error on connect: %v", err)
  1726  			}
  1727  			defer nc.Close()
  1728  
  1729  			// Listen for metrics
  1730  			rsub, _ := nc.SubscribeSync("latency.svc")
  1731  			defer rsub.Unsubscribe()
  1732  
  1733  			// Setup responder
  1734  			serviceTime := 25 * time.Millisecond
  1735  			sub, _ := nc.Subscribe("svc.echo", func(msg *nats.Msg) {
  1736  				time.Sleep(serviceTime)
  1737  				msg.Respond([]byte("world"))
  1738  			})
  1739  			nc.Flush()
  1740  			defer sub.Unsubscribe()
  1741  
  1742  			// Setup requestor
  1743  			nc2, err := nats.Connect(fmt.Sprintf("nats://client:pass@%s:%d", opts.Host, opts.Port), nats.UseOldRequestStyle())
  1744  			if err != nil {
  1745  				t.Fatalf("Error on connect: %v", err)
  1746  			}
  1747  			defer nc2.Close()
  1748  
  1749  			// Send a request
  1750  			start := time.Now()
  1751  			msg := &nats.Msg{
  1752  				Subject: "SVC",
  1753  				Data:    []byte("1h"),
  1754  				Header:  make(nats.Header),
  1755  			}
  1756  			for k, v := range v.header {
  1757  				for _, val := range v {
  1758  					msg.Header.Add(k, val)
  1759  				}
  1760  			}
  1761  			msg.Header.Add("Some-Other", "value")
  1762  			if _, err := nc2.RequestMsg(msg, 50*time.Millisecond); err != nil {
  1763  				t.Fatalf("Expected a response")
  1764  			}
  1765  			sl := receiveAndTest(t, rsub, v.shared, v.header, http.StatusOK, srv.Name())
  1766  			checkServiceLatency(t, sl, start, serviceTime)
  1767  			// shut down responder to test various error scenarios
  1768  			sub.Unsubscribe()
  1769  			nc.Flush()
  1770  			// Send a request without responder
  1771  			if _, err := nc2.RequestMsg(msg, 50*time.Millisecond); err == nil {
  1772  				t.Fatalf("Expected no response")
  1773  			}
  1774  			receiveAndTest(t, rsub, v.shared, v.header, http.StatusServiceUnavailable, srv.Name())
  1775  
  1776  			// send a message without a response
  1777  			msg.Reply = ""
  1778  			if err := nc2.PublishMsg(msg); err != nil {
  1779  				t.Fatalf("Expected no error got %v", err)
  1780  			}
  1781  			receiveAndTest(t, rsub, v.shared, v.header, http.StatusBadRequest, srv.Name())
  1782  		})
  1783  	}
  1784  }
  1785  
  1786  // From a report by rip@nats.io on simple latency reporting missing in 2 server cluster setup.
  1787  func TestServiceLatencyMissingResults(t *testing.T) {
  1788  	accConf := createConfFile(t, []byte(`
  1789  		accounts {
  1790  		  one: {
  1791  		    users = [ {user: one, password: password} ]
  1792  		    imports = [ {service: {account: weather, subject: service.weather.requests.>}, to: service.weather.>, share: true} ]
  1793  		  }
  1794  		  weather: {
  1795  		    users = [ {user: weather, password: password} ]
  1796  		    exports = [ {
  1797  		        service: service.weather.requests.>
  1798  		        accounts: [one]
  1799  		        latency: { sampling: 100%, subject: service.weather.latency }
  1800  		      } ]
  1801  		  }
  1802  		}
  1803  	`))
  1804  
  1805  	s1Conf := createConfFile(t, []byte(fmt.Sprintf(`
  1806  		listen: 127.0.0.1:-1
  1807  		server_name: s1
  1808  		cluster { port: -1 }
  1809  		include %q
  1810  	`, filepath.Base(accConf))))
  1811  
  1812  	// Link accConf for relative import from s1Conf
  1813  	os.Link(accConf, filepath.Join(filepath.Dir(s1Conf), filepath.Base(accConf)))
  1814  
  1815  	s1, opts1 := RunServerWithConfig(s1Conf)
  1816  	defer s1.Shutdown()
  1817  
  1818  	s2Conf := createConfFile(t, []byte(fmt.Sprintf(`
  1819  		listen: 127.0.0.1:-1
  1820  		server_name: s2
  1821  		cluster {
  1822  			port: -1
  1823  			routes = [ nats-route://127.0.0.1:%d ]
  1824  		}
  1825  		include %q
  1826  	`, opts1.Cluster.Port, filepath.Base(accConf))))
  1827  
  1828  	// Link accConf for relative import from s2Conf
  1829  	os.Link(accConf, filepath.Join(filepath.Dir(s2Conf), filepath.Base(accConf)))
  1830  
  1831  	s2, opts2 := RunServerWithConfig(s2Conf)
  1832  	defer s2.Shutdown()
  1833  
  1834  	checkClusterFormed(t, s1, s2)
  1835  
  1836  	nc1, err := nats.Connect(fmt.Sprintf("nats://%s:%s@%s:%d", "weather", "password", opts1.Host, opts1.Port))
  1837  	if err != nil {
  1838  		t.Fatalf("Error on connect: %v", err)
  1839  	}
  1840  	defer nc1.Close()
  1841  
  1842  	// Create responder
  1843  	sub, _ := nc1.Subscribe("service.weather.requests.>", func(msg *nats.Msg) {
  1844  		time.Sleep(25 * time.Millisecond)
  1845  		msg.Respond([]byte("sunny!"))
  1846  	})
  1847  	defer sub.Unsubscribe()
  1848  
  1849  	// Create sync listener for latency.
  1850  	latSubj := "service.weather.latency"
  1851  	lsub, _ := nc1.SubscribeSync(latSubj)
  1852  	defer lsub.Unsubscribe()
  1853  	nc1.Flush()
  1854  
  1855  	// Make sure the subscription propagates to s2 server.
  1856  	checkSubInterest(t, s2, "weather", latSubj, time.Second)
  1857  
  1858  	// Create requestor on s2.
  1859  	nc2, err := nats.Connect(fmt.Sprintf("nats://%s:%s@%s:%d", "one", "password", opts2.Host, opts2.Port), nats.UseOldRequestStyle())
  1860  	if err != nil {
  1861  		t.Fatalf("Error on connect: %v", err)
  1862  	}
  1863  	defer nc2.Close()
  1864  
  1865  	nc2.Request("service.weather.los_angeles", nil, time.Second)
  1866  
  1867  	lr, err := lsub.NextMsg(time.Second)
  1868  	if err != nil {
  1869  		t.Fatalf("Expected a latency result, got %v", err)
  1870  	}
  1871  	// Make sure we reported ok and have valid results for service, system and total.
  1872  	var sl server.ServiceLatency
  1873  	json.Unmarshal(lr.Data, &sl)
  1874  
  1875  	if sl.Status != 200 {
  1876  		t.Fatalf("Expected a 200 status, got %d\n", sl.Status)
  1877  	}
  1878  	if sl.ServiceLatency == 0 || sl.SystemLatency == 0 || sl.TotalLatency == 0 {
  1879  		t.Fatalf("Received invalid tracking measurements, %d %d %d", sl.ServiceLatency, sl.SystemLatency, sl.TotalLatency)
  1880  	}
  1881  }
  1882  
  1883  // To test a bug I was seeing.
  1884  func TestServiceLatencyDoubleResponse(t *testing.T) {
  1885  	sc := createSuperCluster(t, 3, 1)
  1886  	defer sc.shutdown()
  1887  
  1888  	// Now add in new service export to FOO and have bar import that with tracking enabled.
  1889  	sc.setupLatencyTracking(t, 100)
  1890  
  1891  	// Responder
  1892  	nc := clientConnectWithName(t, sc.clusters[0].opts[0], "foo", "service22")
  1893  	defer nc.Close()
  1894  
  1895  	// Setup responder
  1896  	sub, _ := nc.Subscribe("ngs.usage.*", func(msg *nats.Msg) {
  1897  		msg.Respond([]byte("22 msgs"))
  1898  		msg.Respond([]byte("boom"))
  1899  	})
  1900  	nc.Flush()
  1901  	defer sub.Unsubscribe()
  1902  
  1903  	// Listen for metrics
  1904  	rsub, _ := nc.SubscribeSync("latency.svc")
  1905  
  1906  	// Requestor
  1907  	nc2 := clientConnect(t, sc.clusters[0].opts[2], "bar")
  1908  	defer nc2.Close()
  1909  
  1910  	// Send a request
  1911  	if _, err := nc2.Request("ngs.usage", []byte("1h"), time.Second); err != nil {
  1912  		t.Fatalf("Expected a response")
  1913  	}
  1914  
  1915  	rsub.NextMsg(time.Second)
  1916  	time.Sleep(time.Second)
  1917  }