github.com/grafana/pyroscope@v1.18.0/pkg/test/integration/cluster/cluster_v2.go (about)

     1  package cluster
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  
     9  	"google.golang.org/grpc"
    10  	"google.golang.org/grpc/credentials/insecure"
    11  
    12  	"github.com/grafana/pyroscope/pkg/metastore/raftnode/raftnodepb"
    13  )
    14  
    15  func WithV2() ClusterOption {
    16  	return func(c *Cluster) {
    17  		c.v2 = true
    18  		c.expectedComponents = []string{
    19  			"distributor",
    20  			"distributor",
    21  			"segment-writer",
    22  			"segment-writer",
    23  			"metastore",
    24  			"metastore",
    25  			"metastore",
    26  			"query-frontend",
    27  			"query-backend",
    28  			"compaction-worker",
    29  		}
    30  	}
    31  }
    32  
    33  func (c *Cluster) metastoreConfig() (string, error) {
    34  	cfgPath := filepath.Join(c.tmpDir, "metastore.yaml")
    35  
    36  	// check if the file exists
    37  	if _, err := os.Stat(cfgPath); err == nil {
    38  		return cfgPath, nil
    39  	} else if !os.IsNotExist(err) {
    40  		return "", err
    41  	}
    42  
    43  	// ensure compaction worker are picking up l0 compaction straight away
    44  	metastoreConfig := `
    45  metastore:
    46      levels:
    47          - maxblocks: 20
    48            maxage: 2000000000 # 2 seconds
    49  `
    50  	tmpFile, err := os.Create(cfgPath)
    51  	if err != nil {
    52  		return "", err
    53  	}
    54  	if _, err := tmpFile.Write([]byte(metastoreConfig)); err != nil {
    55  		return "", err
    56  	}
    57  	if err := tmpFile.Close(); err != nil {
    58  		return "", err
    59  	}
    60  	return tmpFile.Name(), nil
    61  }
    62  
    63  func (c *Cluster) metastores() []*Component {
    64  	metastores := make([]*Component, 0, len(c.perTarget["metastore"]))
    65  	for _, compidx := range c.perTarget["metastore"] {
    66  		metastores = append(metastores, c.Components[compidx])
    67  	}
    68  	return metastores
    69  }
    70  
    71  func (c *Cluster) metastoreExpectedLeader() *Component {
    72  	metastores := c.metastores()
    73  	return metastores[len(metastores)-1]
    74  }
    75  
    76  func (c *Cluster) CompactionJobsFinished(ctx context.Context) (float64, error) {
    77  	leader := c.metastoreExpectedLeader()
    78  
    79  	floatCh := make(chan float64, 1)
    80  	check := leader.checkMetrics().
    81  		addRetrieveValue(floatCh, "pyroscope_metastore_compaction_scheduler_queue_completed_jobs_total", "level", "0")
    82  
    83  	if err := check.run(ctx); err != nil {
    84  		return 0, err
    85  	}
    86  	close(floatCh)
    87  
    88  	sum := 0.0
    89  	found := false
    90  	for v := range floatCh {
    91  		found = true
    92  		sum += v
    93  	}
    94  	if !found {
    95  		return 0, fmt.Errorf("no value received")
    96  	}
    97  	return sum, nil
    98  }
    99  
   100  func (c *Cluster) v2Prepare(_ context.Context, memberlistJoin []string) error {
   101  	metastoreLeader := c.metastoreExpectedLeader()
   102  
   103  	for _, comp := range c.Components {
   104  		if err := c.v2PrepareComponent(comp, metastoreLeader); err != nil {
   105  			return err
   106  		}
   107  
   108  		// handle memberlist join
   109  		for _, m := range memberlistJoin {
   110  			comp.flags = append(comp.flags, fmt.Sprintf("-memberlist.join=%s", m))
   111  		}
   112  	}
   113  
   114  	return nil
   115  }
   116  
   117  func (c *Cluster) v2PrepareComponent(comp *Component, metastoreLeader *Component) error {
   118  	dataDir := c.dataDir(comp)
   119  
   120  	comp.cfg.V2 = true
   121  	comp.flags = c.commonFlags(comp)
   122  
   123  	comp.flags = append(comp.flags,
   124  		"-enable-query-backend=true",
   125  		"-write-path=segment-writer",
   126  		"-metastore.min-ready-duration=0",
   127  		fmt.Sprintf("-metastore.address=%s:%d/%s", listenAddr, metastoreLeader.grpcPort, metastoreLeader.nodeName()),
   128  	)
   129  
   130  	if c.debuginfodURL != "" && comp.Target == "query-frontend" {
   131  		comp.flags = append(comp.flags,
   132  			fmt.Sprintf("-symbolizer.debuginfod-url=%s", c.debuginfodURL),
   133  			"-symbolizer.enabled=true",
   134  		)
   135  	}
   136  
   137  	if comp.Target == "segment-writer" {
   138  		comp.flags = append(comp.flags,
   139  			"-segment-writer.num-tokens=1",
   140  			"-segment-writer.min-ready-duration=0",
   141  			"-segment-writer.lifecycler.addr="+listenAddr,
   142  			"-segment-writer.lifecycler.ID="+comp.nodeName(),
   143  			"-segment-writer.heartbeat-period=1s",
   144  		)
   145  	}
   146  
   147  	if comp.Target == "compaction-worker" {
   148  		comp.flags = append(comp.flags,
   149  			"-compaction-worker.job-concurrency=20",
   150  			"-compaction-worker.job-poll-interval=1s",
   151  		)
   152  	}
   153  
   154  	// register query-backends in the frontend and themselves
   155  	if comp.Target == "query-frontend" || comp.Target == "query-backend" {
   156  		for _, compidx := range c.perTarget["query-backend"] {
   157  			comp.flags = append(comp.flags,
   158  				fmt.Sprintf("-query-backend.address=%s:%d", listenAddr, c.Components[compidx].grpcPort),
   159  			)
   160  		}
   161  	}
   162  
   163  	// handle metastore folders and ports
   164  	if comp.Target == "metastore" {
   165  		cfgPath, err := c.metastoreConfig()
   166  		if err != nil {
   167  			return err
   168  		}
   169  		comp.flags = append(comp.flags,
   170  			fmt.Sprint("-config.file=", cfgPath),
   171  			fmt.Sprintf("-metastore.data-dir=%s", dataDir+"../metastore-ephemeral"),
   172  			fmt.Sprintf("-metastore.raft.dir=%s", dataDir+"../metastore-raft"),
   173  			fmt.Sprintf("-metastore.raft.snapshots-dir=%s", dataDir+"../metastore-snapshots"),
   174  			fmt.Sprintf("-metastore.raft.bind-address=%s:%d", listenAddr, comp.raftPort),
   175  			fmt.Sprintf("-metastore.raft.advertise-address=%s:%d", listenAddr, comp.raftPort),
   176  			fmt.Sprintf("-metastore.raft.server-id=%s", comp.nodeName()),
   177  			fmt.Sprintf("-metastore.raft.bootstrap-expect-peers=%d", len(c.perTarget[comp.Target])),
   178  		)
   179  
   180  		// add bootstrap peers
   181  		for _, compidx := range c.perTarget[comp.Target] {
   182  			peer := c.Components[compidx]
   183  			comp.flags = append(comp.flags,
   184  				fmt.Sprintf("-metastore.raft.bootstrap-peers=%s:%d/%s", listenAddr, peer.raftPort, peer.nodeName()),
   185  			)
   186  		}
   187  	}
   188  
   189  	return nil
   190  }
   191  
   192  func (c *Cluster) v2ReadyCheckComponent(ctx context.Context, t *Component) (bool, error) {
   193  	switch t.Target {
   194  	case "metastore":
   195  		return true, t.metastoreReadyCheck(ctx, c.metastores(), c.metastoreExpectedLeader())
   196  	case "distributor":
   197  		return true, t.distributorReadyCheck(ctx, 0, len(c.perTarget["segment-writer"]), len(c.perTarget["distributor"]))
   198  	}
   199  	return false, nil
   200  }
   201  
   202  // for the metastore, we need to check that the first replica is the leader, as this is configured statically as the client for other components.
   203  func (comp *Component) metastoreReadyCheck(ctx context.Context, metastores []*Component, expectedLeader *Component) error {
   204  	expectedPeers := len(metastores)
   205  
   206  	opts := []grpc.DialOption{
   207  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   208  	}
   209  	cc, err := grpc.NewClient(fmt.Sprintf("%s:%d", listenAddr, comp.grpcPort), opts...)
   210  	if err != nil {
   211  		return err
   212  	}
   213  
   214  	client := raftnodepb.NewRaftNodeServiceClient(cc)
   215  
   216  	nodeInfo, err := client.NodeInfo(ctx, &raftnodepb.NodeInfoRequest{})
   217  	if err != nil {
   218  		return err
   219  	}
   220  
   221  	// only ready once all peers are here
   222  	if len(nodeInfo.Node.Peers) != expectedPeers {
   223  		return fmt.Errorf("unexpected peer count: exp=%d actual=%d", expectedPeers, len(nodeInfo.Node.Peers))
   224  	}
   225  
   226  	// only ready once leader is known
   227  	if nodeInfo.Node.LeaderId == "" {
   228  		return fmt.Errorf("leader not known on node %s", comp.nodeName())
   229  	}
   230  
   231  	// exit if we are not the leader
   232  	if nodeInfo.Node.LeaderId != nodeInfo.Node.ServerId {
   233  		return nil
   234  	}
   235  
   236  	// if we are replica 0 we are done as we are already leader
   237  	if comp.replica == expectedPeers-1 {
   238  		return nil
   239  	}
   240  
   241  	// promote last metastore to new leader
   242  	_, err = client.PromoteToLeader(ctx, &raftnodepb.PromoteToLeaderRequest{
   243  		ServerId:    fmt.Sprintf("%s:%d/%s", listenAddr, expectedLeader.raftPort, expectedLeader.nodeName()),
   244  		CurrentTerm: nodeInfo.Node.CurrentTerm,
   245  	})
   246  	return err
   247  }
   248  
   249  func (c *Cluster) GetMetastoreRaftNodeClient() (raftnodepb.RaftNodeServiceClient, error) {
   250  	leader := c.metastoreExpectedLeader()
   251  	opts := []grpc.DialOption{
   252  		grpc.WithTransportCredentials(insecure.NewCredentials()),
   253  	}
   254  	cc, err := grpc.NewClient(fmt.Sprintf("127.0.0.1:%d", leader.grpcPort), opts...)
   255  	if err != nil {
   256  		return nil, err
   257  	}
   258  
   259  	return raftnodepb.NewRaftNodeServiceClient(cc), nil
   260  }
   261  
   262  func (c *Cluster) AddMetastoreWithAutoJoin(ctx context.Context) error {
   263  	leader := c.metastoreExpectedLeader()
   264  
   265  	comp := newComponent("metastore")
   266  	comp.replica = len(c.perTarget["metastore"])
   267  	c.Components = append(c.Components, comp)
   268  	c.perTarget["metastore"] = append(c.perTarget["metastore"], len(c.Components)-1)
   269  
   270  	if err := c.v2PrepareComponent(comp, leader); err != nil {
   271  		return err
   272  	}
   273  	comp.flags = append(comp.flags, "-metastore.raft.auto-join=true")
   274  
   275  	p, err := comp.start(ctx)
   276  	if err != nil {
   277  		return fmt.Errorf("failed to start component: %w", err)
   278  	}
   279  	comp.p = p
   280  
   281  	c.wg.Add(1)
   282  	go func() {
   283  		defer c.wg.Done()
   284  		if err := p.Run(); err != nil {
   285  			fmt.Printf("metastore with auto-join stopped with error: %v\n", err)
   286  		}
   287  	}()
   288  
   289  	return nil
   290  }