github.com/grafana/pyroscope@v1.18.0/pkg/test/integration/cluster/cluster_v2.go (about) 1 package cluster 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 9 "google.golang.org/grpc" 10 "google.golang.org/grpc/credentials/insecure" 11 12 "github.com/grafana/pyroscope/pkg/metastore/raftnode/raftnodepb" 13 ) 14 15 func WithV2() ClusterOption { 16 return func(c *Cluster) { 17 c.v2 = true 18 c.expectedComponents = []string{ 19 "distributor", 20 "distributor", 21 "segment-writer", 22 "segment-writer", 23 "metastore", 24 "metastore", 25 "metastore", 26 "query-frontend", 27 "query-backend", 28 "compaction-worker", 29 } 30 } 31 } 32 33 func (c *Cluster) metastoreConfig() (string, error) { 34 cfgPath := filepath.Join(c.tmpDir, "metastore.yaml") 35 36 // check if the file exists 37 if _, err := os.Stat(cfgPath); err == nil { 38 return cfgPath, nil 39 } else if !os.IsNotExist(err) { 40 return "", err 41 } 42 43 // ensure compaction worker are picking up l0 compaction straight away 44 metastoreConfig := ` 45 metastore: 46 levels: 47 - maxblocks: 20 48 maxage: 2000000000 # 2 seconds 49 ` 50 tmpFile, err := os.Create(cfgPath) 51 if err != nil { 52 return "", err 53 } 54 if _, err := tmpFile.Write([]byte(metastoreConfig)); err != nil { 55 return "", err 56 } 57 if err := tmpFile.Close(); err != nil { 58 return "", err 59 } 60 return tmpFile.Name(), nil 61 } 62 63 func (c *Cluster) metastores() []*Component { 64 metastores := make([]*Component, 0, len(c.perTarget["metastore"])) 65 for _, compidx := range c.perTarget["metastore"] { 66 metastores = append(metastores, c.Components[compidx]) 67 } 68 return metastores 69 } 70 71 func (c *Cluster) metastoreExpectedLeader() *Component { 72 metastores := c.metastores() 73 return metastores[len(metastores)-1] 74 } 75 76 func (c *Cluster) CompactionJobsFinished(ctx context.Context) (float64, error) { 77 leader := c.metastoreExpectedLeader() 78 79 floatCh := make(chan float64, 1) 80 check := leader.checkMetrics(). 81 addRetrieveValue(floatCh, "pyroscope_metastore_compaction_scheduler_queue_completed_jobs_total", "level", "0") 82 83 if err := check.run(ctx); err != nil { 84 return 0, err 85 } 86 close(floatCh) 87 88 sum := 0.0 89 found := false 90 for v := range floatCh { 91 found = true 92 sum += v 93 } 94 if !found { 95 return 0, fmt.Errorf("no value received") 96 } 97 return sum, nil 98 } 99 100 func (c *Cluster) v2Prepare(_ context.Context, memberlistJoin []string) error { 101 metastoreLeader := c.metastoreExpectedLeader() 102 103 for _, comp := range c.Components { 104 if err := c.v2PrepareComponent(comp, metastoreLeader); err != nil { 105 return err 106 } 107 108 // handle memberlist join 109 for _, m := range memberlistJoin { 110 comp.flags = append(comp.flags, fmt.Sprintf("-memberlist.join=%s", m)) 111 } 112 } 113 114 return nil 115 } 116 117 func (c *Cluster) v2PrepareComponent(comp *Component, metastoreLeader *Component) error { 118 dataDir := c.dataDir(comp) 119 120 comp.cfg.V2 = true 121 comp.flags = c.commonFlags(comp) 122 123 comp.flags = append(comp.flags, 124 "-enable-query-backend=true", 125 "-write-path=segment-writer", 126 "-metastore.min-ready-duration=0", 127 fmt.Sprintf("-metastore.address=%s:%d/%s", listenAddr, metastoreLeader.grpcPort, metastoreLeader.nodeName()), 128 ) 129 130 if c.debuginfodURL != "" && comp.Target == "query-frontend" { 131 comp.flags = append(comp.flags, 132 fmt.Sprintf("-symbolizer.debuginfod-url=%s", c.debuginfodURL), 133 "-symbolizer.enabled=true", 134 ) 135 } 136 137 if comp.Target == "segment-writer" { 138 comp.flags = append(comp.flags, 139 "-segment-writer.num-tokens=1", 140 "-segment-writer.min-ready-duration=0", 141 "-segment-writer.lifecycler.addr="+listenAddr, 142 "-segment-writer.lifecycler.ID="+comp.nodeName(), 143 "-segment-writer.heartbeat-period=1s", 144 ) 145 } 146 147 if comp.Target == "compaction-worker" { 148 comp.flags = append(comp.flags, 149 "-compaction-worker.job-concurrency=20", 150 "-compaction-worker.job-poll-interval=1s", 151 ) 152 } 153 154 // register query-backends in the frontend and themselves 155 if comp.Target == "query-frontend" || comp.Target == "query-backend" { 156 for _, compidx := range c.perTarget["query-backend"] { 157 comp.flags = append(comp.flags, 158 fmt.Sprintf("-query-backend.address=%s:%d", listenAddr, c.Components[compidx].grpcPort), 159 ) 160 } 161 } 162 163 // handle metastore folders and ports 164 if comp.Target == "metastore" { 165 cfgPath, err := c.metastoreConfig() 166 if err != nil { 167 return err 168 } 169 comp.flags = append(comp.flags, 170 fmt.Sprint("-config.file=", cfgPath), 171 fmt.Sprintf("-metastore.data-dir=%s", dataDir+"../metastore-ephemeral"), 172 fmt.Sprintf("-metastore.raft.dir=%s", dataDir+"../metastore-raft"), 173 fmt.Sprintf("-metastore.raft.snapshots-dir=%s", dataDir+"../metastore-snapshots"), 174 fmt.Sprintf("-metastore.raft.bind-address=%s:%d", listenAddr, comp.raftPort), 175 fmt.Sprintf("-metastore.raft.advertise-address=%s:%d", listenAddr, comp.raftPort), 176 fmt.Sprintf("-metastore.raft.server-id=%s", comp.nodeName()), 177 fmt.Sprintf("-metastore.raft.bootstrap-expect-peers=%d", len(c.perTarget[comp.Target])), 178 ) 179 180 // add bootstrap peers 181 for _, compidx := range c.perTarget[comp.Target] { 182 peer := c.Components[compidx] 183 comp.flags = append(comp.flags, 184 fmt.Sprintf("-metastore.raft.bootstrap-peers=%s:%d/%s", listenAddr, peer.raftPort, peer.nodeName()), 185 ) 186 } 187 } 188 189 return nil 190 } 191 192 func (c *Cluster) v2ReadyCheckComponent(ctx context.Context, t *Component) (bool, error) { 193 switch t.Target { 194 case "metastore": 195 return true, t.metastoreReadyCheck(ctx, c.metastores(), c.metastoreExpectedLeader()) 196 case "distributor": 197 return true, t.distributorReadyCheck(ctx, 0, len(c.perTarget["segment-writer"]), len(c.perTarget["distributor"])) 198 } 199 return false, nil 200 } 201 202 // for the metastore, we need to check that the first replica is the leader, as this is configured statically as the client for other components. 203 func (comp *Component) metastoreReadyCheck(ctx context.Context, metastores []*Component, expectedLeader *Component) error { 204 expectedPeers := len(metastores) 205 206 opts := []grpc.DialOption{ 207 grpc.WithTransportCredentials(insecure.NewCredentials()), 208 } 209 cc, err := grpc.NewClient(fmt.Sprintf("%s:%d", listenAddr, comp.grpcPort), opts...) 210 if err != nil { 211 return err 212 } 213 214 client := raftnodepb.NewRaftNodeServiceClient(cc) 215 216 nodeInfo, err := client.NodeInfo(ctx, &raftnodepb.NodeInfoRequest{}) 217 if err != nil { 218 return err 219 } 220 221 // only ready once all peers are here 222 if len(nodeInfo.Node.Peers) != expectedPeers { 223 return fmt.Errorf("unexpected peer count: exp=%d actual=%d", expectedPeers, len(nodeInfo.Node.Peers)) 224 } 225 226 // only ready once leader is known 227 if nodeInfo.Node.LeaderId == "" { 228 return fmt.Errorf("leader not known on node %s", comp.nodeName()) 229 } 230 231 // exit if we are not the leader 232 if nodeInfo.Node.LeaderId != nodeInfo.Node.ServerId { 233 return nil 234 } 235 236 // if we are replica 0 we are done as we are already leader 237 if comp.replica == expectedPeers-1 { 238 return nil 239 } 240 241 // promote last metastore to new leader 242 _, err = client.PromoteToLeader(ctx, &raftnodepb.PromoteToLeaderRequest{ 243 ServerId: fmt.Sprintf("%s:%d/%s", listenAddr, expectedLeader.raftPort, expectedLeader.nodeName()), 244 CurrentTerm: nodeInfo.Node.CurrentTerm, 245 }) 246 return err 247 } 248 249 func (c *Cluster) GetMetastoreRaftNodeClient() (raftnodepb.RaftNodeServiceClient, error) { 250 leader := c.metastoreExpectedLeader() 251 opts := []grpc.DialOption{ 252 grpc.WithTransportCredentials(insecure.NewCredentials()), 253 } 254 cc, err := grpc.NewClient(fmt.Sprintf("127.0.0.1:%d", leader.grpcPort), opts...) 255 if err != nil { 256 return nil, err 257 } 258 259 return raftnodepb.NewRaftNodeServiceClient(cc), nil 260 } 261 262 func (c *Cluster) AddMetastoreWithAutoJoin(ctx context.Context) error { 263 leader := c.metastoreExpectedLeader() 264 265 comp := newComponent("metastore") 266 comp.replica = len(c.perTarget["metastore"]) 267 c.Components = append(c.Components, comp) 268 c.perTarget["metastore"] = append(c.perTarget["metastore"], len(c.Components)-1) 269 270 if err := c.v2PrepareComponent(comp, leader); err != nil { 271 return err 272 } 273 comp.flags = append(comp.flags, "-metastore.raft.auto-join=true") 274 275 p, err := comp.start(ctx) 276 if err != nil { 277 return fmt.Errorf("failed to start component: %w", err) 278 } 279 comp.p = p 280 281 c.wg.Add(1) 282 go func() { 283 defer c.wg.Done() 284 if err := p.Run(); err != nil { 285 fmt.Printf("metastore with auto-join stopped with error: %v\n", err) 286 } 287 }() 288 289 return nil 290 }