github.com/matrixorigin/matrixone@v1.2.0/pkg/clusterservice/cluster.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package clusterservice 16 17 import ( 18 "context" 19 "sync" 20 "sync/atomic" 21 "time" 22 23 "github.com/matrixorigin/matrixone/pkg/common/log" 24 "github.com/matrixorigin/matrixone/pkg/common/runtime" 25 "github.com/matrixorigin/matrixone/pkg/common/stopper" 26 logpb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 27 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 28 "go.uber.org/zap" 29 ) 30 31 // GetMOCluster get mo cluster from process level runtime 32 func GetMOCluster() MOCluster { 33 timeout := time.Second * 10 34 now := time.Now() 35 for { 36 v, ok := runtime.ProcessLevelRuntime().GetGlobalVariables(runtime.ClusterService) 37 if !ok { 38 if time.Since(now) > timeout { 39 panic("no mocluster service") 40 } 41 time.Sleep(time.Second) 42 continue 43 } 44 return v.(MOCluster) 45 } 46 } 47 48 // Option options for create cluster 49 type Option func(*cluster) 50 51 // WithServices set init cn and tn services 52 func WithServices( 53 cnServices []metadata.CNService, 54 tnServices []metadata.TNService) Option { 55 return func(c *cluster) { 56 new := c.copyServices() 57 new.addCN(cnServices) 58 new.addTN(tnServices) 59 c.services.Store(new) 60 } 61 } 62 63 // WithDisableRefresh disable refresh from hakeeper 64 func WithDisableRefresh() Option { 65 return func(c *cluster) { 66 c.options.disableRefresh = true 67 } 68 } 69 70 type cluster struct { 71 logger *log.MOLogger 72 stopper *stopper.Stopper 73 client ClusterClient 74 refreshInterval time.Duration 75 forceRefreshC chan struct{} 76 readyOnce sync.Once 77 readyC chan struct{} 78 services atomic.Pointer[services] 79 options struct { 80 disableRefresh bool 81 } 82 } 83 84 // NewMOCluster create a MOCluter by HAKeeperClient. MoCluster synchronizes 85 // information from HAKeeper and forcibly refreshes the information once every 86 // refreshInterval. 87 // 88 // TODO(fagongzi): extend hakeeper to support event-driven original message changes 89 func NewMOCluster( 90 client ClusterClient, 91 refreshInterval time.Duration, 92 opts ...Option) MOCluster { 93 logger := runtime.ProcessLevelRuntime().Logger().Named("mo-cluster") 94 c := &cluster{ 95 logger: logger, 96 stopper: stopper.NewStopper("mo-cluster", stopper.WithLogger(logger.RawLogger())), 97 client: client, 98 forceRefreshC: make(chan struct{}, 1), 99 readyC: make(chan struct{}), 100 refreshInterval: refreshInterval, 101 } 102 103 c.services.Store(&services{}) 104 105 for _, opt := range opts { 106 opt(c) 107 } 108 if !c.options.disableRefresh { 109 if err := c.stopper.RunTask(c.refreshTask); err != nil { 110 panic(err) 111 } 112 } else { 113 c.readyOnce.Do(func() { 114 close(c.readyC) 115 }) 116 } 117 return c 118 } 119 120 func (c *cluster) GetCNService(selector Selector, apply func(metadata.CNService) bool) { 121 c.waitReady() 122 123 s := c.services.Load() 124 for _, cn := range s.cn { 125 // If the all field is false, the work state of CN service MUST be 126 // working, and then we could do the filter job. If the state is not 127 // working, means that the CN may be marked as draining and is going 128 // to be removed, or has been removed. 129 // The state Unknown is allowed here to make many test cases pass, and 130 // it does not affect the function. 131 if (selector.all || cn.WorkState == metadata.WorkState_Working || 132 cn.WorkState == metadata.WorkState_Unknown) && 133 selector.filterCN(cn) { 134 if !apply(cn) { 135 return 136 } 137 } 138 } 139 } 140 141 func (c *cluster) GetCNServiceWithoutWorkingState(selector Selector, apply func(metadata.CNService) bool) { 142 c.waitReady() 143 144 s := c.services.Load() 145 for _, cn := range s.cn { 146 if selector.filterCN(cn) { 147 if !apply(cn) { 148 return 149 } 150 } 151 } 152 } 153 154 func (c *cluster) GetTNService(selector Selector, apply func(metadata.TNService) bool) { 155 c.waitReady() 156 157 s := c.services.Load() 158 for _, tn := range s.tn { 159 if selector.filterTN(tn) { 160 if !apply(tn) { 161 return 162 } 163 } 164 } 165 } 166 167 func (c *cluster) GetAllTNServices() []metadata.TNService { 168 c.waitReady() 169 s := c.services.Load() 170 return s.tn 171 } 172 173 func (c *cluster) ForceRefresh(sync bool) { 174 if c.options.disableRefresh { 175 return 176 } 177 if sync { 178 c.refresh() 179 return 180 } 181 182 select { 183 case c.forceRefreshC <- struct{}{}: 184 default: 185 } 186 } 187 188 func (c *cluster) Close() { 189 c.waitReady() 190 c.stopper.Stop() 191 close(c.forceRefreshC) 192 } 193 194 // DebugUpdateCNLabel implements the MOCluster interface. 195 func (c *cluster) DebugUpdateCNLabel(uuid string, kvs map[string][]string) error { 196 ctx, cancel := context.WithTimeout(context.TODO(), time.Second*3) 197 defer cancel() 198 convert := make(map[string]metadata.LabelList) 199 for k, v := range kvs { 200 convert[k] = metadata.LabelList{Labels: v} 201 } 202 label := logpb.CNStoreLabel{ 203 UUID: uuid, 204 Labels: convert, 205 } 206 proxyClient := c.client.(labelSupportedClient) 207 if err := proxyClient.UpdateCNLabel(ctx, label); err != nil { 208 return err 209 } 210 return nil 211 } 212 213 func (c *cluster) DebugUpdateCNWorkState(uuid string, state int) error { 214 ctx, cancel := context.WithTimeout(context.TODO(), time.Second*3) 215 defer cancel() 216 wstate := logpb.CNWorkState{ 217 UUID: uuid, 218 State: metadata.WorkState(state), 219 } 220 proxyClient := c.client.(labelSupportedClient) 221 if err := proxyClient.UpdateCNWorkState(ctx, wstate); err != nil { 222 return err 223 } 224 return nil 225 } 226 227 func (c *cluster) RemoveCN(id string) { 228 new := c.copyServices() 229 values := new.cn[:0] 230 for _, s := range new.cn { 231 if s.ServiceID != id { 232 values = append(values, s) 233 } 234 } 235 new.cn = values 236 c.services.Store(new) 237 } 238 239 func (c *cluster) AddCN(s metadata.CNService) { 240 new := c.copyServices() 241 new.cn = append(new.cn, s) 242 c.services.Store(new) 243 } 244 245 func (c *cluster) waitReady() { 246 <-c.readyC 247 } 248 249 func (c *cluster) refreshTask(ctx context.Context) { 250 c.ForceRefresh(false) 251 252 timer := time.NewTimer(c.refreshInterval) 253 defer timer.Stop() 254 255 for { 256 select { 257 case <-ctx.Done(): 258 c.logger.Info("refresh cluster details task stopped") 259 return 260 case <-timer.C: 261 c.refresh() 262 timer.Reset(c.refreshInterval) 263 case <-c.forceRefreshC: 264 c.refresh() 265 } 266 } 267 } 268 269 func (c *cluster) refresh() { 270 defer c.logger.LogAction("refresh from hakeeper", 271 log.DefaultLogOptions().WithLevel(zap.DebugLevel))() 272 273 ctx, cancel := context.WithTimeout(context.Background(), c.refreshInterval) 274 defer cancel() 275 276 details, err := c.client.GetClusterDetails(ctx) 277 if err != nil { 278 c.logger.Error("failed to refresh cluster details from hakeeper", 279 zap.Error(err)) 280 return 281 } 282 283 c.logger.Debug("refresh cluster details from hakeeper", 284 zap.Int("cn-count", len(details.CNStores)), 285 zap.Int("dn-count", len(details.TNStores))) 286 287 new := &services{} 288 for _, cn := range details.CNStores { 289 v := newCNService(cn) 290 new.addCN([]metadata.CNService{v}) 291 if c.logger.Enabled(zap.DebugLevel) { 292 c.logger.Debug("cn service added", zap.String("cn", v.DebugString())) 293 } 294 } 295 for _, tn := range details.TNStores { 296 v := newTNService(tn) 297 new.addTN([]metadata.TNService{v}) 298 if c.logger.Enabled(zap.DebugLevel) { 299 c.logger.Debug("dn service added", zap.String("dn", v.DebugString())) 300 } 301 } 302 c.services.Store(new) 303 c.readyOnce.Do(func() { 304 close(c.readyC) 305 }) 306 } 307 308 func (c *cluster) copyServices() *services { 309 new := &services{} 310 old := c.services.Load() 311 if old != nil { 312 new.addCN(old.cn) 313 new.addTN(old.tn) 314 } 315 return new 316 } 317 318 func newCNService(cn logpb.CNStore) metadata.CNService { 319 return metadata.CNService{ 320 ServiceID: cn.UUID, 321 PipelineServiceAddress: cn.ServiceAddress, 322 SQLAddress: cn.SQLAddress, 323 LockServiceAddress: cn.LockServiceAddress, 324 WorkState: cn.WorkState, 325 Labels: cn.Labels, 326 QueryAddress: cn.QueryAddress, 327 } 328 } 329 330 func newTNService(tn logpb.TNStore) metadata.TNService { 331 v := metadata.TNService{ 332 ServiceID: tn.UUID, 333 TxnServiceAddress: tn.ServiceAddress, 334 LogTailServiceAddress: tn.LogtailServerAddress, 335 LockServiceAddress: tn.LockServiceAddress, 336 QueryAddress: tn.QueryAddress, 337 } 338 v.Shards = make([]metadata.TNShard, 0, len(tn.Shards)) 339 for _, s := range tn.Shards { 340 v.Shards = append(v.Shards, metadata.TNShard{ 341 TNShardRecord: metadata.TNShardRecord{ShardID: s.ShardID}, 342 ReplicaID: s.ReplicaID, 343 }) 344 } 345 return v 346 } 347 348 type services struct { 349 cn []metadata.CNService 350 tn []metadata.TNService 351 } 352 353 func (s *services) addCN(values []metadata.CNService) { 354 s.cn = append(s.cn, values...) 355 } 356 357 func (s *services) addTN(values []metadata.TNService) { 358 s.tn = append(s.tn, values...) 359 }