github.com/matrixorigin/matrixone@v1.2.0/cmd/mo-service/main.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package main 16 17 import ( 18 "context" 19 "errors" 20 "flag" 21 "fmt" 22 "net/http" 23 "os" 24 "os/signal" 25 "path/filepath" 26 struntime "runtime" 27 "strings" 28 "sync" 29 "syscall" 30 "time" 31 _ "time/tzdata" 32 33 "github.com/google/uuid" 34 "go.uber.org/zap" 35 36 "github.com/matrixorigin/matrixone/pkg/catalog" 37 "github.com/matrixorigin/matrixone/pkg/clusterservice" 38 "github.com/matrixorigin/matrixone/pkg/cnservice" 39 "github.com/matrixorigin/matrixone/pkg/cnservice/cnclient" 40 "github.com/matrixorigin/matrixone/pkg/common/moerr" 41 "github.com/matrixorigin/matrixone/pkg/common/runtime" 42 "github.com/matrixorigin/matrixone/pkg/common/stopper" 43 "github.com/matrixorigin/matrixone/pkg/common/system" 44 "github.com/matrixorigin/matrixone/pkg/defines" 45 "github.com/matrixorigin/matrixone/pkg/fileservice" 46 "github.com/matrixorigin/matrixone/pkg/gossip" 47 "github.com/matrixorigin/matrixone/pkg/logservice" 48 "github.com/matrixorigin/matrixone/pkg/logutil" 49 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 50 "github.com/matrixorigin/matrixone/pkg/proxy" 51 qclient "github.com/matrixorigin/matrixone/pkg/queryservice/client" 52 "github.com/matrixorigin/matrixone/pkg/sql/compile" 53 "github.com/matrixorigin/matrixone/pkg/tnservice" 54 "github.com/matrixorigin/matrixone/pkg/udf/pythonservice" 55 "github.com/matrixorigin/matrixone/pkg/util" 56 "github.com/matrixorigin/matrixone/pkg/util/debug/goroutine" 57 "github.com/matrixorigin/matrixone/pkg/util/export" 58 "github.com/matrixorigin/matrixone/pkg/util/export/table" 59 "github.com/matrixorigin/matrixone/pkg/util/metric/mometric" 60 "github.com/matrixorigin/matrixone/pkg/util/profile" 61 "github.com/matrixorigin/matrixone/pkg/util/trace/impl/motrace" 62 ) 63 64 var ( 65 configFile = flag.String("cfg", "", "toml configuration used to start mo-service") 66 launchFile = flag.String("launch", "", "toml configuration used to launch mo cluster") 67 versionFlag = flag.Bool("version", false, "print version information") 68 daemon = flag.Bool("daemon", false, "run mo-service in daemon mode") 69 withProxy = flag.Bool("with-proxy", false, "run mo-service with proxy module started") 70 maxProcessor = flag.Int("max-processor", 0, "set max processor for go runtime") 71 globalEtlFS fileservice.FileService 72 ) 73 74 func main() { 75 if *maxProcessor > 0 { 76 struntime.GOMAXPROCS(*maxProcessor) 77 } 78 79 flag.Parse() 80 maybePrintVersion() 81 maybeRunInDaemonMode() 82 83 uuid.EnableRandPool() 84 85 if *cpuProfilePathFlag != "" { 86 stop := startCPUProfile() 87 defer stop() 88 } 89 if *allocsProfilePathFlag != "" { 90 defer writeAllocsProfile() 91 } 92 if *heapProfilePathFlag != "" { 93 defer writeHeapProfile() 94 } 95 if *httpListenAddr != "" { 96 go func() { 97 http.ListenAndServe(*httpListenAddr, nil) 98 }() 99 } 100 101 ctx := context.Background() 102 shutdownC := make(chan struct{}) 103 104 stopper := stopper.NewStopper("main", stopper.WithLogger(logutil.GetGlobalLogger())) 105 if *launchFile != "" { 106 if err := startCluster(ctx, stopper, shutdownC); err != nil { 107 panic(err) 108 } 109 } else if *configFile != "" { 110 cfg := NewConfig() 111 if err := parseConfigFromFile(*configFile, cfg); err != nil { 112 panic(fmt.Sprintf("failed to parse config from %s, error: %s", *configFile, err.Error())) 113 } 114 if err := startService(ctx, cfg, stopper, shutdownC); err != nil { 115 panic(err) 116 } 117 } else { 118 panic(errors.New("no configuration specified")) 119 } 120 121 waitSignalToStop(stopper, shutdownC) 122 logutil.GetGlobalLogger().Info("Shutdown complete") 123 } 124 125 func waitSignalToStop(stopper *stopper.Stopper, shutdownC chan struct{}) { 126 sigchan := make(chan os.Signal, 1) 127 signal.Notify(sigchan, syscall.SIGTERM, syscall.SIGINT) 128 129 detail := "Starting shutdown..." 130 select { 131 case sig := <-sigchan: 132 detail += "signal: " + sig.String() 133 //dump heap profile before stopping services 134 heapName, _ := uuid.NewV7() 135 heapProfilePath := catalog.BuildProfilePath("heap", heapName.String()) 136 cnservice.SaveProfile(heapProfilePath, profile.HEAP, globalEtlFS) 137 detail += ". heap profile: " + heapProfilePath 138 //dump goroutine before stopping services 139 routineName, _ := uuid.NewV7() 140 routineProfilePath := catalog.BuildProfilePath("routine", routineName.String()) 141 cnservice.SaveProfile(routineProfilePath, profile.GOROUTINE, globalEtlFS) 142 detail += " routine profile: " + routineProfilePath 143 case <-shutdownC: 144 // waiting, give a chance let all log stores and tn stores to get 145 // shutdown cmd from ha keeper 146 time.Sleep(time.Second * 5) 147 detail += "ha keeper issues shutdown command" 148 } 149 150 stopAllDynamicCNServices() 151 152 logutil.GetGlobalLogger().Info(detail) 153 stopper.Stop() 154 if cnProxy != nil { 155 if err := cnProxy.Stop(); err != nil { 156 logutil.GetGlobalLogger().Error("shutdown cn proxy failed", zap.Error(err)) 157 } 158 } 159 } 160 161 func startService( 162 ctx context.Context, 163 cfg *Config, 164 stopper *stopper.Stopper, 165 shutdownC chan struct{}, 166 ) error { 167 if err := cfg.validate(); err != nil { 168 return err 169 } 170 if err := cfg.resolveGossipSeedAddresses(); err != nil { 171 return err 172 } 173 setupProcessLevelRuntime(cfg, stopper) 174 175 setupStatusServer(runtime.ProcessLevelRuntime()) 176 177 goroutine.StartLeakCheck(stopper, cfg.Goroutine) 178 179 st, err := cfg.getServiceType() 180 if err != nil { 181 return err 182 } 183 184 uuid, err := getNodeUUID(ctx, st, cfg) 185 if err != nil { 186 return err 187 } 188 189 var gossipNode *gossip.Node 190 if st == metadata.ServiceType_CN { 191 gossipNode, err = gossip.NewNode(ctx, cfg.CN.UUID) 192 if err != nil { 193 return err 194 } 195 for i := range cfg.FileServices { 196 cfg.FileServices[i].Cache.KeyRouterFactory = gossipNode.DistKeyCacheGetter() 197 cfg.FileServices[i].Cache.QueryClient, err = qclient.NewQueryClient( 198 cfg.CN.UUID, cfg.FileServices[i].Cache.RPC, 199 ) 200 if err != nil { 201 return err 202 } 203 } 204 } 205 206 fs, err := cfg.createFileService(ctx, st, uuid) 207 if err != nil { 208 return err 209 } 210 211 etlFS, err := fileservice.Get[fileservice.FileService](fs, defines.ETLFileServiceName) 212 if err != nil { 213 return err 214 } 215 if err = initTraceMetric(ctx, st, cfg, stopper, etlFS, uuid); err != nil { 216 return err 217 } 218 219 if globalEtlFS == nil { 220 globalEtlFS = etlFS 221 } 222 223 switch st { 224 case metadata.ServiceType_CN: 225 return startCNService(cfg, stopper, fs, gossipNode) 226 case metadata.ServiceType_TN: 227 return startTNService(cfg, stopper, fs, shutdownC) 228 case metadata.ServiceType_LOG: 229 return startLogService(cfg, stopper, fs, shutdownC) 230 case metadata.ServiceType_PROXY: 231 return startProxyService(cfg, stopper) 232 case metadata.ServiceType_PYTHON_UDF: 233 return startPythonUdfService(cfg, stopper) 234 default: 235 panic("unknown service type") 236 } 237 } 238 239 // serviceWG control motrace/mometric quit as last one. 240 var serviceWG sync.WaitGroup 241 242 func startCNService( 243 cfg *Config, 244 stopper *stopper.Stopper, 245 fileService fileservice.FileService, 246 gossipNode *gossip.Node, 247 ) error { 248 // start up system module to do some calculation. 249 system.Run(stopper) 250 251 if err := waitClusterCondition(cfg.HAKeeperClient, waitAnyShardReady); err != nil { 252 return err 253 } 254 serviceWG.Add(1) 255 return stopper.RunNamedTask("cn-service", func(ctx context.Context) { 256 defer serviceWG.Done() 257 cfg.initMetaCache() 258 c := cfg.getCNServiceConfig() 259 commonConfigKVMap, _ := dumpCommonConfig(*cfg) 260 s, err := cnservice.NewService( 261 &c, 262 ctx, 263 fileService, 264 gossipNode, 265 cnservice.WithLogger(logutil.GetGlobalLogger().Named("cn-service").With(zap.String("uuid", cfg.CN.UUID))), 266 cnservice.WithMessageHandle(compile.CnServerMessageHandler), 267 cnservice.WithConfigData(commonConfigKVMap), 268 cnservice.WithTxnTraceData(filepath.Join(cfg.DataDir, c.Txn.Trace.Dir)), 269 ) 270 if err != nil { 271 panic(err) 272 } 273 if err := s.Start(); err != nil { 274 panic(err) 275 } 276 277 <-ctx.Done() 278 // Close the cache client which is used in file service. 279 for _, fs := range cfg.FileServices { 280 if fs.Cache.QueryClient != nil { 281 _ = fs.Cache.QueryClient.Close() 282 } 283 } 284 if err := s.Close(); err != nil { 285 panic(err) 286 } 287 if err := cnclient.CloseCNClient(); err != nil { 288 panic(err) 289 } 290 }) 291 } 292 293 func startTNService( 294 cfg *Config, 295 stopper *stopper.Stopper, 296 fileService fileservice.FileService, 297 shutdownC chan struct{}, 298 ) error { 299 if err := waitClusterCondition(cfg.HAKeeperClient, waitHAKeeperRunning); err != nil { 300 return err 301 } 302 r, err := getRuntime(metadata.ServiceType_TN, cfg, stopper) 303 if err != nil { 304 return err 305 } 306 serviceWG.Add(1) 307 return stopper.RunNamedTask("tn-service", func(ctx context.Context) { 308 defer serviceWG.Done() 309 cfg.initMetaCache() 310 c := cfg.getTNServiceConfig() 311 //notify the tn service it is in the standalone cluster 312 c.InStandalone = cfg.IsStandalone 313 commonConfigKVMap, _ := dumpCommonConfig(*cfg) 314 s, err := tnservice.NewService( 315 &c, 316 r, 317 fileService, 318 shutdownC, 319 tnservice.WithConfigData(commonConfigKVMap)) 320 if err != nil { 321 panic(err) 322 } 323 if err := s.Start(); err != nil { 324 panic(err) 325 } 326 327 <-ctx.Done() 328 if err := s.Close(); err != nil { 329 panic(err) 330 } 331 }) 332 } 333 334 func startLogService( 335 cfg *Config, 336 stopper *stopper.Stopper, 337 fileService fileservice.FileService, 338 shutdownC chan struct{}, 339 ) error { 340 lscfg := cfg.getLogServiceConfig() 341 commonConfigKVMap, _ := dumpCommonConfig(*cfg) 342 s, err := logservice.NewService(lscfg, fileService, 343 shutdownC, 344 logservice.WithRuntime(runtime.ProcessLevelRuntime()), 345 logservice.WithConfigData(commonConfigKVMap)) 346 if err != nil { 347 panic(err) 348 } 349 if err := s.Start(); err != nil { 350 panic(err) 351 } 352 serviceWG.Add(1) 353 return stopper.RunNamedTask("log-service", func(ctx context.Context) { 354 defer serviceWG.Done() 355 if cfg.LogService.BootstrapConfig.BootstrapCluster { 356 logutil.Infof("bootstrapping hakeeper...") 357 if err := s.BootstrapHAKeeper(ctx, cfg.LogService); err != nil { 358 panic(err) 359 } 360 } 361 362 <-ctx.Done() 363 if err := s.Close(); err != nil { 364 panic(err) 365 } 366 }) 367 } 368 369 // startProxyService starts the proxy service. 370 func startProxyService(cfg *Config, stopper *stopper.Stopper) error { 371 if err := waitClusterCondition(cfg.HAKeeperClient, waitHAKeeperRunning); err != nil { 372 return err 373 } 374 serviceWG.Add(1) 375 return stopper.RunNamedTask("proxy-service", func(ctx context.Context) { 376 defer serviceWG.Done() 377 s, err := proxy.NewServer( 378 ctx, 379 cfg.getProxyConfig(), 380 proxy.WithRuntime(runtime.ProcessLevelRuntime()), 381 ) 382 if err != nil { 383 panic(err) 384 } 385 if err := s.Start(); err != nil { 386 panic(err) 387 } 388 <-ctx.Done() 389 if err := s.Close(); err != nil { 390 panic(err) 391 } 392 }) 393 } 394 395 // startPythonUdfService starts the python udf service. 396 func startPythonUdfService(cfg *Config, stopper *stopper.Stopper) error { 397 if err := waitClusterCondition(cfg.HAKeeperClient, waitHAKeeperRunning); err != nil { 398 return err 399 } 400 serviceWG.Add(1) 401 return stopper.RunNamedTask("python-udf-service", func(ctx context.Context) { 402 defer serviceWG.Done() 403 s, err := pythonservice.NewService(cfg.PythonUdfServerConfig) 404 if err != nil { 405 panic(err) 406 } 407 if err := s.Start(); err != nil { 408 panic(err) 409 } 410 <-ctx.Done() 411 if err := s.Close(); err != nil { 412 panic(err) 413 } 414 }) 415 } 416 417 func getNodeUUID(ctx context.Context, st metadata.ServiceType, cfg *Config) (UUID string, err error) { 418 switch st { 419 case metadata.ServiceType_CN: 420 // validate node_uuid 421 var uuidErr error 422 var nodeUUID uuid.UUID 423 if nodeUUID, uuidErr = uuid.Parse(cfg.CN.UUID); uuidErr != nil { 424 nodeUUID, _ = uuid.NewV7() 425 } 426 if err := util.SetUUIDNodeID(ctx, nodeUUID[:]); err != nil { 427 return "", moerr.ConvertPanicError(ctx, err) 428 } 429 UUID = nodeUUID.String() 430 case metadata.ServiceType_TN: 431 UUID = cfg.getTNServiceConfig().UUID 432 case metadata.ServiceType_LOG: 433 UUID = cfg.LogService.UUID 434 case metadata.ServiceType_PYTHON_UDF: 435 UUID = cfg.PythonUdfServerConfig.UUID 436 } 437 UUID = strings.ReplaceAll(UUID, " ", "_") // remove space in UUID for filename 438 return 439 } 440 441 func initTraceMetric(ctx context.Context, st metadata.ServiceType, cfg *Config, stopper *stopper.Stopper, fs fileservice.FileService, UUID string) error { 442 var writerFactory table.WriterFactory 443 var err error 444 var initWG sync.WaitGroup 445 SV := cfg.getObservabilityConfig() 446 447 nodeRole := st.String() 448 if *launchFile != "" { 449 nodeRole = mometric.LaunchMode 450 } 451 452 selector := clusterservice.NewSelector().SelectByLabel(SV.LabelSelector, clusterservice.Contain) 453 runtime.ProcessLevelRuntime().SetGlobalVariables(runtime.BackgroundCNSelector, selector) 454 455 if !SV.DisableTrace || !SV.DisableMetric { 456 writerFactory = export.GetWriterFactory(fs, UUID, nodeRole, !SV.DisableSqlWriter) 457 initWG.Add(1) 458 collector := export.NewMOCollector(ctx, export.WithOBCollectorConfig(&SV.OBCollectorConfig)) 459 stopper.RunNamedTask("trace", func(ctx context.Context) { 460 err, act := motrace.InitWithConfig(ctx, 461 &SV, 462 motrace.WithNode(UUID, nodeRole), 463 motrace.WithBatchProcessor(collector), 464 motrace.WithFSWriterFactory(writerFactory), 465 motrace.WithSQLExecutor(nil), 466 ) 467 initWG.Done() 468 if err != nil { 469 panic(err) 470 } 471 if !act { 472 return 473 } 474 <-ctx.Done() 475 logutil.Info("motrace receive shutdown signal, wait other services shutdown complete.") 476 serviceWG.Wait() 477 logutil.Info("Shutdown service complete.") 478 // flush trace/log/error framework 479 if err = motrace.Shutdown(ctx); err != nil { 480 logutil.Warn("Shutdown trace", logutil.ErrorField(err), logutil.NoReportFiled()) 481 } 482 }) 483 initWG.Wait() 484 } 485 if !SV.DisableMetric || SV.EnableMetricToProm { 486 stopper.RunNamedTask("metric", func(ctx context.Context) { 487 if act := mometric.InitMetric(ctx, nil, &SV, UUID, nodeRole, mometric.WithWriterFactory(writerFactory)); !act { 488 return 489 } 490 <-ctx.Done() 491 mometric.StopMetricSync() 492 }) 493 } 494 if err = export.InitMerge(ctx, &SV); err != nil { 495 return err 496 } 497 return nil 498 } 499 500 func maybeRunInDaemonMode() { 501 if _, isChild := os.LookupEnv("daemon"); *daemon && !isChild { 502 childENV := []string{"daemon=true"} 503 pwd, err := os.Getwd() 504 if err != nil { 505 panic(err) 506 } 507 cpid, err := syscall.ForkExec(os.Args[0], os.Args, &syscall.ProcAttr{ 508 Dir: pwd, 509 Env: append(os.Environ(), childENV...), 510 Sys: &syscall.SysProcAttr{ 511 Setsid: true, 512 }, 513 Files: []uintptr{0, 1, 2}, // print message to the same pty 514 }) 515 if err != nil { 516 panic(err) 517 } 518 logutil.Infof("mo-service is running in daemon mode, child process is %d", cpid) 519 os.Exit(0) 520 } 521 }