github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/worker/metrics.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package worker 15 16 import ( 17 "context" 18 "net" 19 "net/http" 20 "net/http/pprof" 21 "time" 22 23 cpu "github.com/pingcap/tidb-tools/pkg/utils" 24 "github.com/pingcap/tiflow/dm/common" 25 "github.com/pingcap/tiflow/dm/dumpling" 26 "github.com/pingcap/tiflow/dm/loader" 27 "github.com/pingcap/tiflow/dm/pkg/log" 28 "github.com/pingcap/tiflow/dm/relay" 29 "github.com/pingcap/tiflow/dm/syncer/metrics" 30 "github.com/pingcap/tiflow/engine/pkg/promutil" 31 "github.com/pingcap/tiflow/pkg/version" 32 "github.com/prometheus/client_golang/prometheus" 33 "github.com/prometheus/client_golang/prometheus/collectors" 34 "github.com/prometheus/client_golang/prometheus/promhttp" 35 ) 36 37 const ( 38 opErrTypeBeforeOp = "BeforeAnyOp" 39 opErrTypeSourceBound = "SourceBound" 40 opErrTypeRelaySource = "RelaySource" 41 ) 42 43 var ( 44 f = &promutil.PromFactory{} 45 taskState = f.NewGaugeVec( 46 prometheus.GaugeOpts{ 47 Namespace: "dm", 48 Subsystem: "worker", 49 Name: "task_state", 50 Help: "state of task, 0 - invalidStage, 1 - New, 2 - Running, 3 - Paused, 4 - Stopped, 5 - Finished", 51 }, []string{"task", "source_id", "worker"}) 52 53 // opErrCounter cleans on worker close, which is the same time dm-worker exits, so no explicit clean. 54 opErrCounter = f.NewCounterVec( 55 prometheus.CounterOpts{ 56 Namespace: "dm", 57 Subsystem: "worker", 58 Name: "operate_error", 59 Help: "number of different operate error", 60 }, []string{"worker", "type"}) 61 62 cpuUsageGauge = prometheus.NewGauge( 63 prometheus.GaugeOpts{ 64 Namespace: "dm", 65 Subsystem: "worker", 66 Name: "cpu_usage", 67 Help: "the cpu usage of worker", 68 }) 69 ) 70 71 type statusHandler struct{} 72 73 func (h *statusHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { 74 w.Header().Set("Content-Type", "text/plain") 75 text := version.GetRawInfo() 76 _, err := w.Write([]byte(text)) 77 if err != nil && !common.IsErrNetClosing(err) { 78 log.L().Error("fail to write status response", log.ShortError(err)) 79 } 80 } 81 82 // Note: handle error inside the function with returning it. 83 func (s *Server) collectMetrics() { 84 // CPU usage metric 85 cpuUsage := cpu.GetCPUPercentage() 86 cpuUsageGauge.Set(cpuUsage) 87 } 88 89 func (s *Server) runBackgroundJob(ctx context.Context) { 90 ticker := time.NewTicker(time.Second * 10) 91 defer ticker.Stop() 92 93 for { 94 select { 95 case <-ticker.C: 96 s.collectMetrics() 97 98 case <-ctx.Done(): 99 return 100 } 101 } 102 } 103 104 // RegistryMetrics registries metrics for worker. 105 func RegistryMetrics() { 106 registry := prometheus.NewRegistry() 107 registry.MustRegister(prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{})) 108 registry.MustRegister(prometheus.NewGoCollector( 109 collectors.WithGoCollections(collectors.GoRuntimeMemStatsCollection | collectors.GoRuntimeMetricsCollection))) 110 111 registry.MustRegister(cpuUsageGauge) 112 113 registry.MustRegister(taskState) 114 registry.MustRegister(opErrCounter) 115 116 relay.RegisterMetrics(registry) 117 dumpling.RegisterMetrics(registry) 118 loader.RegisterMetrics(registry) 119 metrics.RegisterValidatorMetrics(registry) 120 metrics.DefaultMetricsProxies.RegisterMetrics(registry) 121 prometheus.DefaultGatherer = registry 122 } 123 124 // InitStatus initializes the HTTP status server. 125 func InitStatus(lis net.Listener) { 126 mux := http.NewServeMux() 127 mux.Handle("/status", &statusHandler{}) 128 mux.Handle("/metrics", promhttp.Handler()) 129 130 mux.HandleFunc("/debug/pprof/", pprof.Index) 131 mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) 132 mux.HandleFunc("/debug/pprof/profile", pprof.Profile) 133 mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) 134 mux.HandleFunc("/debug/pprof/trace", pprof.Trace) 135 136 httpS := &http.Server{ 137 Handler: mux, 138 } 139 err := httpS.Serve(lis) 140 if err != nil && !common.IsErrNetClosing(err) && err != http.ErrServerClosed { 141 log.L().Error("status server returned", log.ShortError(err)) 142 } 143 }