github.com/m3db/m3@v1.5.0/src/x/instrument/prom_process_collector.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package instrument 22 23 import ( 24 "errors" 25 "os" 26 27 "github.com/m3db/prometheus_client_golang/prometheus" 28 procfs "github.com/m3db/prometheus_procfs" 29 ) 30 31 type processCollector struct { 32 collectFn func(chan<- prometheus.Metric) 33 pidFn func() (int, error) 34 reportErrors bool 35 cpuTotal *prometheus.Desc 36 openFDs, maxFDs *prometheus.Desc 37 vsize, maxVsize *prometheus.Desc 38 rss *prometheus.Desc 39 startTime *prometheus.Desc 40 } 41 42 // ProcessCollectorOpts defines the behavior of a process metrics collector 43 // created with NewProcessCollector. 44 type ProcessCollectorOpts struct { 45 // PidFn returns the PID of the process the collector collects metrics 46 // for. It is called upon each collection. By default, the PID of the 47 // current process is used, as determined on construction time by 48 // calling os.Getpid(). 49 PidFn func() (int, error) 50 // If non-empty, each of the collected metrics is prefixed by the 51 // provided string and an underscore ("_"). 52 Namespace string 53 // DisableOpenFDs allows disabling the reporting of open FDs due to 54 // the cost that is required to report the number of file descriptors. 55 DisableOpenFDs bool 56 // If true, any error encountered during collection is reported as an 57 // invalid metric (see NewInvalidMetric). Otherwise, errors are ignored 58 // and the collected metrics will be incomplete. (Possibly, no metrics 59 // will be collected at all.) While that's usually not desired, it is 60 // appropriate for the common "mix-in" of process metrics, where process 61 // metrics are nice to have, but failing to collect them should not 62 // disrupt the collection of the remaining metrics. 63 ReportErrors bool 64 } 65 66 // NewPrometheusProcessCollector returns a collector which exports the current state of 67 // process metrics including CPU, memory and file descriptor usage as well as 68 // the process start time. The detailed behavior is defined by the provided 69 // ProcessCollectorOpts. The zero value of ProcessCollectorOpts creates a 70 // collector for the current process with an empty namespace string and no error 71 // reporting. 72 // 73 // Currently, the collector depends on a Linux-style proc filesystem and 74 // therefore only exports metrics for Linux. 75 // 76 // NB(r): This version of the Prometheus process collector allows skipping emitting 77 // open FDs due to excessive load reporting open FDs with processes with 78 // a large number of open FDs. 79 func NewPrometheusProcessCollector(opts ProcessCollectorOpts) prometheus.Collector { 80 ns := "" 81 if len(opts.Namespace) > 0 { 82 ns = opts.Namespace + "_" 83 } 84 85 c := &processCollector{ 86 reportErrors: opts.ReportErrors, 87 cpuTotal: prometheus.NewDesc( 88 ns+"process_cpu_seconds_total", 89 "Total user and system CPU time spent in seconds.", 90 nil, nil, 91 ), 92 maxFDs: prometheus.NewDesc( 93 ns+"process_max_fds", 94 "Maximum number of open file descriptors.", 95 nil, nil, 96 ), 97 vsize: prometheus.NewDesc( 98 ns+"process_virtual_memory_bytes", 99 "Virtual memory size in bytes.", 100 nil, nil, 101 ), 102 maxVsize: prometheus.NewDesc( 103 ns+"process_virtual_memory_max_bytes", 104 "Maximum amount of virtual memory available in bytes.", 105 nil, nil, 106 ), 107 rss: prometheus.NewDesc( 108 ns+"process_resident_memory_bytes", 109 "Resident memory size in bytes.", 110 nil, nil, 111 ), 112 startTime: prometheus.NewDesc( 113 ns+"process_start_time_seconds", 114 "Start time of the process since unix epoch in seconds.", 115 nil, nil, 116 ), 117 } 118 119 if !opts.DisableOpenFDs { 120 c.openFDs = prometheus.NewDesc( 121 ns+"process_open_fds", 122 "Number of open file descriptors.", 123 nil, nil, 124 ) 125 } 126 127 if opts.PidFn == nil { 128 pid := os.Getpid() 129 c.pidFn = func() (int, error) { return pid, nil } 130 } else { 131 c.pidFn = opts.PidFn 132 } 133 134 // Set up process metric collection if supported by the runtime. 135 if _, err := procfs.NewStat(); err == nil { 136 c.collectFn = c.processCollect 137 } else { 138 c.collectFn = func(ch chan<- prometheus.Metric) { 139 c.reportError(ch, nil, errors.New("process metrics not supported on this platform")) 140 } 141 } 142 143 return c 144 } 145 146 // Describe returns all descriptions of the collector. 147 func (c *processCollector) Describe(ch chan<- *prometheus.Desc) { 148 ch <- c.cpuTotal 149 if c.openFDs != nil { 150 ch <- c.openFDs 151 } 152 ch <- c.maxFDs 153 ch <- c.vsize 154 ch <- c.maxVsize 155 ch <- c.rss 156 ch <- c.startTime 157 } 158 159 // Collect returns the current state of all metrics of the collector. 160 func (c *processCollector) Collect(ch chan<- prometheus.Metric) { 161 c.collectFn(ch) 162 } 163 164 func (c *processCollector) processCollect(ch chan<- prometheus.Metric) { 165 pid, err := c.pidFn() 166 if err != nil { 167 c.reportError(ch, nil, err) 168 return 169 } 170 171 p, err := procfs.NewProc(pid) 172 if err != nil { 173 c.reportError(ch, nil, err) 174 return 175 } 176 177 if stat, err := p.NewStat(); err == nil { 178 ch <- prometheus.MustNewConstMetric(c.cpuTotal, prometheus.CounterValue, stat.CPUTime()) 179 ch <- prometheus.MustNewConstMetric(c.vsize, prometheus.GaugeValue, float64(stat.VirtualMemory())) 180 ch <- prometheus.MustNewConstMetric(c.rss, prometheus.GaugeValue, float64(stat.ResidentMemory())) 181 if startTime, err := stat.StartTime(); err == nil { 182 ch <- prometheus.MustNewConstMetric(c.startTime, prometheus.GaugeValue, startTime) 183 } else { 184 c.reportError(ch, c.startTime, err) 185 } 186 } else { 187 c.reportError(ch, nil, err) 188 } 189 190 if c.openFDs != nil { 191 if fds, err := p.FileDescriptorsLen(); err == nil { 192 ch <- prometheus.MustNewConstMetric(c.openFDs, prometheus.GaugeValue, float64(fds)) 193 } else { 194 c.reportError(ch, c.openFDs, err) 195 } 196 } 197 198 if limits, err := p.NewLimits(); err == nil { 199 ch <- prometheus.MustNewConstMetric(c.maxFDs, prometheus.GaugeValue, float64(limits.OpenFiles)) 200 ch <- prometheus.MustNewConstMetric(c.maxVsize, prometheus.GaugeValue, float64(limits.AddressSpace)) 201 } else { 202 c.reportError(ch, nil, err) 203 } 204 } 205 206 func (c *processCollector) reportError(ch chan<- prometheus.Metric, desc *prometheus.Desc, err error) { 207 if !c.reportErrors { 208 return 209 } 210 if desc == nil { 211 desc = prometheus.NewInvalidDesc(err) 212 } 213 ch <- prometheus.NewInvalidMetric(desc, err) 214 }