github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/instrument/prom_process_collector.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package instrument
    22  
    23  import (
    24  	"errors"
    25  	"os"
    26  
    27  	"github.com/m3db/prometheus_client_golang/prometheus"
    28  	procfs "github.com/m3db/prometheus_procfs"
    29  )
    30  
    31  type processCollector struct {
    32  	collectFn       func(chan<- prometheus.Metric)
    33  	pidFn           func() (int, error)
    34  	reportErrors    bool
    35  	cpuTotal        *prometheus.Desc
    36  	openFDs, maxFDs *prometheus.Desc
    37  	vsize, maxVsize *prometheus.Desc
    38  	rss             *prometheus.Desc
    39  	startTime       *prometheus.Desc
    40  }
    41  
    42  // ProcessCollectorOpts defines the behavior of a process metrics collector
    43  // created with NewProcessCollector.
    44  type ProcessCollectorOpts struct {
    45  	// PidFn returns the PID of the process the collector collects metrics
    46  	// for. It is called upon each collection. By default, the PID of the
    47  	// current process is used, as determined on construction time by
    48  	// calling os.Getpid().
    49  	PidFn func() (int, error)
    50  	// If non-empty, each of the collected metrics is prefixed by the
    51  	// provided string and an underscore ("_").
    52  	Namespace string
    53  	// DisableOpenFDs allows disabling the reporting of open FDs due to
    54  	// the cost that is required to report the number of file descriptors.
    55  	DisableOpenFDs bool
    56  	// If true, any error encountered during collection is reported as an
    57  	// invalid metric (see NewInvalidMetric). Otherwise, errors are ignored
    58  	// and the collected metrics will be incomplete. (Possibly, no metrics
    59  	// will be collected at all.) While that's usually not desired, it is
    60  	// appropriate for the common "mix-in" of process metrics, where process
    61  	// metrics are nice to have, but failing to collect them should not
    62  	// disrupt the collection of the remaining metrics.
    63  	ReportErrors bool
    64  }
    65  
    66  // NewPrometheusProcessCollector returns a collector which exports the current state of
    67  // process metrics including CPU, memory and file descriptor usage as well as
    68  // the process start time. The detailed behavior is defined by the provided
    69  // ProcessCollectorOpts. The zero value of ProcessCollectorOpts creates a
    70  // collector for the current process with an empty namespace string and no error
    71  // reporting.
    72  //
    73  // Currently, the collector depends on a Linux-style proc filesystem and
    74  // therefore only exports metrics for Linux.
    75  //
    76  // NB(r): This version of the Prometheus process collector allows skipping emitting
    77  // open FDs due to excessive load reporting open FDs with processes with
    78  // a large number of open FDs.
    79  func NewPrometheusProcessCollector(opts ProcessCollectorOpts) prometheus.Collector {
    80  	ns := ""
    81  	if len(opts.Namespace) > 0 {
    82  		ns = opts.Namespace + "_"
    83  	}
    84  
    85  	c := &processCollector{
    86  		reportErrors: opts.ReportErrors,
    87  		cpuTotal: prometheus.NewDesc(
    88  			ns+"process_cpu_seconds_total",
    89  			"Total user and system CPU time spent in seconds.",
    90  			nil, nil,
    91  		),
    92  		maxFDs: prometheus.NewDesc(
    93  			ns+"process_max_fds",
    94  			"Maximum number of open file descriptors.",
    95  			nil, nil,
    96  		),
    97  		vsize: prometheus.NewDesc(
    98  			ns+"process_virtual_memory_bytes",
    99  			"Virtual memory size in bytes.",
   100  			nil, nil,
   101  		),
   102  		maxVsize: prometheus.NewDesc(
   103  			ns+"process_virtual_memory_max_bytes",
   104  			"Maximum amount of virtual memory available in bytes.",
   105  			nil, nil,
   106  		),
   107  		rss: prometheus.NewDesc(
   108  			ns+"process_resident_memory_bytes",
   109  			"Resident memory size in bytes.",
   110  			nil, nil,
   111  		),
   112  		startTime: prometheus.NewDesc(
   113  			ns+"process_start_time_seconds",
   114  			"Start time of the process since unix epoch in seconds.",
   115  			nil, nil,
   116  		),
   117  	}
   118  
   119  	if !opts.DisableOpenFDs {
   120  		c.openFDs = prometheus.NewDesc(
   121  			ns+"process_open_fds",
   122  			"Number of open file descriptors.",
   123  			nil, nil,
   124  		)
   125  	}
   126  
   127  	if opts.PidFn == nil {
   128  		pid := os.Getpid()
   129  		c.pidFn = func() (int, error) { return pid, nil }
   130  	} else {
   131  		c.pidFn = opts.PidFn
   132  	}
   133  
   134  	// Set up process metric collection if supported by the runtime.
   135  	if _, err := procfs.NewStat(); err == nil {
   136  		c.collectFn = c.processCollect
   137  	} else {
   138  		c.collectFn = func(ch chan<- prometheus.Metric) {
   139  			c.reportError(ch, nil, errors.New("process metrics not supported on this platform"))
   140  		}
   141  	}
   142  
   143  	return c
   144  }
   145  
   146  // Describe returns all descriptions of the collector.
   147  func (c *processCollector) Describe(ch chan<- *prometheus.Desc) {
   148  	ch <- c.cpuTotal
   149  	if c.openFDs != nil {
   150  		ch <- c.openFDs
   151  	}
   152  	ch <- c.maxFDs
   153  	ch <- c.vsize
   154  	ch <- c.maxVsize
   155  	ch <- c.rss
   156  	ch <- c.startTime
   157  }
   158  
   159  // Collect returns the current state of all metrics of the collector.
   160  func (c *processCollector) Collect(ch chan<- prometheus.Metric) {
   161  	c.collectFn(ch)
   162  }
   163  
   164  func (c *processCollector) processCollect(ch chan<- prometheus.Metric) {
   165  	pid, err := c.pidFn()
   166  	if err != nil {
   167  		c.reportError(ch, nil, err)
   168  		return
   169  	}
   170  
   171  	p, err := procfs.NewProc(pid)
   172  	if err != nil {
   173  		c.reportError(ch, nil, err)
   174  		return
   175  	}
   176  
   177  	if stat, err := p.NewStat(); err == nil {
   178  		ch <- prometheus.MustNewConstMetric(c.cpuTotal, prometheus.CounterValue, stat.CPUTime())
   179  		ch <- prometheus.MustNewConstMetric(c.vsize, prometheus.GaugeValue, float64(stat.VirtualMemory()))
   180  		ch <- prometheus.MustNewConstMetric(c.rss, prometheus.GaugeValue, float64(stat.ResidentMemory()))
   181  		if startTime, err := stat.StartTime(); err == nil {
   182  			ch <- prometheus.MustNewConstMetric(c.startTime, prometheus.GaugeValue, startTime)
   183  		} else {
   184  			c.reportError(ch, c.startTime, err)
   185  		}
   186  	} else {
   187  		c.reportError(ch, nil, err)
   188  	}
   189  
   190  	if c.openFDs != nil {
   191  		if fds, err := p.FileDescriptorsLen(); err == nil {
   192  			ch <- prometheus.MustNewConstMetric(c.openFDs, prometheus.GaugeValue, float64(fds))
   193  		} else {
   194  			c.reportError(ch, c.openFDs, err)
   195  		}
   196  	}
   197  
   198  	if limits, err := p.NewLimits(); err == nil {
   199  		ch <- prometheus.MustNewConstMetric(c.maxFDs, prometheus.GaugeValue, float64(limits.OpenFiles))
   200  		ch <- prometheus.MustNewConstMetric(c.maxVsize, prometheus.GaugeValue, float64(limits.AddressSpace))
   201  	} else {
   202  		c.reportError(ch, nil, err)
   203  	}
   204  }
   205  
   206  func (c *processCollector) reportError(ch chan<- prometheus.Metric, desc *prometheus.Desc, err error) {
   207  	if !c.reportErrors {
   208  		return
   209  	}
   210  	if desc == nil {
   211  		desc = prometheus.NewInvalidDesc(err)
   212  	}
   213  	ch <- prometheus.NewInvalidMetric(desc, err)
   214  }