github.com/mackerelio/mackerel-agent-plugins@v0.89.3/mackerel-plugin-jvm/lib/jvm.go (about)

     1  package mpjvm
     2  
     3  import (
     4  	"flag"
     5  	"fmt"
     6  	"os"
     7  	"os/exec"
     8  	"strconv"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/Songmu/timeout"
    13  	mp "github.com/mackerelio/go-mackerel-plugin-helper"
    14  	"github.com/mackerelio/golib/logging"
    15  )
    16  
    17  var logger = logging.GetLogger("metrics.plugin.jvm")
    18  
    19  // JVMPlugin plugin for JVM
    20  type JVMPlugin struct {
    21  	Remote      string
    22  	Lvmid       string
    23  	JstatPath   string
    24  	JinfoPath   string
    25  	JavaName    string
    26  	Tempfile    string
    27  	MetricKey   string
    28  	MetricLabel string
    29  }
    30  
    31  // # jps
    32  // 26547 NettyServer
    33  // 6438 Jps
    34  func fetchLvmidByAppname(appname, target, jpsPath string) (string, error) {
    35  	var (
    36  		stdout     string
    37  		exitStatus *timeout.ExitStatus
    38  		err        error
    39  	)
    40  	if target != "" {
    41  		stdout, _, exitStatus, err = runTimeoutCommand(jpsPath, target)
    42  	} else {
    43  		stdout, _, exitStatus, err = runTimeoutCommand(jpsPath)
    44  	}
    45  
    46  	if err == nil && exitStatus.IsTimedOut() {
    47  		err = fmt.Errorf("jps command timed out")
    48  	}
    49  	if err != nil {
    50  		logger.Errorf("Failed to run exec jps. %s. Please run with the java process user.", err)
    51  		return "", err
    52  	}
    53  
    54  	for _, line := range strings.Split(string(stdout), "\n") {
    55  		words := strings.Split(line, " ")
    56  		if len(words) != 2 {
    57  			continue
    58  		}
    59  		lvmid, name := words[0], words[1]
    60  		if name == appname {
    61  			return lvmid, nil
    62  		}
    63  	}
    64  	return "", fmt.Errorf("cannot get lvmid from %s (please run with the java process user)", appname)
    65  }
    66  
    67  func (m JVMPlugin) fetchJstatMetrics(option string) (map[string]float64, error) {
    68  	vmid := generateVmid(m.Remote, m.Lvmid)
    69  	stdout, _, exitStatus, err := runTimeoutCommand(m.JstatPath, option, vmid)
    70  
    71  	if err == nil && exitStatus.IsTimedOut() {
    72  		err = fmt.Errorf("jstat command timed out")
    73  	}
    74  	if err != nil || exitStatus.GetChildExitCode() != 0 {
    75  		logger.Errorf("Failed to run exec jstat. %s. Please run with the java process user.", err)
    76  		return nil, err
    77  	}
    78  
    79  	lines := strings.Split(string(stdout), "\n")
    80  	if len(lines) < 2 {
    81  		logger.Warningf("Failed to parse output. output has only %d lines.", len(lines))
    82  		return nil, fmt.Errorf("output of jstat command does not have enough lines")
    83  	}
    84  	keys := strings.Fields(lines[0])
    85  	values := strings.Fields(lines[1])
    86  
    87  	stat := make(map[string]float64)
    88  	for i, key := range keys {
    89  		if values[i] == "-" {
    90  			continue
    91  		}
    92  		value, err := strconv.ParseFloat(values[i], 64)
    93  		if err != nil {
    94  			logger.Warningf("Failed to parse value. %s", err)
    95  		}
    96  		stat[key] = value
    97  	}
    98  
    99  	return stat, nil
   100  }
   101  
   102  func (m JVMPlugin) calculateMemorySpaceRate(gcStat map[string]float64) (map[string]float64, error) {
   103  	ret := make(map[string]float64)
   104  	ret["oldSpaceRate"] = gcStat["OU"] / gcStat["OC"] * 100
   105  	ret["newSpaceRate"] = (gcStat["S0U"] + gcStat["S1U"] + gcStat["EU"]) / (gcStat["S0C"] + gcStat["S1C"] + gcStat["EC"]) * 100
   106  
   107  	checkCMSGC, err := m.checkCMSGC()
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  	if checkCMSGC {
   112  		fraction, err := fetchCMSInitiatingOccupancyFraction(m.Lvmid, m.JinfoPath)
   113  		if err != nil {
   114  			return nil, err
   115  		}
   116  		ret["CMSInitiatingOccupancyFraction"] = fraction
   117  	}
   118  
   119  	return ret, nil
   120  }
   121  
   122  func (m JVMPlugin) checkCMSGC() (bool, error) {
   123  	// jinfo does not work on remote
   124  	if m.Remote != "" {
   125  		return false, nil
   126  	}
   127  	stdout, _, exitStatus, err := runTimeoutCommand(m.JinfoPath, "-flag", "UseConcMarkSweepGC", m.Lvmid)
   128  
   129  	if err == nil && exitStatus.IsTimedOut() {
   130  		err = fmt.Errorf("jinfo command timed out")
   131  	}
   132  	if err != nil {
   133  		logger.Errorf("Failed to run exec jinfo. %s. Please run with the java process user.", err)
   134  		return false, err
   135  	}
   136  	return strings.Contains(string(stdout), "+UseConcMarkSweepGC"), nil
   137  }
   138  
   139  func fetchCMSInitiatingOccupancyFraction(lvmid, JinfoPath string) (float64, error) {
   140  	var fraction float64
   141  
   142  	stdout, _, exitStatus, err := runTimeoutCommand(JinfoPath, "-flag", "CMSInitiatingOccupancyFraction", lvmid)
   143  
   144  	if err == nil && exitStatus.IsTimedOut() {
   145  		err = fmt.Errorf("jinfo command timed out")
   146  	}
   147  	if err != nil {
   148  		logger.Errorf("Failed to run exec jinfo. %s. Please run with the java process user.", err)
   149  		return 0.0, err
   150  	}
   151  
   152  	out := strings.Trim(string(stdout), "\n")
   153  	tmp := strings.Split(out, "=")
   154  	fraction, _ = strconv.ParseFloat(tmp[1], 64)
   155  
   156  	return fraction, nil
   157  }
   158  
   159  func mergeStat(dst, src map[string]float64) {
   160  	for k, v := range src {
   161  		dst[k] = v
   162  	}
   163  }
   164  
   165  func runTimeoutCommand(Path string, Args ...string) (string, string, *timeout.ExitStatus, error) {
   166  	var TimeoutDuration = 10 * time.Second
   167  	var TimeoutKillAfter = 5 * time.Second
   168  	tio := &timeout.Timeout{
   169  		Cmd:       exec.Command(Path, Args...),
   170  		Duration:  TimeoutDuration,
   171  		KillAfter: TimeoutKillAfter,
   172  	}
   173  	exitStatus, stdout, stderr, err := tio.Run()
   174  	return stdout, stderr, exitStatus, err
   175  }
   176  
   177  // <Java11>
   178  // # jstat -gc <vmid>
   179  //  S0C    S1C    S0U    S1U      EC       EU        OC         OU       MC     MU    CCSC   CCSU   YGC     YGCT    FGC    FGCT    CGC    CGCT     GCT
   180  // 45184.0 45184.0 45184.0  0.0   361728.0 132414.7  904068.0   679249.5  21248.0 20787.3 2304.0 2105.8     22    8.584   6      2.343   -          -   10.927
   181  
   182  // <Java8> https://docs.oracle.com/javase/8/docs/technotes/tools/unix/jstat.html
   183  // # jstat -gc <vmid>
   184  //  S0C    S1C    S0U    S1U      EC       EU        OC         OU       MC     MU    CCSC   CCSU   YGC     YGCT    FGC    FGCT     GCT
   185  // 1024.0 1024.0  0.0    0.0    8256.0   8256.0   20480.0     453.4    4864.0 2776.2 512.0  300.8       0    0.000   1      0.003    0.003
   186  
   187  // # jstat -gccapacity <vmid>
   188  //  NGCMN    NGCMX     NGC     S0C   S1C       EC      OGCMN      OGCMX       OGC         OC       MCMN     MCMX      MC     CCSMN    CCSMX     CCSC    YGC    FGC
   189  //  10240.0 160384.0  10304.0 1024.0 1024.0   8256.0    20480.0   320896.0    20480.0    20480.0      0.0 1056768.0   4864.0      0.0 1048576.0    512.0      0     1
   190  
   191  // # jstat -gcnew <vmid>
   192  //  S0C    S1C    S0U    S1U   TT MTT  DSS      EC       EU     YGC     YGCT
   193  // 1024.0 1024.0    0.0    0.0 15  15    0.0   8256.0   8256.0      0    0.000
   194  
   195  // <Java7>
   196  // # jstat -gc <vmid>
   197  //  S0C    S1C    S0U    S1U      EC       EU        OC         OU       PC     PU    YGC     YGCT    FGC    FGCT     GCT
   198  // 3584.0 3584.0 2528.0  0.0   692224.0 19062.4  1398272.0   485450.1  72704.0 72611.3   3152   30.229   0      0.000   30.229
   199  
   200  // # jstat -gccapacity  <vmid>
   201  //  NGCMN    NGCMX     NGC     S0C   S1C       EC      OGCMN      OGCMX       OGC         OC      PGCMN    PGCMX     PGC       PC     YGC    FGC
   202  // 699392.0 699392.0 699392.0 4096.0 4096.0 691200.0  1398272.0  1398272.0  1398272.0  1398272.0  21504.0 524288.0  72704.0  72704.0   4212     0
   203  
   204  // # jstat -gcnew  <vmid>
   205  //  S0C    S1C    S0U    S1U   TT MTT  DSS      EC       EU     YGC     YGCT
   206  // 3072.0 3072.0    0.0 2848.0  1  15 3072.0 693248.0 626782.2   3463   33.658
   207  
   208  // FetchMetrics interface for mackerelplugin
   209  func (m JVMPlugin) FetchMetrics() (map[string]interface{}, error) {
   210  	gcStat, err := m.fetchJstatMetrics("-gc")
   211  	if err != nil {
   212  		return nil, err
   213  	}
   214  	gcCapacityStat, err := m.fetchJstatMetrics("-gccapacity")
   215  	if err != nil {
   216  		return nil, err
   217  	}
   218  	gcNewStat, err := m.fetchJstatMetrics("-gcnew")
   219  	if err != nil {
   220  		return nil, err
   221  	}
   222  	gcOldStat, err := m.fetchJstatMetrics("-gcold")
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  	gcSpaceRate, err := m.calculateMemorySpaceRate(gcStat)
   227  	if err != nil {
   228  		return nil, err
   229  	}
   230  
   231  	stat := make(map[string]float64)
   232  	mergeStat(stat, gcStat)
   233  	mergeStat(stat, gcCapacityStat)
   234  	mergeStat(stat, gcNewStat)
   235  	mergeStat(stat, gcOldStat)
   236  	mergeStat(stat, gcSpaceRate)
   237  
   238  	result := make(map[string]interface{})
   239  	for k, v := range stat {
   240  		result[k] = v
   241  	}
   242  	return result, nil
   243  }
   244  
   245  // GraphDefinition interface for mackerelplugin
   246  func (m JVMPlugin) GraphDefinition() map[string]mp.Graphs {
   247  	metricLabel := m.MetricLabel
   248  	if metricLabel == "" {
   249  		metricLabel = m.JavaName
   250  	}
   251  
   252  	javaName := m.MetricKey
   253  	if javaName == "" {
   254  		javaName = m.JavaName
   255  	}
   256  	lowerJavaName := strings.ToLower(javaName)
   257  
   258  	return map[string]mp.Graphs{
   259  		fmt.Sprintf("jvm.%s.gc_events", lowerJavaName): {
   260  			Label: fmt.Sprintf("JVM %s GC events", metricLabel),
   261  			Unit:  "integer",
   262  			Metrics: []mp.Metrics{
   263  				{Name: "YGC", Label: "Young GC event", Diff: true},
   264  				{Name: "FGC", Label: "Full GC event", Diff: true},
   265  				{Name: "CGC", Label: "Concurrent GC event", Diff: true},
   266  			},
   267  		},
   268  		fmt.Sprintf("jvm.%s.gc_time", lowerJavaName): {
   269  			Label: fmt.Sprintf("JVM %s GC time (sec)", metricLabel),
   270  			Unit:  "float",
   271  			Metrics: []mp.Metrics{
   272  				{Name: "YGCT", Label: "Young GC time", Diff: true},
   273  				{Name: "FGCT", Label: "Full GC time", Diff: true},
   274  				{Name: "CGCT", Label: "Concurrent GC time", Diff: true},
   275  			},
   276  		},
   277  		fmt.Sprintf("jvm.%s.gc_time_percentage", lowerJavaName): {
   278  			Label: fmt.Sprintf("JVM %s GC time percentage", metricLabel),
   279  			Unit:  "percentage",
   280  			Metrics: []mp.Metrics{
   281  				// gc_time_percentage is the percentage of gc time to 60 sec.
   282  				{Name: "YGCT", Label: "Young GC time", Diff: true, Scale: (100.0 / 60)},
   283  				{Name: "FGCT", Label: "Full GC time", Diff: true, Scale: (100.0 / 60)},
   284  				{Name: "CGCT", Label: "Concurrent GC time", Diff: true, Scale: (100.0 / 60)},
   285  			},
   286  		},
   287  		fmt.Sprintf("jvm.%s.new_space", lowerJavaName): {
   288  			Label: fmt.Sprintf("JVM %s New Space memory", metricLabel),
   289  			Unit:  "float",
   290  			Metrics: []mp.Metrics{
   291  				{Name: "NGCMX", Label: "New max", Diff: false, Scale: 1024},
   292  				{Name: "NGC", Label: "New current", Diff: false, Scale: 1024},
   293  				{Name: "EU", Label: "Eden used", Diff: false, Scale: 1024},
   294  				{Name: "S0U", Label: "Survivor0 used", Diff: false, Scale: 1024},
   295  				{Name: "S1U", Label: "Survivor1 used", Diff: false, Scale: 1024},
   296  			},
   297  		},
   298  		fmt.Sprintf("jvm.%s.old_space", lowerJavaName): {
   299  			Label: fmt.Sprintf("JVM %s Old Space memory", metricLabel),
   300  			Unit:  "float",
   301  			Metrics: []mp.Metrics{
   302  				{Name: "OGCMX", Label: "Old max", Diff: false, Scale: 1024},
   303  				{Name: "OGC", Label: "Old current", Diff: false, Scale: 1024},
   304  				{Name: "OU", Label: "Old used", Diff: false, Scale: 1024},
   305  			},
   306  		},
   307  		fmt.Sprintf("jvm.%s.perm_space", lowerJavaName): {
   308  			Label: fmt.Sprintf("JVM %s Permanent Space", metricLabel),
   309  			Unit:  "float",
   310  			Metrics: []mp.Metrics{
   311  				{Name: "PGCMX", Label: "Perm max", Diff: false, Scale: 1024},
   312  				{Name: "PGC", Label: "Perm current", Diff: false, Scale: 1024},
   313  				{Name: "PU", Label: "Perm used", Diff: false, Scale: 1024},
   314  			},
   315  		},
   316  		fmt.Sprintf("jvm.%s.metaspace", lowerJavaName): {
   317  			Label: fmt.Sprintf("JVM %s Metaspace", metricLabel),
   318  			Unit:  "float",
   319  			Metrics: []mp.Metrics{
   320  				{Name: "MCMX", Label: "Metaspace capacity max", Diff: false, Scale: 1024},
   321  				{Name: "MCMN", Label: "Metaspace capacity min", Diff: false, Scale: 1024},
   322  				{Name: "MC", Label: "Metaspace capacity", Diff: false, Scale: 1024},
   323  				{Name: "MU", Label: "Metaspace utilization ", Diff: false, Scale: 1024},
   324  				{Name: "CCSC", Label: "Compressed Class Space Capacity", Diff: false, Scale: 1024},
   325  				{Name: "CCSU", Label: "Compressed Class Space Used", Diff: false, Scale: 1024},
   326  			},
   327  		},
   328  		fmt.Sprintf("jvm.%s.memorySpace", lowerJavaName): {
   329  			Label: fmt.Sprintf("JVM %s MemorySpace", metricLabel),
   330  			Unit:  "float",
   331  			Metrics: []mp.Metrics{
   332  				{Name: "oldSpaceRate", Label: "GC Old Memory Space", Diff: false},
   333  				{Name: "newSpaceRate", Label: "GC New Memory Space", Diff: false},
   334  				{Name: "CMSInitiatingOccupancyFraction", Label: "CMS Initiating Occupancy Fraction", Diff: false},
   335  			},
   336  		},
   337  	}
   338  }
   339  
   340  func generateVmid(remote, lvmid string) string {
   341  	if remote != "" {
   342  		if lvmid == "" {
   343  			return remote
   344  		}
   345  		return fmt.Sprintf("%s@%s", lvmid, remote)
   346  	}
   347  	return lvmid
   348  }
   349  
   350  func generateRemote(remote, host string, port int) string {
   351  	if remote == "" {
   352  		if host == "" {
   353  			if port != 0 {
   354  				// for backward compatibility
   355  				return fmt.Sprintf("localhost:%d", port)
   356  			}
   357  			return ""
   358  		}
   359  		if port == 0 {
   360  			return host
   361  		}
   362  		return fmt.Sprintf("%s:%d", host, port)
   363  	}
   364  
   365  	if host != "" || port != 0 {
   366  		logger.Warningf("'-host' and '-port' are ignored, since '-remote' is specified")
   367  	}
   368  	return remote
   369  }
   370  
   371  // Do the plugin
   372  func Do() {
   373  	// Prefer ${JAVA_HOME}/bin if JAVA_HOME presents
   374  	pathBase := "/usr/bin"
   375  	if javaHome := os.Getenv("JAVA_HOME"); javaHome != "" {
   376  		pathBase = javaHome + "/bin"
   377  	}
   378  	optHost := flag.String("host", "", "jps/jstat target hostname [deprecated]")
   379  	optPort := flag.Int("port", 0, "jps/jstat target port [deprecated]")
   380  	optRemote := flag.String("remote", "", "jps/jstat remote target. hostname[:port][/servername]")
   381  	optJstatPath := flag.String("jstatpath", pathBase+"/jstat", "jstat path")
   382  	optJinfoPath := flag.String("jinfopath", pathBase+"/jinfo", "jinfo path")
   383  	optJpsPath := flag.String("jpspath", pathBase+"/jps", "jps path")
   384  	optJavaName := flag.String("javaname", "", "Java app name")
   385  	optPidFile := flag.String("pidfile", "", "pidfile path")
   386  	optTempfile := flag.String("tempfile", "", "Temp file name")
   387  	optMetricKey := flag.String("metric-key", "", "Specifying the Name field in the Graph Definition")
   388  	optMetricLabel := flag.String("metric-label", "", "Specifying the Label field in the Graph Definition")
   389  	flag.Parse()
   390  
   391  	var jvm JVMPlugin
   392  	jvm.JstatPath = *optJstatPath
   393  	jvm.JinfoPath = *optJinfoPath
   394  	jvm.Remote = generateRemote(*optRemote, *optHost, *optPort)
   395  
   396  	if *optJavaName == "" {
   397  		logger.Errorf("javaname is required (if you use 'pidfile' option, 'javaname' is used as just a prefix of graph label)")
   398  		flag.PrintDefaults()
   399  		os.Exit(1)
   400  	}
   401  
   402  	if *optPidFile != "" && jvm.Remote != "" {
   403  		logger.Warningf("both '-pidfile' and '-remote' specified, but '-pidfile' does not work with '-remote' therefore ignored")
   404  	}
   405  
   406  	if *optPidFile == "" || jvm.Remote != "" {
   407  		lvmid, err := fetchLvmidByAppname(*optJavaName, generateVmid(jvm.Remote, ""), *optJpsPath)
   408  		if err != nil {
   409  			logger.Errorf("Failed to fetch lvmid. %s. Please run with the java process user when monitoring local JVM, or set proper 'remote' option when monitorint remote one.", err)
   410  			os.Exit(1)
   411  		}
   412  		jvm.Lvmid = lvmid
   413  	} else {
   414  		// https://docs.oracle.com/javase/7/docs/technotes/tools/share/jps.html
   415  		// `The lvmid is typically, but not necessarily, the operating system's process identifier for the JVM process.`
   416  		pid, err := os.ReadFile(*optPidFile)
   417  		if err != nil {
   418  			logger.Errorf("Failed to load pid. %s", err)
   419  			os.Exit(1)
   420  		}
   421  		jvm.Lvmid = strings.Replace(string(pid), "\n", "", 1)
   422  	}
   423  
   424  	jvm.JavaName = *optJavaName
   425  	jvm.MetricKey = *optMetricKey
   426  	jvm.MetricLabel = *optMetricLabel
   427  
   428  	helper := mp.NewMackerelPlugin(jvm)
   429  	helper.Tempfile = *optTempfile
   430  
   431  	helper.Run()
   432  }