github.com/mackerelio/mackerel-agent-plugins@v0.89.3/mackerel-plugin-jvm/lib/jvm.go (about) 1 package mpjvm 2 3 import ( 4 "flag" 5 "fmt" 6 "os" 7 "os/exec" 8 "strconv" 9 "strings" 10 "time" 11 12 "github.com/Songmu/timeout" 13 mp "github.com/mackerelio/go-mackerel-plugin-helper" 14 "github.com/mackerelio/golib/logging" 15 ) 16 17 var logger = logging.GetLogger("metrics.plugin.jvm") 18 19 // JVMPlugin plugin for JVM 20 type JVMPlugin struct { 21 Remote string 22 Lvmid string 23 JstatPath string 24 JinfoPath string 25 JavaName string 26 Tempfile string 27 MetricKey string 28 MetricLabel string 29 } 30 31 // # jps 32 // 26547 NettyServer 33 // 6438 Jps 34 func fetchLvmidByAppname(appname, target, jpsPath string) (string, error) { 35 var ( 36 stdout string 37 exitStatus *timeout.ExitStatus 38 err error 39 ) 40 if target != "" { 41 stdout, _, exitStatus, err = runTimeoutCommand(jpsPath, target) 42 } else { 43 stdout, _, exitStatus, err = runTimeoutCommand(jpsPath) 44 } 45 46 if err == nil && exitStatus.IsTimedOut() { 47 err = fmt.Errorf("jps command timed out") 48 } 49 if err != nil { 50 logger.Errorf("Failed to run exec jps. %s. Please run with the java process user.", err) 51 return "", err 52 } 53 54 for _, line := range strings.Split(string(stdout), "\n") { 55 words := strings.Split(line, " ") 56 if len(words) != 2 { 57 continue 58 } 59 lvmid, name := words[0], words[1] 60 if name == appname { 61 return lvmid, nil 62 } 63 } 64 return "", fmt.Errorf("cannot get lvmid from %s (please run with the java process user)", appname) 65 } 66 67 func (m JVMPlugin) fetchJstatMetrics(option string) (map[string]float64, error) { 68 vmid := generateVmid(m.Remote, m.Lvmid) 69 stdout, _, exitStatus, err := runTimeoutCommand(m.JstatPath, option, vmid) 70 71 if err == nil && exitStatus.IsTimedOut() { 72 err = fmt.Errorf("jstat command timed out") 73 } 74 if err != nil || exitStatus.GetChildExitCode() != 0 { 75 logger.Errorf("Failed to run exec jstat. %s. Please run with the java process user.", err) 76 return nil, err 77 } 78 79 lines := strings.Split(string(stdout), "\n") 80 if len(lines) < 2 { 81 logger.Warningf("Failed to parse output. output has only %d lines.", len(lines)) 82 return nil, fmt.Errorf("output of jstat command does not have enough lines") 83 } 84 keys := strings.Fields(lines[0]) 85 values := strings.Fields(lines[1]) 86 87 stat := make(map[string]float64) 88 for i, key := range keys { 89 if values[i] == "-" { 90 continue 91 } 92 value, err := strconv.ParseFloat(values[i], 64) 93 if err != nil { 94 logger.Warningf("Failed to parse value. %s", err) 95 } 96 stat[key] = value 97 } 98 99 return stat, nil 100 } 101 102 func (m JVMPlugin) calculateMemorySpaceRate(gcStat map[string]float64) (map[string]float64, error) { 103 ret := make(map[string]float64) 104 ret["oldSpaceRate"] = gcStat["OU"] / gcStat["OC"] * 100 105 ret["newSpaceRate"] = (gcStat["S0U"] + gcStat["S1U"] + gcStat["EU"]) / (gcStat["S0C"] + gcStat["S1C"] + gcStat["EC"]) * 100 106 107 checkCMSGC, err := m.checkCMSGC() 108 if err != nil { 109 return nil, err 110 } 111 if checkCMSGC { 112 fraction, err := fetchCMSInitiatingOccupancyFraction(m.Lvmid, m.JinfoPath) 113 if err != nil { 114 return nil, err 115 } 116 ret["CMSInitiatingOccupancyFraction"] = fraction 117 } 118 119 return ret, nil 120 } 121 122 func (m JVMPlugin) checkCMSGC() (bool, error) { 123 // jinfo does not work on remote 124 if m.Remote != "" { 125 return false, nil 126 } 127 stdout, _, exitStatus, err := runTimeoutCommand(m.JinfoPath, "-flag", "UseConcMarkSweepGC", m.Lvmid) 128 129 if err == nil && exitStatus.IsTimedOut() { 130 err = fmt.Errorf("jinfo command timed out") 131 } 132 if err != nil { 133 logger.Errorf("Failed to run exec jinfo. %s. Please run with the java process user.", err) 134 return false, err 135 } 136 return strings.Contains(string(stdout), "+UseConcMarkSweepGC"), nil 137 } 138 139 func fetchCMSInitiatingOccupancyFraction(lvmid, JinfoPath string) (float64, error) { 140 var fraction float64 141 142 stdout, _, exitStatus, err := runTimeoutCommand(JinfoPath, "-flag", "CMSInitiatingOccupancyFraction", lvmid) 143 144 if err == nil && exitStatus.IsTimedOut() { 145 err = fmt.Errorf("jinfo command timed out") 146 } 147 if err != nil { 148 logger.Errorf("Failed to run exec jinfo. %s. Please run with the java process user.", err) 149 return 0.0, err 150 } 151 152 out := strings.Trim(string(stdout), "\n") 153 tmp := strings.Split(out, "=") 154 fraction, _ = strconv.ParseFloat(tmp[1], 64) 155 156 return fraction, nil 157 } 158 159 func mergeStat(dst, src map[string]float64) { 160 for k, v := range src { 161 dst[k] = v 162 } 163 } 164 165 func runTimeoutCommand(Path string, Args ...string) (string, string, *timeout.ExitStatus, error) { 166 var TimeoutDuration = 10 * time.Second 167 var TimeoutKillAfter = 5 * time.Second 168 tio := &timeout.Timeout{ 169 Cmd: exec.Command(Path, Args...), 170 Duration: TimeoutDuration, 171 KillAfter: TimeoutKillAfter, 172 } 173 exitStatus, stdout, stderr, err := tio.Run() 174 return stdout, stderr, exitStatus, err 175 } 176 177 // <Java11> 178 // # jstat -gc <vmid> 179 // S0C S1C S0U S1U EC EU OC OU MC MU CCSC CCSU YGC YGCT FGC FGCT CGC CGCT GCT 180 // 45184.0 45184.0 45184.0 0.0 361728.0 132414.7 904068.0 679249.5 21248.0 20787.3 2304.0 2105.8 22 8.584 6 2.343 - - 10.927 181 182 // <Java8> https://docs.oracle.com/javase/8/docs/technotes/tools/unix/jstat.html 183 // # jstat -gc <vmid> 184 // S0C S1C S0U S1U EC EU OC OU MC MU CCSC CCSU YGC YGCT FGC FGCT GCT 185 // 1024.0 1024.0 0.0 0.0 8256.0 8256.0 20480.0 453.4 4864.0 2776.2 512.0 300.8 0 0.000 1 0.003 0.003 186 187 // # jstat -gccapacity <vmid> 188 // NGCMN NGCMX NGC S0C S1C EC OGCMN OGCMX OGC OC MCMN MCMX MC CCSMN CCSMX CCSC YGC FGC 189 // 10240.0 160384.0 10304.0 1024.0 1024.0 8256.0 20480.0 320896.0 20480.0 20480.0 0.0 1056768.0 4864.0 0.0 1048576.0 512.0 0 1 190 191 // # jstat -gcnew <vmid> 192 // S0C S1C S0U S1U TT MTT DSS EC EU YGC YGCT 193 // 1024.0 1024.0 0.0 0.0 15 15 0.0 8256.0 8256.0 0 0.000 194 195 // <Java7> 196 // # jstat -gc <vmid> 197 // S0C S1C S0U S1U EC EU OC OU PC PU YGC YGCT FGC FGCT GCT 198 // 3584.0 3584.0 2528.0 0.0 692224.0 19062.4 1398272.0 485450.1 72704.0 72611.3 3152 30.229 0 0.000 30.229 199 200 // # jstat -gccapacity <vmid> 201 // NGCMN NGCMX NGC S0C S1C EC OGCMN OGCMX OGC OC PGCMN PGCMX PGC PC YGC FGC 202 // 699392.0 699392.0 699392.0 4096.0 4096.0 691200.0 1398272.0 1398272.0 1398272.0 1398272.0 21504.0 524288.0 72704.0 72704.0 4212 0 203 204 // # jstat -gcnew <vmid> 205 // S0C S1C S0U S1U TT MTT DSS EC EU YGC YGCT 206 // 3072.0 3072.0 0.0 2848.0 1 15 3072.0 693248.0 626782.2 3463 33.658 207 208 // FetchMetrics interface for mackerelplugin 209 func (m JVMPlugin) FetchMetrics() (map[string]interface{}, error) { 210 gcStat, err := m.fetchJstatMetrics("-gc") 211 if err != nil { 212 return nil, err 213 } 214 gcCapacityStat, err := m.fetchJstatMetrics("-gccapacity") 215 if err != nil { 216 return nil, err 217 } 218 gcNewStat, err := m.fetchJstatMetrics("-gcnew") 219 if err != nil { 220 return nil, err 221 } 222 gcOldStat, err := m.fetchJstatMetrics("-gcold") 223 if err != nil { 224 return nil, err 225 } 226 gcSpaceRate, err := m.calculateMemorySpaceRate(gcStat) 227 if err != nil { 228 return nil, err 229 } 230 231 stat := make(map[string]float64) 232 mergeStat(stat, gcStat) 233 mergeStat(stat, gcCapacityStat) 234 mergeStat(stat, gcNewStat) 235 mergeStat(stat, gcOldStat) 236 mergeStat(stat, gcSpaceRate) 237 238 result := make(map[string]interface{}) 239 for k, v := range stat { 240 result[k] = v 241 } 242 return result, nil 243 } 244 245 // GraphDefinition interface for mackerelplugin 246 func (m JVMPlugin) GraphDefinition() map[string]mp.Graphs { 247 metricLabel := m.MetricLabel 248 if metricLabel == "" { 249 metricLabel = m.JavaName 250 } 251 252 javaName := m.MetricKey 253 if javaName == "" { 254 javaName = m.JavaName 255 } 256 lowerJavaName := strings.ToLower(javaName) 257 258 return map[string]mp.Graphs{ 259 fmt.Sprintf("jvm.%s.gc_events", lowerJavaName): { 260 Label: fmt.Sprintf("JVM %s GC events", metricLabel), 261 Unit: "integer", 262 Metrics: []mp.Metrics{ 263 {Name: "YGC", Label: "Young GC event", Diff: true}, 264 {Name: "FGC", Label: "Full GC event", Diff: true}, 265 {Name: "CGC", Label: "Concurrent GC event", Diff: true}, 266 }, 267 }, 268 fmt.Sprintf("jvm.%s.gc_time", lowerJavaName): { 269 Label: fmt.Sprintf("JVM %s GC time (sec)", metricLabel), 270 Unit: "float", 271 Metrics: []mp.Metrics{ 272 {Name: "YGCT", Label: "Young GC time", Diff: true}, 273 {Name: "FGCT", Label: "Full GC time", Diff: true}, 274 {Name: "CGCT", Label: "Concurrent GC time", Diff: true}, 275 }, 276 }, 277 fmt.Sprintf("jvm.%s.gc_time_percentage", lowerJavaName): { 278 Label: fmt.Sprintf("JVM %s GC time percentage", metricLabel), 279 Unit: "percentage", 280 Metrics: []mp.Metrics{ 281 // gc_time_percentage is the percentage of gc time to 60 sec. 282 {Name: "YGCT", Label: "Young GC time", Diff: true, Scale: (100.0 / 60)}, 283 {Name: "FGCT", Label: "Full GC time", Diff: true, Scale: (100.0 / 60)}, 284 {Name: "CGCT", Label: "Concurrent GC time", Diff: true, Scale: (100.0 / 60)}, 285 }, 286 }, 287 fmt.Sprintf("jvm.%s.new_space", lowerJavaName): { 288 Label: fmt.Sprintf("JVM %s New Space memory", metricLabel), 289 Unit: "float", 290 Metrics: []mp.Metrics{ 291 {Name: "NGCMX", Label: "New max", Diff: false, Scale: 1024}, 292 {Name: "NGC", Label: "New current", Diff: false, Scale: 1024}, 293 {Name: "EU", Label: "Eden used", Diff: false, Scale: 1024}, 294 {Name: "S0U", Label: "Survivor0 used", Diff: false, Scale: 1024}, 295 {Name: "S1U", Label: "Survivor1 used", Diff: false, Scale: 1024}, 296 }, 297 }, 298 fmt.Sprintf("jvm.%s.old_space", lowerJavaName): { 299 Label: fmt.Sprintf("JVM %s Old Space memory", metricLabel), 300 Unit: "float", 301 Metrics: []mp.Metrics{ 302 {Name: "OGCMX", Label: "Old max", Diff: false, Scale: 1024}, 303 {Name: "OGC", Label: "Old current", Diff: false, Scale: 1024}, 304 {Name: "OU", Label: "Old used", Diff: false, Scale: 1024}, 305 }, 306 }, 307 fmt.Sprintf("jvm.%s.perm_space", lowerJavaName): { 308 Label: fmt.Sprintf("JVM %s Permanent Space", metricLabel), 309 Unit: "float", 310 Metrics: []mp.Metrics{ 311 {Name: "PGCMX", Label: "Perm max", Diff: false, Scale: 1024}, 312 {Name: "PGC", Label: "Perm current", Diff: false, Scale: 1024}, 313 {Name: "PU", Label: "Perm used", Diff: false, Scale: 1024}, 314 }, 315 }, 316 fmt.Sprintf("jvm.%s.metaspace", lowerJavaName): { 317 Label: fmt.Sprintf("JVM %s Metaspace", metricLabel), 318 Unit: "float", 319 Metrics: []mp.Metrics{ 320 {Name: "MCMX", Label: "Metaspace capacity max", Diff: false, Scale: 1024}, 321 {Name: "MCMN", Label: "Metaspace capacity min", Diff: false, Scale: 1024}, 322 {Name: "MC", Label: "Metaspace capacity", Diff: false, Scale: 1024}, 323 {Name: "MU", Label: "Metaspace utilization ", Diff: false, Scale: 1024}, 324 {Name: "CCSC", Label: "Compressed Class Space Capacity", Diff: false, Scale: 1024}, 325 {Name: "CCSU", Label: "Compressed Class Space Used", Diff: false, Scale: 1024}, 326 }, 327 }, 328 fmt.Sprintf("jvm.%s.memorySpace", lowerJavaName): { 329 Label: fmt.Sprintf("JVM %s MemorySpace", metricLabel), 330 Unit: "float", 331 Metrics: []mp.Metrics{ 332 {Name: "oldSpaceRate", Label: "GC Old Memory Space", Diff: false}, 333 {Name: "newSpaceRate", Label: "GC New Memory Space", Diff: false}, 334 {Name: "CMSInitiatingOccupancyFraction", Label: "CMS Initiating Occupancy Fraction", Diff: false}, 335 }, 336 }, 337 } 338 } 339 340 func generateVmid(remote, lvmid string) string { 341 if remote != "" { 342 if lvmid == "" { 343 return remote 344 } 345 return fmt.Sprintf("%s@%s", lvmid, remote) 346 } 347 return lvmid 348 } 349 350 func generateRemote(remote, host string, port int) string { 351 if remote == "" { 352 if host == "" { 353 if port != 0 { 354 // for backward compatibility 355 return fmt.Sprintf("localhost:%d", port) 356 } 357 return "" 358 } 359 if port == 0 { 360 return host 361 } 362 return fmt.Sprintf("%s:%d", host, port) 363 } 364 365 if host != "" || port != 0 { 366 logger.Warningf("'-host' and '-port' are ignored, since '-remote' is specified") 367 } 368 return remote 369 } 370 371 // Do the plugin 372 func Do() { 373 // Prefer ${JAVA_HOME}/bin if JAVA_HOME presents 374 pathBase := "/usr/bin" 375 if javaHome := os.Getenv("JAVA_HOME"); javaHome != "" { 376 pathBase = javaHome + "/bin" 377 } 378 optHost := flag.String("host", "", "jps/jstat target hostname [deprecated]") 379 optPort := flag.Int("port", 0, "jps/jstat target port [deprecated]") 380 optRemote := flag.String("remote", "", "jps/jstat remote target. hostname[:port][/servername]") 381 optJstatPath := flag.String("jstatpath", pathBase+"/jstat", "jstat path") 382 optJinfoPath := flag.String("jinfopath", pathBase+"/jinfo", "jinfo path") 383 optJpsPath := flag.String("jpspath", pathBase+"/jps", "jps path") 384 optJavaName := flag.String("javaname", "", "Java app name") 385 optPidFile := flag.String("pidfile", "", "pidfile path") 386 optTempfile := flag.String("tempfile", "", "Temp file name") 387 optMetricKey := flag.String("metric-key", "", "Specifying the Name field in the Graph Definition") 388 optMetricLabel := flag.String("metric-label", "", "Specifying the Label field in the Graph Definition") 389 flag.Parse() 390 391 var jvm JVMPlugin 392 jvm.JstatPath = *optJstatPath 393 jvm.JinfoPath = *optJinfoPath 394 jvm.Remote = generateRemote(*optRemote, *optHost, *optPort) 395 396 if *optJavaName == "" { 397 logger.Errorf("javaname is required (if you use 'pidfile' option, 'javaname' is used as just a prefix of graph label)") 398 flag.PrintDefaults() 399 os.Exit(1) 400 } 401 402 if *optPidFile != "" && jvm.Remote != "" { 403 logger.Warningf("both '-pidfile' and '-remote' specified, but '-pidfile' does not work with '-remote' therefore ignored") 404 } 405 406 if *optPidFile == "" || jvm.Remote != "" { 407 lvmid, err := fetchLvmidByAppname(*optJavaName, generateVmid(jvm.Remote, ""), *optJpsPath) 408 if err != nil { 409 logger.Errorf("Failed to fetch lvmid. %s. Please run with the java process user when monitoring local JVM, or set proper 'remote' option when monitorint remote one.", err) 410 os.Exit(1) 411 } 412 jvm.Lvmid = lvmid 413 } else { 414 // https://docs.oracle.com/javase/7/docs/technotes/tools/share/jps.html 415 // `The lvmid is typically, but not necessarily, the operating system's process identifier for the JVM process.` 416 pid, err := os.ReadFile(*optPidFile) 417 if err != nil { 418 logger.Errorf("Failed to load pid. %s", err) 419 os.Exit(1) 420 } 421 jvm.Lvmid = strings.Replace(string(pid), "\n", "", 1) 422 } 423 424 jvm.JavaName = *optJavaName 425 jvm.MetricKey = *optMetricKey 426 jvm.MetricLabel = *optMetricLabel 427 428 helper := mp.NewMackerelPlugin(jvm) 429 helper.Tempfile = *optTempfile 430 431 helper.Run() 432 }