github.com/simpleiot/simpleiot@v0.18.3/client/metrics.go (about) 1 package client 2 3 import ( 4 "log" 5 "os" 6 "runtime" 7 "strings" 8 "time" 9 10 "github.com/nats-io/nats.go" 11 "github.com/shirou/gopsutil/v3/cpu" 12 "github.com/shirou/gopsutil/v3/disk" 13 "github.com/shirou/gopsutil/v3/host" 14 "github.com/shirou/gopsutil/v3/load" 15 "github.com/shirou/gopsutil/v3/mem" 16 "github.com/shirou/gopsutil/v3/net" 17 "github.com/shirou/gopsutil/v3/process" 18 "github.com/simpleiot/simpleiot/data" 19 ) 20 21 // Metrics represents the config of a metrics node type 22 type Metrics struct { 23 ID string `node:"id"` 24 Parent string `node:"parent"` 25 Description string `point:"description"` 26 Type string `point:"type"` 27 Name string `point:"name"` 28 Period int `point:"period"` 29 } 30 31 // MetricsClient is a SIOT client used to collect system or app metrics 32 type MetricsClient struct { 33 nc *nats.Conn 34 config Metrics 35 stop chan struct{} 36 newPoints chan NewPoints 37 newEdgePoints chan NewPoints 38 } 39 40 // NewMetricsClient ... 41 func NewMetricsClient(nc *nats.Conn, config Metrics) Client { 42 return &MetricsClient{ 43 nc: nc, 44 config: config, 45 stop: make(chan struct{}), 46 newPoints: make(chan NewPoints), 47 newEdgePoints: make(chan NewPoints), 48 } 49 } 50 51 // Run the main logic for this client and blocks until stopped 52 func (m *MetricsClient) Run() error { 53 if m.config.Type == data.PointValueSystem { 54 m.sysStart() 55 } 56 57 checkPeriod := func() { 58 if m.config.Period < 1 { 59 m.config.Period = 120 60 points := data.Points{ 61 {Type: data.PointTypePeriod, Value: float64(m.config.Period)}, 62 } 63 64 err := SendPoints(m.nc, SubjectNodePoints(m.config.ID), points, false) 65 if err != nil { 66 log.Println("Error sending metrics period:", err) 67 } 68 } 69 } 70 71 checkPeriod() 72 73 sampleTicker := time.NewTicker(time.Duration(m.config.Period) * time.Second) 74 75 done: 76 for { 77 select { 78 case <-m.stop: 79 break done 80 81 case <-sampleTicker.C: 82 switch m.config.Type { 83 case data.PointValueSystem: 84 m.sysPeriodic() 85 case data.PointValueApp: 86 m.appPeriodic("") 87 case data.PointValueProcess: 88 m.appPeriodic(m.config.Name) 89 case data.PointValueAllProcesses: 90 m.allProcPeriodic() 91 default: 92 log.Println("Metrics: Must select metric type") 93 } 94 95 case pts := <-m.newPoints: 96 err := data.MergePoints(pts.ID, pts.Points, &m.config) 97 if err != nil { 98 log.Println("error merging new points:", err) 99 } 100 101 for _, p := range pts.Points { 102 switch p.Type { 103 case data.PointTypePeriod: 104 checkPeriod() 105 sampleTicker.Reset(time.Duration(m.config.Period) * 106 time.Second) 107 case data.PointTypeType: 108 if m.config.Type == data.PointValueSystem { 109 m.sysStart() 110 } 111 } 112 } 113 114 case pts := <-m.newEdgePoints: 115 err := data.MergeEdgePoints(pts.ID, pts.Parent, pts.Points, &m.config) 116 if err != nil { 117 log.Println("error merging new points:", err) 118 } 119 120 } 121 } 122 123 return nil 124 } 125 126 // Stop sends a signal to the Run function to exit 127 func (m *MetricsClient) Stop(_ error) { 128 close(m.stop) 129 } 130 131 // Points is called by the Manager when new points for this 132 // node are received. 133 func (m *MetricsClient) Points(nodeID string, points []data.Point) { 134 m.newPoints <- NewPoints{nodeID, "", points} 135 } 136 137 // EdgePoints is called by the Manager when new edge points for this 138 // node are received. 139 func (m *MetricsClient) EdgePoints(nodeID, parentID string, points []data.Point) { 140 m.newEdgePoints <- NewPoints{nodeID, parentID, points} 141 } 142 143 func (m *MetricsClient) sysStart() { 144 now := time.Now() 145 // collect static host stats on startup 146 hostStat, err := host.Info() 147 if err != nil { 148 log.Println("Metrics error:", err) 149 } else { 150 // TODO, only send points if they have changed 151 pts := data.Points{ 152 { 153 Type: data.PointTypeHost, 154 Time: now, 155 Key: data.PointKeyHostname, 156 Text: hostStat.Hostname, 157 }, 158 { 159 Type: data.PointTypeHostBootTime, 160 Time: now, 161 Value: float64(hostStat.BootTime), 162 }, 163 { 164 Type: data.PointTypeHost, 165 Time: now, 166 Key: data.PointKeyOS, 167 Text: hostStat.OS, 168 }, 169 { 170 Type: data.PointTypeHost, 171 Time: now, 172 Key: data.PointKeyPlatform, 173 Text: hostStat.Platform, 174 }, 175 { 176 Type: data.PointTypeHost, 177 Time: now, 178 Key: data.PointKeyPlatformFamily, 179 Text: hostStat.PlatformFamily, 180 }, 181 { 182 Type: data.PointTypeHost, 183 Time: now, 184 Key: data.PointKeyPlatformVersion, 185 Text: hostStat.PlatformVersion, 186 }, 187 { 188 Type: data.PointTypeHost, 189 Time: now, 190 Key: data.PointKeyKernelVersion, 191 Text: hostStat.KernelVersion, 192 }, 193 { 194 Type: data.PointTypeHost, 195 Time: now, 196 Key: data.PointKeyKernelArch, 197 Text: hostStat.KernelArch, 198 }, 199 { 200 Type: data.PointTypeHost, 201 Time: now, 202 Key: data.PointKeyVirtualizationSystem, 203 Text: hostStat.VirtualizationSystem, 204 }, 205 { 206 Type: data.PointTypeHost, 207 Time: now, 208 Key: data.PointKeyVirtualizationRole, 209 Text: hostStat.VirtualizationRole, 210 }, 211 } 212 err = SendNodePoints(m.nc, m.config.ID, pts, false) 213 if err != nil { 214 log.Println("Metrics: error sending points:", err) 215 } 216 } 217 218 vm, err := mem.VirtualMemory() 219 if err != nil { 220 log.Println("Metrics error:", err) 221 } else { 222 pt := data.Point{ 223 Type: data.PointTypeMetricSysMem, 224 Time: now, 225 Key: data.PointKeyTotal, 226 Value: float64(vm.Total), 227 } 228 229 err = SendNodePoint(m.nc, m.config.ID, pt, false) 230 if err != nil { 231 log.Println("Metrics: error sending points:", err) 232 } 233 } 234 235 } 236 237 func (m *MetricsClient) sysPeriodic() { 238 now := time.Now() 239 var pts data.Points 240 241 avg, err := load.Avg() 242 if err != nil { 243 log.Println("Metrics error:", err) 244 } else { 245 pts = append(pts, data.Points{ 246 { 247 Type: data.PointTypeMetricSysLoad, 248 Time: now, 249 Key: "1", 250 Value: avg.Load1, 251 }, 252 { 253 Type: data.PointTypeMetricSysLoad, 254 Time: now, 255 Key: "5", 256 Value: avg.Load5, 257 }, 258 { 259 Type: data.PointTypeMetricSysLoad, 260 Time: now, 261 Key: "15", 262 Value: avg.Load15, 263 }, 264 }...) 265 266 } 267 268 perc, err := cpu.Percent(time.Duration(m.config.Period)*time.Second, false) 269 if err != nil { 270 log.Println("Metrics error:", err) 271 } else { 272 pts = append(pts, data.Point{Type: data.PointTypeMetricSysCPUPercent, 273 Time: now, 274 Value: perc[0], 275 }) 276 } 277 278 vm, err := mem.VirtualMemory() 279 if err != nil { 280 log.Println("Metrics error:", err) 281 } else { 282 pts = append(pts, data.Points{ 283 { 284 Type: data.PointTypeMetricSysMemUsedPercent, 285 Time: now, 286 Value: vm.UsedPercent, 287 }, 288 { 289 Type: data.PointTypeMetricSysMem, 290 Time: now, 291 Key: data.PointKeyAvailable, 292 Value: float64(vm.Available), 293 }, 294 { 295 Type: data.PointTypeMetricSysMem, 296 Time: now, 297 Key: data.PointKeyUsed, 298 Value: float64(vm.Used), 299 }, 300 { 301 Type: data.PointTypeMetricSysMem, 302 Time: now, 303 Key: data.PointKeyFree, 304 Value: float64(vm.Free), 305 }, 306 }...) 307 } 308 309 parts, err := disk.Partitions(false) 310 if err != nil { 311 log.Println("Metrics error:", err) 312 } else { 313 for _, p := range parts { 314 if strings.HasPrefix(p.Mountpoint, "/run/media") { 315 // don't track stats for removable media 316 continue 317 } 318 319 u, err := disk.Usage(p.Mountpoint) 320 if err != nil { 321 log.Println("Error getting disk usage:", err) 322 continue 323 } 324 pts = append(pts, data.Points{ 325 { 326 Time: now, 327 Type: data.PointTypeMetricSysDiskUsedPercent, 328 Key: u.Path, 329 Value: u.UsedPercent, 330 }, 331 }...) 332 } 333 } 334 335 netio, err := net.IOCounters(true) 336 if err != nil { 337 log.Println("Metrics error:", err) 338 } else { 339 for _, io := range netio { 340 pts = append(pts, data.Points{ 341 { 342 Time: now, 343 Type: data.PointTypeMetricSysNetBytesRecv, 344 Key: io.Name, 345 Value: float64(io.BytesRecv), 346 }, 347 { 348 Time: now, 349 Type: data.PointTypeMetricSysNetBytesSent, 350 Key: io.Name, 351 Value: float64(io.BytesSent), 352 }, 353 }...) 354 } 355 356 } 357 358 uptime, err := host.Uptime() 359 if err != nil { 360 log.Println("Metrics error:", err) 361 } else { 362 pts = append(pts, data.Point{ 363 Time: now, 364 Type: data.PointTypeMetricSysUptime, 365 Value: float64(uptime), 366 }) 367 } 368 369 temps, err := host.SensorsTemperatures() 370 if err != nil { 371 log.Println("Error reading sensors:", err) 372 } else { 373 for _, t := range temps { 374 pts = append(pts, data.Points{ 375 { 376 Time: now, 377 Type: data.PointTypeTemperature, 378 Key: t.SensorKey, 379 Value: t.Temperature, 380 }, 381 }...) 382 } 383 } 384 385 err = SendNodePoints(m.nc, m.config.ID, pts, false) 386 if err != nil { 387 log.Println("Metrics: error sending points:", err) 388 } 389 } 390 391 // if procName is "", then collect stats for this app 392 func (m *MetricsClient) appPeriodic(procName string) { 393 now := time.Now() 394 395 if procName == "" { 396 var memStats runtime.MemStats 397 398 runtime.ReadMemStats(&memStats) 399 400 numGoRoutine := runtime.NumGoroutine() 401 402 pts := data.Points{ 403 { 404 Time: now, 405 Type: data.PointTypeMetricAppAlloc, 406 Value: float64(memStats.Alloc), 407 }, 408 { 409 Time: now, 410 Type: data.PointTypeMetricAppNumGoroutine, 411 Value: float64(numGoRoutine), 412 }, 413 } 414 415 err := SendNodePoints(m.nc, m.config.ID, pts, false) 416 if err != nil { 417 log.Println("Metrics: error sending points:", err) 418 } 419 } 420 421 pid := os.Getpid() 422 423 procs, err := process.Processes() 424 if err != nil { 425 log.Println("Metrics error:", err) 426 } else { 427 var accumCPUPerc, accumMemPerc, accumMemRSS float64 428 var procCount int 429 for _, p := range procs { 430 if procName != "" { 431 name, err := p.Name() 432 if err != nil { 433 log.Println("Error getting process name:", err) 434 continue 435 } 436 if name != procName { 437 continue 438 } 439 } else { 440 if p.Pid != int32(pid) { 441 continue 442 } 443 } 444 445 procCount++ 446 447 cpuPerc, err := p.CPUPercent() 448 if err != nil { 449 log.Println("Error getting CPU percent for proc:", err) 450 break 451 } 452 453 accumCPUPerc += cpuPerc 454 455 memPerc, err := p.MemoryPercent() 456 if err != nil { 457 log.Println("Error getting mem percent for proc:", err) 458 break 459 } 460 461 accumMemPerc += float64(memPerc) 462 463 memInfo, err := p.MemoryInfo() 464 if err != nil { 465 log.Println("Error getting mem info:", err) 466 break 467 } 468 469 accumMemRSS += float64(memInfo.RSS) 470 } 471 472 pts := data.Points{ 473 { 474 Time: now, 475 Type: data.PointTypeMetricProcCPUPercent, 476 Value: float64(accumCPUPerc), 477 }, 478 { 479 Time: now, 480 Type: data.PointTypeMetricProcMemPercent, 481 Value: float64(accumMemPerc), 482 }, 483 { 484 Time: now, 485 Type: data.PointTypeMetricProcMemRSS, 486 Value: float64(accumMemRSS), 487 }, 488 } 489 490 if procName != "" { 491 pts = append(pts, data.Point{ 492 Time: now, 493 Type: data.PointTypeCount, 494 Value: float64(procCount), 495 }) 496 } 497 498 err = SendNodePoints(m.nc, m.config.ID, pts, false) 499 if err != nil { 500 log.Println("Metrics: error sending points:", err) 501 } 502 503 } 504 } 505 506 type procMetrics struct { 507 count float64 508 cpu float64 509 mem float64 510 rss float64 511 } 512 513 func (m *MetricsClient) allProcPeriodic() { 514 now := time.Now() 515 516 metrics := make(map[string]procMetrics) 517 518 procs, err := process.Processes() 519 if err != nil { 520 log.Println("Metrics error:", err) 521 } else { 522 for _, p := range procs { 523 name, err := p.Name() 524 if err != nil { 525 log.Println("Error getting process name:", err) 526 continue 527 } 528 529 m := metrics[name] 530 531 m.count++ 532 533 cpuPerc, err := p.CPUPercent() 534 if err != nil { 535 log.Println("Error getting CPU percent for proc:", err) 536 break 537 } 538 539 m.cpu += cpuPerc 540 541 memPerc, err := p.MemoryPercent() 542 if err != nil { 543 log.Println("Error getting mem percent for proc:", err) 544 break 545 } 546 547 m.mem += float64(memPerc) 548 549 memInfo, err := p.MemoryInfo() 550 if err != nil { 551 log.Println("Error getting mem info:", err) 552 break 553 } 554 555 m.rss += float64(memInfo.RSS) 556 557 metrics[name] = m 558 } 559 560 pts := make(data.Points, len(metrics)*4) 561 var i int 562 for k, v := range metrics { 563 pts[i].Time = now 564 pts[i].Key = k 565 pts[i].Type = data.PointTypeMetricProcCPUPercent 566 pts[i].Value = v.cpu 567 i++ 568 569 pts[i].Time = now 570 pts[i].Key = k 571 pts[i].Type = data.PointTypeMetricProcMemPercent 572 pts[i].Value = v.mem 573 i++ 574 575 pts[i].Time = now 576 pts[i].Key = k 577 pts[i].Type = data.PointTypeMetricProcMemRSS 578 pts[i].Value = v.rss 579 i++ 580 581 pts[i].Time = now 582 pts[i].Key = k 583 pts[i].Type = data.PointTypeCount 584 pts[i].Value = v.count 585 i++ 586 } 587 588 err = SendNodePoints(m.nc, m.config.ID, pts, false) 589 if err != nil { 590 log.Println("Metrics: error sending points:", err) 591 } 592 } 593 }