github.com/simpleiot/simpleiot@v0.18.3/client/metrics.go (about)

     1  package client
     2  
     3  import (
     4  	"log"
     5  	"os"
     6  	"runtime"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/nats-io/nats.go"
    11  	"github.com/shirou/gopsutil/v3/cpu"
    12  	"github.com/shirou/gopsutil/v3/disk"
    13  	"github.com/shirou/gopsutil/v3/host"
    14  	"github.com/shirou/gopsutil/v3/load"
    15  	"github.com/shirou/gopsutil/v3/mem"
    16  	"github.com/shirou/gopsutil/v3/net"
    17  	"github.com/shirou/gopsutil/v3/process"
    18  	"github.com/simpleiot/simpleiot/data"
    19  )
    20  
    21  // Metrics represents the config of a metrics node type
    22  type Metrics struct {
    23  	ID          string `node:"id"`
    24  	Parent      string `node:"parent"`
    25  	Description string `point:"description"`
    26  	Type        string `point:"type"`
    27  	Name        string `point:"name"`
    28  	Period      int    `point:"period"`
    29  }
    30  
    31  // MetricsClient is a SIOT client used to collect system or app metrics
    32  type MetricsClient struct {
    33  	nc            *nats.Conn
    34  	config        Metrics
    35  	stop          chan struct{}
    36  	newPoints     chan NewPoints
    37  	newEdgePoints chan NewPoints
    38  }
    39  
    40  // NewMetricsClient ...
    41  func NewMetricsClient(nc *nats.Conn, config Metrics) Client {
    42  	return &MetricsClient{
    43  		nc:            nc,
    44  		config:        config,
    45  		stop:          make(chan struct{}),
    46  		newPoints:     make(chan NewPoints),
    47  		newEdgePoints: make(chan NewPoints),
    48  	}
    49  }
    50  
    51  // Run the main logic for this client and blocks until stopped
    52  func (m *MetricsClient) Run() error {
    53  	if m.config.Type == data.PointValueSystem {
    54  		m.sysStart()
    55  	}
    56  
    57  	checkPeriod := func() {
    58  		if m.config.Period < 1 {
    59  			m.config.Period = 120
    60  			points := data.Points{
    61  				{Type: data.PointTypePeriod, Value: float64(m.config.Period)},
    62  			}
    63  
    64  			err := SendPoints(m.nc, SubjectNodePoints(m.config.ID), points, false)
    65  			if err != nil {
    66  				log.Println("Error sending metrics period:", err)
    67  			}
    68  		}
    69  	}
    70  
    71  	checkPeriod()
    72  
    73  	sampleTicker := time.NewTicker(time.Duration(m.config.Period) * time.Second)
    74  
    75  done:
    76  	for {
    77  		select {
    78  		case <-m.stop:
    79  			break done
    80  
    81  		case <-sampleTicker.C:
    82  			switch m.config.Type {
    83  			case data.PointValueSystem:
    84  				m.sysPeriodic()
    85  			case data.PointValueApp:
    86  				m.appPeriodic("")
    87  			case data.PointValueProcess:
    88  				m.appPeriodic(m.config.Name)
    89  			case data.PointValueAllProcesses:
    90  				m.allProcPeriodic()
    91  			default:
    92  				log.Println("Metrics: Must select metric type")
    93  			}
    94  
    95  		case pts := <-m.newPoints:
    96  			err := data.MergePoints(pts.ID, pts.Points, &m.config)
    97  			if err != nil {
    98  				log.Println("error merging new points:", err)
    99  			}
   100  
   101  			for _, p := range pts.Points {
   102  				switch p.Type {
   103  				case data.PointTypePeriod:
   104  					checkPeriod()
   105  					sampleTicker.Reset(time.Duration(m.config.Period) *
   106  						time.Second)
   107  				case data.PointTypeType:
   108  					if m.config.Type == data.PointValueSystem {
   109  						m.sysStart()
   110  					}
   111  				}
   112  			}
   113  
   114  		case pts := <-m.newEdgePoints:
   115  			err := data.MergeEdgePoints(pts.ID, pts.Parent, pts.Points, &m.config)
   116  			if err != nil {
   117  				log.Println("error merging new points:", err)
   118  			}
   119  
   120  		}
   121  	}
   122  
   123  	return nil
   124  }
   125  
   126  // Stop sends a signal to the Run function to exit
   127  func (m *MetricsClient) Stop(_ error) {
   128  	close(m.stop)
   129  }
   130  
   131  // Points is called by the Manager when new points for this
   132  // node are received.
   133  func (m *MetricsClient) Points(nodeID string, points []data.Point) {
   134  	m.newPoints <- NewPoints{nodeID, "", points}
   135  }
   136  
   137  // EdgePoints is called by the Manager when new edge points for this
   138  // node are received.
   139  func (m *MetricsClient) EdgePoints(nodeID, parentID string, points []data.Point) {
   140  	m.newEdgePoints <- NewPoints{nodeID, parentID, points}
   141  }
   142  
   143  func (m *MetricsClient) sysStart() {
   144  	now := time.Now()
   145  	// collect static host stats on startup
   146  	hostStat, err := host.Info()
   147  	if err != nil {
   148  		log.Println("Metrics error:", err)
   149  	} else {
   150  		// TODO, only send points if they have changed
   151  		pts := data.Points{
   152  			{
   153  				Type: data.PointTypeHost,
   154  				Time: now,
   155  				Key:  data.PointKeyHostname,
   156  				Text: hostStat.Hostname,
   157  			},
   158  			{
   159  				Type:  data.PointTypeHostBootTime,
   160  				Time:  now,
   161  				Value: float64(hostStat.BootTime),
   162  			},
   163  			{
   164  				Type: data.PointTypeHost,
   165  				Time: now,
   166  				Key:  data.PointKeyOS,
   167  				Text: hostStat.OS,
   168  			},
   169  			{
   170  				Type: data.PointTypeHost,
   171  				Time: now,
   172  				Key:  data.PointKeyPlatform,
   173  				Text: hostStat.Platform,
   174  			},
   175  			{
   176  				Type: data.PointTypeHost,
   177  				Time: now,
   178  				Key:  data.PointKeyPlatformFamily,
   179  				Text: hostStat.PlatformFamily,
   180  			},
   181  			{
   182  				Type: data.PointTypeHost,
   183  				Time: now,
   184  				Key:  data.PointKeyPlatformVersion,
   185  				Text: hostStat.PlatformVersion,
   186  			},
   187  			{
   188  				Type: data.PointTypeHost,
   189  				Time: now,
   190  				Key:  data.PointKeyKernelVersion,
   191  				Text: hostStat.KernelVersion,
   192  			},
   193  			{
   194  				Type: data.PointTypeHost,
   195  				Time: now,
   196  				Key:  data.PointKeyKernelArch,
   197  				Text: hostStat.KernelArch,
   198  			},
   199  			{
   200  				Type: data.PointTypeHost,
   201  				Time: now,
   202  				Key:  data.PointKeyVirtualizationSystem,
   203  				Text: hostStat.VirtualizationSystem,
   204  			},
   205  			{
   206  				Type: data.PointTypeHost,
   207  				Time: now,
   208  				Key:  data.PointKeyVirtualizationRole,
   209  				Text: hostStat.VirtualizationRole,
   210  			},
   211  		}
   212  		err = SendNodePoints(m.nc, m.config.ID, pts, false)
   213  		if err != nil {
   214  			log.Println("Metrics: error sending points:", err)
   215  		}
   216  	}
   217  
   218  	vm, err := mem.VirtualMemory()
   219  	if err != nil {
   220  		log.Println("Metrics error:", err)
   221  	} else {
   222  		pt := data.Point{
   223  			Type:  data.PointTypeMetricSysMem,
   224  			Time:  now,
   225  			Key:   data.PointKeyTotal,
   226  			Value: float64(vm.Total),
   227  		}
   228  
   229  		err = SendNodePoint(m.nc, m.config.ID, pt, false)
   230  		if err != nil {
   231  			log.Println("Metrics: error sending points:", err)
   232  		}
   233  	}
   234  
   235  }
   236  
   237  func (m *MetricsClient) sysPeriodic() {
   238  	now := time.Now()
   239  	var pts data.Points
   240  
   241  	avg, err := load.Avg()
   242  	if err != nil {
   243  		log.Println("Metrics error:", err)
   244  	} else {
   245  		pts = append(pts, data.Points{
   246  			{
   247  				Type:  data.PointTypeMetricSysLoad,
   248  				Time:  now,
   249  				Key:   "1",
   250  				Value: avg.Load1,
   251  			},
   252  			{
   253  				Type:  data.PointTypeMetricSysLoad,
   254  				Time:  now,
   255  				Key:   "5",
   256  				Value: avg.Load5,
   257  			},
   258  			{
   259  				Type:  data.PointTypeMetricSysLoad,
   260  				Time:  now,
   261  				Key:   "15",
   262  				Value: avg.Load15,
   263  			},
   264  		}...)
   265  
   266  	}
   267  
   268  	perc, err := cpu.Percent(time.Duration(m.config.Period)*time.Second, false)
   269  	if err != nil {
   270  		log.Println("Metrics error:", err)
   271  	} else {
   272  		pts = append(pts, data.Point{Type: data.PointTypeMetricSysCPUPercent,
   273  			Time:  now,
   274  			Value: perc[0],
   275  		})
   276  	}
   277  
   278  	vm, err := mem.VirtualMemory()
   279  	if err != nil {
   280  		log.Println("Metrics error:", err)
   281  	} else {
   282  		pts = append(pts, data.Points{
   283  			{
   284  				Type:  data.PointTypeMetricSysMemUsedPercent,
   285  				Time:  now,
   286  				Value: vm.UsedPercent,
   287  			},
   288  			{
   289  				Type:  data.PointTypeMetricSysMem,
   290  				Time:  now,
   291  				Key:   data.PointKeyAvailable,
   292  				Value: float64(vm.Available),
   293  			},
   294  			{
   295  				Type:  data.PointTypeMetricSysMem,
   296  				Time:  now,
   297  				Key:   data.PointKeyUsed,
   298  				Value: float64(vm.Used),
   299  			},
   300  			{
   301  				Type:  data.PointTypeMetricSysMem,
   302  				Time:  now,
   303  				Key:   data.PointKeyFree,
   304  				Value: float64(vm.Free),
   305  			},
   306  		}...)
   307  	}
   308  
   309  	parts, err := disk.Partitions(false)
   310  	if err != nil {
   311  		log.Println("Metrics error:", err)
   312  	} else {
   313  		for _, p := range parts {
   314  			if strings.HasPrefix(p.Mountpoint, "/run/media") {
   315  				// don't track stats for removable media
   316  				continue
   317  			}
   318  
   319  			u, err := disk.Usage(p.Mountpoint)
   320  			if err != nil {
   321  				log.Println("Error getting disk usage:", err)
   322  				continue
   323  			}
   324  			pts = append(pts, data.Points{
   325  				{
   326  					Time:  now,
   327  					Type:  data.PointTypeMetricSysDiskUsedPercent,
   328  					Key:   u.Path,
   329  					Value: u.UsedPercent,
   330  				},
   331  			}...)
   332  		}
   333  	}
   334  
   335  	netio, err := net.IOCounters(true)
   336  	if err != nil {
   337  		log.Println("Metrics error:", err)
   338  	} else {
   339  		for _, io := range netio {
   340  			pts = append(pts, data.Points{
   341  				{
   342  					Time:  now,
   343  					Type:  data.PointTypeMetricSysNetBytesRecv,
   344  					Key:   io.Name,
   345  					Value: float64(io.BytesRecv),
   346  				},
   347  				{
   348  					Time:  now,
   349  					Type:  data.PointTypeMetricSysNetBytesSent,
   350  					Key:   io.Name,
   351  					Value: float64(io.BytesSent),
   352  				},
   353  			}...)
   354  		}
   355  
   356  	}
   357  
   358  	uptime, err := host.Uptime()
   359  	if err != nil {
   360  		log.Println("Metrics error:", err)
   361  	} else {
   362  		pts = append(pts, data.Point{
   363  			Time:  now,
   364  			Type:  data.PointTypeMetricSysUptime,
   365  			Value: float64(uptime),
   366  		})
   367  	}
   368  
   369  	temps, err := host.SensorsTemperatures()
   370  	if err != nil {
   371  		log.Println("Error reading sensors:", err)
   372  	} else {
   373  		for _, t := range temps {
   374  			pts = append(pts, data.Points{
   375  				{
   376  					Time:  now,
   377  					Type:  data.PointTypeTemperature,
   378  					Key:   t.SensorKey,
   379  					Value: t.Temperature,
   380  				},
   381  			}...)
   382  		}
   383  	}
   384  
   385  	err = SendNodePoints(m.nc, m.config.ID, pts, false)
   386  	if err != nil {
   387  		log.Println("Metrics: error sending points:", err)
   388  	}
   389  }
   390  
   391  // if procName is "", then collect stats for this app
   392  func (m *MetricsClient) appPeriodic(procName string) {
   393  	now := time.Now()
   394  
   395  	if procName == "" {
   396  		var memStats runtime.MemStats
   397  
   398  		runtime.ReadMemStats(&memStats)
   399  
   400  		numGoRoutine := runtime.NumGoroutine()
   401  
   402  		pts := data.Points{
   403  			{
   404  				Time:  now,
   405  				Type:  data.PointTypeMetricAppAlloc,
   406  				Value: float64(memStats.Alloc),
   407  			},
   408  			{
   409  				Time:  now,
   410  				Type:  data.PointTypeMetricAppNumGoroutine,
   411  				Value: float64(numGoRoutine),
   412  			},
   413  		}
   414  
   415  		err := SendNodePoints(m.nc, m.config.ID, pts, false)
   416  		if err != nil {
   417  			log.Println("Metrics: error sending points:", err)
   418  		}
   419  	}
   420  
   421  	pid := os.Getpid()
   422  
   423  	procs, err := process.Processes()
   424  	if err != nil {
   425  		log.Println("Metrics error:", err)
   426  	} else {
   427  		var accumCPUPerc, accumMemPerc, accumMemRSS float64
   428  		var procCount int
   429  		for _, p := range procs {
   430  			if procName != "" {
   431  				name, err := p.Name()
   432  				if err != nil {
   433  					log.Println("Error getting process name:", err)
   434  					continue
   435  				}
   436  				if name != procName {
   437  					continue
   438  				}
   439  			} else {
   440  				if p.Pid != int32(pid) {
   441  					continue
   442  				}
   443  			}
   444  
   445  			procCount++
   446  
   447  			cpuPerc, err := p.CPUPercent()
   448  			if err != nil {
   449  				log.Println("Error getting CPU percent for proc:", err)
   450  				break
   451  			}
   452  
   453  			accumCPUPerc += cpuPerc
   454  
   455  			memPerc, err := p.MemoryPercent()
   456  			if err != nil {
   457  				log.Println("Error getting mem percent for proc:", err)
   458  				break
   459  			}
   460  
   461  			accumMemPerc += float64(memPerc)
   462  
   463  			memInfo, err := p.MemoryInfo()
   464  			if err != nil {
   465  				log.Println("Error getting mem info:", err)
   466  				break
   467  			}
   468  
   469  			accumMemRSS += float64(memInfo.RSS)
   470  		}
   471  
   472  		pts := data.Points{
   473  			{
   474  				Time:  now,
   475  				Type:  data.PointTypeMetricProcCPUPercent,
   476  				Value: float64(accumCPUPerc),
   477  			},
   478  			{
   479  				Time:  now,
   480  				Type:  data.PointTypeMetricProcMemPercent,
   481  				Value: float64(accumMemPerc),
   482  			},
   483  			{
   484  				Time:  now,
   485  				Type:  data.PointTypeMetricProcMemRSS,
   486  				Value: float64(accumMemRSS),
   487  			},
   488  		}
   489  
   490  		if procName != "" {
   491  			pts = append(pts, data.Point{
   492  				Time:  now,
   493  				Type:  data.PointTypeCount,
   494  				Value: float64(procCount),
   495  			})
   496  		}
   497  
   498  		err = SendNodePoints(m.nc, m.config.ID, pts, false)
   499  		if err != nil {
   500  			log.Println("Metrics: error sending points:", err)
   501  		}
   502  
   503  	}
   504  }
   505  
   506  type procMetrics struct {
   507  	count float64
   508  	cpu   float64
   509  	mem   float64
   510  	rss   float64
   511  }
   512  
   513  func (m *MetricsClient) allProcPeriodic() {
   514  	now := time.Now()
   515  
   516  	metrics := make(map[string]procMetrics)
   517  
   518  	procs, err := process.Processes()
   519  	if err != nil {
   520  		log.Println("Metrics error:", err)
   521  	} else {
   522  		for _, p := range procs {
   523  			name, err := p.Name()
   524  			if err != nil {
   525  				log.Println("Error getting process name:", err)
   526  				continue
   527  			}
   528  
   529  			m := metrics[name]
   530  
   531  			m.count++
   532  
   533  			cpuPerc, err := p.CPUPercent()
   534  			if err != nil {
   535  				log.Println("Error getting CPU percent for proc:", err)
   536  				break
   537  			}
   538  
   539  			m.cpu += cpuPerc
   540  
   541  			memPerc, err := p.MemoryPercent()
   542  			if err != nil {
   543  				log.Println("Error getting mem percent for proc:", err)
   544  				break
   545  			}
   546  
   547  			m.mem += float64(memPerc)
   548  
   549  			memInfo, err := p.MemoryInfo()
   550  			if err != nil {
   551  				log.Println("Error getting mem info:", err)
   552  				break
   553  			}
   554  
   555  			m.rss += float64(memInfo.RSS)
   556  
   557  			metrics[name] = m
   558  		}
   559  
   560  		pts := make(data.Points, len(metrics)*4)
   561  		var i int
   562  		for k, v := range metrics {
   563  			pts[i].Time = now
   564  			pts[i].Key = k
   565  			pts[i].Type = data.PointTypeMetricProcCPUPercent
   566  			pts[i].Value = v.cpu
   567  			i++
   568  
   569  			pts[i].Time = now
   570  			pts[i].Key = k
   571  			pts[i].Type = data.PointTypeMetricProcMemPercent
   572  			pts[i].Value = v.mem
   573  			i++
   574  
   575  			pts[i].Time = now
   576  			pts[i].Key = k
   577  			pts[i].Type = data.PointTypeMetricProcMemRSS
   578  			pts[i].Value = v.rss
   579  			i++
   580  
   581  			pts[i].Time = now
   582  			pts[i].Key = k
   583  			pts[i].Type = data.PointTypeCount
   584  			pts[i].Value = v.count
   585  			i++
   586  		}
   587  
   588  		err = SendNodePoints(m.nc, m.config.ID, pts, false)
   589  		if err != nil {
   590  			log.Println("Metrics: error sending points:", err)
   591  		}
   592  	}
   593  }