github.com/mackerelio/mackerel-agent-plugins@v0.89.3/mackerel-plugin-multicore/lib/multicore.go (about)

     1  package mpmulticore
     2  
     3  import (
     4  	"bufio"
     5  	"encoding/json"
     6  	"errors"
     7  	"flag"
     8  	"fmt"
     9  	"io"
    10  	"log"
    11  	"os"
    12  	"path/filepath"
    13  	"strconv"
    14  	"strings"
    15  	"time"
    16  
    17  	mp "github.com/mackerelio/go-mackerel-plugin"
    18  )
    19  
    20  var graphDef = map[string]mp.Graphs{
    21  	"multicore.cpu.#": {
    22  		Label: "MultiCore CPU",
    23  		Unit:  "percentage",
    24  		Metrics: []mp.Metrics{
    25  			{Name: "guest_nice", Label: "guest_nice", Diff: false, Stacked: true},
    26  			{Name: "guest", Label: "guest", Diff: false, Stacked: true},
    27  			{Name: "steal", Label: "steal", Diff: false, Stacked: true},
    28  			{Name: "softirq", Label: "softirq", Diff: false, Stacked: true},
    29  			{Name: "irq", Label: "irq", Diff: false, Stacked: true},
    30  			{Name: "iowait", Label: "ioWait", Diff: false, Stacked: true},
    31  			{Name: "idle", Label: "idle", Diff: false, Stacked: true},
    32  			{Name: "system", Label: "system", Diff: false, Stacked: true},
    33  			{Name: "nice", Label: "nice", Diff: false, Stacked: true},
    34  			{Name: "user", Label: "user", Diff: false, Stacked: true},
    35  		},
    36  	},
    37  	"multicore.loadavg_per_core": {
    38  		Label: "MultiCore loadavg5 per core",
    39  		Unit:  "float",
    40  		Metrics: []mp.Metrics{
    41  			{Name: "loadavg5", Label: "loadavg5", Diff: false, Stacked: false},
    42  		},
    43  	},
    44  }
    45  
    46  type saveItem struct {
    47  	LastTime       time.Time
    48  	ProcStatsByCPU map[string]procStats
    49  }
    50  
    51  type procStats struct {
    52  	User      *uint64 `json:"user"`
    53  	Nice      *uint64 `json:"nice"`
    54  	System    *uint64 `json:"system"`
    55  	Idle      *uint64 `json:"idle"`
    56  	IoWait    *uint64 `json:"iowait"`
    57  	Irq       *uint64 `json:"irq"`
    58  	SoftIrq   *uint64 `json:"softirq"`
    59  	Steal     *uint64 `json:"steal"`
    60  	Guest     *uint64 `json:"guest"`
    61  	GuestNice *uint64 `json:"guest_nice"`
    62  	Total     uint64  `json:"total"`
    63  }
    64  
    65  type cpuPercentages struct {
    66  	CPUName   string
    67  	User      *float64
    68  	Nice      *float64
    69  	System    *float64
    70  	Idle      *float64
    71  	IoWait    *float64
    72  	Irq       *float64
    73  	SoftIrq   *float64
    74  	Steal     *float64
    75  	Guest     *float64
    76  	GuestNice *float64
    77  }
    78  
    79  func parseProcStat(out io.Reader) (map[string]procStats, error) {
    80  	scanner := bufio.NewScanner(out)
    81  	var result = make(map[string]procStats)
    82  	for scanner.Scan() {
    83  		line := scanner.Text()
    84  		if !strings.HasPrefix(line, "cpu") {
    85  			break
    86  		}
    87  
    88  		fields := strings.Fields(line)
    89  		key := fields[0]
    90  		values := fields[1:]
    91  
    92  		// skip total cpu usage
    93  		if key == "cpu" {
    94  			continue
    95  		}
    96  
    97  		var stats procStats
    98  		statPtrs := []**uint64{
    99  			&stats.User,
   100  			&stats.Nice,
   101  			&stats.System,
   102  			&stats.Idle,
   103  			&stats.IoWait,
   104  			&stats.Irq,
   105  			&stats.SoftIrq,
   106  			&stats.Steal,
   107  			&stats.Guest,
   108  			&stats.GuestNice,
   109  		}
   110  
   111  		for i, valStr := range values {
   112  			val, err := strconv.ParseUint(valStr, 10, 64)
   113  			if err != nil {
   114  				return nil, err
   115  			}
   116  			*statPtrs[i] = &val
   117  			stats.Total += val
   118  		}
   119  
   120  		// Since cpustat[CPUTIME_USER] includes cpustat[CPUTIME_GUEST], subtract the duplicated values from total.
   121  		// https://github.com/torvalds/linux/blob/4ec9f7a18/kernel/sched/cputime.c#L151-L158
   122  		if stats.Guest != nil {
   123  			stats.Total -= *stats.Guest
   124  			*stats.User -= *stats.Guest
   125  		}
   126  
   127  		// cpustat[CPUTIME_NICE] includes cpustat[CPUTIME_GUEST_NICE]
   128  		if stats.GuestNice != nil {
   129  			stats.Total -= *stats.GuestNice
   130  			*stats.Nice -= *stats.GuestNice
   131  		}
   132  
   133  		result[key] = stats
   134  	}
   135  	return result, nil
   136  }
   137  
   138  func collectProcStatValues() (map[string]procStats, error) {
   139  	file, err := os.Open("/proc/stat")
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  	defer file.Close()
   144  	return parseProcStat(file)
   145  }
   146  
   147  func saveValues(tempFileName string, values map[string]procStats, now time.Time) error {
   148  	f, err := os.Create(tempFileName)
   149  	if err != nil {
   150  		return err
   151  	}
   152  	defer f.Close()
   153  
   154  	s := saveItem{
   155  		LastTime:       now,
   156  		ProcStatsByCPU: values,
   157  	}
   158  
   159  	encoder := json.NewEncoder(f)
   160  	err = encoder.Encode(s)
   161  	if err != nil {
   162  		return err
   163  	}
   164  
   165  	return nil
   166  }
   167  
   168  func fetchSavedItem(tempFileName string) (*saveItem, error) {
   169  	f, err := os.Open(tempFileName)
   170  	if err != nil {
   171  		if os.IsNotExist(err) {
   172  			return nil, nil
   173  		}
   174  		return nil, err
   175  	}
   176  	defer f.Close()
   177  
   178  	var stat saveItem
   179  	decoder := json.NewDecoder(f)
   180  	err = decoder.Decode(&stat)
   181  	if err != nil {
   182  		return nil, err
   183  	}
   184  	return &stat, nil
   185  }
   186  
   187  func calcCPUUsage(currentValues map[string]procStats, now time.Time, savedItem *saveItem) ([]cpuPercentages, error) {
   188  	if now.Sub(savedItem.LastTime).Seconds() > 600 {
   189  		return nil, errors.New("Too long duration") // nolint
   190  	}
   191  
   192  	var result []cpuPercentages
   193  	for name, current := range currentValues {
   194  		last, ok := savedItem.ProcStatsByCPU[name]
   195  		if !ok {
   196  			continue
   197  		}
   198  		if last.Total > current.Total {
   199  			return nil, errors.New("cpu counter has been reset")
   200  		}
   201  
   202  		user := calculatePercentage(current.User, last.User, current.Total, last.Total)
   203  		nice := calculatePercentage(current.Nice, last.Nice, current.Total, last.Total)
   204  		system := calculatePercentage(current.System, last.System, current.Total, last.Total)
   205  		idle := calculatePercentage(current.Idle, last.Idle, current.Total, last.Total)
   206  		iowait := calculatePercentage(current.IoWait, last.IoWait, current.Total, last.Total)
   207  		irq := calculatePercentage(current.Irq, last.Irq, current.Total, last.Total)
   208  		softirq := calculatePercentage(current.SoftIrq, last.SoftIrq, current.Total, last.Total)
   209  		steal := calculatePercentage(current.Steal, last.Steal, current.Total, last.Total)
   210  		guest := calculatePercentage(current.Guest, last.Guest, current.Total, last.Total)
   211  		// guest_nice available since Linux 2.6.33 (ref: man proc)
   212  		guestNice := calculatePercentage(current.GuestNice, last.GuestNice, current.Total, last.Total)
   213  
   214  		result = append(result, cpuPercentages{
   215  			CPUName:   name,
   216  			User:      user,
   217  			Nice:      nice,
   218  			System:    system,
   219  			Idle:      idle,
   220  			IoWait:    iowait,
   221  			Irq:       irq,
   222  			SoftIrq:   softirq,
   223  			Steal:     steal,
   224  			Guest:     guest,
   225  			GuestNice: guestNice,
   226  		})
   227  	}
   228  
   229  	return result, nil
   230  }
   231  
   232  func calculatePercentage(currentValue *uint64, lastValue *uint64, currentTotal uint64, lastTotal uint64) *float64 {
   233  	if currentValue == nil || lastValue == nil {
   234  		return nil
   235  	}
   236  	ret := float64(*currentValue-*lastValue) / float64(currentTotal-lastTotal) * 100.0
   237  	return &ret
   238  }
   239  
   240  func fetchLoadavg5() (float64, error) {
   241  	contentbytes, err := os.ReadFile("/proc/loadavg")
   242  	if err != nil {
   243  		return 0.0, err
   244  	}
   245  	content := string(contentbytes)
   246  	cols := strings.Fields(content)
   247  
   248  	if len(cols) > 2 {
   249  		f, err := strconv.ParseFloat(cols[1], 64)
   250  		if err != nil {
   251  			return 0.0, err
   252  		}
   253  		return f, nil
   254  	}
   255  	return 0.0, fmt.Errorf("cannot fetch loadavg5")
   256  }
   257  
   258  func printValue(key string, value *float64, time time.Time) {
   259  	if value != nil {
   260  		fmt.Printf("%s\t%f\t%d\n", key, *value, time.Unix())
   261  	}
   262  }
   263  
   264  func outputCPUUsage(cpuUsage []cpuPercentages, now time.Time) {
   265  	for _, u := range cpuUsage {
   266  		printValue("multicore.cpu."+u.CPUName+".user", u.User, now)
   267  		printValue("multicore.cpu."+u.CPUName+".nice", u.Nice, now)
   268  		printValue("multicore.cpu."+u.CPUName+".system", u.System, now)
   269  		printValue("multicore.cpu."+u.CPUName+".idle", u.Idle, now)
   270  		printValue("multicore.cpu."+u.CPUName+".iowait", u.IoWait, now)
   271  		printValue("multicore.cpu."+u.CPUName+".irq", u.Irq, now)
   272  		printValue("multicore.cpu."+u.CPUName+".softirq", u.SoftIrq, now)
   273  		printValue("multicore.cpu."+u.CPUName+".steal", u.Steal, now)
   274  		printValue("multicore.cpu."+u.CPUName+".guest", u.Guest, now)
   275  		printValue("multicore.cpu."+u.CPUName+".guest_nice", u.GuestNice, now)
   276  	}
   277  }
   278  
   279  func outputLoadavgPerCore(loadavgPerCore float64, now time.Time) {
   280  	printValue("multicore.loadavg_per_core.loadavg5", &loadavgPerCore, now)
   281  }
   282  
   283  func outputDefinitions() {
   284  	fmt.Println("# mackerel-agent-plugin")
   285  	var graphs mp.GraphDef
   286  	graphs.Graphs = graphDef
   287  
   288  	b, err := json.Marshal(graphs)
   289  	if err != nil {
   290  		log.Fatalln("OutputDefinitions: ", err)
   291  	}
   292  	fmt.Println(string(b))
   293  }
   294  
   295  func outputMulticore(tempFileName string) {
   296  	now := time.Now()
   297  
   298  	currentValues, err := collectProcStatValues()
   299  	if err != nil {
   300  		log.Fatalln("collectProcStatValues: ", err)
   301  	}
   302  
   303  	savedItem, err := fetchSavedItem(tempFileName)
   304  	if err != nil {
   305  		log.Fatalln("fetchLastValues: ", err)
   306  	}
   307  	err = saveValues(tempFileName, currentValues, now)
   308  	if err != nil {
   309  		log.Fatalln("saveValues: ", err)
   310  	}
   311  
   312  	// maybe first time run
   313  	if savedItem == nil {
   314  		return
   315  	}
   316  
   317  	cpuUsage, err := calcCPUUsage(currentValues, now, savedItem)
   318  	if err != nil {
   319  		log.Fatalln("calcCPUUsage: ", err)
   320  	}
   321  
   322  	loadavg5, err := fetchLoadavg5()
   323  	if err != nil {
   324  		log.Fatalln("fetchLoadavg5: ", err)
   325  	}
   326  	loadPerCPUCount := loadavg5 / (float64(len(cpuUsage)))
   327  
   328  	outputCPUUsage(cpuUsage, now)
   329  	outputLoadavgPerCore(loadPerCPUCount, now)
   330  }
   331  
   332  func generateTempfilePath() string {
   333  	dir := os.Getenv("MACKEREL_PLUGIN_WORKDIR")
   334  	if dir == "" {
   335  		dir = os.TempDir()
   336  	}
   337  	return filepath.Join(dir, "mackerel-plugin-multicore")
   338  }
   339  
   340  // Do the plugin
   341  func Do() {
   342  	var tempFileName string
   343  	optTempfile := flag.String("tempfile", "", "Temp file name")
   344  	flag.Parse()
   345  
   346  	tempFileName = *optTempfile
   347  	if tempFileName == "" {
   348  		tempFileName = generateTempfilePath()
   349  	}
   350  
   351  	if os.Getenv("MACKEREL_AGENT_PLUGIN_META") != "" {
   352  		outputDefinitions()
   353  	} else {
   354  		outputMulticore(tempFileName)
   355  	}
   356  }