github.com/mackerelio/mackerel-agent-plugins@v0.89.3/mackerel-plugin-docker/lib/docker.go (about)

     1  //go:build linux
     2  
     3  package mpdocker
     4  
     5  import (
     6  	"errors"
     7  	"flag"
     8  	"fmt"
     9  	"log"
    10  	"os"
    11  	"regexp"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  
    16  	docker "github.com/fsouza/go-dockerclient"
    17  	mp "github.com/mackerelio/go-mackerel-plugin-helper"
    18  	"golang.org/x/text/cases"
    19  	"golang.org/x/text/language"
    20  )
    21  
    22  var graphdef = map[string]mp.Graphs{
    23  	"docker.cpuacct.#": {
    24  		Label: "Docker CPU",
    25  		Unit:  "integer",
    26  		Metrics: []mp.Metrics{
    27  			{Name: "user", Label: "User", Diff: true, Stacked: true, Type: "uint64"},
    28  			{Name: "system", Label: "System", Diff: true, Stacked: true, Type: "uint64"},
    29  		},
    30  	},
    31  	"docker.cpuacct_percentage.#": {
    32  		Label: "Docker CPU Percentage",
    33  		Unit:  "percentage",
    34  		Metrics: []mp.Metrics{
    35  			{Name: "user", Label: "User", Diff: false, Stacked: true, Type: "float64"},
    36  			{Name: "system", Label: "System", Diff: false, Stacked: true, Type: "float64"},
    37  		},
    38  	},
    39  	"docker.memory.#": {
    40  		Label: "Docker Memory",
    41  		Unit:  "bytes",
    42  		Metrics: []mp.Metrics{
    43  			{Name: "cache", Label: "Cache", Diff: false, Stacked: true},
    44  			{Name: "rss", Label: "RSS", Diff: false, Stacked: true},
    45  		},
    46  	},
    47  	"docker.blkio.io_queued.#": {
    48  		Label: "Docker BlkIO Queued",
    49  		Unit:  "integer",
    50  		Metrics: []mp.Metrics{
    51  			{Name: "read", Label: "Read", Diff: false, Stacked: true},
    52  			{Name: "write", Label: "Write", Diff: false, Stacked: true},
    53  			{Name: "sync", Label: "Sync", Diff: false, Stacked: true},
    54  			{Name: "async", Label: "Async", Diff: false, Stacked: true},
    55  		},
    56  	},
    57  	"docker.blkio.io_serviced.#": {
    58  		Label: "Docker BlkIO IOPS",
    59  		Unit:  "iops",
    60  		Metrics: []mp.Metrics{
    61  			{Name: "read", Label: "Read", Diff: true, Stacked: true, Type: "uint64", Scale: (1.0 / 60.0)},
    62  			{Name: "write", Label: "Write", Diff: true, Stacked: true, Type: "uint64", Scale: (1.0 / 60.0)},
    63  			{Name: "sync", Label: "Sync", Diff: true, Stacked: true, Type: "uint64", Scale: (1.0 / 60.0)},
    64  			{Name: "async", Label: "Async", Diff: true, Stacked: true, Type: "uint64", Scale: (1.0 / 60.0)},
    65  		},
    66  	},
    67  	"docker.blkio.io_service_bytes.#": {
    68  		Label: "Docker BlkIO Bytes",
    69  		Unit:  "bytes",
    70  		Metrics: []mp.Metrics{
    71  			{Name: "read", Label: "Read", Diff: true, Stacked: true, Type: "uint64"},
    72  			{Name: "write", Label: "Write", Diff: true, Stacked: true, Type: "uint64"},
    73  			{Name: "sync", Label: "Sync", Diff: true, Stacked: true, Type: "uint64"},
    74  			{Name: "async", Label: "Async", Diff: true, Stacked: true, Type: "uint64"},
    75  		},
    76  	},
    77  	// some other fields also exist in metrics, but they're internal intermediate data
    78  }
    79  
    80  // DockerPlugin mackerel plugin for docker
    81  type DockerPlugin struct {
    82  	Host             string
    83  	Tempfile         string
    84  	Method           string
    85  	NameFormat       string
    86  	Label            string
    87  	lastMetricValues mp.MetricValues
    88  	UseCPUPercentage bool
    89  }
    90  
    91  var normalizeMetricRe = regexp.MustCompile(`[^-a-zA-Z0-9_]`)
    92  
    93  func normalizeMetricName(str string) string {
    94  	return normalizeMetricRe.ReplaceAllString(str, "_")
    95  }
    96  
    97  func (m DockerPlugin) listContainer() ([]docker.APIContainers, error) {
    98  	client, _ := docker.NewClient(m.Host)
    99  	containers, err := client.ListContainers(docker.ListContainersOptions{})
   100  	if err != nil {
   101  		return nil, err
   102  	}
   103  	return containers, nil
   104  }
   105  
   106  // FetchMetrics interface for mackerel plugin
   107  func (m DockerPlugin) FetchMetrics() (map[string]interface{}, error) {
   108  	var stats map[string]interface{}
   109  
   110  	if m.Method == "File" {
   111  		return nil, errors.New("no longer supported")
   112  	}
   113  	containers, err := m.listContainer()
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  	stats, err = m.FetchMetricsWithAPI(containers)
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  
   122  	if m.UseCPUPercentage {
   123  		if time.Since(m.lastMetricValues.Timestamp) <= 5*time.Minute {
   124  			addCPUPercentageStats(&stats, m.lastMetricValues.Values)
   125  		}
   126  	}
   127  
   128  	return stats, nil
   129  }
   130  
   131  func (m DockerPlugin) generateName(container docker.APIContainers) string {
   132  	switch m.NameFormat {
   133  	case "name_id":
   134  		return fmt.Sprintf("%s_%s", strings.Replace(container.Names[0], "/", "", 1), container.ID[0:6])
   135  	case "name":
   136  		return strings.Replace(container.Names[0], "/", "", 1)
   137  	case "id":
   138  		return container.ID
   139  	case "image":
   140  		return container.Image
   141  	case "image_id":
   142  		return fmt.Sprintf("%s_%s", container.Image, container.ID[0:6])
   143  	case "image_name":
   144  		return fmt.Sprintf("%s_%s", container.Image, strings.Replace(container.Names[0], "/", "", 1))
   145  	case "label":
   146  		return container.Labels[m.Label]
   147  	}
   148  	return strings.Replace(container.Names[0], "/", "", 1)
   149  }
   150  
   151  // FetchMetricsWithAPI use docker API to fetch metrics
   152  func (m DockerPlugin) FetchMetricsWithAPI(containers []docker.APIContainers) (map[string]interface{}, error) {
   153  	var wg sync.WaitGroup
   154  	var mu sync.Mutex
   155  	res := map[string]interface{}{}
   156  	for _, container := range containers {
   157  		wg.Add(1)
   158  		go func(cont docker.APIContainers) {
   159  			defer wg.Done()
   160  			name := strings.Replace(cont.Names[0], "/", "", 1)
   161  			metricName := normalizeMetricName(m.generateName(cont))
   162  			client, _ := docker.NewClient(m.Host)
   163  			errC := make(chan error, 1)
   164  			statsC := make(chan *docker.Stats)
   165  			done := make(chan bool)
   166  			go func() {
   167  				errC <- client.Stats(docker.StatsOptions{ID: name, Stats: statsC, Stream: false, Done: done, Timeout: time.Duration(20) * time.Second})
   168  				close(errC)
   169  			}()
   170  			var resultStats []*docker.Stats
   171  			for {
   172  				stats, ok := <-statsC
   173  				if !ok {
   174  					break
   175  				}
   176  				resultStats = append(resultStats, stats)
   177  			}
   178  			err := <-errC
   179  			if err != nil {
   180  				log.Fatal(err)
   181  			}
   182  			if len(resultStats) == 0 {
   183  				log.Fatalf("Stats: Expected 1 result. Got %d.", len(resultStats))
   184  			}
   185  			mu.Lock()
   186  			err = m.parseStats(&res, metricName, resultStats[0])
   187  			if err != nil {
   188  				log.Fatal(err)
   189  			}
   190  			mu.Unlock()
   191  		}(container)
   192  	}
   193  	wg.Wait()
   194  	return res, nil
   195  }
   196  
   197  const internalCPUStatPrefix = "docker._internal.cpuacct."
   198  
   199  func (m DockerPlugin) parseStats(stats *map[string]interface{}, name string, result *docker.Stats) error {
   200  	if m.UseCPUPercentage {
   201  		// intermediate data to calc CPU percentage
   202  		(*stats)[internalCPUStatPrefix+name+".user"] = (*result).CPUStats.CPUUsage.UsageInUsermode
   203  		(*stats)[internalCPUStatPrefix+name+".system"] = (*result).CPUStats.CPUUsage.UsageInKernelmode
   204  		(*stats)[internalCPUStatPrefix+name+".host"] = (*result).CPUStats.SystemCPUUsage
   205  
   206  		onlineCPUs := int((*result).CPUStats.OnlineCPUs)
   207  		// if either `CPUStats.OnlineCPUs` or `PerCPUStats.OnlineCPUs` is zero,
   208  		// use the length of CPUUsage.PerCPUUsage for onlineCPUs
   209  		// ref. https://docs.docker.com/engine/api/v1.41/#operation/ContainerStats
   210  		if onlineCPUs == 0 || (*result).PreCPUStats.OnlineCPUs == 0 {
   211  			onlineCPUs = len((*result).CPUStats.CPUUsage.PercpuUsage)
   212  		}
   213  		(*stats)[internalCPUStatPrefix+name+".onlineCPUs"] = onlineCPUs
   214  	} else {
   215  		(*stats)["docker.cpuacct."+name+".user"] = (*result).CPUStats.CPUUsage.UsageInUsermode
   216  		(*stats)["docker.cpuacct."+name+".system"] = (*result).CPUStats.CPUUsage.UsageInKernelmode
   217  	}
   218  
   219  	totalRss := (*result).MemoryStats.Stats.TotalRss
   220  	if totalRss == 0 {
   221  		// use `anon` and `file` for RSS and Cache usage on cgroup2 host
   222  		// ref. https://github.com/google/cadvisor/blob/a9858972e75642c2b1914c8d5428e33e6392c08a/container/libcontainer/handler.go#L799-L800
   223  		(*stats)["docker.memory."+name+".rss"] = (*result).MemoryStats.Stats.Anon
   224  		(*stats)["docker.memory."+name+".cache"] = (*result).MemoryStats.Stats.File
   225  
   226  	} else {
   227  		// use `total_rss` and `total_cache` for RSS and Cache usage on cgroup host
   228  		(*stats)["docker.memory."+name+".rss"] = totalRss
   229  		(*stats)["docker.memory."+name+".cache"] = (*result).MemoryStats.Stats.TotalCache
   230  	}
   231  
   232  	fields := []string{"read", "write", "sync", "async"}
   233  	for _, field := range fields {
   234  		for _, s := range (*result).BlkioStats.IOQueueRecursive {
   235  			if s.Op == cases.Title(language.Und, cases.NoLower).String(field) {
   236  				(*stats)["docker.blkio.io_queued."+name+"."+field] = s.Value
   237  			}
   238  		}
   239  		for _, s := range (*result).BlkioStats.IOServicedRecursive {
   240  			if s.Op == cases.Title(language.Und, cases.NoLower).String(field) {
   241  				(*stats)["docker.blkio.io_serviced."+name+"."+field] = s.Value
   242  			}
   243  		}
   244  		for _, s := range (*result).BlkioStats.IOServiceBytesRecursive {
   245  			if s.Op == cases.Title(language.Und, cases.NoLower).String(field) {
   246  				(*stats)["docker.blkio.io_service_bytes."+name+"."+field] = s.Value
   247  			}
   248  		}
   249  	}
   250  	return nil
   251  }
   252  
   253  func addCPUPercentageStats(stats *map[string]interface{}, lastStat map[string]interface{}) {
   254  	for k, v := range lastStat {
   255  		if !strings.HasPrefix(k, internalCPUStatPrefix) || !strings.HasSuffix(k, ".host") {
   256  			continue
   257  		}
   258  		name := strings.TrimSuffix(strings.TrimPrefix(k, internalCPUStatPrefix), ".host")
   259  		currentHostUsage, ok1 := (*stats)[internalCPUStatPrefix+name+".host"]
   260  		cpuNums, ok2 := (*stats)[internalCPUStatPrefix+name+".onlineCPUs"]
   261  		if !ok1 || !ok2 {
   262  			continue
   263  		}
   264  		hostUsage := float64(currentHostUsage.(uint64) - uint64(v.(float64)))
   265  		cpuNumsInt := cpuNums.(int)
   266  		if hostUsage < 0 {
   267  			continue // counter seems reset
   268  		}
   269  
   270  		currentUserUsage, ok1 := (*stats)[internalCPUStatPrefix+name+".user"]
   271  		prevUserUsage, ok2 := lastStat[internalCPUStatPrefix+name+".user"]
   272  		if ok1 && ok2 {
   273  			currentUserUsageUInt := currentUserUsage.(uint64)
   274  			prevUserUsageUInt := uint64(prevUserUsage.(float64))
   275  			var userUsage float64
   276  			if currentUserUsageUInt >= prevUserUsageUInt {
   277  				userUsage = float64(currentUserUsage.(uint64) - uint64(prevUserUsage.(float64)))
   278  			} else {
   279  				// counter has been reset
   280  				userUsage = float64(currentUserUsageUInt)
   281  			}
   282  			(*stats)["docker.cpuacct_percentage."+name+".user"] = userUsage / hostUsage * 100.0 * float64(cpuNumsInt)
   283  		}
   284  
   285  		currentSystemUsage, ok1 := (*stats)[internalCPUStatPrefix+name+".system"]
   286  		prevSystemUsage, ok2 := lastStat[internalCPUStatPrefix+name+".system"]
   287  		if ok1 && ok2 {
   288  			currentSystemUsageUInt := currentSystemUsage.(uint64)
   289  			prevSystemUsageUInt := uint64(prevSystemUsage.(float64))
   290  			var systemUsage float64
   291  			if currentSystemUsageUInt >= prevSystemUsageUInt {
   292  				systemUsage = float64(currentSystemUsageUInt - prevSystemUsageUInt)
   293  			} else {
   294  				systemUsage = float64(currentSystemUsageUInt)
   295  			}
   296  			(*stats)["docker.cpuacct_percentage."+name+".system"] = systemUsage / hostUsage * 100.0 * float64(cpuNumsInt)
   297  		}
   298  	}
   299  }
   300  
   301  // GraphDefinition interface for mackerel plugin
   302  func (m DockerPlugin) GraphDefinition() map[string]mp.Graphs {
   303  	return graphdef
   304  }
   305  
   306  // Do the plugin
   307  func Do() {
   308  	candidateNameFormat := []string{"name", "name_id", "id", "image", "image_id", "image_name", "label"}
   309  	setCandidateNameFormat := make(map[string]bool)
   310  	for _, v := range candidateNameFormat {
   311  		setCandidateNameFormat[v] = true
   312  	}
   313  
   314  	optHost := flag.String("host", "unix:///var/run/docker.sock", "Host for socket")
   315  	flag.String("command", "docker", "Command path to docker(deprecated)") // backward compatibility
   316  	optMethod := flag.String("method", "", "Specify the method to collect stats, 'API' or 'File'. If not specified, an appropriate method is chosen.(deprecated)")
   317  	optTempfile := flag.String("tempfile", "", "Temp file name")
   318  	optNameFormat := flag.String("name-format", "name_id", "Set the name format from "+strings.Join(candidateNameFormat, ", "))
   319  	optLabel := flag.String("label", "", "Use the value of the key as name in case that name-format is label.")
   320  	optCPUFormat := flag.String("cpu-format", "", "Specify which CPU metrics format to use, 'percentage' or 'usage'. 'percentage' is default for 'API' method, and is not supported in 'File' method.")
   321  	flag.Parse()
   322  
   323  	var docker DockerPlugin
   324  
   325  	docker.Host = *optHost
   326  	docker.NameFormat = *optNameFormat
   327  	docker.Label = *optLabel
   328  	if !setCandidateNameFormat[docker.NameFormat] {
   329  		log.Fatalf("Name flag should be each of '%s'", strings.Join(candidateNameFormat, ","))
   330  	}
   331  	if docker.NameFormat == "label" && docker.Label == "" {
   332  		log.Fatalf("Label flag should be set when name flag is 'label'.")
   333  	}
   334  
   335  	switch *optMethod {
   336  	case "", "API":
   337  		docker.Method = "API"
   338  	case "File":
   339  		log.Fatalf("'File' method is no longer supported")
   340  	default:
   341  		log.Fatalf("Method should be 'API', 'File' or an empty string.")
   342  	}
   343  
   344  	switch *optCPUFormat {
   345  	case "percentage":
   346  		docker.UseCPUPercentage = true
   347  	case "usage":
   348  		docker.UseCPUPercentage = false
   349  	default:
   350  		docker.UseCPUPercentage = true
   351  	}
   352  
   353  	helper := mp.NewMackerelPlugin(docker)
   354  
   355  	if *optTempfile != "" {
   356  		helper.Tempfile = *optTempfile
   357  	} else {
   358  		helper.SetTempfileByBasename(fmt.Sprintf("mackerel-plugin-docker-%s", normalizeMetricName(*optHost)))
   359  	}
   360  
   361  	if os.Getenv("MACKEREL_AGENT_PLUGIN_META") != "" {
   362  		helper.OutputDefinitions()
   363  	} else {
   364  		docker.lastMetricValues, _ = helper.FetchLastValues()
   365  		helper.Plugin = docker
   366  		helper.OutputValues()
   367  	}
   368  }