github.com/mackerelio/mackerel-agent-plugins@v0.89.3/mackerel-plugin-docker/lib/docker.go (about) 1 //go:build linux 2 3 package mpdocker 4 5 import ( 6 "errors" 7 "flag" 8 "fmt" 9 "log" 10 "os" 11 "regexp" 12 "strings" 13 "sync" 14 "time" 15 16 docker "github.com/fsouza/go-dockerclient" 17 mp "github.com/mackerelio/go-mackerel-plugin-helper" 18 "golang.org/x/text/cases" 19 "golang.org/x/text/language" 20 ) 21 22 var graphdef = map[string]mp.Graphs{ 23 "docker.cpuacct.#": { 24 Label: "Docker CPU", 25 Unit: "integer", 26 Metrics: []mp.Metrics{ 27 {Name: "user", Label: "User", Diff: true, Stacked: true, Type: "uint64"}, 28 {Name: "system", Label: "System", Diff: true, Stacked: true, Type: "uint64"}, 29 }, 30 }, 31 "docker.cpuacct_percentage.#": { 32 Label: "Docker CPU Percentage", 33 Unit: "percentage", 34 Metrics: []mp.Metrics{ 35 {Name: "user", Label: "User", Diff: false, Stacked: true, Type: "float64"}, 36 {Name: "system", Label: "System", Diff: false, Stacked: true, Type: "float64"}, 37 }, 38 }, 39 "docker.memory.#": { 40 Label: "Docker Memory", 41 Unit: "bytes", 42 Metrics: []mp.Metrics{ 43 {Name: "cache", Label: "Cache", Diff: false, Stacked: true}, 44 {Name: "rss", Label: "RSS", Diff: false, Stacked: true}, 45 }, 46 }, 47 "docker.blkio.io_queued.#": { 48 Label: "Docker BlkIO Queued", 49 Unit: "integer", 50 Metrics: []mp.Metrics{ 51 {Name: "read", Label: "Read", Diff: false, Stacked: true}, 52 {Name: "write", Label: "Write", Diff: false, Stacked: true}, 53 {Name: "sync", Label: "Sync", Diff: false, Stacked: true}, 54 {Name: "async", Label: "Async", Diff: false, Stacked: true}, 55 }, 56 }, 57 "docker.blkio.io_serviced.#": { 58 Label: "Docker BlkIO IOPS", 59 Unit: "iops", 60 Metrics: []mp.Metrics{ 61 {Name: "read", Label: "Read", Diff: true, Stacked: true, Type: "uint64", Scale: (1.0 / 60.0)}, 62 {Name: "write", Label: "Write", Diff: true, Stacked: true, Type: "uint64", Scale: (1.0 / 60.0)}, 63 {Name: "sync", Label: "Sync", Diff: true, Stacked: true, Type: "uint64", Scale: (1.0 / 60.0)}, 64 {Name: "async", Label: "Async", Diff: true, Stacked: true, Type: "uint64", Scale: (1.0 / 60.0)}, 65 }, 66 }, 67 "docker.blkio.io_service_bytes.#": { 68 Label: "Docker BlkIO Bytes", 69 Unit: "bytes", 70 Metrics: []mp.Metrics{ 71 {Name: "read", Label: "Read", Diff: true, Stacked: true, Type: "uint64"}, 72 {Name: "write", Label: "Write", Diff: true, Stacked: true, Type: "uint64"}, 73 {Name: "sync", Label: "Sync", Diff: true, Stacked: true, Type: "uint64"}, 74 {Name: "async", Label: "Async", Diff: true, Stacked: true, Type: "uint64"}, 75 }, 76 }, 77 // some other fields also exist in metrics, but they're internal intermediate data 78 } 79 80 // DockerPlugin mackerel plugin for docker 81 type DockerPlugin struct { 82 Host string 83 Tempfile string 84 Method string 85 NameFormat string 86 Label string 87 lastMetricValues mp.MetricValues 88 UseCPUPercentage bool 89 } 90 91 var normalizeMetricRe = regexp.MustCompile(`[^-a-zA-Z0-9_]`) 92 93 func normalizeMetricName(str string) string { 94 return normalizeMetricRe.ReplaceAllString(str, "_") 95 } 96 97 func (m DockerPlugin) listContainer() ([]docker.APIContainers, error) { 98 client, _ := docker.NewClient(m.Host) 99 containers, err := client.ListContainers(docker.ListContainersOptions{}) 100 if err != nil { 101 return nil, err 102 } 103 return containers, nil 104 } 105 106 // FetchMetrics interface for mackerel plugin 107 func (m DockerPlugin) FetchMetrics() (map[string]interface{}, error) { 108 var stats map[string]interface{} 109 110 if m.Method == "File" { 111 return nil, errors.New("no longer supported") 112 } 113 containers, err := m.listContainer() 114 if err != nil { 115 return nil, err 116 } 117 stats, err = m.FetchMetricsWithAPI(containers) 118 if err != nil { 119 return nil, err 120 } 121 122 if m.UseCPUPercentage { 123 if time.Since(m.lastMetricValues.Timestamp) <= 5*time.Minute { 124 addCPUPercentageStats(&stats, m.lastMetricValues.Values) 125 } 126 } 127 128 return stats, nil 129 } 130 131 func (m DockerPlugin) generateName(container docker.APIContainers) string { 132 switch m.NameFormat { 133 case "name_id": 134 return fmt.Sprintf("%s_%s", strings.Replace(container.Names[0], "/", "", 1), container.ID[0:6]) 135 case "name": 136 return strings.Replace(container.Names[0], "/", "", 1) 137 case "id": 138 return container.ID 139 case "image": 140 return container.Image 141 case "image_id": 142 return fmt.Sprintf("%s_%s", container.Image, container.ID[0:6]) 143 case "image_name": 144 return fmt.Sprintf("%s_%s", container.Image, strings.Replace(container.Names[0], "/", "", 1)) 145 case "label": 146 return container.Labels[m.Label] 147 } 148 return strings.Replace(container.Names[0], "/", "", 1) 149 } 150 151 // FetchMetricsWithAPI use docker API to fetch metrics 152 func (m DockerPlugin) FetchMetricsWithAPI(containers []docker.APIContainers) (map[string]interface{}, error) { 153 var wg sync.WaitGroup 154 var mu sync.Mutex 155 res := map[string]interface{}{} 156 for _, container := range containers { 157 wg.Add(1) 158 go func(cont docker.APIContainers) { 159 defer wg.Done() 160 name := strings.Replace(cont.Names[0], "/", "", 1) 161 metricName := normalizeMetricName(m.generateName(cont)) 162 client, _ := docker.NewClient(m.Host) 163 errC := make(chan error, 1) 164 statsC := make(chan *docker.Stats) 165 done := make(chan bool) 166 go func() { 167 errC <- client.Stats(docker.StatsOptions{ID: name, Stats: statsC, Stream: false, Done: done, Timeout: time.Duration(20) * time.Second}) 168 close(errC) 169 }() 170 var resultStats []*docker.Stats 171 for { 172 stats, ok := <-statsC 173 if !ok { 174 break 175 } 176 resultStats = append(resultStats, stats) 177 } 178 err := <-errC 179 if err != nil { 180 log.Fatal(err) 181 } 182 if len(resultStats) == 0 { 183 log.Fatalf("Stats: Expected 1 result. Got %d.", len(resultStats)) 184 } 185 mu.Lock() 186 err = m.parseStats(&res, metricName, resultStats[0]) 187 if err != nil { 188 log.Fatal(err) 189 } 190 mu.Unlock() 191 }(container) 192 } 193 wg.Wait() 194 return res, nil 195 } 196 197 const internalCPUStatPrefix = "docker._internal.cpuacct." 198 199 func (m DockerPlugin) parseStats(stats *map[string]interface{}, name string, result *docker.Stats) error { 200 if m.UseCPUPercentage { 201 // intermediate data to calc CPU percentage 202 (*stats)[internalCPUStatPrefix+name+".user"] = (*result).CPUStats.CPUUsage.UsageInUsermode 203 (*stats)[internalCPUStatPrefix+name+".system"] = (*result).CPUStats.CPUUsage.UsageInKernelmode 204 (*stats)[internalCPUStatPrefix+name+".host"] = (*result).CPUStats.SystemCPUUsage 205 206 onlineCPUs := int((*result).CPUStats.OnlineCPUs) 207 // if either `CPUStats.OnlineCPUs` or `PerCPUStats.OnlineCPUs` is zero, 208 // use the length of CPUUsage.PerCPUUsage for onlineCPUs 209 // ref. https://docs.docker.com/engine/api/v1.41/#operation/ContainerStats 210 if onlineCPUs == 0 || (*result).PreCPUStats.OnlineCPUs == 0 { 211 onlineCPUs = len((*result).CPUStats.CPUUsage.PercpuUsage) 212 } 213 (*stats)[internalCPUStatPrefix+name+".onlineCPUs"] = onlineCPUs 214 } else { 215 (*stats)["docker.cpuacct."+name+".user"] = (*result).CPUStats.CPUUsage.UsageInUsermode 216 (*stats)["docker.cpuacct."+name+".system"] = (*result).CPUStats.CPUUsage.UsageInKernelmode 217 } 218 219 totalRss := (*result).MemoryStats.Stats.TotalRss 220 if totalRss == 0 { 221 // use `anon` and `file` for RSS and Cache usage on cgroup2 host 222 // ref. https://github.com/google/cadvisor/blob/a9858972e75642c2b1914c8d5428e33e6392c08a/container/libcontainer/handler.go#L799-L800 223 (*stats)["docker.memory."+name+".rss"] = (*result).MemoryStats.Stats.Anon 224 (*stats)["docker.memory."+name+".cache"] = (*result).MemoryStats.Stats.File 225 226 } else { 227 // use `total_rss` and `total_cache` for RSS and Cache usage on cgroup host 228 (*stats)["docker.memory."+name+".rss"] = totalRss 229 (*stats)["docker.memory."+name+".cache"] = (*result).MemoryStats.Stats.TotalCache 230 } 231 232 fields := []string{"read", "write", "sync", "async"} 233 for _, field := range fields { 234 for _, s := range (*result).BlkioStats.IOQueueRecursive { 235 if s.Op == cases.Title(language.Und, cases.NoLower).String(field) { 236 (*stats)["docker.blkio.io_queued."+name+"."+field] = s.Value 237 } 238 } 239 for _, s := range (*result).BlkioStats.IOServicedRecursive { 240 if s.Op == cases.Title(language.Und, cases.NoLower).String(field) { 241 (*stats)["docker.blkio.io_serviced."+name+"."+field] = s.Value 242 } 243 } 244 for _, s := range (*result).BlkioStats.IOServiceBytesRecursive { 245 if s.Op == cases.Title(language.Und, cases.NoLower).String(field) { 246 (*stats)["docker.blkio.io_service_bytes."+name+"."+field] = s.Value 247 } 248 } 249 } 250 return nil 251 } 252 253 func addCPUPercentageStats(stats *map[string]interface{}, lastStat map[string]interface{}) { 254 for k, v := range lastStat { 255 if !strings.HasPrefix(k, internalCPUStatPrefix) || !strings.HasSuffix(k, ".host") { 256 continue 257 } 258 name := strings.TrimSuffix(strings.TrimPrefix(k, internalCPUStatPrefix), ".host") 259 currentHostUsage, ok1 := (*stats)[internalCPUStatPrefix+name+".host"] 260 cpuNums, ok2 := (*stats)[internalCPUStatPrefix+name+".onlineCPUs"] 261 if !ok1 || !ok2 { 262 continue 263 } 264 hostUsage := float64(currentHostUsage.(uint64) - uint64(v.(float64))) 265 cpuNumsInt := cpuNums.(int) 266 if hostUsage < 0 { 267 continue // counter seems reset 268 } 269 270 currentUserUsage, ok1 := (*stats)[internalCPUStatPrefix+name+".user"] 271 prevUserUsage, ok2 := lastStat[internalCPUStatPrefix+name+".user"] 272 if ok1 && ok2 { 273 currentUserUsageUInt := currentUserUsage.(uint64) 274 prevUserUsageUInt := uint64(prevUserUsage.(float64)) 275 var userUsage float64 276 if currentUserUsageUInt >= prevUserUsageUInt { 277 userUsage = float64(currentUserUsage.(uint64) - uint64(prevUserUsage.(float64))) 278 } else { 279 // counter has been reset 280 userUsage = float64(currentUserUsageUInt) 281 } 282 (*stats)["docker.cpuacct_percentage."+name+".user"] = userUsage / hostUsage * 100.0 * float64(cpuNumsInt) 283 } 284 285 currentSystemUsage, ok1 := (*stats)[internalCPUStatPrefix+name+".system"] 286 prevSystemUsage, ok2 := lastStat[internalCPUStatPrefix+name+".system"] 287 if ok1 && ok2 { 288 currentSystemUsageUInt := currentSystemUsage.(uint64) 289 prevSystemUsageUInt := uint64(prevSystemUsage.(float64)) 290 var systemUsage float64 291 if currentSystemUsageUInt >= prevSystemUsageUInt { 292 systemUsage = float64(currentSystemUsageUInt - prevSystemUsageUInt) 293 } else { 294 systemUsage = float64(currentSystemUsageUInt) 295 } 296 (*stats)["docker.cpuacct_percentage."+name+".system"] = systemUsage / hostUsage * 100.0 * float64(cpuNumsInt) 297 } 298 } 299 } 300 301 // GraphDefinition interface for mackerel plugin 302 func (m DockerPlugin) GraphDefinition() map[string]mp.Graphs { 303 return graphdef 304 } 305 306 // Do the plugin 307 func Do() { 308 candidateNameFormat := []string{"name", "name_id", "id", "image", "image_id", "image_name", "label"} 309 setCandidateNameFormat := make(map[string]bool) 310 for _, v := range candidateNameFormat { 311 setCandidateNameFormat[v] = true 312 } 313 314 optHost := flag.String("host", "unix:///var/run/docker.sock", "Host for socket") 315 flag.String("command", "docker", "Command path to docker(deprecated)") // backward compatibility 316 optMethod := flag.String("method", "", "Specify the method to collect stats, 'API' or 'File'. If not specified, an appropriate method is chosen.(deprecated)") 317 optTempfile := flag.String("tempfile", "", "Temp file name") 318 optNameFormat := flag.String("name-format", "name_id", "Set the name format from "+strings.Join(candidateNameFormat, ", ")) 319 optLabel := flag.String("label", "", "Use the value of the key as name in case that name-format is label.") 320 optCPUFormat := flag.String("cpu-format", "", "Specify which CPU metrics format to use, 'percentage' or 'usage'. 'percentage' is default for 'API' method, and is not supported in 'File' method.") 321 flag.Parse() 322 323 var docker DockerPlugin 324 325 docker.Host = *optHost 326 docker.NameFormat = *optNameFormat 327 docker.Label = *optLabel 328 if !setCandidateNameFormat[docker.NameFormat] { 329 log.Fatalf("Name flag should be each of '%s'", strings.Join(candidateNameFormat, ",")) 330 } 331 if docker.NameFormat == "label" && docker.Label == "" { 332 log.Fatalf("Label flag should be set when name flag is 'label'.") 333 } 334 335 switch *optMethod { 336 case "", "API": 337 docker.Method = "API" 338 case "File": 339 log.Fatalf("'File' method is no longer supported") 340 default: 341 log.Fatalf("Method should be 'API', 'File' or an empty string.") 342 } 343 344 switch *optCPUFormat { 345 case "percentage": 346 docker.UseCPUPercentage = true 347 case "usage": 348 docker.UseCPUPercentage = false 349 default: 350 docker.UseCPUPercentage = true 351 } 352 353 helper := mp.NewMackerelPlugin(docker) 354 355 if *optTempfile != "" { 356 helper.Tempfile = *optTempfile 357 } else { 358 helper.SetTempfileByBasename(fmt.Sprintf("mackerel-plugin-docker-%s", normalizeMetricName(*optHost))) 359 } 360 361 if os.Getenv("MACKEREL_AGENT_PLUGIN_META") != "" { 362 helper.OutputDefinitions() 363 } else { 364 docker.lastMetricValues, _ = helper.FetchLastValues() 365 helper.Plugin = docker 366 helper.OutputValues() 367 } 368 }