github.com/netdata/go.d.plugin@v0.58.1/modules/docker_engine/collect.go (about)

     1  // SPDX-License-Identifier: GPL-3.0-or-later
     2  
     3  package docker_engine
     4  
     5  import (
     6  	"fmt"
     7  
     8  	"github.com/netdata/go.d.plugin/pkg/prometheus"
     9  	"github.com/netdata/go.d.plugin/pkg/stm"
    10  )
    11  
    12  func isDockerEngineMetrics(pms prometheus.Series) bool {
    13  	return pms.FindByName("engine_daemon_engine_info").Len() > 0
    14  }
    15  
    16  func (de *DockerEngine) collect() (map[string]int64, error) {
    17  	pms, err := de.prom.ScrapeSeries()
    18  	if err != nil {
    19  		return nil, err
    20  	}
    21  
    22  	if !isDockerEngineMetrics(pms) {
    23  		return nil, fmt.Errorf("'%s' returned non docker engine metrics", de.URL)
    24  	}
    25  
    26  	mx := de.collectMetrics(pms)
    27  	return stm.ToMap(mx), nil
    28  }
    29  
    30  func (de *DockerEngine) collectMetrics(pms prometheus.Series) metrics {
    31  	var mx metrics
    32  	collectHealthChecks(&mx, pms)
    33  	collectContainerActions(&mx, pms)
    34  	collectBuilderBuildsFails(&mx, pms)
    35  	if hasContainerStates(pms) {
    36  		de.hasContainerStates = true
    37  		mx.Container.States = &containerStates{}
    38  		collectContainerStates(&mx, pms)
    39  	}
    40  	if isSwarmManager(pms) {
    41  		de.isSwarmManager = true
    42  		mx.SwarmManager = &swarmManager{}
    43  		collectSwarmManager(&mx, pms)
    44  	}
    45  	return mx
    46  }
    47  
    48  func isSwarmManager(pms prometheus.Series) bool {
    49  	return pms.FindByName("swarm_node_manager").Max() == 1
    50  }
    51  
    52  func hasContainerStates(pms prometheus.Series) bool {
    53  	return pms.FindByName("engine_daemon_container_states_containers").Len() > 0
    54  }
    55  
    56  func collectHealthChecks(mx *metrics, raw prometheus.Series) {
    57  	v := raw.FindByName("engine_daemon_health_checks_failed_total").Max()
    58  	mx.HealthChecks.Failed = v
    59  }
    60  
    61  func collectContainerActions(mx *metrics, raw prometheus.Series) {
    62  	for _, metric := range raw.FindByName("engine_daemon_container_actions_seconds_count") {
    63  		action := metric.Labels.Get("action")
    64  		if action == "" {
    65  			continue
    66  		}
    67  
    68  		v := metric.Value
    69  		switch action {
    70  		default:
    71  		case "changes":
    72  			mx.Container.Actions.Changes = v
    73  		case "commit":
    74  			mx.Container.Actions.Commit = v
    75  		case "create":
    76  			mx.Container.Actions.Create = v
    77  		case "delete":
    78  			mx.Container.Actions.Delete = v
    79  		case "start":
    80  			mx.Container.Actions.Start = v
    81  		}
    82  	}
    83  }
    84  
    85  func collectContainerStates(mx *metrics, raw prometheus.Series) {
    86  	for _, metric := range raw.FindByName("engine_daemon_container_states_containers") {
    87  		state := metric.Labels.Get("state")
    88  		if state == "" {
    89  			continue
    90  		}
    91  
    92  		v := metric.Value
    93  		switch state {
    94  		default:
    95  		case "paused":
    96  			mx.Container.States.Paused = v
    97  		case "running":
    98  			mx.Container.States.Running = v
    99  		case "stopped":
   100  			mx.Container.States.Stopped = v
   101  		}
   102  	}
   103  }
   104  
   105  func collectBuilderBuildsFails(mx *metrics, raw prometheus.Series) {
   106  	for _, metric := range raw.FindByName("builder_builds_failed_total") {
   107  		reason := metric.Labels.Get("reason")
   108  		if reason == "" {
   109  			continue
   110  		}
   111  
   112  		v := metric.Value
   113  		switch reason {
   114  		default:
   115  		case "build_canceled":
   116  			mx.Builder.FailsByReason.BuildCanceled = v
   117  		case "build_target_not_reachable_error":
   118  			mx.Builder.FailsByReason.BuildTargetNotReachableError = v
   119  		case "command_not_supported_error":
   120  			mx.Builder.FailsByReason.CommandNotSupportedError = v
   121  		case "dockerfile_empty_error":
   122  			mx.Builder.FailsByReason.DockerfileEmptyError = v
   123  		case "dockerfile_syntax_error":
   124  			mx.Builder.FailsByReason.DockerfileSyntaxError = v
   125  		case "error_processing_commands_error":
   126  			mx.Builder.FailsByReason.ErrorProcessingCommandsError = v
   127  		case "missing_onbuild_arguments_error":
   128  			mx.Builder.FailsByReason.MissingOnbuildArgumentsError = v
   129  		case "unknown_instruction_error":
   130  			mx.Builder.FailsByReason.UnknownInstructionError = v
   131  		}
   132  	}
   133  }
   134  
   135  func collectSwarmManager(mx *metrics, raw prometheus.Series) {
   136  	v := raw.FindByName("swarm_manager_configs_total").Max()
   137  	mx.SwarmManager.Configs = v
   138  
   139  	v = raw.FindByName("swarm_manager_networks_total").Max()
   140  	mx.SwarmManager.Networks = v
   141  
   142  	v = raw.FindByName("swarm_manager_secrets_total").Max()
   143  	mx.SwarmManager.Secrets = v
   144  
   145  	v = raw.FindByName("swarm_manager_services_total").Max()
   146  	mx.SwarmManager.Services = v
   147  
   148  	v = raw.FindByName("swarm_manager_leader").Max()
   149  	mx.SwarmManager.IsLeader = v
   150  
   151  	for _, metric := range raw.FindByName("swarm_manager_nodes") {
   152  		state := metric.Labels.Get("state")
   153  		if state == "" {
   154  			continue
   155  		}
   156  
   157  		v := metric.Value
   158  		switch state {
   159  		default:
   160  		case "disconnected":
   161  			mx.SwarmManager.Nodes.PerState.Disconnected = v
   162  		case "down":
   163  			mx.SwarmManager.Nodes.PerState.Down = v
   164  		case "ready":
   165  			mx.SwarmManager.Nodes.PerState.Ready = v
   166  		case "unknown":
   167  			mx.SwarmManager.Nodes.PerState.Unknown = v
   168  		}
   169  		mx.SwarmManager.Nodes.Total += v
   170  	}
   171  
   172  	for _, metric := range raw.FindByName("swarm_manager_tasks_total") {
   173  		state := metric.Labels.Get("state")
   174  		if state == "" {
   175  			continue
   176  		}
   177  
   178  		v := metric.Value
   179  		switch state {
   180  		default:
   181  		case "accepted":
   182  			mx.SwarmManager.Tasks.PerState.Accepted = v
   183  		case "assigned":
   184  			mx.SwarmManager.Tasks.PerState.Assigned = v
   185  		case "complete":
   186  			mx.SwarmManager.Tasks.PerState.Complete = v
   187  		case "failed":
   188  			mx.SwarmManager.Tasks.PerState.Failed = v
   189  		case "new":
   190  			mx.SwarmManager.Tasks.PerState.New = v
   191  		case "orphaned":
   192  			mx.SwarmManager.Tasks.PerState.Orphaned = v
   193  		case "pending":
   194  			mx.SwarmManager.Tasks.PerState.Pending = v
   195  		case "preparing":
   196  			mx.SwarmManager.Tasks.PerState.Preparing = v
   197  		case "ready":
   198  			mx.SwarmManager.Tasks.PerState.Ready = v
   199  		case "rejected":
   200  			mx.SwarmManager.Tasks.PerState.Rejected = v
   201  		case "remove":
   202  			mx.SwarmManager.Tasks.PerState.Remove = v
   203  		case "running":
   204  			mx.SwarmManager.Tasks.PerState.Running = v
   205  		case "shutdown":
   206  			mx.SwarmManager.Tasks.PerState.Shutdown = v
   207  		case "starting":
   208  			mx.SwarmManager.Tasks.PerState.Starting = v
   209  		}
   210  		mx.SwarmManager.Tasks.Total += v
   211  	}
   212  }