github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/statemetrics/statemetrics.go (about) 1 // Copyright 2016 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package statemetrics 5 6 import ( 7 "github.com/juju/errors" 8 "github.com/juju/loggo" 9 "github.com/prometheus/client_golang/prometheus" 10 ) 11 12 const ( 13 metricsNamespace = "juju_state" 14 15 statusLabel = "status" 16 lifeLabel = "life" 17 disabledLabel = "disabled" 18 deletedLabel = "deleted" 19 controllerAccessLabel = "controller_access" 20 domainLabel = "domain" 21 agentStatusLabel = "agent_status" 22 machineStatusLabel = "machine_status" 23 ) 24 25 var ( 26 machineLabelNames = []string{ 27 agentStatusLabel, 28 lifeLabel, 29 machineStatusLabel, 30 } 31 32 modelLabelNames = []string{ 33 lifeLabel, 34 statusLabel, 35 } 36 37 userLabelNames = []string{ 38 controllerAccessLabel, 39 deletedLabel, 40 disabledLabel, 41 domainLabel, 42 } 43 44 logger = loggo.GetLogger("juju.state.statemetrics") 45 ) 46 47 // Collector is a prometheus.Collector that collects metrics about 48 // the Juju global state. 49 type Collector struct { 50 pool StatePool 51 52 scrapeDuration prometheus.Gauge 53 scrapeErrors prometheus.Gauge 54 55 models *prometheus.GaugeVec 56 machines *prometheus.GaugeVec 57 users *prometheus.GaugeVec 58 } 59 60 // New returns a new Collector. 61 func New(pool StatePool) *Collector { 62 return &Collector{ 63 pool: pool, 64 scrapeDuration: prometheus.NewGauge( 65 prometheus.GaugeOpts{ 66 Namespace: metricsNamespace, 67 Name: "scrape_duration_seconds", 68 Help: "Amount of time taken to collect state metrics.", 69 }, 70 ), 71 scrapeErrors: prometheus.NewGauge( 72 prometheus.GaugeOpts{ 73 Namespace: metricsNamespace, 74 Name: "scrape_errors", 75 Help: "Number of errors observed while collecting state metrics.", 76 }, 77 ), 78 79 models: prometheus.NewGaugeVec( 80 prometheus.GaugeOpts{ 81 Namespace: metricsNamespace, 82 Name: "models", 83 Help: "Number of models in the controller.", 84 }, 85 modelLabelNames, 86 ), 87 machines: prometheus.NewGaugeVec( 88 prometheus.GaugeOpts{ 89 Namespace: metricsNamespace, 90 Name: "machines", 91 Help: "Number of machines managed by the controller.", 92 }, 93 machineLabelNames, 94 ), 95 users: prometheus.NewGaugeVec( 96 prometheus.GaugeOpts{ 97 Namespace: metricsNamespace, 98 Name: "users", 99 Help: "Number of local users in the controller.", 100 }, 101 userLabelNames, 102 ), 103 } 104 } 105 106 // Describe is part of the prometheus.Collector interface. 107 func (c *Collector) Describe(ch chan<- *prometheus.Desc) { 108 c.machines.Describe(ch) 109 c.models.Describe(ch) 110 c.users.Describe(ch) 111 112 c.scrapeErrors.Describe(ch) 113 c.scrapeDuration.Describe(ch) 114 } 115 116 // Collect is part of the prometheus.Collector interface. 117 func (c *Collector) Collect(ch chan<- prometheus.Metric) { 118 timer := prometheus.NewTimer(prometheus.ObserverFunc(c.scrapeDuration.Set)) 119 defer c.scrapeDuration.Collect(ch) 120 defer timer.ObserveDuration() 121 c.scrapeErrors.Set(0) 122 defer c.scrapeErrors.Collect(ch) 123 124 c.machines.Reset() 125 c.models.Reset() 126 c.users.Reset() 127 128 c.updateMetrics() 129 130 c.machines.Collect(ch) 131 c.models.Collect(ch) 132 c.users.Collect(ch) 133 } 134 135 func (c *Collector) updateMetrics() { 136 logger.Tracef("updating state metrics") 137 defer logger.Tracef("updated state metrics") 138 139 st := c.pool.SystemState() 140 modelUUIDs, err := st.AllModelUUIDs() 141 if err != nil { 142 logger.Debugf("error getting models: %v", err) 143 c.scrapeErrors.Inc() 144 } 145 for _, m := range modelUUIDs { 146 c.updateModelMetrics(m) 147 } 148 149 // TODO(axw) AllUsers only returns *local* users. We do not have User 150 // records for external users. To obtain external users, we will need 151 // to get all of the controller and model-level access documents. 152 controllerTag := st.ControllerTag() 153 localUsers, err := st.AllUsers() 154 if err != nil { 155 logger.Debugf("error getting local users: %v", err) 156 c.scrapeErrors.Inc() 157 localUsers = nil 158 } 159 for _, u := range localUsers { 160 userTag := u.UserTag() 161 access, err := st.UserAccess(userTag, controllerTag) 162 if err != nil && !errors.IsNotFound(err) { 163 logger.Debugf("error getting controller user access: %v", err) 164 c.scrapeErrors.Inc() 165 continue 166 } 167 var deleted, disabled string 168 if u.IsDeleted() { 169 deleted = "true" 170 } 171 if u.IsDisabled() { 172 disabled = "true" 173 } 174 c.users.With(prometheus.Labels{ 175 controllerAccessLabel: string(access.Access), 176 deletedLabel: deleted, 177 disabledLabel: disabled, 178 domainLabel: userTag.Domain(), 179 }).Inc() 180 } 181 } 182 183 func (c *Collector) updateModelMetrics(modelUUID string) { 184 model, ph, err := c.pool.GetModel(modelUUID) 185 if err != nil { 186 logger.Debugf("error getting model: %v", err) 187 return 188 } 189 defer ph.Release() 190 191 modelStatus, err := model.Status() 192 if err != nil { 193 if errors.IsNotFound(err) { 194 return // Model removed 195 } 196 c.scrapeErrors.Inc() 197 logger.Debugf("error getting model status: %v", err) 198 return 199 } 200 201 modelTag := model.ModelTag() 202 st, err := c.pool.Get(modelTag.Id()) 203 if err != nil { 204 if errors.IsNotFound(err) { 205 return // Model removed 206 } 207 c.scrapeErrors.Inc() 208 logger.Debugf("error getting model state: %v", err) 209 return 210 } 211 defer st.Release() 212 213 machines, err := st.AllMachines() 214 if err != nil { 215 c.scrapeErrors.Inc() 216 logger.Debugf("error getting machines: %v", err) 217 machines = nil 218 } 219 for _, m := range machines { 220 agentStatus, err := m.Status() 221 if errors.IsNotFound(err) { 222 continue // Machine removed 223 } else if err != nil { 224 c.scrapeErrors.Inc() 225 logger.Debugf("error getting machine status: %v", err) 226 continue 227 } 228 229 machineStatus, err := m.InstanceStatus() 230 if errors.IsNotFound(err) { 231 continue // Machine removed 232 } else if errors.IsNotProvisioned(err) { 233 machineStatus.Status = "" 234 } else if err != nil { 235 c.scrapeErrors.Inc() 236 logger.Debugf("error getting machine status: %v", err) 237 continue 238 } 239 240 c.machines.With(prometheus.Labels{ 241 agentStatusLabel: string(agentStatus.Status), 242 lifeLabel: m.Life().String(), 243 machineStatusLabel: string(machineStatus.Status), 244 }).Inc() 245 } 246 247 c.models.With(prometheus.Labels{ 248 lifeLabel: model.Life().String(), 249 statusLabel: string(modelStatus.Status), 250 }).Inc() 251 }