github.com/pachyderm/pachyderm@v1.13.4/etc/kubernetes-prometheus/manifests-all.yaml (about) 1 # Derived from ./manifests 2 --- 3 apiVersion: v1 4 kind: Namespace 5 metadata: 6 name: monitoring 7 --- 8 apiVersion: rbac.authorization.k8s.io/v1beta1 9 kind: ClusterRoleBinding 10 metadata: 11 name: prometheus 12 roleRef: 13 apiGroup: rbac.authorization.k8s.io 14 kind: ClusterRole 15 name: prometheus 16 subjects: 17 - kind: ServiceAccount 18 name: prometheus-k8s 19 namespace: monitoring 20 --- 21 apiVersion: rbac.authorization.k8s.io/v1beta1 22 kind: ClusterRole 23 metadata: 24 name: prometheus 25 rules: 26 - apiGroups: [""] 27 resources: 28 - nodes 29 - nodes/proxy 30 - services 31 - endpoints 32 - pods 33 verbs: ["get", "list", "watch"] 34 - apiGroups: [""] 35 resources: 36 - configmaps 37 verbs: ["get"] 38 - nonResourceURLs: ["/metrics"] 39 verbs: ["get"] 40 --- 41 apiVersion: v1 42 kind: ServiceAccount 43 metadata: 44 name: prometheus-k8s 45 namespace: monitoring 46 --- 47 apiVersion: v1 48 data: 49 default.tmpl: | 50 {{ define "__alertmanager" }}AlertManager{{ end }} 51 {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }} 52 53 {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }} 54 {{ define "__description" }}{{ end }} 55 56 {{ define "__text_alert_list" }}{{ range . }}Labels: 57 {{ range .Labels.SortedPairs }} - {{ .Name }} = {{ .Value }} 58 {{ end }}Annotations: 59 {{ range .Annotations.SortedPairs }} - {{ .Name }} = {{ .Value }} 60 {{ end }}Source: {{ .GeneratorURL }} 61 {{ end }}{{ end }} 62 63 64 {{ define "slack.default.title" }}{{ template "__subject" . }}{{ end }} 65 {{ define "slack.default.username" }}{{ template "__alertmanager" . }}{{ end }} 66 {{ define "slack.default.fallback" }}{{ template "slack.default.title" . }} | {{ template "slack.default.titlelink" . }}{{ end }} 67 {{ define "slack.default.pretext" }}{{ end }} 68 {{ define "slack.default.titlelink" }}{{ template "__alertmanagerURL" . }}{{ end }} 69 {{ define "slack.default.iconemoji" }}{{ end }} 70 {{ define "slack.default.iconurl" }}{{ end }} 71 {{ define "slack.default.text" }}{{ end }} 72 73 74 {{ define "hipchat.default.from" }}{{ template "__alertmanager" . }}{{ end }} 75 {{ define "hipchat.default.message" }}{{ template "__subject" . }}{{ end }} 76 77 78 {{ define "pagerduty.default.description" }}{{ template "__subject" . }}{{ end }} 79 {{ define "pagerduty.default.client" }}{{ template "__alertmanager" . }}{{ end }} 80 {{ define "pagerduty.default.clientURL" }}{{ template "__alertmanagerURL" . }}{{ end }} 81 {{ define "pagerduty.default.instances" }}{{ template "__text_alert_list" . }}{{ end }} 82 83 84 {{ define "opsgenie.default.message" }}{{ template "__subject" . }}{{ end }} 85 {{ define "opsgenie.default.description" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} 86 {{ if gt (len .Alerts.Firing) 0 -}} 87 Alerts Firing: 88 {{ template "__text_alert_list" .Alerts.Firing }} 89 {{- end }} 90 {{ if gt (len .Alerts.Resolved) 0 -}} 91 Alerts Resolved: 92 {{ template "__text_alert_list" .Alerts.Resolved }} 93 {{- end }} 94 {{- end }} 95 {{ define "opsgenie.default.source" }}{{ template "__alertmanagerURL" . }}{{ end }} 96 97 98 {{ define "victorops.default.message" }}{{ template "__subject" . }} | {{ template "__alertmanagerURL" . }}{{ end }} 99 {{ define "victorops.default.from" }}{{ template "__alertmanager" . }}{{ end }} 100 101 102 {{ define "email.default.subject" }}{{ template "__subject" . }}{{ end }} 103 {{ define "email.default.html" }} 104 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 105 <!-- 106 Style and HTML derived from https://github.com/mailgun/transactional-email-templates 107 108 109 The MIT License (MIT) 110 111 Copyright (c) 2014 Mailgun 112 113 Permission is hereby granted, free of charge, to any person obtaining a copy 114 of this software and associated documentation files (the "Software"), to deal 115 in the Software without restriction, including without limitation the rights 116 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 117 copies of the Software, and to permit persons to whom the Software is 118 furnished to do so, subject to the following conditions: 119 120 The above copyright notice and this permission notice shall be included in all 121 copies or substantial portions of the Software. 122 123 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 124 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 125 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 126 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 127 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 128 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 129 SOFTWARE. 130 --> 131 <html xmlns="http://www.w3.org/1999/xhtml" xmlns="http://www.w3.org/1999/xhtml" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 132 <head style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 133 <meta name="viewport" content="width=device-width" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> 134 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> 135 <title style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">{{ template "__subject" . }}</title> 136 137 </head> 138 139 <body itemscope="" itemtype="http://schema.org/EmailMessage" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; -webkit-font-smoothing: antialiased; -webkit-text-size-adjust: none; height: 100%; line-height: 1.6em; width: 100% !important; background-color: #f6f6f6; margin: 0; padding: 0;" bgcolor="#f6f6f6"> 140 141 <table style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; background-color: #f6f6f6; margin: 0;" bgcolor="#f6f6f6"> 142 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 143 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td> 144 <td width="600" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; display: block !important; max-width: 600px !important; clear: both !important; width: 100% !important; margin: 0 auto; padding: 0;" valign="top"> 145 <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; max-width: 600px; display: block; margin: 0 auto; padding: 0;"> 146 <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; border-radius: 3px; background-color: #fff; margin: 0; border: 1px solid #e9e9e9;" bgcolor="#fff"> 147 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 148 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 16px; vertical-align: top; color: #fff; font-weight: 500; text-align: center; border-radius: 3px 3px 0 0; background-color: #E6522C; margin: 0; padding: 20px;" align="center" bgcolor="#E6522C" valign="top"> 149 {{ .Alerts | len }} alert{{ if gt (len .Alerts) 1 }}s{{ end }} for {{ range .GroupLabels.SortedPairs }} 150 {{ .Name }}={{ .Value }} 151 {{ end }} 152 </td> 153 </tr> 154 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 155 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 10px;" valign="top"> 156 <table width="100%" cellpadding="0" cellspacing="0" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 157 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 158 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> 159 <a href="{{ template "__alertmanagerURL" . }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #FFF; text-decoration: none; line-height: 2em; font-weight: bold; text-align: center; cursor: pointer; display: inline-block; border-radius: 5px; text-transform: capitalize; background-color: #348eda; margin: 0; border-color: #348eda; border-style: solid; border-width: 10px 20px;">View in {{ template "__alertmanager" . }}</a> 160 </td> 161 </tr> 162 {{ if gt (len .Alerts.Firing) 0 }} 163 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 164 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> 165 <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Firing | len }}] Firing</strong> 166 </td> 167 </tr> 168 {{ end }} 169 {{ range .Alerts.Firing }} 170 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 171 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> 172 <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> 173 {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} 174 {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} 175 {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} 176 <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> 177 </td> 178 </tr> 179 {{ end }} 180 181 {{ if gt (len .Alerts.Resolved) 0 }} 182 {{ if gt (len .Alerts.Firing) 0 }} 183 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 184 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> 185 <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> 186 <hr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> 187 <br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> 188 </td> 189 </tr> 190 {{ end }} 191 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 192 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> 193 <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">[{{ .Alerts.Resolved | len }}] Resolved</strong> 194 </td> 195 </tr> 196 {{ end }} 197 {{ range .Alerts.Resolved }} 198 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 199 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0; padding: 0 0 20px;" valign="top"> 200 <strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Labels</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> 201 {{ range .Labels.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} 202 {{ if gt (len .Annotations) 0 }}<strong style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;">Annotations</strong><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} 203 {{ range .Annotations.SortedPairs }}{{ .Name }} = {{ .Value }}<br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" />{{ end }} 204 <a href="{{ .GeneratorURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; color: #348eda; text-decoration: underline; margin: 0;">Source</a><br style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;" /> 205 </td> 206 </tr> 207 {{ end }} 208 </table> 209 </td> 210 </tr> 211 </table> 212 213 <div style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; width: 100%; clear: both; color: #999; margin: 0; padding: 20px;"> 214 <table width="100%" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 215 <tr style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; margin: 0;"> 216 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; vertical-align: top; text-align: center; color: #999; margin: 0; padding: 0 0 20px;" align="center" valign="top"><a href="{{ .ExternalURL }}" style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 12px; color: #999; text-decoration: underline; margin: 0;">Sent by {{ template "__alertmanager" . }}</a></td> 217 </tr> 218 </table> 219 </div></div> 220 </td> 221 <td style="font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif; box-sizing: border-box; font-size: 14px; vertical-align: top; margin: 0;" valign="top"></td> 222 </tr> 223 </table> 224 225 </body> 226 </html> 227 228 {{ end }} 229 230 {{ define "pushover.default.title" }}{{ template "__subject" . }}{{ end }} 231 {{ define "pushover.default.message" }}{{ .CommonAnnotations.SortedPairs.Values | join " " }} 232 {{ if gt (len .Alerts.Firing) 0 }} 233 Alerts Firing: 234 {{ template "__text_alert_list" .Alerts.Firing }} 235 {{ end }} 236 {{ if gt (len .Alerts.Resolved) 0 }} 237 Alerts Resolved: 238 {{ template "__text_alert_list" .Alerts.Resolved }} 239 {{ end }} 240 {{ end }} 241 {{ define "pushover.default.url" }}{{ template "__alertmanagerURL" . }}{{ end }} 242 slack.tmpl: | 243 {{ define "slack.devops.text" }} 244 {{range .Alerts}}{{.Annotations.DESCRIPTION}} 245 {{end}} 246 {{ end }} 247 kind: ConfigMap 248 metadata: 249 creationTimestamp: null 250 name: alertmanager-templates 251 namespace: monitoring 252 --- 253 kind: ConfigMap 254 apiVersion: v1 255 metadata: 256 name: alertmanager 257 namespace: monitoring 258 data: 259 config.yml: |- 260 global: 261 # ResolveTimeout is the time after which an alert is declared resolved 262 # if it has not been updated. 263 resolve_timeout: 5m 264 265 # The smarthost and SMTP sender used for mail notifications. 266 smtp_smarthost: 'smtp.gmail.com:587' 267 smtp_from: 'foo@bar.com' 268 smtp_auth_username: 'foo@bar.com' 269 smtp_auth_password: 'barfoo' 270 271 # The API URL to use for Slack notifications. 272 slack_api_url: 'https://hooks.slack.com/services/some/api/token' 273 274 # # The directory from which notification templates are read. 275 templates: 276 - '/etc/alertmanager-templates/*.tmpl' 277 278 # The root route on which each incoming alert enters. 279 route: 280 281 # The labels by which incoming alerts are grouped together. For example, 282 # multiple alerts coming in for cluster=A and alertname=LatencyHigh would 283 # be batched into a single group. 284 285 group_by: ['alertname', 'cluster', 'service'] 286 287 # When a new group of alerts is created by an incoming alert, wait at 288 # least 'group_wait' to send the initial notification. 289 # This way ensures that you get multiple alerts for the same group that start 290 # firing shortly after another are batched together on the first 291 # notification. 292 293 group_wait: 30s 294 295 # When the first notification was sent, wait 'group_interval' to send a batch 296 # of new alerts that started firing for that group. 297 298 group_interval: 5m 299 300 # If an alert has successfully been sent, wait 'repeat_interval' to 301 # resend them. 302 303 #repeat_interval: 1m 304 repeat_interval: 15m 305 306 # A default receiver 307 308 # If an alert isn't caught by a route, send it to default. 309 receiver: default 310 311 # All the above attributes are inherited by all child routes and can 312 # overwritten on each. 313 314 # The child route trees. 315 routes: 316 # Send severity=slack alerts to slack. 317 - match: 318 severity: slack 319 receiver: slack_alert 320 # - match: 321 # severity: email 322 # receiver: email_alert 323 324 receivers: 325 - name: 'default' 326 slack_configs: 327 - channel: '#alertmanager-test' 328 text: '<!channel>{{ template "slack.devops.text" . }}' 329 send_resolved: true 330 331 - name: 'slack_alert' 332 slack_configs: 333 - channel: '#alertmanager-test' 334 send_resolved: true 335 --- 336 apiVersion: apps/v1 337 kind: Deployment 338 metadata: 339 name: alertmanager 340 namespace: monitoring 341 spec: 342 replicas: 1 343 selector: 344 matchLabels: 345 app: alertmanager 346 template: 347 metadata: 348 name: alertmanager 349 labels: 350 app: alertmanager 351 spec: 352 containers: 353 - name: alertmanager 354 image: quay.io/prometheus/alertmanager:v0.7.1 355 args: 356 - '-config.file=/etc/alertmanager/config.yml' 357 - '-storage.path=/alertmanager' 358 ports: 359 - name: alertmanager 360 containerPort: 9093 361 volumeMounts: 362 - name: config-volume 363 mountPath: /etc/alertmanager 364 - name: templates-volume 365 mountPath: /etc/alertmanager-templates 366 - name: alertmanager 367 mountPath: /alertmanager 368 volumes: 369 - name: config-volume 370 configMap: 371 name: alertmanager 372 - name: templates-volume 373 configMap: 374 name: alertmanager-templates 375 - name: alertmanager 376 emptyDir: {} 377 --- 378 apiVersion: v1 379 kind: Service 380 metadata: 381 annotations: 382 prometheus.io/scrape: 'true' 383 prometheus.io/path: '/metrics' 384 labels: 385 name: alertmanager 386 name: alertmanager 387 namespace: monitoring 388 spec: 389 selector: 390 app: alertmanager 391 type: NodePort 392 ports: 393 - name: alertmanager 394 protocol: TCP 395 port: 9093 396 targetPort: 9093 397 --- 398 apiVersion: apps/v1 399 kind: Deployment 400 metadata: 401 name: grafana-core 402 namespace: monitoring 403 labels: 404 app: grafana 405 component: core 406 spec: 407 replicas: 1 408 selector: 409 matchLabels: 410 app: grafana 411 template: 412 metadata: 413 labels: 414 app: grafana 415 component: core 416 spec: 417 containers: 418 - image: grafana/grafana:4.2.0 419 name: grafana-core 420 imagePullPolicy: IfNotPresent 421 # env: 422 resources: 423 # keep request = limit to keep this container in guaranteed class 424 limits: 425 cpu: 100m 426 memory: 100Mi 427 requests: 428 cpu: 100m 429 memory: 100Mi 430 env: 431 # The following env variables set up basic auth twith the default admin user and admin password. 432 - name: GF_AUTH_BASIC_ENABLED 433 value: "true" 434 - name: GF_SECURITY_ADMIN_USER 435 valueFrom: 436 secretKeyRef: 437 name: grafana 438 key: admin-username 439 - name: GF_SECURITY_ADMIN_PASSWORD 440 valueFrom: 441 secretKeyRef: 442 name: grafana 443 key: admin-password 444 - name: GF_AUTH_ANONYMOUS_ENABLED 445 value: "false" 446 # - name: GF_AUTH_ANONYMOUS_ORG_ROLE 447 # value: Admin 448 # does not really work, because of template variables in exported dashboards: 449 # - name: GF_DASHBOARDS_JSON_ENABLED 450 # value: "true" 451 readinessProbe: 452 httpGet: 453 path: /login 454 port: 3000 455 # initialDelaySeconds: 30 456 # timeoutSeconds: 1 457 volumeMounts: 458 - name: grafana-persistent-storage 459 mountPath: /var/lib/grafana 460 volumes: 461 - name: grafana-persistent-storage 462 emptyDir: {} 463 --- 464 apiVersion: v1 465 data: 466 grafana-net-2-dashboard.json: | 467 { 468 "__inputs": [{ 469 "name": "DS_PROMETHEUS", 470 "label": "Prometheus", 471 "description": "", 472 "type": "datasource", 473 "pluginId": "prometheus", 474 "pluginName": "Prometheus" 475 }], 476 "__requires": [{ 477 "type": "panel", 478 "id": "singlestat", 479 "name": "Singlestat", 480 "version": "" 481 }, { 482 "type": "panel", 483 "id": "text", 484 "name": "Text", 485 "version": "" 486 }, { 487 "type": "panel", 488 "id": "graph", 489 "name": "Graph", 490 "version": "" 491 }, { 492 "type": "grafana", 493 "id": "grafana", 494 "name": "Grafana", 495 "version": "3.1.0" 496 }, { 497 "type": "datasource", 498 "id": "prometheus", 499 "name": "Prometheus", 500 "version": "1.0.0" 501 }], 502 "id": null, 503 "title": "Prometheus Stats", 504 "tags": [], 505 "style": "dark", 506 "timezone": "browser", 507 "editable": true, 508 "hideControls": true, 509 "sharedCrosshair": false, 510 "rows": [{ 511 "collapse": false, 512 "editable": true, 513 "height": 178, 514 "panels": [{ 515 "cacheTimeout": null, 516 "colorBackground": false, 517 "colorValue": false, 518 "colors": ["rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", "rgba(50, 172, 45, 0.97)"], 519 "datasource": "${DS_PROMETHEUS}", 520 "decimals": 1, 521 "editable": true, 522 "error": false, 523 "format": "s", 524 "id": 5, 525 "interval": null, 526 "links": [], 527 "maxDataPoints": 100, 528 "nullPointMode": "connected", 529 "nullText": null, 530 "postfix": "", 531 "postfixFontSize": "50%", 532 "prefix": "", 533 "prefixFontSize": "50%", 534 "span": 3, 535 "sparkline": { 536 "fillColor": "rgba(31, 118, 189, 0.18)", 537 "full": false, 538 "lineColor": "rgb(31, 120, 193)", 539 "show": false 540 }, 541 "targets": [{ 542 "expr": "(time() - container_start_time_seconds{container_name=\"kube-apiserver\"})", 543 "intervalFactor": 2, 544 "refId": "A", 545 "step": 4 546 }], 547 "thresholds": "", 548 "title": "Uptime", 549 "type": "singlestat", 550 "valueFontSize": "80%", 551 "valueMaps": [{ 552 "op": "=", 553 "text": "N/A", 554 "value": "null" 555 }], 556 "valueName": "current", 557 "mappingTypes": [{ 558 "name": "value to text", 559 "value": 1 560 }, { 561 "name": "range to text", 562 "value": 2 563 }], 564 "rangeMaps": [{ 565 "from": "null", 566 "to": "null", 567 "text": "N/A" 568 }], 569 "mappingType": 1, 570 "gauge": { 571 "show": false, 572 "minValue": 0, 573 "maxValue": 100, 574 "thresholdMarkers": true, 575 "thresholdLabels": false 576 } 577 }, { 578 "cacheTimeout": null, 579 "colorBackground": false, 580 "colorValue": false, 581 "colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"], 582 "datasource": "${DS_PROMETHEUS}", 583 "editable": true, 584 "error": false, 585 "format": "none", 586 "id": 6, 587 "interval": null, 588 "links": [], 589 "maxDataPoints": 100, 590 "nullPointMode": "connected", 591 "nullText": null, 592 "postfix": "", 593 "postfixFontSize": "50%", 594 "prefix": "", 595 "prefixFontSize": "50%", 596 "span": 3, 597 "sparkline": { 598 "fillColor": "rgba(31, 118, 189, 0.18)", 599 "full": false, 600 "lineColor": "rgb(31, 120, 193)", 601 "show": true 602 }, 603 "targets": [{ 604 "expr": "prometheus_local_storage_memory_series", 605 "intervalFactor": 2, 606 "refId": "A", 607 "step": 4 608 }], 609 "thresholds": "1,5", 610 "title": "Local Storage Memory Series", 611 "type": "singlestat", 612 "valueFontSize": "70%", 613 "valueMaps": [], 614 "valueName": "current", 615 "mappingTypes": [{ 616 "name": "value to text", 617 "value": 1 618 }, { 619 "name": "range to text", 620 "value": 2 621 }], 622 "rangeMaps": [{ 623 "from": "null", 624 "to": "null", 625 "text": "N/A" 626 }], 627 "mappingType": 1, 628 "gauge": { 629 "show": false, 630 "minValue": 0, 631 "maxValue": 100, 632 "thresholdMarkers": true, 633 "thresholdLabels": false 634 } 635 }, { 636 "cacheTimeout": null, 637 "colorBackground": false, 638 "colorValue": true, 639 "colors": ["rgba(50, 172, 45, 0.97)", "rgba(237, 129, 40, 0.89)", "rgba(245, 54, 54, 0.9)"], 640 "datasource": "${DS_PROMETHEUS}", 641 "editable": true, 642 "error": false, 643 "format": "none", 644 "id": 7, 645 "interval": null, 646 "links": [], 647 "maxDataPoints": 100, 648 "nullPointMode": "connected", 649 "nullText": null, 650 "postfix": "", 651 "postfixFontSize": "50%", 652 "prefix": "", 653 "prefixFontSize": "50%", 654 "span": 3, 655 "sparkline": { 656 "fillColor": "rgba(31, 118, 189, 0.18)", 657 "full": false, 658 "lineColor": "rgb(31, 120, 193)", 659 "show": true 660 }, 661 "targets": [{ 662 "expr": "prometheus_local_storage_indexing_queue_length", 663 "intervalFactor": 2, 664 "refId": "A", 665 "step": 4 666 }], 667 "thresholds": "500,4000", 668 "title": "Internal Storage Queue Length", 669 "type": "singlestat", 670 "valueFontSize": "70%", 671 "valueMaps": [{ 672 "op": "=", 673 "text": "Empty", 674 "value": "0" 675 }], 676 "valueName": "current", 677 "mappingTypes": [{ 678 "name": "value to text", 679 "value": 1 680 }, { 681 "name": "range to text", 682 "value": 2 683 }], 684 "rangeMaps": [{ 685 "from": "null", 686 "to": "null", 687 "text": "N/A" 688 }], 689 "mappingType": 1, 690 "gauge": { 691 "show": false, 692 "minValue": 0, 693 "maxValue": 100, 694 "thresholdMarkers": true, 695 "thresholdLabels": false 696 } 697 }, { 698 "content": "<img src=\"http://prometheus.io/assets/prometheus_logo_grey.svg\" alt=\"Prometheus logo\" style=\"height: 40px;\">\n<span style=\"font-family: 'Open Sans', 'Helvetica Neue', Helvetica; font-size: 25px;vertical-align: text-top;color: #bbbfc2;margin-left: 10px;\">Prometheus</span>\n\n<p style=\"margin-top: 10px;\">You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the <a href=\"http://www.grafana.org/\">Grafana</a> and <a href=\"http://prometheus.io/\">Prometheus</a> projects.</p>", 699 "editable": true, 700 "error": false, 701 "id": 9, 702 "links": [], 703 "mode": "html", 704 "span": 3, 705 "style": {}, 706 "title": "", 707 "transparent": true, 708 "type": "text" 709 }], 710 "title": "New row" 711 }, { 712 "collapse": false, 713 "editable": true, 714 "height": 227, 715 "panels": [{ 716 "aliasColors": { 717 "prometheus": "#C15C17", 718 "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17" 719 }, 720 "bars": false, 721 "datasource": "${DS_PROMETHEUS}", 722 "editable": true, 723 "error": false, 724 "fill": 1, 725 "grid": { 726 "threshold1": null, 727 "threshold1Color": "rgba(216, 200, 27, 0.27)", 728 "threshold2": null, 729 "threshold2Color": "rgba(234, 112, 112, 0.22)" 730 }, 731 "id": 3, 732 "legend": { 733 "avg": false, 734 "current": false, 735 "max": false, 736 "min": false, 737 "show": true, 738 "total": false, 739 "values": false 740 }, 741 "lines": true, 742 "linewidth": 2, 743 "links": [], 744 "nullPointMode": "connected", 745 "percentage": false, 746 "pointradius": 2, 747 "points": false, 748 "renderer": "flot", 749 "seriesOverrides": [], 750 "span": 9, 751 "stack": false, 752 "steppedLine": false, 753 "targets": [{ 754 "expr": "rate(prometheus_local_storage_ingested_samples_total[5m])", 755 "interval": "", 756 "intervalFactor": 2, 757 "legendFormat": "{{job}}", 758 "metric": "", 759 "refId": "A", 760 "step": 2 761 }], 762 "timeFrom": null, 763 "timeShift": null, 764 "title": "Samples ingested (rate-5m)", 765 "tooltip": { 766 "shared": true, 767 "value_type": "cumulative", 768 "ordering": "alphabetical", 769 "msResolution": false 770 }, 771 "type": "graph", 772 "yaxes": [{ 773 "show": true, 774 "min": null, 775 "max": null, 776 "logBase": 1, 777 "format": "short" 778 }, { 779 "show": true, 780 "min": null, 781 "max": null, 782 "logBase": 1, 783 "format": "short" 784 }], 785 "xaxis": { 786 "show": true 787 } 788 }, { 789 "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ", 790 "editable": true, 791 "error": false, 792 "id": 8, 793 "links": [], 794 "mode": "markdown", 795 "span": 2.995914043583536, 796 "style": {}, 797 "title": "", 798 "transparent": true, 799 "type": "text" 800 }], 801 "title": "New row" 802 }, { 803 "collapse": false, 804 "editable": true, 805 "height": "250px", 806 "panels": [{ 807 "aliasColors": { 808 "prometheus": "#F9BA8F", 809 "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" 810 }, 811 "bars": false, 812 "datasource": "${DS_PROMETHEUS}", 813 "editable": true, 814 "error": false, 815 "fill": 1, 816 "grid": { 817 "threshold1": null, 818 "threshold1Color": "rgba(216, 200, 27, 0.27)", 819 "threshold2": null, 820 "threshold2Color": "rgba(234, 112, 112, 0.22)" 821 }, 822 "id": 2, 823 "legend": { 824 "avg": false, 825 "current": false, 826 "max": false, 827 "min": false, 828 "show": true, 829 "total": false, 830 "values": false 831 }, 832 "lines": true, 833 "linewidth": 2, 834 "links": [], 835 "nullPointMode": "connected", 836 "percentage": false, 837 "pointradius": 5, 838 "points": false, 839 "renderer": "flot", 840 "seriesOverrides": [], 841 "span": 5, 842 "stack": false, 843 "steppedLine": false, 844 "targets": [{ 845 "expr": "rate(prometheus_target_interval_length_seconds_count[5m])", 846 "intervalFactor": 2, 847 "legendFormat": "{{job}}", 848 "refId": "A", 849 "step": 2 850 }], 851 "timeFrom": null, 852 "timeShift": null, 853 "title": "Target Scrapes (last 5m)", 854 "tooltip": { 855 "shared": true, 856 "value_type": "cumulative", 857 "ordering": "alphabetical", 858 "msResolution": false 859 }, 860 "type": "graph", 861 "yaxes": [{ 862 "show": true, 863 "min": null, 864 "max": null, 865 "logBase": 1, 866 "format": "short" 867 }, { 868 "show": true, 869 "min": null, 870 "max": null, 871 "logBase": 1, 872 "format": "short" 873 }], 874 "xaxis": { 875 "show": true 876 } 877 }, { 878 "aliasColors": {}, 879 "bars": false, 880 "datasource": "${DS_PROMETHEUS}", 881 "editable": true, 882 "error": false, 883 "fill": 1, 884 "grid": { 885 "threshold1": null, 886 "threshold1Color": "rgba(216, 200, 27, 0.27)", 887 "threshold2": null, 888 "threshold2Color": "rgba(234, 112, 112, 0.22)" 889 }, 890 "id": 14, 891 "legend": { 892 "avg": false, 893 "current": false, 894 "max": false, 895 "min": false, 896 "show": true, 897 "total": false, 898 "values": false 899 }, 900 "lines": true, 901 "linewidth": 2, 902 "links": [], 903 "nullPointMode": "connected", 904 "percentage": false, 905 "pointradius": 5, 906 "points": false, 907 "renderer": "flot", 908 "seriesOverrides": [], 909 "span": 4, 910 "stack": false, 911 "steppedLine": false, 912 "targets": [{ 913 "expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", 914 "interval": "", 915 "intervalFactor": 2, 916 "legendFormat": "{{quantile}} ({{interval}})", 917 "metric": "", 918 "refId": "A", 919 "step": 2 920 }], 921 "timeFrom": null, 922 "timeShift": null, 923 "title": "Scrape Duration", 924 "tooltip": { 925 "shared": true, 926 "value_type": "cumulative", 927 "ordering": "alphabetical", 928 "msResolution": false 929 }, 930 "type": "graph", 931 "yaxes": [{ 932 "show": true, 933 "min": null, 934 "max": null, 935 "logBase": 1, 936 "format": "short" 937 }, { 938 "show": true, 939 "min": null, 940 "max": null, 941 "logBase": 1, 942 "format": "short" 943 }], 944 "xaxis": { 945 "show": true 946 } 947 }, { 948 "content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ", 949 "editable": true, 950 "error": false, 951 "id": 11, 952 "links": [], 953 "mode": "markdown", 954 "span": 3, 955 "style": {}, 956 "title": "", 957 "transparent": true, 958 "type": "text" 959 }], 960 "title": "New row" 961 }, { 962 "collapse": false, 963 "editable": true, 964 "height": "250px", 965 "panels": [{ 966 "aliasColors": {}, 967 "bars": false, 968 "datasource": "${DS_PROMETHEUS}", 969 "decimals": null, 970 "editable": true, 971 "error": false, 972 "fill": 1, 973 "grid": { 974 "threshold1": null, 975 "threshold1Color": "rgba(216, 200, 27, 0.27)", 976 "threshold2": null, 977 "threshold2Color": "rgba(234, 112, 112, 0.22)" 978 }, 979 "id": 12, 980 "legend": { 981 "alignAsTable": false, 982 "avg": false, 983 "current": false, 984 "hideEmpty": true, 985 "max": false, 986 "min": false, 987 "show": true, 988 "total": false, 989 "values": false 990 }, 991 "lines": true, 992 "linewidth": 2, 993 "links": [], 994 "nullPointMode": "connected", 995 "percentage": false, 996 "pointradius": 5, 997 "points": false, 998 "renderer": "flot", 999 "seriesOverrides": [], 1000 "span": 9, 1001 "stack": false, 1002 "steppedLine": false, 1003 "targets": [{ 1004 "expr": "prometheus_evaluator_duration_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", 1005 "interval": "", 1006 "intervalFactor": 2, 1007 "legendFormat": "{{quantile}}", 1008 "refId": "A", 1009 "step": 2 1010 }], 1011 "timeFrom": null, 1012 "timeShift": null, 1013 "title": "Rule Eval Duration", 1014 "tooltip": { 1015 "shared": true, 1016 "value_type": "cumulative", 1017 "ordering": "alphabetical", 1018 "msResolution": false 1019 }, 1020 "type": "graph", 1021 "yaxes": [{ 1022 "show": true, 1023 "min": null, 1024 "max": null, 1025 "logBase": 1, 1026 "format": "percentunit", 1027 "label": "" 1028 }, { 1029 "show": true, 1030 "min": null, 1031 "max": null, 1032 "logBase": 1, 1033 "format": "short" 1034 }], 1035 "xaxis": { 1036 "show": true 1037 } 1038 }, { 1039 "content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.", 1040 "editable": true, 1041 "error": false, 1042 "id": 15, 1043 "links": [], 1044 "mode": "markdown", 1045 "span": 3, 1046 "style": {}, 1047 "title": "", 1048 "transparent": true, 1049 "type": "text" 1050 }], 1051 "title": "New row" 1052 }], 1053 "time": { 1054 "from": "now-5m", 1055 "to": "now" 1056 }, 1057 "timepicker": { 1058 "now": true, 1059 "refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"], 1060 "time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"] 1061 }, 1062 "templating": { 1063 "list": [] 1064 }, 1065 "annotations": { 1066 "list": [] 1067 }, 1068 "refresh": false, 1069 "schemaVersion": 12, 1070 "version": 0, 1071 "links": [{ 1072 "icon": "info", 1073 "tags": [], 1074 "targetBlank": true, 1075 "title": "Grafana Docs", 1076 "tooltip": "", 1077 "type": "link", 1078 "url": "http://www.grafana.org/docs" 1079 }, { 1080 "icon": "info", 1081 "tags": [], 1082 "targetBlank": true, 1083 "title": "Prometheus Docs", 1084 "type": "link", 1085 "url": "http://prometheus.io/docs/introduction/overview/" 1086 }], 1087 "gnetId": 2, 1088 "description": "The official, pre-built Prometheus Stats Dashboard." 1089 } 1090 grafana-net-737-dashboard.json: | 1091 { 1092 "__inputs": [{ 1093 "name": "DS_PROMETHEUS", 1094 "label": "prometheus", 1095 "description": "", 1096 "type": "datasource", 1097 "pluginId": "prometheus", 1098 "pluginName": "Prometheus" 1099 }], 1100 "__requires": [{ 1101 "type": "panel", 1102 "id": "singlestat", 1103 "name": "Singlestat", 1104 "version": "" 1105 }, { 1106 "type": "panel", 1107 "id": "graph", 1108 "name": "Graph", 1109 "version": "" 1110 }, { 1111 "type": "grafana", 1112 "id": "grafana", 1113 "name": "Grafana", 1114 "version": "3.1.0" 1115 }, { 1116 "type": "datasource", 1117 "id": "prometheus", 1118 "name": "Prometheus", 1119 "version": "1.0.0" 1120 }], 1121 "id": null, 1122 "title": "Kubernetes Pod Resources", 1123 "description": "Shows resource usage of Kubernetes pods.", 1124 "tags": [ 1125 "kubernetes" 1126 ], 1127 "style": "dark", 1128 "timezone": "browser", 1129 "editable": true, 1130 "hideControls": false, 1131 "sharedCrosshair": false, 1132 "rows": [{ 1133 "collapse": false, 1134 "editable": true, 1135 "height": "250px", 1136 "panels": [{ 1137 "cacheTimeout": null, 1138 "colorBackground": false, 1139 "colorValue": true, 1140 "colors": [ 1141 "rgba(50, 172, 45, 0.97)", 1142 "rgba(237, 129, 40, 0.89)", 1143 "rgba(245, 54, 54, 0.9)" 1144 ], 1145 "datasource": "${DS_PROMETHEUS}", 1146 "editable": true, 1147 "error": false, 1148 "format": "percent", 1149 "gauge": { 1150 "maxValue": 100, 1151 "minValue": 0, 1152 "show": true, 1153 "thresholdLabels": false, 1154 "thresholdMarkers": true 1155 }, 1156 "height": "180px", 1157 "id": 4, 1158 "interval": null, 1159 "isNew": true, 1160 "links": [], 1161 "mappingType": 1, 1162 "mappingTypes": [{ 1163 "name": "value to text", 1164 "value": 1 1165 }, { 1166 "name": "range to text", 1167 "value": 2 1168 }], 1169 "maxDataPoints": 100, 1170 "nullPointMode": "connected", 1171 "nullText": null, 1172 "postfix": "", 1173 "postfixFontSize": "50%", 1174 "prefix": "", 1175 "prefixFontSize": "50%", 1176 "rangeMaps": [{ 1177 "from": "null", 1178 "text": "N/A", 1179 "to": "null" 1180 }], 1181 "span": 4, 1182 "sparkline": { 1183 "fillColor": "rgba(31, 118, 189, 0.18)", 1184 "full": false, 1185 "lineColor": "rgb(31, 120, 193)", 1186 "show": false 1187 }, 1188 "targets": [{ 1189 "expr": "sum (container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum (machine_memory_bytes{instance=~\"^$instance$\"}) * 100", 1190 "interval": "", 1191 "intervalFactor": 2, 1192 "legendFormat": "", 1193 "refId": "A", 1194 "step": 2 1195 }], 1196 "thresholds": "65, 90", 1197 "timeFrom": "1m", 1198 "timeShift": null, 1199 "title": "Memory Working Set", 1200 "transparent": false, 1201 "type": "singlestat", 1202 "valueFontSize": "80%", 1203 "valueMaps": [{ 1204 "op": "=", 1205 "text": "N/A", 1206 "value": "null" 1207 }], 1208 "valueName": "current" 1209 }, { 1210 "cacheTimeout": null, 1211 "colorBackground": false, 1212 "colorValue": true, 1213 "colors": [ 1214 "rgba(50, 172, 45, 0.97)", 1215 "rgba(237, 129, 40, 0.89)", 1216 "rgba(245, 54, 54, 0.9)" 1217 ], 1218 "datasource": "${DS_PROMETHEUS}", 1219 "decimals": 2, 1220 "editable": true, 1221 "error": false, 1222 "format": "percent", 1223 "gauge": { 1224 "maxValue": 100, 1225 "minValue": 0, 1226 "show": true, 1227 "thresholdLabels": false, 1228 "thresholdMarkers": true 1229 }, 1230 "height": "180px", 1231 "id": 6, 1232 "interval": null, 1233 "isNew": true, 1234 "links": [], 1235 "mappingType": 1, 1236 "mappingTypes": [{ 1237 "name": "value to text", 1238 "value": 1 1239 }, { 1240 "name": "range to text", 1241 "value": 2 1242 }], 1243 "maxDataPoints": 100, 1244 "nullPointMode": "connected", 1245 "nullText": null, 1246 "postfix": "", 1247 "postfixFontSize": "50%", 1248 "prefix": "", 1249 "prefixFontSize": "50%", 1250 "rangeMaps": [{ 1251 "from": "null", 1252 "text": "N/A", 1253 "to": "null" 1254 }], 1255 "span": 4, 1256 "sparkline": { 1257 "fillColor": "rgba(31, 118, 189, 0.18)", 1258 "full": false, 1259 "lineColor": "rgb(31, 120, 193)", 1260 "show": false 1261 }, 1262 "targets": [{ 1263 "expr": "sum(rate(container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m])) / sum (machine_cpu_cores{instance=~\"^$instance$\"}) * 100", 1264 "interval": "10s", 1265 "intervalFactor": 1, 1266 "refId": "A", 1267 "step": 10 1268 }], 1269 "thresholds": "65, 90", 1270 "timeFrom": "1m", 1271 "timeShift": null, 1272 "title": "Cpu Usage", 1273 "type": "singlestat", 1274 "valueFontSize": "80%", 1275 "valueMaps": [{ 1276 "op": "=", 1277 "text": "N/A", 1278 "value": "null" 1279 }], 1280 "valueName": "current" 1281 }, { 1282 "cacheTimeout": null, 1283 "colorBackground": false, 1284 "colorValue": true, 1285 "colors": [ 1286 "rgba(50, 172, 45, 0.97)", 1287 "rgba(237, 129, 40, 0.89)", 1288 "rgba(245, 54, 54, 0.9)" 1289 ], 1290 "datasource": "${DS_PROMETHEUS}", 1291 "decimals": 2, 1292 "editable": true, 1293 "error": false, 1294 "format": "percent", 1295 "gauge": { 1296 "maxValue": 100, 1297 "minValue": 0, 1298 "show": true, 1299 "thresholdLabels": false, 1300 "thresholdMarkers": true 1301 }, 1302 "height": "180px", 1303 "id": 7, 1304 "interval": null, 1305 "isNew": true, 1306 "links": [], 1307 "mappingType": 1, 1308 "mappingTypes": [{ 1309 "name": "value to text", 1310 "value": 1 1311 }, { 1312 "name": "range to text", 1313 "value": 2 1314 }], 1315 "maxDataPoints": 100, 1316 "nullPointMode": "connected", 1317 "nullText": null, 1318 "postfix": "", 1319 "postfixFontSize": "50%", 1320 "prefix": "", 1321 "prefixFontSize": "50%", 1322 "rangeMaps": [{ 1323 "from": "null", 1324 "text": "N/A", 1325 "to": "null" 1326 }], 1327 "span": 4, 1328 "sparkline": { 1329 "fillColor": "rgba(31, 118, 189, 0.18)", 1330 "full": false, 1331 "lineColor": "rgb(31, 120, 193)", 1332 "show": false 1333 }, 1334 "targets": [{ 1335 "expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"}) / sum(container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"}) * 100", 1336 "interval": "10s", 1337 "intervalFactor": 1, 1338 "legendFormat": "", 1339 "metric": "", 1340 "refId": "A", 1341 "step": 10 1342 }], 1343 "thresholds": "65, 90", 1344 "timeFrom": "1m", 1345 "timeShift": null, 1346 "title": "Filesystem Usage", 1347 "type": "singlestat", 1348 "valueFontSize": "80%", 1349 "valueMaps": [{ 1350 "op": "=", 1351 "text": "N/A", 1352 "value": "null" 1353 }], 1354 "valueName": "current" 1355 }, { 1356 "cacheTimeout": null, 1357 "colorBackground": false, 1358 "colorValue": false, 1359 "colors": [ 1360 "rgba(50, 172, 45, 0.97)", 1361 "rgba(237, 129, 40, 0.89)", 1362 "rgba(245, 54, 54, 0.9)" 1363 ], 1364 "datasource": "${DS_PROMETHEUS}", 1365 "decimals": 2, 1366 "editable": true, 1367 "error": false, 1368 "format": "bytes", 1369 "gauge": { 1370 "maxValue": 100, 1371 "minValue": 0, 1372 "show": false, 1373 "thresholdLabels": false, 1374 "thresholdMarkers": true 1375 }, 1376 "height": "1px", 1377 "hideTimeOverride": true, 1378 "id": 9, 1379 "interval": null, 1380 "isNew": true, 1381 "links": [], 1382 "mappingType": 1, 1383 "mappingTypes": [{ 1384 "name": "value to text", 1385 "value": 1 1386 }, { 1387 "name": "range to text", 1388 "value": 2 1389 }], 1390 "maxDataPoints": 100, 1391 "nullPointMode": "connected", 1392 "nullText": null, 1393 "postfix": "", 1394 "postfixFontSize": "20%", 1395 "prefix": "", 1396 "prefixFontSize": "20%", 1397 "rangeMaps": [{ 1398 "from": "null", 1399 "text": "N/A", 1400 "to": "null" 1401 }], 1402 "span": 2, 1403 "sparkline": { 1404 "fillColor": "rgba(31, 118, 189, 0.18)", 1405 "full": false, 1406 "lineColor": "rgb(31, 120, 193)", 1407 "show": false 1408 }, 1409 "targets": [{ 1410 "expr": "sum(container_memory_working_set_bytes{id=\"/\",instance=~\"^$instance$\"})", 1411 "interval": "10s", 1412 "intervalFactor": 1, 1413 "refId": "A", 1414 "step": 10 1415 }], 1416 "thresholds": "", 1417 "timeFrom": "1m", 1418 "title": "Used", 1419 "type": "singlestat", 1420 "valueFontSize": "50%", 1421 "valueMaps": [{ 1422 "op": "=", 1423 "text": "N/A", 1424 "value": "null" 1425 }], 1426 "valueName": "current" 1427 }, { 1428 "cacheTimeout": null, 1429 "colorBackground": false, 1430 "colorValue": false, 1431 "colors": [ 1432 "rgba(50, 172, 45, 0.97)", 1433 "rgba(237, 129, 40, 0.89)", 1434 "rgba(245, 54, 54, 0.9)" 1435 ], 1436 "datasource": "${DS_PROMETHEUS}", 1437 "decimals": 2, 1438 "editable": true, 1439 "error": false, 1440 "format": "bytes", 1441 "gauge": { 1442 "maxValue": 100, 1443 "minValue": 0, 1444 "show": false, 1445 "thresholdLabels": false, 1446 "thresholdMarkers": true 1447 }, 1448 "height": "1px", 1449 "hideTimeOverride": true, 1450 "id": 10, 1451 "interval": null, 1452 "isNew": true, 1453 "links": [], 1454 "mappingType": 1, 1455 "mappingTypes": [{ 1456 "name": "value to text", 1457 "value": 1 1458 }, { 1459 "name": "range to text", 1460 "value": 2 1461 }], 1462 "maxDataPoints": 100, 1463 "nullPointMode": "connected", 1464 "nullText": null, 1465 "postfix": "", 1466 "postfixFontSize": "50%", 1467 "prefix": "", 1468 "prefixFontSize": "50%", 1469 "rangeMaps": [{ 1470 "from": "null", 1471 "text": "N/A", 1472 "to": "null" 1473 }], 1474 "span": 2, 1475 "sparkline": { 1476 "fillColor": "rgba(31, 118, 189, 0.18)", 1477 "full": false, 1478 "lineColor": "rgb(31, 120, 193)", 1479 "show": false 1480 }, 1481 "targets": [{ 1482 "expr": "sum (machine_memory_bytes{instance=~\"^$instance$\"})", 1483 "interval": "10s", 1484 "intervalFactor": 1, 1485 "refId": "A", 1486 "step": 10 1487 }], 1488 "thresholds": "", 1489 "timeFrom": "1m", 1490 "title": "Total", 1491 "type": "singlestat", 1492 "valueFontSize": "50%", 1493 "valueMaps": [{ 1494 "op": "=", 1495 "text": "N/A", 1496 "value": "null" 1497 }], 1498 "valueName": "current" 1499 }, { 1500 "cacheTimeout": null, 1501 "colorBackground": false, 1502 "colorValue": false, 1503 "colors": [ 1504 "rgba(50, 172, 45, 0.97)", 1505 "rgba(237, 129, 40, 0.89)", 1506 "rgba(245, 54, 54, 0.9)" 1507 ], 1508 "datasource": "${DS_PROMETHEUS}", 1509 "decimals": 2, 1510 "editable": true, 1511 "error": false, 1512 "format": "none", 1513 "gauge": { 1514 "maxValue": 100, 1515 "minValue": 0, 1516 "show": false, 1517 "thresholdLabels": false, 1518 "thresholdMarkers": true 1519 }, 1520 "height": "1px", 1521 "hideTimeOverride": true, 1522 "id": 11, 1523 "interval": null, 1524 "isNew": true, 1525 "links": [], 1526 "mappingType": 1, 1527 "mappingTypes": [{ 1528 "name": "value to text", 1529 "value": 1 1530 }, { 1531 "name": "range to text", 1532 "value": 2 1533 }], 1534 "maxDataPoints": 100, 1535 "nullPointMode": "connected", 1536 "nullText": null, 1537 "postfix": " cores", 1538 "postfixFontSize": "30%", 1539 "prefix": "", 1540 "prefixFontSize": "50%", 1541 "rangeMaps": [{ 1542 "from": "null", 1543 "text": "N/A", 1544 "to": "null" 1545 }], 1546 "span": 2, 1547 "sparkline": { 1548 "fillColor": "rgba(31, 118, 189, 0.18)", 1549 "full": false, 1550 "lineColor": "rgb(31, 120, 193)", 1551 "show": false 1552 }, 1553 "targets": [{ 1554 "expr": "sum (rate (container_cpu_usage_seconds_total{id=\"/\",instance=~\"^$instance$\"}[1m]))", 1555 "interval": "10s", 1556 "intervalFactor": 1, 1557 "refId": "A", 1558 "step": 10 1559 }], 1560 "thresholds": "", 1561 "timeFrom": "1m", 1562 "timeShift": null, 1563 "title": "Used", 1564 "type": "singlestat", 1565 "valueFontSize": "50%", 1566 "valueMaps": [{ 1567 "op": "=", 1568 "text": "N/A", 1569 "value": "null" 1570 }], 1571 "valueName": "current" 1572 }, { 1573 "cacheTimeout": null, 1574 "colorBackground": false, 1575 "colorValue": false, 1576 "colors": [ 1577 "rgba(50, 172, 45, 0.97)", 1578 "rgba(237, 129, 40, 0.89)", 1579 "rgba(245, 54, 54, 0.9)" 1580 ], 1581 "datasource": "${DS_PROMETHEUS}", 1582 "decimals": 2, 1583 "editable": true, 1584 "error": false, 1585 "format": "none", 1586 "gauge": { 1587 "maxValue": 100, 1588 "minValue": 0, 1589 "show": false, 1590 "thresholdLabels": false, 1591 "thresholdMarkers": true 1592 }, 1593 "height": "1px", 1594 "hideTimeOverride": true, 1595 "id": 12, 1596 "interval": null, 1597 "isNew": true, 1598 "links": [], 1599 "mappingType": 1, 1600 "mappingTypes": [{ 1601 "name": "value to text", 1602 "value": 1 1603 }, { 1604 "name": "range to text", 1605 "value": 2 1606 }], 1607 "maxDataPoints": 100, 1608 "nullPointMode": "connected", 1609 "nullText": null, 1610 "postfix": " cores", 1611 "postfixFontSize": "30%", 1612 "prefix": "", 1613 "prefixFontSize": "50%", 1614 "rangeMaps": [{ 1615 "from": "null", 1616 "text": "N/A", 1617 "to": "null" 1618 }], 1619 "span": 2, 1620 "sparkline": { 1621 "fillColor": "rgba(31, 118, 189, 0.18)", 1622 "full": false, 1623 "lineColor": "rgb(31, 120, 193)", 1624 "show": false 1625 }, 1626 "targets": [{ 1627 "expr": "sum (machine_cpu_cores{instance=~\"^$instance$\"})", 1628 "interval": "10s", 1629 "intervalFactor": 1, 1630 "refId": "A", 1631 "step": 10 1632 }], 1633 "thresholds": "", 1634 "timeFrom": "1m", 1635 "title": "Total", 1636 "type": "singlestat", 1637 "valueFontSize": "50%", 1638 "valueMaps": [{ 1639 "op": "=", 1640 "text": "N/A", 1641 "value": "null" 1642 }], 1643 "valueName": "current" 1644 }, { 1645 "cacheTimeout": null, 1646 "colorBackground": false, 1647 "colorValue": false, 1648 "colors": [ 1649 "rgba(50, 172, 45, 0.97)", 1650 "rgba(237, 129, 40, 0.89)", 1651 "rgba(245, 54, 54, 0.9)" 1652 ], 1653 "datasource": "${DS_PROMETHEUS}", 1654 "decimals": 2, 1655 "editable": true, 1656 "error": false, 1657 "format": "bytes", 1658 "gauge": { 1659 "maxValue": 100, 1660 "minValue": 0, 1661 "show": false, 1662 "thresholdLabels": false, 1663 "thresholdMarkers": true 1664 }, 1665 "height": "1px", 1666 "hideTimeOverride": true, 1667 "id": 13, 1668 "interval": null, 1669 "isNew": true, 1670 "links": [], 1671 "mappingType": 1, 1672 "mappingTypes": [{ 1673 "name": "value to text", 1674 "value": 1 1675 }, { 1676 "name": "range to text", 1677 "value": 2 1678 }], 1679 "maxDataPoints": 100, 1680 "nullPointMode": "connected", 1681 "nullText": null, 1682 "postfix": "", 1683 "postfixFontSize": "50%", 1684 "prefix": "", 1685 "prefixFontSize": "50%", 1686 "rangeMaps": [{ 1687 "from": "null", 1688 "text": "N/A", 1689 "to": "null" 1690 }], 1691 "span": 2, 1692 "sparkline": { 1693 "fillColor": "rgba(31, 118, 189, 0.18)", 1694 "full": false, 1695 "lineColor": "rgb(31, 120, 193)", 1696 "show": false 1697 }, 1698 "targets": [{ 1699 "expr": "sum(container_fs_usage_bytes{id=\"/\",instance=~\"^$instance$\"})", 1700 "interval": "10s", 1701 "intervalFactor": 1, 1702 "refId": "A", 1703 "step": 10 1704 }], 1705 "thresholds": "", 1706 "timeFrom": "1m", 1707 "title": "Used", 1708 "type": "singlestat", 1709 "valueFontSize": "50%", 1710 "valueMaps": [{ 1711 "op": "=", 1712 "text": "N/A", 1713 "value": "null" 1714 }], 1715 "valueName": "current" 1716 }, { 1717 "cacheTimeout": null, 1718 "colorBackground": false, 1719 "colorValue": false, 1720 "colors": [ 1721 "rgba(50, 172, 45, 0.97)", 1722 "rgba(237, 129, 40, 0.89)", 1723 "rgba(245, 54, 54, 0.9)" 1724 ], 1725 "datasource": "${DS_PROMETHEUS}", 1726 "decimals": 2, 1727 "editable": true, 1728 "error": false, 1729 "format": "bytes", 1730 "gauge": { 1731 "maxValue": 100, 1732 "minValue": 0, 1733 "show": false, 1734 "thresholdLabels": false, 1735 "thresholdMarkers": true 1736 }, 1737 "height": "1px", 1738 "hideTimeOverride": true, 1739 "id": 14, 1740 "interval": null, 1741 "isNew": true, 1742 "links": [], 1743 "mappingType": 1, 1744 "mappingTypes": [{ 1745 "name": "value to text", 1746 "value": 1 1747 }, { 1748 "name": "range to text", 1749 "value": 2 1750 }], 1751 "maxDataPoints": 100, 1752 "nullPointMode": "connected", 1753 "nullText": null, 1754 "postfix": "", 1755 "postfixFontSize": "50%", 1756 "prefix": "", 1757 "prefixFontSize": "50%", 1758 "rangeMaps": [{ 1759 "from": "null", 1760 "text": "N/A", 1761 "to": "null" 1762 }], 1763 "span": 2, 1764 "sparkline": { 1765 "fillColor": "rgba(31, 118, 189, 0.18)", 1766 "full": false, 1767 "lineColor": "rgb(31, 120, 193)", 1768 "show": false 1769 }, 1770 "targets": [{ 1771 "expr": "sum (container_fs_limit_bytes{id=\"/\",instance=~\"^$instance$\"})", 1772 "interval": "10s", 1773 "intervalFactor": 1, 1774 "refId": "A", 1775 "step": 10 1776 }], 1777 "thresholds": "", 1778 "timeFrom": "1m", 1779 "title": "Total", 1780 "type": "singlestat", 1781 "valueFontSize": "50%", 1782 "valueMaps": [{ 1783 "op": "=", 1784 "text": "N/A", 1785 "value": "null" 1786 }], 1787 "valueName": "current" 1788 }, { 1789 "aliasColors": {}, 1790 "bars": false, 1791 "datasource": "${DS_PROMETHEUS}", 1792 "decimals": 2, 1793 "editable": true, 1794 "error": false, 1795 "fill": 1, 1796 "grid": { 1797 "threshold1": null, 1798 "threshold1Color": "rgba(216, 200, 27, 0.27)", 1799 "threshold2": null, 1800 "threshold2Color": "rgba(234, 112, 112, 0.22)", 1801 "thresholdLine": false 1802 }, 1803 "height": "200px", 1804 "id": 32, 1805 "isNew": true, 1806 "legend": { 1807 "alignAsTable": true, 1808 "avg": true, 1809 "current": true, 1810 "max": false, 1811 "min": false, 1812 "rightSide": true, 1813 "show": true, 1814 "sideWidth": 200, 1815 "sort": "current", 1816 "sortDesc": true, 1817 "total": false, 1818 "values": true 1819 }, 1820 "lines": true, 1821 "linewidth": 2, 1822 "links": [], 1823 "nullPointMode": "connected", 1824 "percentage": false, 1825 "pointradius": 5, 1826 "points": false, 1827 "renderer": "flot", 1828 "seriesOverrides": [], 1829 "span": 12, 1830 "stack": false, 1831 "steppedLine": false, 1832 "targets": [{ 1833 "expr": "sum(rate(container_network_receive_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))", 1834 "interval": "", 1835 "intervalFactor": 2, 1836 "legendFormat": "receive", 1837 "metric": "network", 1838 "refId": "A", 1839 "step": 240 1840 }, { 1841 "expr": "- sum(rate(container_network_transmit_bytes_total{instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m]))", 1842 "interval": "", 1843 "intervalFactor": 2, 1844 "legendFormat": "transmit", 1845 "metric": "network", 1846 "refId": "B", 1847 "step": 240 1848 }], 1849 "timeFrom": null, 1850 "timeShift": null, 1851 "title": "Network", 1852 "tooltip": { 1853 "msResolution": false, 1854 "shared": true, 1855 "sort": 0, 1856 "value_type": "cumulative" 1857 }, 1858 "transparent": false, 1859 "type": "graph", 1860 "xaxis": { 1861 "show": true 1862 }, 1863 "yaxes": [{ 1864 "format": "Bps", 1865 "label": "transmit / receive", 1866 "logBase": 1, 1867 "max": null, 1868 "min": null, 1869 "show": true 1870 }, { 1871 "format": "Bps", 1872 "label": null, 1873 "logBase": 1, 1874 "max": null, 1875 "min": null, 1876 "show": false 1877 }] 1878 }], 1879 "showTitle": true, 1880 "title": "all pods" 1881 }, { 1882 "collapse": false, 1883 "editable": true, 1884 "height": "250px", 1885 "panels": [{ 1886 "aliasColors": {}, 1887 "bars": false, 1888 "datasource": "${DS_PROMETHEUS}", 1889 "decimals": 3, 1890 "editable": true, 1891 "error": false, 1892 "fill": 0, 1893 "grid": { 1894 "threshold1": null, 1895 "threshold1Color": "rgba(216, 200, 27, 0.27)", 1896 "threshold2": null, 1897 "threshold2Color": "rgba(234, 112, 112, 0.22)" 1898 }, 1899 "height": "", 1900 "id": 17, 1901 "isNew": true, 1902 "legend": { 1903 "alignAsTable": true, 1904 "avg": true, 1905 "current": true, 1906 "hideEmpty": true, 1907 "hideZero": true, 1908 "max": false, 1909 "min": false, 1910 "rightSide": true, 1911 "show": true, 1912 "sideWidth": null, 1913 "sort": "current", 1914 "sortDesc": true, 1915 "total": false, 1916 "values": true 1917 }, 1918 "lines": true, 1919 "linewidth": 2, 1920 "links": [], 1921 "nullPointMode": "connected", 1922 "percentage": false, 1923 "pointradius": 5, 1924 "points": false, 1925 "renderer": "flot", 1926 "seriesOverrides": [], 1927 "span": 12, 1928 "stack": false, 1929 "steppedLine": false, 1930 "targets": [{ 1931 "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)", 1932 "interval": "", 1933 "intervalFactor": 2, 1934 "legendFormat": "{{ pod_name }}", 1935 "metric": "container_cpu", 1936 "refId": "A", 1937 "step": 240 1938 }], 1939 "timeFrom": null, 1940 "timeShift": null, 1941 "title": "Cpu Usage", 1942 "tooltip": { 1943 "msResolution": true, 1944 "shared": false, 1945 "sort": 2, 1946 "value_type": "cumulative" 1947 }, 1948 "transparent": false, 1949 "type": "graph", 1950 "xaxis": { 1951 "show": true 1952 }, 1953 "yaxes": [{ 1954 "format": "none", 1955 "label": "cores", 1956 "logBase": 1, 1957 "max": null, 1958 "min": null, 1959 "show": true 1960 }, { 1961 "format": "short", 1962 "label": null, 1963 "logBase": 1, 1964 "max": null, 1965 "min": null, 1966 "show": false 1967 }] 1968 }, { 1969 "aliasColors": {}, 1970 "bars": false, 1971 "datasource": "${DS_PROMETHEUS}", 1972 "decimals": 2, 1973 "editable": true, 1974 "error": false, 1975 "fill": 0, 1976 "grid": { 1977 "threshold1": null, 1978 "threshold1Color": "rgba(216, 200, 27, 0.27)", 1979 "threshold2": null, 1980 "threshold2Color": "rgba(234, 112, 112, 0.22)" 1981 }, 1982 "id": 33, 1983 "isNew": true, 1984 "legend": { 1985 "alignAsTable": true, 1986 "avg": true, 1987 "current": true, 1988 "hideEmpty": true, 1989 "hideZero": true, 1990 "max": false, 1991 "min": false, 1992 "rightSide": true, 1993 "show": true, 1994 "sideWidth": null, 1995 "sort": "current", 1996 "sortDesc": true, 1997 "total": false, 1998 "values": true 1999 }, 2000 "lines": true, 2001 "linewidth": 2, 2002 "links": [], 2003 "nullPointMode": "null", 2004 "percentage": false, 2005 "pointradius": 5, 2006 "points": false, 2007 "renderer": "flot", 2008 "seriesOverrides": [], 2009 "span": 12, 2010 "stack": false, 2011 "steppedLine": false, 2012 "targets": [{ 2013 "expr": "sum (container_memory_working_set_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)", 2014 "interval": "", 2015 "intervalFactor": 2, 2016 "legendFormat": "{{ pod_name }}", 2017 "metric": "", 2018 "refId": "A", 2019 "step": 240 2020 }], 2021 "timeFrom": null, 2022 "timeShift": null, 2023 "title": "Memory Working Set", 2024 "tooltip": { 2025 "msResolution": false, 2026 "shared": false, 2027 "sort": 2, 2028 "value_type": "cumulative" 2029 }, 2030 "type": "graph", 2031 "xaxis": { 2032 "show": true 2033 }, 2034 "yaxes": [{ 2035 "format": "bytes", 2036 "label": "used", 2037 "logBase": 1, 2038 "max": null, 2039 "min": null, 2040 "show": true 2041 }, { 2042 "format": "short", 2043 "label": null, 2044 "logBase": 1, 2045 "max": null, 2046 "min": null, 2047 "show": false 2048 }] 2049 }, { 2050 "aliasColors": {}, 2051 "bars": false, 2052 "datasource": "${DS_PROMETHEUS}", 2053 "decimals": 2, 2054 "editable": true, 2055 "error": false, 2056 "fill": 1, 2057 "grid": { 2058 "threshold1": null, 2059 "threshold1Color": "rgba(216, 200, 27, 0.27)", 2060 "threshold2": null, 2061 "threshold2Color": "rgba(234, 112, 112, 0.22)" 2062 }, 2063 "id": 16, 2064 "isNew": true, 2065 "legend": { 2066 "alignAsTable": true, 2067 "avg": true, 2068 "current": true, 2069 "hideEmpty": true, 2070 "hideZero": true, 2071 "max": false, 2072 "min": false, 2073 "rightSide": true, 2074 "show": true, 2075 "sideWidth": 200, 2076 "sort": "avg", 2077 "sortDesc": true, 2078 "total": false, 2079 "values": true 2080 }, 2081 "lines": true, 2082 "linewidth": 2, 2083 "links": [], 2084 "nullPointMode": "null", 2085 "percentage": false, 2086 "pointradius": 5, 2087 "points": false, 2088 "renderer": "flot", 2089 "seriesOverrides": [], 2090 "span": 12, 2091 "stack": false, 2092 "steppedLine": false, 2093 "targets": [{ 2094 "expr": "sum (rate (container_network_receive_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)", 2095 "interval": "", 2096 "intervalFactor": 2, 2097 "legendFormat": "{{ pod_name }} < in", 2098 "metric": "network", 2099 "refId": "A", 2100 "step": 240 2101 }, { 2102 "expr": "- sum (rate (container_network_transmit_bytes_total{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}[1m])) by (pod_name)", 2103 "interval": "", 2104 "intervalFactor": 2, 2105 "legendFormat": "{{ pod_name }} > out", 2106 "metric": "network", 2107 "refId": "B", 2108 "step": 240 2109 }], 2110 "timeFrom": null, 2111 "timeShift": null, 2112 "title": "Network", 2113 "tooltip": { 2114 "msResolution": false, 2115 "shared": false, 2116 "sort": 2, 2117 "value_type": "cumulative" 2118 }, 2119 "type": "graph", 2120 "xaxis": { 2121 "show": true 2122 }, 2123 "yaxes": [{ 2124 "format": "Bps", 2125 "label": "transmit / receive", 2126 "logBase": 1, 2127 "max": null, 2128 "min": null, 2129 "show": true 2130 }, { 2131 "format": "short", 2132 "label": null, 2133 "logBase": 1, 2134 "max": null, 2135 "min": null, 2136 "show": false 2137 }] 2138 }, { 2139 "aliasColors": {}, 2140 "bars": false, 2141 "datasource": "${DS_PROMETHEUS}", 2142 "decimals": 2, 2143 "editable": true, 2144 "error": false, 2145 "fill": 1, 2146 "grid": { 2147 "threshold1": null, 2148 "threshold1Color": "rgba(216, 200, 27, 0.27)", 2149 "threshold2": null, 2150 "threshold2Color": "rgba(234, 112, 112, 0.22)" 2151 }, 2152 "id": 34, 2153 "isNew": true, 2154 "legend": { 2155 "alignAsTable": true, 2156 "avg": true, 2157 "current": true, 2158 "hideEmpty": true, 2159 "hideZero": true, 2160 "max": false, 2161 "min": false, 2162 "rightSide": true, 2163 "show": true, 2164 "sideWidth": 200, 2165 "sort": "current", 2166 "sortDesc": true, 2167 "total": false, 2168 "values": true 2169 }, 2170 "lines": true, 2171 "linewidth": 2, 2172 "links": [], 2173 "nullPointMode": "null", 2174 "percentage": false, 2175 "pointradius": 5, 2176 "points": false, 2177 "renderer": "flot", 2178 "seriesOverrides": [], 2179 "span": 12, 2180 "stack": false, 2181 "steppedLine": false, 2182 "targets": [{ 2183 "expr": "sum(container_fs_usage_bytes{image!=\"\",name=~\"^k8s_.*\",instance=~\"^$instance$\",namespace=~\"^$namespace$\"}) by (pod_name)", 2184 "interval": "", 2185 "intervalFactor": 2, 2186 "legendFormat": "{{ pod_name }}", 2187 "metric": "network", 2188 "refId": "A", 2189 "step": 240 2190 }], 2191 "timeFrom": null, 2192 "timeShift": null, 2193 "title": "Filesystem", 2194 "tooltip": { 2195 "msResolution": false, 2196 "shared": false, 2197 "sort": 2, 2198 "value_type": "cumulative" 2199 }, 2200 "type": "graph", 2201 "xaxis": { 2202 "show": true 2203 }, 2204 "yaxes": [{ 2205 "format": "bytes", 2206 "label": "used", 2207 "logBase": 1, 2208 "max": null, 2209 "min": null, 2210 "show": true 2211 }, { 2212 "format": "short", 2213 "label": null, 2214 "logBase": 1, 2215 "max": null, 2216 "min": null, 2217 "show": false 2218 }] 2219 }], 2220 "showTitle": true, 2221 "title": "each pod" 2222 }], 2223 "time": { 2224 "from": "now-3d", 2225 "to": "now" 2226 }, 2227 "timepicker": { 2228 "refresh_intervals": [ 2229 "5s", 2230 "10s", 2231 "30s", 2232 "1m", 2233 "5m", 2234 "15m", 2235 "30m", 2236 "1h", 2237 "2h", 2238 "1d" 2239 ], 2240 "time_options": [ 2241 "5m", 2242 "15m", 2243 "1h", 2244 "6h", 2245 "12h", 2246 "24h", 2247 "2d", 2248 "7d", 2249 "30d" 2250 ] 2251 }, 2252 "templating": { 2253 "list": [{ 2254 "allValue": ".*", 2255 "current": {}, 2256 "datasource": "${DS_PROMETHEUS}", 2257 "hide": 0, 2258 "includeAll": true, 2259 "label": "Instance", 2260 "multi": false, 2261 "name": "instance", 2262 "options": [], 2263 "query": "label_values(instance)", 2264 "refresh": 1, 2265 "regex": "", 2266 "type": "query" 2267 }, { 2268 "current": {}, 2269 "datasource": "${DS_PROMETHEUS}", 2270 "hide": 0, 2271 "includeAll": true, 2272 "label": "Namespace", 2273 "multi": true, 2274 "name": "namespace", 2275 "options": [], 2276 "query": "label_values(namespace)", 2277 "refresh": 1, 2278 "regex": "", 2279 "type": "query" 2280 }] 2281 }, 2282 "annotations": { 2283 "list": [] 2284 }, 2285 "refresh": false, 2286 "schemaVersion": 12, 2287 "version": 8, 2288 "links": [], 2289 "gnetId": 737 2290 } 2291 prometheus-datasource.json: | 2292 { 2293 "name": "prometheus", 2294 "type": "prometheus", 2295 "url": "http://prometheus:9090", 2296 "access": "proxy", 2297 "basicAuth": false 2298 } 2299 kind: ConfigMap 2300 metadata: 2301 creationTimestamp: null 2302 name: grafana-import-dashboards 2303 namespace: monitoring 2304 --- 2305 apiVersion: batch/v1 2306 kind: Job 2307 metadata: 2308 name: grafana-import-dashboards 2309 namespace: monitoring 2310 labels: 2311 app: grafana 2312 component: import-dashboards 2313 spec: 2314 template: 2315 metadata: 2316 name: grafana-import-dashboards 2317 labels: 2318 app: grafana 2319 component: import-dashboards 2320 spec: 2321 serviceAccountName: prometheus-k8s 2322 initContainers: 2323 - name: wait-for-grafana 2324 image: giantswarm/tiny-tools 2325 args: 2326 - /bin/sh 2327 - -c 2328 - > 2329 set -x; 2330 while [ $(curl -Lsw '%{http_code}' "http://grafana:3000" -o /dev/null) -ne 200 ]; do 2331 echo '.' 2332 sleep 15; 2333 done 2334 containers: 2335 - name: grafana-import-dashboards 2336 image: giantswarm/tiny-tools 2337 command: ["/bin/sh", "-c"] 2338 workingDir: /opt/grafana-import-dashboards 2339 args: 2340 - > 2341 for file in *-datasource.json ; do 2342 if [ -e "$file" ] ; then 2343 echo "importing $file" && 2344 curl --silent --fail --show-error \ 2345 --request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/datasources \ 2346 --header "Content-Type: application/json" \ 2347 --data-binary "@$file" ; 2348 echo "" ; 2349 fi 2350 done ; 2351 for file in *-dashboard.json ; do 2352 if [ -e "$file" ] ; then 2353 echo "importing $file" && 2354 ( echo '{"dashboard":'; \ 2355 cat "$file"; \ 2356 echo ',"overwrite":true,"inputs":[{"name":"DS_PROMETHEUS","type":"datasource","pluginId":"prometheus","value":"prometheus"}]}' ) \ 2357 | jq -c '.' \ 2358 | curl --silent --fail --show-error \ 2359 --request POST http://${GF_ADMIN_USER}:${GF_ADMIN_PASSWORD}@grafana:3000/api/dashboards/import \ 2360 --header "Content-Type: application/json" \ 2361 --data-binary "@-" ; 2362 echo "" ; 2363 fi 2364 done 2365 2366 env: 2367 - name: GF_ADMIN_USER 2368 valueFrom: 2369 secretKeyRef: 2370 name: grafana 2371 key: admin-username 2372 - name: GF_ADMIN_PASSWORD 2373 valueFrom: 2374 secretKeyRef: 2375 name: grafana 2376 key: admin-password 2377 volumeMounts: 2378 - name: config-volume 2379 mountPath: /opt/grafana-import-dashboards 2380 restartPolicy: Never 2381 volumes: 2382 - name: config-volume 2383 configMap: 2384 name: grafana-import-dashboards 2385 --- 2386 # apiVersion: extensions/v1beta1 2387 # kind: Ingress 2388 # metadata: 2389 # name: grafana 2390 # namespace: monitoring 2391 # spec: 2392 # rules: 2393 # - host: <yourchoice>.<cluster-id>.k8s.gigantic.io 2394 # http: 2395 # paths: 2396 # - path: / 2397 # backend: 2398 # serviceName: grafana 2399 # servicePort: 3000 2400 --- 2401 apiVersion: v1 2402 kind: Secret 2403 data: 2404 admin-password: YWRtaW4= 2405 admin-username: YWRtaW4= 2406 metadata: 2407 name: grafana 2408 namespace: monitoring 2409 type: Opaque 2410 --- 2411 apiVersion: v1 2412 kind: Service 2413 metadata: 2414 name: grafana 2415 namespace: monitoring 2416 labels: 2417 app: grafana 2418 component: core 2419 spec: 2420 type: NodePort 2421 ports: 2422 - port: 3000 2423 selector: 2424 app: grafana 2425 component: core 2426 --- 2427 apiVersion: v1 2428 data: 2429 prometheus.yaml: | 2430 global: 2431 scrape_interval: 10s 2432 scrape_timeout: 10s 2433 evaluation_interval: 10s 2434 rule_files: 2435 - "/etc/prometheus-rules/*.rules" 2436 scrape_configs: 2437 2438 # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L37 2439 - job_name: 'kubernetes-nodes' 2440 tls_config: 2441 ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 2442 bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 2443 kubernetes_sd_configs: 2444 - role: node 2445 relabel_configs: 2446 - source_labels: [__address__] 2447 regex: '(.*):10250' 2448 replacement: '${1}:10255' 2449 target_label: __address__ 2450 2451 # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L79 2452 - job_name: 'kubernetes-endpoints' 2453 kubernetes_sd_configs: 2454 - role: endpoints 2455 relabel_configs: 2456 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] 2457 action: keep 2458 regex: true 2459 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] 2460 action: replace 2461 target_label: __scheme__ 2462 regex: (https?) 2463 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] 2464 action: replace 2465 target_label: __metrics_path__ 2466 regex: (.+) 2467 - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] 2468 action: replace 2469 target_label: __address__ 2470 regex: (.+)(?::\d+);(\d+) 2471 replacement: $1:$2 2472 - action: labelmap 2473 regex: __meta_kubernetes_service_label_(.+) 2474 - source_labels: [__meta_kubernetes_namespace] 2475 action: replace 2476 target_label: kubernetes_namespace 2477 - source_labels: [__meta_kubernetes_service_name] 2478 action: replace 2479 target_label: kubernetes_name 2480 2481 # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L119 2482 - job_name: 'kubernetes-services' 2483 metrics_path: /probe 2484 params: 2485 module: [http_2xx] 2486 kubernetes_sd_configs: 2487 - role: service 2488 relabel_configs: 2489 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] 2490 action: keep 2491 regex: true 2492 - source_labels: [__address__] 2493 target_label: __param_target 2494 - target_label: __address__ 2495 replacement: blackbox 2496 - source_labels: [__param_target] 2497 target_label: instance 2498 - action: labelmap 2499 regex: __meta_kubernetes_service_label_(.+) 2500 - source_labels: [__meta_kubernetes_namespace] 2501 target_label: kubernetes_namespace 2502 - source_labels: [__meta_kubernetes_service_name] 2503 target_label: kubernetes_name 2504 2505 # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L156 2506 - job_name: 'kubernetes-pods' 2507 kubernetes_sd_configs: 2508 - role: pod 2509 relabel_configs: 2510 - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 2511 action: keep 2512 regex: true 2513 - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 2514 action: replace 2515 target_label: __metrics_path__ 2516 regex: (.+) 2517 - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 2518 action: replace 2519 regex: (.+):(?:\d+);(\d+) 2520 replacement: ${1}:${2} 2521 target_label: __address__ 2522 - action: labelmap 2523 regex: __meta_kubernetes_pod_label_(.+) 2524 - source_labels: [__meta_kubernetes_namespace] 2525 action: replace 2526 target_label: kubernetes_namespace 2527 - source_labels: [__meta_kubernetes_pod_name] 2528 action: replace 2529 target_label: kubernetes_pod_name 2530 - source_labels: [__meta_kubernetes_pod_container_port_number] 2531 action: keep 2532 regex: 9\d{3} 2533 2534 - job_name: 'kubernetes-cadvisor' 2535 scheme: https 2536 tls_config: 2537 ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 2538 bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 2539 kubernetes_sd_configs: 2540 - role: node 2541 relabel_configs: 2542 - action: labelmap 2543 - action: labelmap 2544 regex: __meta_kubernetes_node_label_(.+) 2545 - target_label: __address__ 2546 replacement: kubernetes.default.svc:443 2547 - source_labels: [__meta_kubernetes_node_name] 2548 regex: (.+) 2549 target_label: __metrics_path__ 2550 replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor 2551 2552 kind: ConfigMap 2553 metadata: 2554 creationTimestamp: null 2555 name: prometheus-core 2556 namespace: monitoring 2557 --- 2558 apiVersion: apps/v1 2559 kind: Deployment 2560 metadata: 2561 name: prometheus-core 2562 namespace: monitoring 2563 labels: 2564 app: prometheus 2565 component: core 2566 spec: 2567 replicas: 1 2568 selector: 2569 matchLabels: 2570 app: prometheus 2571 template: 2572 metadata: 2573 name: prometheus-main 2574 labels: 2575 app: prometheus 2576 component: core 2577 spec: 2578 serviceAccountName: prometheus-k8s 2579 containers: 2580 - name: prometheus 2581 image: prom/prometheus:v1.7.0 2582 args: 2583 - '-storage.local.retention=12h' 2584 - '-storage.local.memory-chunks=500000' 2585 - '-config.file=/etc/prometheus/prometheus.yaml' 2586 - '-alertmanager.url=http://alertmanager:9093/' 2587 ports: 2588 - name: webui 2589 containerPort: 9090 2590 resources: 2591 requests: 2592 cpu: 500m 2593 memory: 500M 2594 limits: 2595 cpu: 500m 2596 memory: 500M 2597 volumeMounts: 2598 - name: config-volume 2599 mountPath: /etc/prometheus 2600 - name: rules-volume 2601 mountPath: /etc/prometheus-rules 2602 volumes: 2603 - name: config-volume 2604 configMap: 2605 name: prometheus-core 2606 - name: rules-volume 2607 configMap: 2608 name: prometheus-rules 2609 --- 2610 apiVersion: apps/v1 2611 kind: Deployment 2612 metadata: 2613 name: kube-state-metrics 2614 namespace: monitoring 2615 labels: 2616 app: kube-state-metrics 2617 spec: 2618 replicas: 1 2619 selector: 2620 matchLabels: 2621 app: kube-state-metrics 2622 template: 2623 metadata: 2624 labels: 2625 app: kube-state-metrics 2626 spec: 2627 serviceAccountName: kube-state-metrics 2628 containers: 2629 - name: kube-state-metrics 2630 image: gcr.io/google_containers/kube-state-metrics:v0.5.0 2631 ports: 2632 - containerPort: 8080 2633 --- 2634 # --- 2635 # apiVersion: rbac.authorization.k8s.io/v1beta1 2636 # kind: ClusterRoleBinding 2637 # metadata: 2638 # name: kube-state-metrics 2639 # roleRef: 2640 # apiGroup: rbac.authorization.k8s.io 2641 # kind: ClusterRole 2642 # name: kube-state-metrics 2643 # subjects: 2644 # - kind: ServiceAccount 2645 # name: kube-state-metrics 2646 # namespace: monitoring 2647 # --- 2648 # apiVersion: rbac.authorization.k8s.io/v1beta1 2649 # kind: ClusterRole 2650 # metadata: 2651 # name: kube-state-metrics 2652 # rules: 2653 # - apiGroups: [""] 2654 # resources: 2655 # - nodes 2656 # - pods 2657 # - services 2658 # - resourcequotas 2659 # - replicationcontrollers 2660 # - limitranges 2661 # verbs: ["list", "watch"] 2662 # - apiGroups: ["apps"] 2663 # resources: 2664 # - daemonsets 2665 # - deployments 2666 # - replicasets 2667 # verbs: ["list", "watch"] 2668 # --- 2669 apiVersion: v1 2670 kind: ServiceAccount 2671 metadata: 2672 name: kube-state-metrics 2673 namespace: monitoring 2674 --- 2675 apiVersion: v1 2676 kind: Service 2677 metadata: 2678 annotations: 2679 prometheus.io/scrape: 'true' 2680 name: kube-state-metrics 2681 namespace: monitoring 2682 labels: 2683 app: kube-state-metrics 2684 spec: 2685 ports: 2686 - name: kube-state-metrics 2687 port: 8080 2688 protocol: TCP 2689 selector: 2690 app: kube-state-metrics 2691 2692 --- 2693 apiVersion: apps/v1 2694 kind: DaemonSet 2695 metadata: 2696 name: node-directory-size-metrics 2697 namespace: monitoring 2698 labels: 2699 app: node-directory-size-metrics 2700 annotations: 2701 description: | 2702 This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes. 2703 The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now. 2704 The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus. 2705 These are scheduled on every node in the Kubernetes cluster. 2706 To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`. 2707 spec: 2708 selector: 2709 matchLabels: 2710 app: node-directory-size-metrics 2711 template: 2712 metadata: 2713 labels: 2714 app: node-directory-size-metrics 2715 annotations: 2716 prometheus.io/scrape: 'true' 2717 prometheus.io/port: '9102' 2718 description: | 2719 This `Pod` provides metrics in Prometheus format about disk usage on the node. 2720 The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now. 2721 The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus. 2722 This `Pod` is scheduled on every node in the Kubernetes cluster. 2723 To choose directories from the node to check just mount them on `read-du` below `/mnt`. 2724 spec: 2725 containers: 2726 - name: read-du 2727 image: giantswarm/tiny-tools 2728 imagePullPolicy: Always 2729 # FIXME threshold via env var 2730 # The 2731 command: 2732 - fish 2733 - --command 2734 - | 2735 touch /tmp/metrics-temp 2736 while true 2737 for directory in (du --bytes --separate-dirs --threshold=100M /mnt) 2738 echo $directory | read size path 2739 echo "node_directory_size_bytes{path=\"$path\"} $size" \ 2740 >> /tmp/metrics-temp 2741 end 2742 mv /tmp/metrics-temp /tmp/metrics 2743 sleep 300 2744 end 2745 volumeMounts: 2746 - name: host-fs-var 2747 mountPath: /mnt/var 2748 readOnly: true 2749 - name: metrics 2750 mountPath: /tmp 2751 - name: caddy 2752 image: dockermuenster/caddy:0.9.3 2753 command: 2754 - "caddy" 2755 - "-port=9102" 2756 - "-root=/var/www" 2757 ports: 2758 - containerPort: 9102 2759 volumeMounts: 2760 - name: metrics 2761 mountPath: /var/www 2762 volumes: 2763 - name: host-fs-var 2764 hostPath: 2765 path: /var 2766 - name: metrics 2767 emptyDir: 2768 medium: Memory 2769 --- 2770 apiVersion: apps/v1 2771 kind: DaemonSet 2772 metadata: 2773 name: prometheus-node-exporter 2774 namespace: monitoring 2775 labels: 2776 app: prometheus 2777 component: node-exporter 2778 spec: 2779 selector: 2780 matchLabels: 2781 app: prometheus 2782 template: 2783 metadata: 2784 name: prometheus-node-exporter 2785 labels: 2786 app: prometheus 2787 component: node-exporter 2788 spec: 2789 containers: 2790 - image: prom/node-exporter:v0.14.0 2791 name: prometheus-node-exporter 2792 ports: 2793 - name: prom-node-exp 2794 #^ must be an IANA_SVC_NAME (at most 15 characters, ..) 2795 containerPort: 9100 2796 hostPort: 9100 2797 hostNetwork: true 2798 hostPID: true 2799 --- 2800 apiVersion: v1 2801 kind: Service 2802 metadata: 2803 annotations: 2804 prometheus.io/scrape: 'true' 2805 name: prometheus-node-exporter 2806 namespace: monitoring 2807 labels: 2808 app: prometheus 2809 component: node-exporter 2810 spec: 2811 clusterIP: None 2812 ports: 2813 - name: prometheus-node-exporter 2814 port: 9100 2815 protocol: TCP 2816 selector: 2817 app: prometheus 2818 component: node-exporter 2819 type: ClusterIP 2820 --- 2821 apiVersion: v1 2822 data: 2823 cpu-usage.rules: | 2824 ALERT NodeCPUUsage 2825 IF (100 - (avg by (instance) (irate(node_cpu{name="node-exporter",mode="idle"}[5m])) * 100)) > 75 2826 FOR 2m 2827 LABELS { 2828 severity="page" 2829 } 2830 ANNOTATIONS { 2831 SUMMARY = "{{$labels.instance}}: High CPU usage detected", 2832 DESCRIPTION = "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})" 2833 } 2834 instance-availability.rules: | 2835 ALERT InstanceDown 2836 IF up == 0 2837 FOR 1m 2838 LABELS { severity = "page" } 2839 ANNOTATIONS { 2840 summary = "Instance {{ $labels.instance }} down", 2841 description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.", 2842 } 2843 low-disk-space.rules: | 2844 ALERT NodeLowRootDisk 2845 IF ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75 2846 FOR 2m 2847 LABELS { 2848 severity="page" 2849 } 2850 ANNOTATIONS { 2851 SUMMARY = "{{$labels.instance}}: Low root disk space", 2852 DESCRIPTION = "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value }})" 2853 } 2854 2855 ALERT NodeLowDataDisk 2856 IF ((node_filesystem_size{mountpoint="/data-disk"} - node_filesystem_free{mountpoint="/data-disk"} ) / node_filesystem_size{mountpoint="/data-disk"} * 100) > 75 2857 FOR 2m 2858 LABELS { 2859 severity="page" 2860 } 2861 ANNOTATIONS { 2862 SUMMARY = "{{$labels.instance}}: Low data disk space", 2863 DESCRIPTION = "{{$labels.instance}}: Data disk usage is above 75% (current value is: {{ $value }})" 2864 } 2865 mem-usage.rules: | 2866 ALERT NodeSwapUsage 2867 IF (((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75 2868 FOR 2m 2869 LABELS { 2870 severity="page" 2871 } 2872 ANNOTATIONS { 2873 SUMMARY = "{{$labels.instance}}: Swap usage detected", 2874 DESCRIPTION = "{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})" 2875 } 2876 2877 ALERT NodeMemoryUsage 2878 IF (((node_memory_MemTotal-node_memory_MemAvailable)/(node_memory_MemTotal)*100)) > 75 2879 FOR 2m 2880 LABELS { 2881 severity="page" 2882 } 2883 ANNOTATIONS { 2884 SUMMARY = "{{$labels.instance}}: High memory usage detected", 2885 DESCRIPTION = "{{$labels.instance}}: Memory usage is above 75% (current value is: {{ $value }})" 2886 } 2887 kind: ConfigMap 2888 metadata: 2889 creationTimestamp: null 2890 name: prometheus-rules 2891 namespace: monitoring 2892 --- 2893 apiVersion: v1 2894 kind: Service 2895 metadata: 2896 name: prometheus 2897 namespace: monitoring 2898 labels: 2899 app: prometheus 2900 component: core 2901 annotations: 2902 prometheus.io/scrape: 'true' 2903 spec: 2904 type: NodePort 2905 ports: 2906 - port: 9090 2907 protocol: TCP 2908 name: webui 2909 selector: 2910 app: prometheus 2911 component: core