github.com/ironcore-dev/gardener-extension-provider-ironcore@v0.3.2-0.20240314231816-8336447fb9a0/charts/internal/machine-controller-manager/seed/templates/configmap-monitoring.yaml (about) 1 apiVersion: v1 2 kind: ConfigMap 3 metadata: 4 name: machine-controller-manager-monitoring-config 5 namespace: {{ .Release.Namespace }} 6 labels: 7 extensions.gardener.cloud/configuration: monitoring 8 data: 9 scrape_config: | 10 - job_name: machine-controller-manager 11 honor_labels: false 12 kubernetes_sd_configs: 13 - role: endpoints 14 namespaces: 15 names: [{{ .Release.Namespace }}] 16 relabel_configs: 17 - source_labels: 18 - __meta_kubernetes_service_name 19 - __meta_kubernetes_endpoint_port_name 20 action: keep 21 regex: machine-controller-manager;metrics 22 # common metrics 23 - action: labelmap 24 regex: __meta_kubernetes_service_label_(.+) 25 - source_labels: [ __meta_kubernetes_pod_name ] 26 target_label: pod 27 metric_relabel_configs: 28 - source_labels: [ __name__ ] 29 regex: ^(mcm_cloud_api_requests_failed_total|mcm_cloud_api_requests_total|mcm_machine_controller_frozen|mcm_machine_current_status_phase|mcm_machine_deployment_failed_machines|mcm_machine_items_total|mcm_machine_set_failed_machines|mcm_machine_deployment_items_total|mcm_machine_set_items_total|mcm_machine_set_stale_machines_total|mcm_scrape_failure_total|process_max_fds|process_open_fds|mcm_workqueue_adds_total|mcm_workqueue_depth|mcm_workqueue_queue_duration_seconds_bucket|mcm_workqueue_queue_duration_seconds_sum|mcm_workqueue_queue_duration_seconds_count|mcm_workqueue_work_duration_seconds_bucket|mcm_workqueue_work_duration_seconds_sum|mcm_workqueue_work_duration_seconds_count|mcm_workqueue_unfinished_work_seconds|mcm_workqueue_longest_running_processor_seconds|mcm_workqueue_retries_total)$ 30 action: keep 31 32 alerting_rules: | 33 machine-controller-manager.rules.yaml: | 34 groups: 35 - name: machine-controller-manager.rules 36 rules: 37 - alert: MachineControllerManagerDown 38 expr: absent(up{job="machine-controller-manager"} == 1) 39 for: 15m 40 labels: 41 service: machine-controller-manager 42 severity: critical 43 type: seed 44 visibility: operator 45 annotations: 46 description: There are no running machine controller manager instances. No shoot nodes can be created/maintained. 47 summary: Machine controller manager is down. 48 49 dashboard_operators: | 50 machine-controller-manager-dashboard.json: |- 51 {{ .Files.Get "mcm-monitoring-dashboard.json" | indent 6 }}