github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/deploy/helm/values.yaml

github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/deploy/helm/values.yaml (about)

     1  ## @section Common parameters
     2  ##
     3  
     4  versionOverride:
     5  
     6  ## KubeBlocks container image settings
     7  ##
     8  ## @param image.registry KubeBlocks image registry
     9  ## @param image.repository KubeBlocks image repository
    10  ## @param image.pullPolicy KubeBlocks image pull policy
    11  ## @param image.tag KubeBlocks image tag (immutable tags are recommended)
    12  ## @param image.imagePullSecrets KubeBlocks image pull secrets
    13  ## @param image.tools.repository KubeBlocks tools image repository
    14  image:
    15    registry: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com
    16    repository: apecloud/kubeblocks
    17    pullPolicy: IfNotPresent
    18    # Overrides the image tag whose default is the chart appVersion.
    19    tag: ""
    20    imagePullSecrets: []
    21    tools:
    22      repository: apecloud/kubeblocks-tools
    23    datascript:
    24      repository: apecloud/kubeblocks-datascript
    25  
    26  ## @param replicaCount
    27  ##
    28  replicaCount: 1
    29  
    30  ## @param nameOverride
    31  ##
    32  nameOverride: ""
    33  
    34  ## @param fullnameOverride
    35  ##
    36  fullnameOverride: ""
    37  
    38  
    39  ## KubeBlocks RBAC access priority setting
    40  ##
    41  ## @param rbac.enabled is used to enable or disable KubeBlocks RBAC access priority.
    42  ## By enabling this feature, KubeBlocks can ensure resource accessibility for the
    43  ## cluster's pods, which are required to efficiently manage the cluster. By default,
    44  ## it is set to true. When RBAC access priority is enabled, KubeBlocks will have
    45  ## the following permissions:
    46  ##   groups=core,resources=serviceaccounts,verbs=get;list;watch;create;update;patch;delete
    47  ##   groups=core,resources=serviceaccounts/status,verbs=get;update;patch
    48  ##   groups=core,resources=serviceaccounts/finalizers,verbs=update
    49  ##
    50  ##   groups=rbac.authorization.k8s.io,resources=rolebindings,verbs=get;list;watch;create;update;patch;delete
    51  ##   groups=rbac.authorization.k8s.io,resources=rolebindings/status,verbs=get;update;patch
    52  ##   groups=rbac.authorization.k8s.io,resources=rolebindings/finalizers,verbs=update
    53  ##
    54  ##   groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;watch;create;update;patch;delete
    55  ##   groups=rbac.authorization.k8s.io,resources=clusterrolebindings/status,verbs=get;update;patch
    56  ##   groups=rbac.authorization.k8s.io,resources=clusterrolebindings/finalizers,verbs=update
    57  ##
    58  ## If it is set to false, then you will need to create the service account
    59  ## named `cluster.ComponentSpec.ServiceAccountName` and the corresponding (cluster) role binding
    60  ## manually or through the cluster's Helm template, as shown in the example:
    61  ##   helm install mysql apecloud-mysql-cluster
    62  rbac:
    63    enabled: true
    64  
    65  ## Deployment update strategy.
    66  ## Ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy
    67  ##
    68  ## @param updateStrategy.rollingUpdate
    69  ## @param updateStrategy.type
    70  updateStrategy:
    71    rollingUpdate:
    72      maxSurge: 1
    73      maxUnavailable: 40%
    74    type: RollingUpdate
    75  
    76  ## Change `hostNetwork` to `true` when you want the KubeBlocks's pod to share its host's network namespace.
    77  ## Useful for situations like when you end up dealing with a custom CNI over Amazon EKS.
    78  ## Update the `dnsPolicy` accordingly as well to suit the host network mode.
    79  ##
    80  ## @param hostNetwork
    81  ##
    82  hostNetwork: false
    83  
    84  ## `dnsPolicy` determines the manner in which DNS resolution happens in the cluster.
    85  ## In case of `hostNetwork: true`, usually, the `dnsPolicy` is suitable to be `ClusterFirstWithHostNet`.
    86  ## For further reference: https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-s-dns-policy.
    87  ##
    88  ## @param dnsPolicy
    89  ##
    90  dnsPolicy: ClusterFirst
    91  
    92  ## Configure podDisruptionBudget spec settings
    93  ##
    94  ## @param podDisruptionBudget.minAvailable
    95  ## @param podDisruptionBudget.maxUnavailable
    96  podDisruptionBudget:
    97    # Configures the minimum available pods for KubeBlocks disruptions.
    98    # Cannot be used if `maxUnavailable` is set.
    99    minAvailable: 1
   100    # Configures the maximum unavailable pods for KubeBlocks disruptions.
   101    # Cannot be used if `minAvailable` is set.
   102    maxUnavailable:
   103  
   104  
   105  ## Logger settings
   106  ##
   107  ## @param loggerSettings.developmentMode
   108  ## @param loggerSettings.encoder
   109  ## @param loggerSettings.level
   110  ## @param loggerSettings.timeEncoding
   111  loggerSettings:
   112    # Development Mode defaults(encoder=consoleEncoder,logLevel=Debug,stackTraceLevel=Warn).
   113    # Production Mode defaults(encoder=jsonEncoder,logLevel=Info,stackTraceLevel=Error) (default false)
   114    developmentMode: false
   115    # log encoding (one of 'json' or 'console')
   116    encoder: console
   117    # log level, can be one of 'debug', 'info', 'error', or any integer value > 0
   118    # which corresponds to custom debug levels of increasing verbosity.
   119    level:
   120    # Zap time encoding (one of 'epoch', 'millis', 'nano', 'iso8601', 'rfc3339' or
   121    # 'rfc3339nano'). Defaults to 'iso8601'.
   122    timeEncoding: 'iso8601'
   123  
   124  ## ServiceAccount settings
   125  ##
   126  ## @param serviceAccount.create
   127  ## @param serviceAccount.annotations
   128  ## @param serviceAccount.name
   129  serviceAccount:
   130    # Specifies whether a service account should be created
   131    create: true
   132    # Annotations to add to the service account
   133    annotations: {}
   134    # The name of the service account to use.
   135    # If not set and create is true, a name is generated using the fullname template
   136    name: ""
   137  
   138  ## @param podAnnotations
   139  ##
   140  podAnnotations: {}
   141  
   142  ## Security context settings
   143  ##
   144  ## @param securityContext.allowPrivilegeEscalation
   145  ## @param securityContext.capabilities
   146  securityContext:
   147    allowPrivilegeEscalation: false
   148    capabilities:
   149      drop:
   150      - ALL
   151  
   152  ## Pod security context settings
   153  ##
   154  ## @param podSecurityContext.runAsNonRoot
   155  ## @param podSecurityContext.readOnlyRootFilesystem
   156  ## @param podSecurityContext.runAsUser
   157  ## @param podSecurityContext.fsGroup
   158  ## @param podSecurityContext.seccompProfile
   159  podSecurityContext:
   160    runAsNonRoot: true
   161    # readOnlyRootFilesystem: true
   162    # runAsUser: 1000
   163    # fsGroup: 2000
   164    # TODO(user): For common cases that do not require escalating privileges
   165    # it is recommended to ensure that all your Pods/Containers are restrictive.
   166    # More info: https://kubernetes.io/docs/concepts/security/pod-security-standards/#restricted
   167    # Please uncomment the following code if your project does NOT have to work on old Kubernetes
   168    # versions < 1.19 or on vendors versions which do NOT support this field by default (i.e. Openshift < 4.11 ).
   169    # seccompProfile:
   170    #   type: RuntimeDefault
   171  
   172  ## Service settings
   173  ##
   174  ## @param service.type
   175  ## @param service.port
   176  ## @param service.nodePort
   177  service:
   178    type: ClusterIP
   179    port: 9443
   180    # -- Service node port.
   181    # Only used if `service.type` is `NodePort`.
   182    nodePort:
   183  
   184  
   185  ## Metrics serviceMonitor parameters
   186  ## Enable this if you're using Prometheus Operator
   187  ##
   188  ## @param serviceMonitor.enabled
   189  ## @param serviceMonitor.port
   190  ## @param serviceMonitor.nodePort
   191  serviceMonitor:
   192    enabled: false
   193    # metrics server will be exposed at this port.
   194    port: 8080
   195    # Only used if `service.type` is `NodePort`.
   196    nodePort:
   197  
   198  ## KubeBlocks pods deployment topologySpreadConstraints settings
   199  ##
   200  ## @param topologySpreadConstraints
   201  topologySpreadConstraints: []
   202  
   203  
   204  ## Resource settings
   205  ##
   206  ## @param resources.limits
   207  ## @param resources.requests
   208  resources: {}
   209    # We usually recommend not to specify default resources and to leave this as a conscious
   210    # choice for the user. This also increases chances charts run on environments with little
   211    # resources, such as Minikube. If you do want to specify resources, uncomment the following
   212    # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
   213    # TODO(user): Configure the resources accordingly based on the project requirements.
   214    # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
   215    # limits:
   216    #   cpu: 500m
   217    #   memory: 128Mi
   218    # requests:
   219    #   cpu: 10m
   220  #   memory: 64Mi
   221  
   222  ## @param priorityClassName
   223  ##
   224  priorityClassName:
   225  
   226  ## Autoscaling settings
   227  ##
   228  ## @param autoscaling.enabled
   229  ## @param autoscaling.minReplicas
   230  ## @param autoscaling.maxReplicas
   231  ## @param autoscaling.targetCPUUtilizationPercentage
   232  ## @param autoscaling.targetMemoryUtilizationPercentage
   233  autoscaling:
   234    enabled: false
   235    minReplicas: 1
   236    maxReplicas: 100
   237    targetCPUUtilizationPercentage: 80
   238    # targetMemoryUtilizationPercentage: 80
   239  
   240  
   241  
   242  ## @param nodeSelector
   243  ##
   244  nodeSelector: {}
   245  
   246  ## @param tolerations
   247  ##
   248  tolerations:
   249  - key: kb-controller
   250    operator: Equal
   251    value: "true"
   252    effect: NoSchedule
   253  
   254  
   255  ## @param affinity
   256  ##
   257  affinity:
   258    nodeAffinity:
   259      preferredDuringSchedulingIgnoredDuringExecution:
   260      - weight: 100
   261        preference:
   262          matchExpressions:
   263          - key: kb-controller
   264            operator: In
   265            values:
   266            - "true"
   267  
   268  ## @param data plane settings
   269  ##
   270  dataPlane:
   271    tolerations:
   272    - key: kb-data
   273      operator: Equal
   274      value: "true"
   275      effect: NoSchedule
   276  
   277    affinity:
   278      nodeAffinity:
   279        preferredDuringSchedulingIgnoredDuringExecution:
   280        - weight: 100
   281          preference:
   282            matchExpressions:
   283            - key: kb-data
   284              operator: In
   285              values:
   286              - "true"
   287  
   288  ## AdmissionWebhooks settings
   289  ##
   290  ## @param admissionWebhooks.enabled
   291  ## @param admissionWebhooks.createSelfSignedCert
   292  ## @param admissionWebhooks.ignoreReplicasCheck
   293  admissionWebhooks:
   294    enabled: false
   295    createSelfSignedCert: true
   296    ignoreReplicasCheck: false
   297  
   298  ## Data protection settings
   299  ##
   300  ## @param dataProtection.enabled - set the dataProtection controllers for backup functions
   301  ## @param dataProtection.gcFrequencySeconds - the frequency of garbage collection
   302  dataProtection:
   303    enabled: true
   304    # customizing the encryption key is strongly recommended.
   305    # if you do not specify a custom key, the default key will be used.
   306    # using the default key can potentially lead to the exposure of database passwords
   307    # if 'get/list' role of the backup CR are compromised.
   308    encryptionKey: ""
   309    gcFrequencySeconds: 3600
   310  
   311    image:
   312      registry: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com
   313      repository: apecloud/kubeblocks-dataprotection
   314      pullPolicy: IfNotPresent
   315      # Overrides the image tag whose default is the chart appVersion.
   316      tag: ""
   317      imagePullSecrets: []
   318      datasafed:
   319        repository: apecloud/datasafed
   320        tag: "0.0.3"
   321  
   322  ## BackupRepo settings
   323  ##
   324  ## @param backupRepo.create - creates a backup repo during installation
   325  ## @param backupRepo.default - set the created repo as the default
   326  ## @param backupRepo.accessMethod - the access method for the backup repo, options: [Mount, Tool]
   327  ## @param backupRepo.storageProvider - the storage provider used by the repo, options: [s3, oss, minio]
   328  ## @param backupRepo.pvReclaimPolicy - the PV reclaim policy, options: [Retain, Delete]
   329  ## @param backupRepo.volumeCapacity - the capacity for creating PVC
   330  ## @param backupRepo.config - a key-value map containing the settings required by the storage provider
   331  ## @param backupRepo.secrets - a key-value map containing the secret values required by the storage provider
   332  backupRepo:
   333    create: false
   334    default: true
   335    accessMethod: Tool
   336    storageProvider: ""
   337    pvReclaimPolicy: "Retain"
   338    volumeCapacity: ""
   339    config:
   340      bucket: ""
   341      endpoint: ""
   342      region: ""
   343    secrets:
   344      accessKeyId: ""
   345      secretAccessKey: ""
   346  
   347    ## Addon controller settings, this will require cluster-admin clusterrole.
   348    ##
   349    ## @param addonController.enabled
   350    ## @param addonController.jobTTL - is addon job time-to-live period, this value is time.Duration-parseable string.
   351    ## default value is "5m" if not provided.
   352  ## @param addonController.jobImagePullPolicy - addon install job image pull policy.
   353  addonController:
   354    enabled: true
   355    jobTTL: "5m"
   356    jobImagePullPolicy: IfNotPresent
   357  
   358  
   359  ## @param keepAddons - keep Addon CR objects when delete this chart.
   360  keepAddons: false
   361  
   362  ## @param addonChartLocationBase - KubeBlocks official addon's chart location base, to be released in an air-gapped environment.
   363  ## if url has prefix "file://", KubeBlocks will use the helm charts copied from the addonChartsImage.
   364  ##
   365  addonChartLocationBase: file://
   366  
   367  ## @param addonChartsImage - addon charts image, used to copy Helm charts to the addon job container.
   368  ## @param addonChartsImage.chartsPath - the helm charts path in the addon charts image.
   369  addonChartsImage:
   370    registry: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com
   371    repository: apecloud/kubeblocks-charts
   372    pullPolicy: IfNotPresent
   373    tag: ""
   374    chartsPath: /charts
   375  
   376  ## @param addonHelmInstallOptions - addon helm install options.
   377  addonHelmInstallOptions:
   378  - "--atomic"
   379  - "--cleanup-on-fail"
   380  - "--wait"
   381  - "--insecure-skip-tls-verify"
   382  
   383  ## Prometheus Addon
   384  ##
   385  prometheus:
   386    ## If false, prometheus sub-chart will not be installed
   387    ##
   388    enabled: false
   389  
   390    alertmanager:
   391      ## If false, alertmanager will not be installed
   392      ##
   393      enabled: true
   394  
   395      ## alertmanager container image
   396      ##
   397      image:
   398        repository: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/alertmanager
   399        tag: v0.24.0
   400  
   401      ## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.alertmanager.configMapOverrideName}}
   402      ## Defining configMapOverrideName will cause templates/alertmanager-configmap.yaml
   403      ## to NOT generate a ConfigMap resource
   404      ##
   405      configMapOverrideName: "alertmanager-config"
   406  
   407      ## Node tolerations for alertmanager scheduling to nodes with taints
   408      ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
   409      ##
   410      tolerations:
   411      - key: kb-controller
   412        operator: Equal
   413        value: "true"
   414        effect: NoSchedule
   415  
   416      affinity:
   417        nodeAffinity:
   418          preferredDuringSchedulingIgnoredDuringExecution:
   419          - weight: 100
   420            preference:
   421              matchExpressions:
   422              - key: kb-controller
   423                operator: In
   424                values:
   425                - "true"
   426  
   427      persistentVolume:
   428        ## If true, alertmanager will create/use a Persistent Volume Claim
   429        ## If false, use emptyDir
   430        ##
   431        enabled: false
   432  
   433        ## alertmanager data Persistent Volume size
   434        ##
   435        size: 1Gi
   436  
   437        ## alertmanager data Persistent Volume Storage Class
   438        ## If defined, storageClassName: <storageClass>
   439        ## If set to "-", storageClassName: "", which disables dynamic provisioning
   440        ## If undefined (the default) or set to null, no storageClassName spec is
   441        ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
   442        ##   GKE, AWS & OpenStack)
   443        ##
   444        # storageClass: "-"
   445  
   446      ## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
   447      ##
   448      replicaCount: 1
   449  
   450      statefulSet:
   451        ## If true, use a statefulset instead of a deployment for pod management.
   452        ## This allows to scale replicas to more than 1 pod
   453        ##
   454        enabled: true
   455  
   456        ## Alertmanager headless service to use for the statefulset
   457        ##
   458        headless:
   459          ## Enabling peer mesh service end points for enabling the HA alert manager
   460          ## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md
   461          enableMeshPeer: true
   462  
   463      ## alertmanager resource requests and limits
   464      ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
   465      ##
   466      resources: {}
   467        # limits:
   468        #   cpu: 10m
   469        #   memory: 32Mi
   470        # requests:
   471        #   cpu: 10m
   472      #   memory: 32Mi
   473  
   474      ## Security context to be added to alertmanager pods
   475      ##
   476      securityContext:
   477        runAsUser: 0
   478        runAsNonRoot: false
   479        runAsGroup: 65534
   480        fsGroup: 65534
   481  
   482      containerSecurityContext:
   483        allowPrivilegeEscalation: false
   484  
   485      ingress:
   486        ## If true, alertmanager Ingress will be created
   487        ##
   488        enabled: false
   489  
   490        # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName
   491        # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress
   492        # ingressClassName: nginx
   493  
   494        ## alertmanager Ingress annotations
   495        ##
   496        annotations: {}
   497        #   kubernetes.io/ingress.class: nginx
   498        #   kubernetes.io/tls-acme: 'true'
   499  
   500        ## alertmanager Ingress additional labels
   501        ##
   502        extraLabels: {}
   503  
   504        ## alertmanager Ingress hostnames with optional path
   505        ## Must be provided if Ingress is enabled
   506        ##
   507        hosts: []
   508        #   - alertmanager.domain.com
   509        #   - domain.com/alertmanager
   510  
   511        path: /
   512  
   513        # pathType is only for k8s >= 1.18
   514        pathType: Prefix
   515  
   516        ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
   517        extraPaths: []
   518        # - path: /*
   519        #   backend:
   520        #     serviceName: ssl-redirect
   521        #     servicePort: use-annotation
   522  
   523        ## alertmanager Ingress TLS configuration
   524        ## Secrets must be manually created in the namespace
   525        ##
   526        tls: []
   527        #   - secretName: prometheus-alerts-tls
   528        #     hosts:
   529        #       - alertmanager.domain.com
   530  
   531      service:
   532        annotations: {}
   533        labels: {}
   534        clusterIP: ""
   535  
   536        ## Enabling peer mesh service end points for enabling the HA alert manager
   537        ## Ref: https://github.com/prometheus/alertmanager/blob/master/README.md
   538        # enableMeshPeer : true
   539  
   540        ## List of IP addresses at which the alertmanager service is available
   541        ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
   542        ##
   543        externalIPs: []
   544  
   545        loadBalancerIP: ""
   546        loadBalancerSourceRanges: []
   547        servicePort: 80
   548        # nodePort: 30000
   549        sessionAffinity: None
   550        type: ClusterIP
   551  
   552  
   553    kubeStateMetrics:
   554      ## If false, kube-state-metrics sub-chart will not be installed
   555      ##
   556      enabled: false
   557  
   558    nodeExporter:
   559      ## If false, node-exporter will not be installed
   560      ##
   561      enabled: false
   562  
   563      ## node-exporter container image
   564      ##
   565      image:
   566        repository: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/node-exporter
   567        tag: v1.3.1
   568  
   569    configmapReload:
   570      prometheus:
   571        ## configmap-reload container image
   572        ##
   573        image:
   574          repository: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/configmap-reload
   575          tag: v0.5.0
   576      alertmanager:
   577        ## configmap-reload container image
   578        ##
   579        image:
   580          repository: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/configmap-reload
   581          tag: v0.5.0
   582  
   583    server:
   584      ## Prometheus server container name
   585      ##
   586      enabled: true
   587  
   588      ## Prometheus server container image
   589      ##
   590      image:
   591        repository: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/prometheus
   592        tag: v2.44.0
   593  
   594      global:
   595        ## How frequently to scrape targets by default
   596        ##
   597        scrape_interval: 15s
   598        ## How long until a scrape request times out
   599        ##
   600        scrape_timeout: 10s
   601        ## How frequently to evaluate rules
   602        ##
   603        evaluation_interval: 15s
   604  
   605      ## Additional Prometheus server container flags
   606      ##
   607      extraFlags:
   608      - web.enable-lifecycle
   609      - web.enable-remote-write-receiver
   610  
   611      ## Additional Prometheus server container arguments
   612      ##
   613      extraArgs:
   614        log.level: info
   615        storage.tsdb.min-block-duration: 30m
   616        enable-feature: memory-snapshot-on-shutdown
   617        storage.tsdb.retention.size: 10GB
   618  
   619      ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write
   620      ##
   621      remoteWrite: []
   622  
   623      ## Prefix used to register routes, overriding externalUrl route.
   624      ## Useful for proxies that rewrite URLs.
   625      ##
   626      routePrefix: /
   627  
   628      ## Node tolerations for server scheduling to nodes with taints
   629      ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
   630      ##
   631      tolerations:
   632      - key: kb-controller
   633        operator: Equal
   634        value: "true"
   635        effect: NoSchedule
   636  
   637      affinity:
   638        nodeAffinity:
   639          preferredDuringSchedulingIgnoredDuringExecution:
   640          - weight: 100
   641            preference:
   642              matchExpressions:
   643              - key: kb-controller
   644                operator: In
   645                values:
   646                - "true"
   647  
   648      persistentVolume:
   649        ## If true, Prometheus server will create/use a Persistent Volume Claim
   650        ## If false, use emptyDir
   651        ##
   652        enabled: false
   653  
   654        ## Prometheus server data Persistent Volume size
   655        ##
   656        size: 20Gi
   657  
   658        ## Prometheus server data Persistent Volume Storage Class
   659        ## If defined, storageClassName: <storageClass>
   660        ## If set to "-", storageClassName: "", which disables dynamic provisioning
   661        ## If undefined (the default) or set to null, no storageClassName spec is
   662        ##   set, choosing the default provisioner.  (gp2 on AWS, standard on
   663        ##   GKE, AWS & OpenStack)
   664        ##
   665        # storageClass: "-"
   666  
   667      ## Use a StatefulSet if replicaCount needs to be greater than 1 (see below)
   668      ##
   669      replicaCount: 1
   670  
   671      statefulSet:
   672        ## If true, use a statefulset instead of a deployment for pod management.
   673        ## This allows to scale replicas to more than 1 pod
   674        ##
   675        enabled: true
   676  
   677      ## Prometheus server resource requests and limits
   678      ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
   679      ##
   680      resources: {}
   681        # limits:
   682        #   cpu: 500m
   683        #   memory: 512Mi
   684        # requests:
   685        #   cpu: 500m
   686      #   memory: 512Mi
   687  
   688      ## Prometheus' data retention period (default if not specified is 15 days)
   689      ##
   690      retention: "2d"
   691  
   692      ## Security context to be added to server pods
   693      ##
   694      securityContext:
   695        runAsUser: 0
   696        runAsNonRoot: false
   697        runAsGroup: 65534
   698        fsGroup: 65534
   699  
   700      containerSecurityContext:
   701        allowPrivilegeEscalation: false
   702  
   703      service:
   704        ## If false, no Service will be created for the Prometheus server
   705        ##
   706        enabled: true
   707  
   708        annotations: {}
   709        labels: {}
   710        clusterIP: ""
   711  
   712        ## List of IP addresses at which the Prometheus server service is available
   713        ## Ref: https://kubernetes.io/docs/user-guide/services/#external-ips
   714        ##
   715        externalIPs: []
   716  
   717        loadBalancerIP: ""
   718        loadBalancerSourceRanges: []
   719        servicePort: 80
   720        sessionAffinity: None
   721        type: ClusterIP
   722  
   723        ## Enable gRPC port on service to allow auto discovery with thanos-querier
   724        gRPC:
   725          enabled: false
   726          servicePort: 10901
   727          # nodePort: 10901
   728  
   729        ## If using a statefulSet (statefulSet.enabled=true), configure the
   730        ## service to connect to a specific replica to have a consistent view
   731        ## of the data.
   732        statefulsetReplica:
   733          enabled: false
   734          replica: 0
   735  
   736      ingress:
   737        ## If true, Prometheus server Ingress will be created
   738        ##
   739        enabled: false
   740  
   741        # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName
   742        # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress
   743        # ingressClassName: nginx
   744  
   745        ## Prometheus server Ingress annotations
   746        ##
   747        annotations: {}
   748        #   kubernetes.io/ingress.class: nginx
   749        #   kubernetes.io/tls-acme: 'true'
   750  
   751        ## Prometheus server Ingress additional labels
   752        ##
   753        extraLabels: {}
   754  
   755        ## Prometheus server Ingress hostnames with optional path
   756        ## Must be provided if Ingress is enabled
   757        ##
   758        hosts: []
   759        #   - prometheus.domain.com
   760        #   - domain.com/prometheus
   761  
   762        path: /
   763  
   764        # pathType is only for k8s >= 1.18
   765        pathType: Prefix
   766  
   767        ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
   768        extraPaths: []
   769        # - path: /*
   770        #   backend:
   771        #     serviceName: ssl-redirect
   772        #     servicePort: use-annotation
   773  
   774        ## Prometheus server Ingress TLS configuration
   775        ## Secrets must be manually created in the namespace
   776        ##
   777        tls: []
   778        #   - secretName: prometheus-server-tls
   779        #     hosts:
   780        #       - prometheus.domain.com
   781  
   782  
   783  
   784  
   785    ## AlertManager ConfigMap Entries
   786    ## NOTE: Please review these carefully as thresholds and behavior may not meet
   787    ##       your SLOs or labels.
   788    ##
   789    alertmanagerFiles:
   790      alertmanager.yml:
   791        global: {}
   792  
   793        receivers:
   794        - name: default-receiver
   795  
   796        route:
   797          receiver: default-receiver
   798          group_wait: 5s
   799          group_interval: 30s
   800          repeat_interval: 10m
   801  
   802    ## Sample prometheus rules/alerts
   803    ## NOTE: Please review these carefully as thresholds and behavior may not meet
   804    ##       your SLOs or labels.
   805    ##
   806    ruleFiles:
   807      kubelet_alert_rules.yml: |
   808        groups:
   809          - name: KubeletSummary
   810            rules:
   811              - alert: ContainerCpuUsageWarning
   812                expr: 'rate(container_cpu_time_seconds_total[2m]) / container_cpu_limit * 100 > 70'
   813                for: 2m
   814                labels:
   815                  severity: warning
   816                annotations:
   817                  summary: 'Container CPU usage is high (> 70%)'
   818                  description: 'Container CPU usage is {{ $value | printf "%.2f" }} percent. (pod: {{ $labels.k8s_pod_name }}, container: {{ $labels.k8s_container_name }})'
   819  
   820              - alert: ContainerCpuUsageCritical
   821                expr: 'rate(container_cpu_time_seconds_total[2m]) / container_cpu_limit * 100 > 90'
   822                for: 1m
   823                labels:
   824                  severity: critical
   825                annotations:
   826                  summary: 'Container CPU usage is very high (> 90%)'
   827                  description: 'Container CPU usage is {{ $value | printf "%.2f" }} percent. (pod: {{ $labels.k8s_pod_name }}, container: {{ $labels.k8s_container_name }})'
   828  
   829              - alert: ContainerMemoryUsage
   830                expr: 'container_memory_working_set_bytes / container_memory_limit_bytes * 100 > 90'
   831                for: 2m
   832                labels:
   833                  severity: warning
   834                annotations:
   835                  summary: 'Container Memory usage is high (> 90%)'
   836                  description: 'Container Memory usage is {{ $value | printf "%.2f" }} percent. (pod: {{ $labels.k8s_pod_name }}, container: {{ $labels.k8s_container_name }})'
   837  
   838              - alert: ContainerMemoryUsagePredict
   839                expr: 'predict_linear(container_memory_working_set_bytes[15m], 30*60) - container_memory_limit_bytes > 0'
   840                for: 5m
   841                labels:
   842                  severity: critical
   843                annotations:
   844                  summary: 'Container Memory predict usage may exceed the limit 30 minutes later'
   845                  description: 'Container Memory predict usage may exceed the limit 30 minutes later, the predict value is {{ $value | humanize1024 }}. (pod: {{ $labels.k8s_pod_name }}, container: {{ $labels.k8s_container_name }})'
   846  
   847              - alert: ContainerVolumeUsage
   848                expr: '(k8s_volume_capacity_bytes - k8s_volume_available_bytes) / k8s_volume_capacity_bytes * 100 > 90'
   849                for: 2m
   850                labels:
   851                  severity: warning
   852                annotations:
   853                  summary: 'Volume usage is high (> 90%)'
   854                  description: 'Volume usage is {{ $value | printf "%.2f" }} percent. (pod: {{ $labels.k8s_pod_name }}, volume: {{ $labels.k8s_volume_name }})'
   855  
   856      mysql_alert_rules.yml: |
   857        groups:
   858          - name: MysqldExporter
   859            rules:
   860              - alert: MysqlDown
   861                expr: 'max_over_time(mysql_up[1m]) == 0'
   862                for: 0m
   863                labels:
   864                  severity: critical
   865                annotations:
   866                  summary: 'MySQL is down'
   867                  description: 'MySQL is down. (instance: {{ $labels.pod }})'
   868  
   869              - alert: MysqlRestarted
   870                expr: 'mysql_global_status_uptime < 60'
   871                for: 0m
   872                labels:
   873                  severity: info
   874                annotations:
   875                  summary: 'MySQL has just been restarted (< 60s)'
   876                  description: 'MySQL has just been restarted {{ $value | printf "%.1f" }} seconds ago. (instance: {{ $labels.pod }})'
   877  
   878              - alert: MysqlTooManyConnections
   879                expr: 'sum(max_over_time(mysql_global_status_threads_connected[1m]) / mysql_global_variables_max_connections) BY (namespace,app_kubernetes_io_instance,pod) * 100 > 80'
   880                for: 2m
   881                labels:
   882                  severity: warning
   883                annotations:
   884                  summary: 'MySQL has too many connections (> 80%)'
   885                  description: '{{ $value | printf "%.2f" }} percent of MySQL connections are in use. (instance: {{ $labels.pod }})'
   886  
   887              - alert: MysqlConnectionErrors
   888                expr: 'sum(increase(mysql_global_status_connection_errors_total[1m])) BY (namespace,app_kubernetes_io_instance,pod) > 0'
   889                for: 2m
   890                labels:
   891                  severity: warning
   892                annotations:
   893                  summary: 'MySQL connection errors'
   894                  description: 'MySQL has connection errors and the value is {{ $value | printf "%.2f" }}. (instance: {{ $labels.pod }})'
   895  
   896              - alert: MysqlHighThreadsRunning
   897                expr: 'sum(max_over_time(mysql_global_status_threads_running[1m]) / mysql_global_variables_max_connections) BY (namespace,app_kubernetes_io_instance,pod) * 100 > 60'
   898                for: 2m
   899                labels:
   900                  severity: warning
   901                annotations:
   902                  summary: 'MySQL high threads running (> 60%)'
   903                  description: '{{ $value | printf "%.2f" }} percent of MySQL connections are in running state. (instance: {{ $labels.pod }})'
   904  
   905              - alert: MysqlSlowQueries
   906                expr: 'sum(increase(mysql_global_status_slow_queries[1m])) BY (namespace,app_kubernetes_io_instance,pod) > 0'
   907                for: 2m
   908                labels:
   909                  severity: info
   910                annotations:
   911                  summary: 'MySQL slow queries'
   912                  description: 'MySQL server has {{ $value | printf "%.2f" }} slow query. (instance: {{ $labels.pod }})'
   913  
   914              - alert: MysqlInnodbLogWaits
   915                expr: 'sum(rate(mysql_global_status_innodb_log_waits[5m])) BY (namespace,app_kubernetes_io_instance,pod) > 10'
   916                for: 2m
   917                labels:
   918                  severity: warning
   919                annotations:
   920                  summary: 'MySQL InnoDB log waits (> 10)'
   921                  description: 'MySQL innodb log writes stalling and the value is {{ $value | printf "%.2f" }}. (instance: {{ $labels.pod }})'
   922  
   923              - alert: MysqlInnodbBufferPoolHits
   924                expr: 'sum(rate(mysql_global_status_innodb_buffer_pool_reads[5m]) / rate(mysql_global_status_innodb_buffer_pool_read_requests[5m])) BY (namespace,app_kubernetes_io_instance,pod) * 100 > 5'
   925                for: 2m
   926                labels:
   927                  severity: warning
   928                annotations:
   929                  summary: 'MySQL InnoDB high read requests rate hitting disk (> 5%)'
   930                  description: 'High number of logical reads that InnoDB could not satisfy from the buffer pool, and had to read directly from disk. The value is {{ $value | printf "%.2f" }} percent. (instance: {{ $labels.pod }})'
   931  
   932      postgresql_alert_rules.yml: |
   933        groups:
   934          - name: PostgreSQLExporter
   935            rules:
   936              - alert: PostgreSQLDown
   937                expr: 'max_over_time(pg_up[1m]) == 0'
   938                for: 0m
   939                labels:
   940                  severity: critical
   941                annotations:
   942                  summary: 'PostgreSQL is down'
   943                  description: 'PostgreSQL is down. (instance: {{ $labels.pod }})'
   944  
   945              - alert: PostgreSQLRestarted
   946                expr: 'time() - pg_postmaster_start_time_seconds < 60'
   947                for: 0m
   948                labels:
   949                  severity: info
   950                annotations:
   951                  summary: 'PostgreSQL has just been restarted (< 60s)'
   952                  description: 'PostgreSQL has just been restarted {{ $value | printf "%.1f" }} seconds ago. (instance: {{ $labels.pod }})'
   953  
   954              - alert: PostgreSQLExporterError
   955                expr: 'pg_exporter_last_scrape_error > 0'
   956                for: 0m
   957                labels:
   958                  severity: warning
   959                annotations:
   960                  summary: 'PostgreSQL exporter scrape error'
   961                  description: 'PostgreSQL exporter has {{ $value | printf "%.2f" }} scrape errors. A query may be buggy in query.yaml. (instance: {{ $labels.pod }})'
   962  
   963              - alert: PostgreSQLTooManySlowQueries
   964                expr: |
   965                  max by(namespace,app_kubernetes_io_instance,pod,datname) (
   966                    max_over_time(pg_stat_activity_max_tx_duration{datname!~"template.*"}[2m])
   967                  ) > 60
   968                for: 2m
   969                labels:
   970                  severity: warning
   971                annotations:
   972                  summary: 'PostgreSQL database has high number of slow queries'
   973                  description: 'PostgreSQL database has slow queries and the value is {{ $value | printf "%.2f" }}. (instance: {{ $labels.pod }}, database: {{ $labels.datname }})'
   974  
   975              - alert: PostgreSQLTooManyConnections
   976                expr: |
   977                  sum by (namespace,app_kubernetes_io_instance,pod) (pg_stat_activity_count{datname!~"template.*"})
   978                  > on(namespace,app_kubernetes_io_instance,pod)
   979                  (pg_settings_max_connections - pg_settings_superuser_reserved_connections) * 0.8
   980                for: 2m
   981                labels:
   982                  severity: warning
   983                annotations:
   984                  summary: 'PostgreSQL too many connections (> 80%)'
   985                  description: 'PostgreSQL has too many connections and the value is {{ $value | printf "%.2f" }} percent. (instance: {{ $labels.pod }})'
   986  
   987              - alert: PostgreSQLDeadLocks
   988                expr: 'increase(pg_stat_database_deadlocks_total{datname!~"template.*", datname!=""}[2m]) > 5'
   989                for: 2m
   990                labels:
   991                  severity: warning
   992                annotations:
   993                  summary: 'PostgreSQL database has dead locks (> 5)'
   994                  description: 'PostgreSQL database has {{ $value | printf "%.2f"}} dead locks. (instance: {{ $labels.pod }}, database: {{ $labels.datname }})'
   995  
   996              - alert: PostgreSQLHighRollbackRate
   997                expr: |
   998                  rate(pg_stat_database_xact_rollback_total{datname!~"template.*", datname!=""}[2m])
   999                  /
  1000                  rate(pg_stat_database_xact_commit_total{datname!~"template.*", datname!=""}[2m])
  1001                  > 0.1
  1002                for: 2m
  1003                labels:
  1004                  severity: warning
  1005                annotations:
  1006                  summary: 'PostgreSQL database has high rollback rate (> 10%)'
  1007                  description: 'Ratio of transactions being aborted compared to committed is {{ $value | printf "%.2f"}} percent. (instance: {{ $labels.pod }}, database: {{ $labels.datname }})'
  1008  
  1009              - alert: PostgreSQLTooManyLocksAcquired
  1010                expr: |
  1011                  sum by (namespace,app_kubernetes_io_instance,pod) (pg_locks_count)
  1012                  / on(namespace,app_kubernetes_io_instance,pod)
  1013                  (pg_settings_max_locks_per_transaction * pg_settings_max_connections)
  1014                  > 0.2
  1015                for: 2m
  1016                labels:
  1017                  severity: warning
  1018                annotations:
  1019                  summary: 'PostgreSQL has too many locks acquired (> 20%)'
  1020                  description: 'Too many locks acquired on the database and the value is {{ $value | printf "%.2f" }} percent. (instance: {{ $labels.pod }})'
  1021  
  1022              - alert: PostgreSQLCacheHitRatio
  1023                expr: |
  1024                  avg by (namespace,app_kubernetes_io_instance,pod,datname) (
  1025                    rate(pg_stat_database_blks_hit_total{datname!~"template.*", datname!=""}[2m])
  1026                    /
  1027                    (
  1028                      rate(
  1029                        pg_stat_database_blks_hit_total{datname!~"template.*", datname!=""}[2m]
  1030                      )
  1031                      +
  1032                      rate(
  1033                        pg_stat_database_blks_read_total{datname!~"template.*", datname!=""}[2m]
  1034                      )
  1035                    )
  1036                  ) < 0.9
  1037                for: 2m
  1038                labels:
  1039                  severity: warning
  1040                annotations:
  1041                  summary: 'PostgreSQL database has low cache hit rate (< 90%)'
  1042                  description: 'Low cache hit rate and the value is {{ $value | printf "%.2f" }} percent. (instance: {{ $labels.pod }}, database: {{ $labels.datname }})'
  1043  
  1044              - alert: PostgreSQLMaxWriteBufferReached
  1045                expr: 'rate(pg_stat_bgwriter_maxwritten_clean_total[2m]) > 0'
  1046                for: 2m
  1047                labels:
  1048                  severity: warning
  1049                annotations:
  1050                  summary: 'PostgreSQL write buffers reached max'
  1051                  description: 'PostgreSQL background writer stops for max and the value is {{ $value | printf "%.2f" }}. (instance: {{ $labels.pod }})'
  1052  
  1053              - alert: PostgreSQLHighWALFilesArchiveErrorRate
  1054                expr: |
  1055                  rate(pg_stat_archiver_failed_count_total[2m])
  1056                  / (
  1057                    rate(pg_stat_archiver_archived_count_total[2m]) + rate(pg_stat_archiver_failed_count_total[2m])
  1058                  ) > 0.1
  1059                for: 2m
  1060                labels:
  1061                  severity: warning
  1062                annotations:
  1063                  summary: 'PostgreSQL has high error rate in WAL files archiver(> 10%)'
  1064                  description: 'PostgreSQL high error rate in WAL files archiver and the value is {{ $value | printf "%.2f" }} percent. (instance: {{ $labels.pod }})'
  1065  
  1066              - alert: PostgreSQLTableNotAutoVacuumed
  1067                expr: |
  1068                  (pg_stat_user_tables_last_autovacuum > 0)
  1069                  and
  1070                  (time() - pg_stat_user_tables_last_autovacuum)
  1071                  > 24 * 60 * 60 * 10
  1072                for: 0m
  1073                labels:
  1074                  severity: warning
  1075                annotations:
  1076                  summary: 'PostgreSQL table in database has not been auto vacuumed for 10 days'
  1077                  description: 'Table {{ $labels.relname }} in database has not been auto vacuumed for 10 days. (instance: {{ $labels.pod }}, database: {{ $labels.datname }})'
  1078  
  1079              - alert: PostgreSQLTableNotAutoAnalyzed
  1080                expr: |
  1081                  (pg_stat_user_tables_last_autoanalyze > 0)
  1082                  and
  1083                  (time() - pg_stat_user_tables_last_autoanalyze)
  1084                  > 24 * 60 * 60 * 10
  1085                for: 0m
  1086                labels:
  1087                  severity: warning
  1088                annotations:
  1089                  summary: 'PostgreSQL table in database has not been auto analyzed for 10 days'
  1090                  description: 'Table {{ $labels.relname }} in database has not been auto analyzed for 10 days. (instance: {{ $labels.pod }}, database: {{ $labels.datname }})'
  1091  
  1092              - alert: PostgreSQLTableTooManyDeadTuples
  1093                expr: |
  1094                  (pg_stat_user_tables_n_dead_tup > 10000)
  1095                  /
  1096                  (pg_stat_user_tables_n_live_tup + pg_stat_user_tables_n_dead_tup)
  1097                  >= 0.1
  1098                for: 2m
  1099                labels:
  1100                  severity: warning
  1101                annotations:
  1102                  summary: 'PostgreSQL table in database has too many dead tuples (> 10%)'
  1103                  description: 'Table {{ $labels.relname }} in database dead tuples is too large and the value is {{ $value | printf "%.2f" }} percent. (instance: {{ $labels.pod }}, database: {{ $labels.datname }})'
  1104  
  1105      redis_alert_rules.yml: |
  1106        groups:
  1107          - name: RedisExporter
  1108            rules:
  1109              - alert: RedisDown
  1110                expr: 'redis_up == 0'
  1111                for: 5m
  1112                labels:
  1113                  severity: critical
  1114                annotations:
  1115                  summary: 'Redis is down'
  1116                  description: 'Redis is down. (instance: {{ $labels.pod }})'
  1117  
  1118              - alert: RedisCPUHigh
  1119                expr: '(rate(redis_cpu_sys_seconds_total[1m]) + rate(redis_cpu_user_seconds_total[1m])) * 100 > 80'
  1120                for: 2m
  1121                labels:
  1122                  severity: warning
  1123                annotations:
  1124                  summary: 'Out of CPU (> 80%)'
  1125                  description: 'Redis is running out of CPU and the value is {{ $value | printf "%.2f" }} percent. (instance: {{ $labels.pod }})'
  1126  
  1127              - alert: RedisMemoryHigh
  1128                expr: '(redis_memory_max_bytes == 0 or redis_memory_used_bytes * 100 / redis_memory_max_bytes) > 90'
  1129                for: 5m
  1130                labels:
  1131                  severity: warning
  1132                annotations:
  1133                  summary: 'Out of memory (> 90%)'
  1134                  description: 'Redis is running out of memory and the value is {{ $value | printf "%.2f" }} percent. (instance: {{ $labels.pod }})'
  1135  
  1136              - alert: RedisTooManyConnections
  1137                expr: 'redis_connected_clients * 100 / redis_config_maxclients > 80'
  1138                for: 1m
  1139                labels:
  1140                  severity: warning
  1141                annotations:
  1142                  summary: 'Redis has too many connections (> 80%)'
  1143                  description: 'Redis has too many connections and the value is {{ $value | printf "%.2f" }} percent. (instance: {{ $labels.pod }})'
  1144  
  1145              - alert: RedisRejectedConnections
  1146                expr: 'increase(redis_rejected_connections_total[1m]) > 0'
  1147                for: 5m
  1148                labels:
  1149                  severity: error
  1150                annotations:
  1151                  summary: 'Redis has rejected connections'
  1152                  description: '{{ $value | printf "%.2f" }} connections to Redis has been rejected. (instance: {{ $labels.pod }})'
  1153  
  1154              - alert: RedisKeyEviction
  1155                expr: 'increase(redis_evicted_keys_total[5m]) > 0'
  1156                for: 1s
  1157                labels:
  1158                  severity: error
  1159                annotations:
  1160                  summary: 'Redis has evicted keys'
  1161                  description: 'Redis has evicted keys in the last 5 minutes and the value is {{ $value | printf "%.2f" }}. (instance: {{ $labels.pod }})'
  1162  
  1163              - alert: RedisMissingMaster
  1164                expr: 'count by (app_kubernetes_io_instance) (redis_instance_info{role="master"}) < 1'
  1165                for: 30s
  1166                labels:
  1167                  severity: critical
  1168                annotations:
  1169                  summary: 'Redis missing master'
  1170                  description: 'Redis cluster has no node marked as master.'
  1171  
  1172              - alert: RedisDisconnectedSlaves
  1173                expr: 'count without (instance, job) (redis_connected_slaves) - sum without (instance, job) (redis_connected_slaves) - 1 > 1'
  1174                for: 0m
  1175                labels:
  1176                  severity: critical
  1177                annotations:
  1178                  summary: 'Redis disconnected slaves'
  1179                  description: 'Redis not replicating for all slaves. Consider reviewing the redis replication status. (instance: {{ $labels.pod }})'
  1180  
  1181              - alert: RedisReplicationBroken
  1182                expr: 'delta(redis_connected_slaves[1m]) < 0'
  1183                for: 0m
  1184                labels:
  1185                  severity: critical
  1186                annotations:
  1187                  summary: 'Redis replication broken'
  1188                  description: 'Redis instance lost a slave. (instance: {{ $labels.pod }})'
  1189  
  1190      mongodb_alert_rules.yml: |-
  1191        groups:
  1192          - name: MongodbExporter
  1193            rules:
  1194              - alert: MongodbDown
  1195                expr: 'max_over_time(mongodb_up[1m]) == 0'
  1196                for: 0m
  1197                labels:
  1198                  severity: critical
  1199                annotations:
  1200                  summary: 'MongoDB is Down'
  1201                  description: 'MongoDB instance is down\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}'
  1202        
  1203              - alert: MongodbRestarted
  1204                expr: 'mongodb_instance_uptime_seconds < 60'
  1205                for: 0m
  1206                labels:
  1207                  severity: info
  1208                annotations:
  1209                  summary: 'Mongodb has just been restarted (< 60s)'
  1210                  description: 'Mongodb has just been restarted {{ $value | printf "%.1f" }} seconds ago\n LABELS = {{ $labels }}'
  1211        
  1212              - alert: MongodbReplicaMemberUnhealthy
  1213                expr: 'max_over_time(mongodb_rs_members_health[1m]) == 0'
  1214                for: 0m
  1215                labels:
  1216                  severity: critical
  1217                annotations:
  1218                  summary: 'Mongodb replica member is unhealthy'
  1219                  description: 'MongoDB replica member is not healthy\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}'
  1220        
  1221              - alert: MongodbReplicationLag
  1222                expr: '(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (pod) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"}) / 1000 > 10'
  1223                for: 0m
  1224                labels:
  1225                  severity: critical
  1226                annotations:
  1227                  summary: 'MongoDB replication lag (> 10s)'
  1228                  description: 'Mongodb replication lag is more than 10s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}'
  1229        
  1230              - alert: MongodbReplicationHeadroom
  1231                expr: 'sum(avg(mongodb_mongod_replset_oplog_head_timestamp - mongodb_mongod_replset_oplog_tail_timestamp)) - sum(avg(mongodb_rs_members_optimeDate{member_state="PRIMARY"} - on (pod) group_right mongodb_rs_members_optimeDate{member_state="SECONDARY"})) <= 0'
  1232                for: 0m
  1233                labels:
  1234                  severity: critical
  1235                annotations:
  1236                  summary: 'MongoDB replication headroom (< 0)'
  1237                  description: 'MongoDB replication headroom is <= 0\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}'
  1238        
  1239              - alert: MongodbNumberCursorsOpen
  1240                expr: 'mongodb_ss_metrics_cursor_open{csr_type="total"} > 10 * 1000'
  1241                for: 2m
  1242                labels:
  1243                  severity: warning
  1244                annotations:
  1245                  summary: 'MongoDB opened cursors num (> 10k)'
  1246                  description: 'Too many cursors opened by MongoDB for clients (> 10k)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}'
  1247        
  1248              - alert: MongodbCursorsTimeouts
  1249                expr: 'increase(mongodb_ss_metrics_cursor_timedOut[1m]) > 100'
  1250                for: 2m
  1251                labels:
  1252                  severity: warning
  1253                annotations:
  1254                  summary: 'MongoDB cursors timeouts (>100/minute)' 
  1255                  description: 'Too many cursors are timing out (> 100/minute)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}'
  1256        
  1257              - alert: MongodbTooManyConnections
  1258                expr: 'avg by(pod) (rate(mongodb_ss_connections{conn_type="current"}[1m])) / avg by(pod) (sum (mongodb_ss_connections) by(pod)) * 100 > 80'
  1259                for: 2m
  1260                labels:
  1261                  severity: warning
  1262                annotations:
  1263                  summary: 'MongoDB too many connections (> 80%)'
  1264                  description: 'Too many connections (> 80%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}'
  1265        
  1266              - alert: MongodbVirtualMemoryUsage
  1267                expr: '(sum(mongodb_ss_mem_virtual) BY (pod) / sum(mongodb_ss_mem_resident) BY (pod)) > 100'
  1268                for: 2m
  1269                labels:
  1270                  severity: warning
  1271                annotations:
  1272                  summary: MongoDB virtual memory usage high
  1273                  description: "High memory usage: the quotient of (mem_virtual / mem_resident) is more than 100\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
  1274  
  1275      kafka_alert_rules.yaml: |-
  1276        group:
  1277          - name: KafkaExporter
  1278            rules:
  1279              - alert: KafkaTopicsReplicas
  1280                  expr: 'sum(kafka_topic_partition_in_sync_replica) by (topic) < 3'
  1281                  for: 0m
  1282                  labels:
  1283                    severity: critical
  1284                  annotations:
  1285                    summary: 'Kafka topics replicas (instance {{ $labels.app_kubernetes_io_instance }})'
  1286                    description: 'Kafka topic in-sync partition\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}'
  1287              - alert: KafkaConsumersGroup
  1288                  expr: 'sum(kafka_consumergroup_lag) by (consumergroup) > 50'
  1289                  for: 1m
  1290                  labels:
  1291                    severity: critical
  1292                  annotations:
  1293                    summary: 'Kafka consumers group (instance {{ $labels.app_kubernetes_io_instance }})'
  1294                    description: 'Kafka consumers group\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}'
  1295              - alert: KafkaBrokerDown
  1296                  expr: 'kafka_brokers < 3'
  1297                  for: 0m
  1298                  labels:
  1299                    severity: critical
  1300                  annotations:
  1301                    Summary: 'Kafka broker *{{ $labels.app_kubernetes_io_instance }}* alert status'
  1302                    description: 'One of the Kafka broker *{{ $labels.app_kubernetes_io_instance }}* is down.'
  1303  
  1304    serverFiles:
  1305      prometheus.yml:
  1306        rule_files:
  1307        - /etc/config/recording_rules.yml
  1308        - /etc/config/alerting_rules.yml
  1309        - /etc/config/kubelet_alert_rules.yml
  1310        - /etc/config/mysql_alert_rules.yml
  1311        - /etc/config/postgresql_alert_rules.yml
  1312        - /etc/config/redis_alert_rules.yml
  1313        - /etc/config/kafka_alert_rules.yml
  1314        - /etc/config/mongodb_alert_rules.yml
  1315  
  1316        scrape_configs:
  1317        - job_name: prometheus
  1318          static_configs:
  1319          - targets:
  1320            - localhost:9090
  1321  
  1322        # Scrape config for kubeblocks managed service endpoints.
  1323        #
  1324        # The relabeling allows the actual service scrape endpoint to be configured
  1325        # via the following annotations:
  1326        #
  1327        # * `monitor.kubeblocks.io/scrape`: Only scrape services that have a value of
  1328        # `true`.
  1329        # * `monitor.kubeblocks.io/scheme`: If the metrics endpoint is secured then you will need
  1330        # to set this to `https` & most likely set the `tls_config` of the scrape config.
  1331        # * `monitor.kubeblocks.io/path`: If the metrics path is not `/metrics` override this.
  1332        # * `monitor.kubeblocks.io/port`: If the metrics are exposed on a different port to the
  1333        # service then set this appropriately.
  1334        # * `monitor.kubeblocks.io/param_<parameter>`: If the metrics endpoint uses parameters
  1335        # then you can set any parameter
  1336        - job_name: 'kubeblocks-service'
  1337          honor_labels: true
  1338  
  1339          kubernetes_sd_configs:
  1340          - role: endpoints
  1341  
  1342          relabel_configs:
  1343          - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_managed_by]
  1344            action: keep
  1345            regex: kubeblocks
  1346          - source_labels: [__meta_kubernetes_service_label_monitor_kubeblocks_io_managed_by]
  1347            action: drop
  1348            regex: agamotto
  1349          - source_labels: [__meta_kubernetes_service_annotation_monitor_kubeblocks_io_scrape]
  1350            action: keep
  1351            regex: true
  1352          - source_labels: [__meta_kubernetes_service_annotation_monitor_kubeblocks_io_scheme]
  1353            action: replace
  1354            target_label: __scheme__
  1355            regex: (https?)
  1356          - source_labels: [__meta_kubernetes_service_annotation_monitor_kubeblocks_io_path]
  1357            action: replace
  1358            target_label: __metrics_path__
  1359            regex: (.+)
  1360          - source_labels: [__address__, __meta_kubernetes_service_annotation_monitor_kubeblocks_io_port]
  1361            action: replace
  1362            target_label: __address__
  1363            regex: (.+?)(?::\d+)?;(\d+)
  1364            replacement: $1:$2
  1365          - action: labelmap
  1366            regex: __meta_kubernetes_service_annotation_monitor_kubeblocks_io_param_(.+)
  1367            replacement: __param_$1
  1368          - action: labelmap
  1369            regex: __meta_kubernetes_service_label_(.+)
  1370          - source_labels: [__meta_kubernetes_namespace]
  1371            action: replace
  1372            target_label: namespace
  1373          - source_labels: [__meta_kubernetes_service_name]
  1374            action: replace
  1375            target_label: service
  1376          - source_labels: [__meta_kubernetes_pod_node_name]
  1377            action: replace
  1378            target_label: node
  1379          - source_labels: [__meta_kubernetes_pod_name]
  1380            action: replace
  1381            target_label: pod
  1382          - source_labels: [__meta_kubernetes_pod_phase]
  1383            regex: Pending|Succeeded|Failed|Completed
  1384            action: drop
  1385  
  1386        - job_name: 'kubeblocks-agamotto'
  1387          honor_labels: true
  1388  
  1389          kubernetes_sd_configs:
  1390          - role: endpoints
  1391  
  1392          relabel_configs:
  1393          - source_labels: [__meta_kubernetes_service_label_monitor_kubeblocks_io_managed_by]
  1394            action: keep
  1395            regex: agamotto
  1396          - source_labels: [__meta_kubernetes_service_annotation_monitor_kubeblocks_io_scrape]
  1397            action: keep
  1398            regex: true
  1399          - source_labels: [__meta_kubernetes_service_annotation_monitor_kubeblocks_io_scheme]
  1400            action: replace
  1401            target_label: __scheme__
  1402            regex: (https?)
  1403          - source_labels: [__meta_kubernetes_service_annotation_monitor_kubeblocks_io_path]
  1404            action: replace
  1405            target_label: __metrics_path__
  1406            regex: (.+)
  1407          - source_labels: [__address__, __meta_kubernetes_service_annotation_monitor_kubeblocks_io_port]
  1408            action: replace
  1409            target_label: __address__
  1410            regex: (.+?)(?::\d+)?;(\d+)
  1411            replacement: $1:$2
  1412          - action: labelmap
  1413            regex: __meta_kubernetes_service_annotation_monitor_kubeblocks_io_param_(.+)
  1414            replacement: __param_$1
  1415          - source_labels: [__meta_kubernetes_pod_phase]
  1416            regex: Pending|Succeeded|Failed|Completed
  1417            action: drop
  1418  
  1419    pushgateway:
  1420      ## If false, pushgateway will not be installed
  1421      ##
  1422      enabled: false
  1423  
  1424  ## loki settings for kubeblocks
  1425  loki:
  1426    enabled: false
  1427    singleBinary:
  1428      replicas: 1
  1429    monitoring:
  1430      lokiCanary:
  1431        enabled: false
  1432      selfMonitoring:
  1433        enabled: false
  1434        grafanaAgent:
  1435          installOperator: false
  1436      dashboards:
  1437        enabled: false
  1438      rules:
  1439        enabled: false
  1440      serviceMonitor:
  1441        enabled: false
  1442    test:
  1443      enabled: false
  1444    loki:
  1445      auth_enabled: false
  1446      commonConfig:
  1447        replication_factor: 1
  1448      storage:
  1449        type: filesystem
  1450      podSecurityContext:
  1451        runAsNonRoot: false
  1452        runAsUser: 0
  1453      limits_config:
  1454        max_query_lookback: 72h
  1455        retention_period: 72h
  1456      compactor:
  1457        working_directory: /var/loki/retention
  1458        shared_store: filesystem
  1459        compaction_interval: 10m
  1460        retention_enabled: true
  1461        retention_delete_delay: 2h
  1462        retention_delete_worker_count: 150
  1463        delete_request_cancel_period: 2h
  1464  
  1465  
  1466  grafana:
  1467    ## If false, grafana sub-chart will not be installed
  1468    ##
  1469    enabled: false
  1470  
  1471    rbac:
  1472      pspEnabled: false
  1473  
  1474    replicas: 1
  1475  
  1476    image:
  1477      repository: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/grafana
  1478      # Overrides the Grafana image tag whose default is the chart appVersion
  1479      tag: 9.2.4
  1480  
  1481    ## Grafana server resource requests and limits
  1482    ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/
  1483    ##
  1484    resources: {}
  1485      # limits:
  1486      #   cpu: 100m
  1487      #   memory: 128Mi
  1488      # requests:
  1489      #   cpu: 100m
  1490    #   memory: 128Mi
  1491  
  1492    ## Node tolerations for grafana scheduling to nodes with taints
  1493    ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/
  1494    ##
  1495    tolerations:
  1496    - key: kb-controller
  1497      operator: Equal
  1498      value: "true"
  1499      effect: NoSchedule
  1500  
  1501    affinity:
  1502      nodeAffinity:
  1503        preferredDuringSchedulingIgnoredDuringExecution:
  1504        - weight: 100
  1505          preference:
  1506            matchExpressions:
  1507            - key: kb-controller
  1508              operator: In
  1509              values:
  1510              - "true"
  1511  
  1512    ## Timezone for the default dashboards
  1513    ## Other options are: browser or a specific timezone, i.e. Europe/Luxembourg
  1514    ##
  1515    defaultDashboardsTimezone:
  1516  
  1517    adminUser: admin
  1518    adminPassword: kubeblocks
  1519  
  1520    sidecar:
  1521      image:
  1522        repository: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/k8s-sidecar
  1523        tag: 1.19.2
  1524  
  1525      dashboards:
  1526        enabled: true
  1527        label: grafana_dashboard
  1528        labelValue: "1"
  1529        searchNamespace: ALL
  1530        resource: configmap
  1531  
  1532      datasources:
  1533        enabled: true
  1534        label: grafana_datasource
  1535        labelValue: "1"
  1536        searchNamespace: ALL
  1537        resource: configmap
  1538  
  1539        defaultDatasourceEnabled: true
  1540        uid: prometheus
  1541  
  1542        skipReload: false
  1543        initDatasources: true
  1544  
  1545    testFramework:
  1546      enabled: false
  1547  
  1548    grafana.ini:
  1549      # Basic auth is enabled by default and works with the builtin Grafana user password authentication system and LDAP authentication integration.
  1550      auth.basic:
  1551        enabled: false
  1552  
  1553      auth.anonymous:
  1554        enabled: true
  1555        # Hide the Grafana version text from the footer and help tooltip for unauthenticated users (default: false)
  1556        hide_version: true
  1557  
  1558    ingress:
  1559      enabled: false
  1560      # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName
  1561      # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress
  1562      # ingressClassName: nginx
  1563      # Values can be templated
  1564      annotations: {}
  1565      # kubernetes.io/ingress.class: nginx
  1566      # kubernetes.io/tls-acme: "true"
  1567      labels: {}
  1568      path: /
  1569  
  1570      # pathType is only for k8s >= 1.1=
  1571      pathType: Prefix
  1572  
  1573      hosts:
  1574      - chart-example.local
  1575      ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services.
  1576      extraPaths: []
  1577      # - path: /*
  1578      #   backend:
  1579      #     serviceName: ssl-redirect
  1580      #     servicePort: use-annotation
  1581      ## Or for k8s > 1.19
  1582      # - path: /*
  1583      #   pathType: Prefix
  1584      #   backend:
  1585      #     service:
  1586      #       name: ssl-redirect
  1587      #       port:
  1588      #         name: use-annotation
  1589  
  1590  
  1591      tls: []
  1592      #  - secretName: chart-example-tls
  1593      #    hosts:
  1594      #      - chart-example.local
  1595  
  1596    ## Expose the grafana service to be accessed from outside the cluster (LoadBalancer service).
  1597    ## or access it from within the cluster (ClusterIP service). Set the service type and the port to serve it.
  1598    ## ref: http://kubernetes.io/docs/user-guide/services/
  1599    ##
  1600    service:
  1601      enabled: true
  1602      type: ClusterIP
  1603      port: 80
  1604      targetPort: 3000
  1605      # targetPort: 4181 To be used with a proxy extraContainer
  1606      ## Service annotations. Can be templated.
  1607      annotations: {}
  1608      labels: {}
  1609      portName: service
  1610      # Adds the appProtocol field to the service. This allows to work with istio protocol selection. Ex: "http" or "tcp"
  1611      appProtocol: ""
  1612  
  1613  
  1614  ### snapshot-controller settings
  1615  ### ref: https://artifacthub.io/packages/helm/piraeus-charts/snapshot-controller#configuration
  1616  ###
  1617  snapshot-controller:
  1618    ## @param snapshot-controller.enabled -- Enable snapshot-controller chart.
  1619    ##
  1620    enabled: true
  1621    ## @param snapshot-controller.replicaCount -- Number of replicas to deploy.
  1622    ##
  1623    replicaCount: 1
  1624    ## snapshot-controller image setting, easy access for CN users.
  1625    ## @param snapshot-controller.image.repository -- Repository to pull the image from.
  1626    ##
  1627    image:
  1628      repository: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com/apecloud/snapshot-controller
  1629      tag: v6.2.1
  1630  
  1631    tolerations:
  1632    - key: kb-controller
  1633      operator: Equal
  1634      value: "true"
  1635      effect: NoSchedule
  1636  
  1637    volumeSnapshotClasses:
  1638    - name: default-vsc
  1639      driver: hostpath.csi.k8s.io
  1640      deletionPolicy: Delete
  1641  
  1642    affinity:
  1643      nodeAffinity:
  1644        preferredDuringSchedulingIgnoredDuringExecution:
  1645        - weight: 100
  1646          preference:
  1647            matchExpressions:
  1648            - key: kb-controller
  1649              operator: In
  1650              values:
  1651              - "true"
  1652  
  1653  kubeblocks-csi-driver:
  1654    enabled: false
  1655  
  1656  
  1657  cloudProvider:
  1658    ## cloudProvider secret settings
  1659    ## @param cloudProvider.accessKey -- S3 Access Key.
  1660    ## @param cloudProvider.secretKey -- S3 Secret Key.
  1661    ## @param cloudProvider.region -- S3 region.
  1662    ## @param cloudProvider.cloud -- cloud name: [aws,aliyun].
  1663    ## @param cloudProvider.bucket -- S3 Bucket.
  1664    accessKey: ""
  1665    secretKey: ""
  1666    region: ""
  1667    name: ""
  1668    bucket: ""
  1669  
  1670  ## csi-s3 settings
  1671  ## ref: https://artifacthub.io/packages/helm/cloudve/csi-s3#configuration
  1672  ##
  1673  csi-s3:
  1674    ## @param csi-s3.enabled -- Enable csi-s3 chart.
  1675    ##
  1676    enabled: false
  1677  
  1678  alertmanager-webhook-adaptor:
  1679    ## Linkage with prometheus.enabled
  1680    ##
  1681    # enabled: false
  1682  
  1683    ## Webhook-Adaptor container image
  1684    ##
  1685    image:
  1686      registry: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com
  1687  
  1688    affinity:
  1689      nodeAffinity:
  1690        preferredDuringSchedulingIgnoredDuringExecution:
  1691        - weight: 100
  1692          preference:
  1693            matchExpressions:
  1694            - key: kb-controller
  1695              operator: In
  1696              values:
  1697              - "true"
  1698  
  1699    ## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.configMapOverrideName}}
  1700    ##
  1701    configMapOverrideName: "config"
  1702  
  1703    ## Webhook-Adaptor ConfigMap Entries
  1704    configFiles:
  1705      config.yaml: {}
  1706  
  1707  csi-hostpath-driver:
  1708    ## @param csi-hostpath-driver.enabled -- Enable csi-hostpath-driver chart.
  1709    ##
  1710    enabled: false
  1711    ## csi-hostpath-driver storageClass setting
  1712    ## @param csi-hostpath-driver.storageClass.create -- Specifies whether the storage class should be created.
  1713    ## @param csi-hostpath-driver.storageClass.default -- Specifies whether the storage class should be default after created.
  1714    ##
  1715    storageClass:
  1716      create: true
  1717      default: true
  1718  
  1719  aws-load-balancer-controller:
  1720    clusterName: ""
  1721    enabled: false
  1722    replicaCount: 1
  1723    tolerations:
  1724    - key: kb-controller
  1725      operator: Equal
  1726      value: "true"
  1727      effect: NoSchedule
  1728    serviceAccount:
  1729      create: true
  1730      name: kubeblocks-service-account-aws-load-balancer-controller
  1731    affinity:
  1732      nodeAffinity:
  1733        preferredDuringSchedulingIgnoredDuringExecution:
  1734        - weight: 100
  1735          preference:
  1736            matchExpressions:
  1737            - key: kb-controller
  1738              operator: In
  1739              values:
  1740              - "true"
  1741  
  1742  ## k8s cluster feature gates, ref: https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/
  1743  enabledAlphaFeatureGates:
  1744    ## @param enabledAlphaFeatureGates.recoverVolumeExpansionFailure -- Specifies whether feature gates RecoverVolumeExpansionFailure is enabled in k8s cluster.
  1745    ##
  1746    recoverVolumeExpansionFailure: false
  1747  
  1748  
  1749  agamotto:
  1750    enabled: false
  1751    image:
  1752      registry: infracreate-registry.cn-zhangjiakou.cr.aliyuncs.com
  1753  
  1754  
  1755  provider: "" # cloud be "aws","gcp","aliyun","tencentCloud", "huaweiCloud", "azure"
  1756  validProviders:
  1757   - "aws"
  1758   - "gcp"
  1759   - "aliyun"
  1760   - "tencentCloud"
  1761   - "huaweiCloud"
  1762   - "azure"
  1763   - ""
  1764  ## @section KubeBlocks default storageClass Parameters for cloud provider.
  1765  storageClass:
  1766    ## @param storageClass.name -- Specifies the name of the default storage class.
  1767    ## If name is not specified and KubeBlocks deployed in a cloud, a default name will be generated.
  1768    ##
  1769    name: ""
  1770    ## @param storageClass.create -- Specifies whether the storage class should be created. If storageClass.name is not
  1771    ## specified or generated, this value will be ignored.
  1772    ##
  1773    create: true
  1774    mountOptions:
  1775    - noatime
  1776    - nobarrier
  1777    provider:
  1778      aws:
  1779        volumeType: gp3
  1780        fsType: xfs
  1781      gcp:
  1782        volumeType: pd-balanced
  1783        fsType: xfs
  1784      aliyun:
  1785        volumeType: cloud_essd
  1786        fsType: xfs
  1787      azure:
  1788        volumeType: managed
  1789        fsType: xfs
  1790      tencentCloud:
  1791        volumeType: CLOUD_SSD
  1792      huaweiCloud: # Huawei Cloud
  1793        volumeType: SSD
  1794        fsType: ext4
  1795  
  1796  external-dns:
  1797    enabled: false
  1798    domain: kubeblocks.io
  1799    tolerations:
  1800    - key: kb-controller
  1801      operator: Equal
  1802      value: "true"
  1803      effect: NoSchedule
  1804  
  1805  developMode: false