k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/cluster/addons/fluentd-gcp/fluentd-gcp-configmap-old.yaml (about)

     1  # This ConfigMap is used to ingest logs against old resources like
     2  # "gke_container" and "gce_instance" when $LOGGING_STACKDRIVER_RESOURCE_TYPES is
     3  # set to "old".
     4  # When $LOGGING_STACKDRIVER_RESOURCE_TYPES is set to "new", the ConfigMap in
     5  # fluentd-gcp-configmap.yaml will be used for ingesting logs against new
     6  # resources like "k8s_container" and "k8s_node".
     7  kind: ConfigMap
     8  apiVersion: v1
     9  data:
    10    containers.input.conf: |-
    11      # This configuration file for Fluentd is used
    12      # to watch changes to Docker log files that live in the
    13      # directory /var/lib/docker/containers/ and are symbolically
    14      # linked to from the /var/log/containers directory using names that capture the
    15      # pod name and container name. These logs are then submitted to
    16      # Google Cloud Logging which assumes the installation of the cloud-logging plug-in.
    17      #
    18      # Example
    19      # =======
    20      # A line in the Docker log file might look like this JSON:
    21      #
    22      # {"log":"2014/09/25 21:15:03 Got request with path wombat\\n",
    23      #  "stream":"stderr",
    24      #   "time":"2014-09-25T21:15:03.499185026Z"}
    25      #
    26      # The original tag is derived from the log file's location.
    27      # For example a Docker container's logs might be in the directory:
    28      #  /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
    29      # and in the file:
    30      #  997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
    31      # where 997599971ee6... is the Docker ID of the running container.
    32      # The Kubernetes kubelet makes a symbolic link to this file on the host
    33      # machine in the /var/log/containers directory which includes the pod name,
    34      # the namespace name and the Kubernetes container name:
    35      #    synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
    36      #    ->
    37      #    /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
    38      # The /var/log directory on the host is mapped to the /var/log directory in the container
    39      # running this instance of Fluentd and we end up collecting the file:
    40      #   /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
    41      # This results in the tag:
    42      #  var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
    43      # where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the
    44      # namespace name, 'synth-lgr' is the container name and '997599971ee6..' is
    45      # the container ID.
    46      # The record reformer is used is discard the var.log.containers prefix and
    47      # the Docker container ID suffix and "kubernetes." is pre-pended giving the tag:
    48      #   kubernetes.synthetic-logger-0.25lps-pod_default_synth-lgr
    49      # Tag is then parsed by google_cloud plugin and translated to the metadata,
    50      # visible in the log viewer
    51  
    52      # Json Log Example:
    53      # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
    54      # CRI Log Example:
    55      # 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here
    56      <source>
    57        @type tail
    58        path /var/log/containers/*.log
    59        pos_file /var/log/gcp-containers.log.pos
    60        # Tags at this point are in the format of:
    61        # reform.var.log.containers.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>-<CONTAINER_ID>.log
    62        tag reform.*
    63        read_from_head true
    64        <parse>
    65          @type multi_format
    66          <pattern>
    67            format json
    68            time_key time
    69            time_format %Y-%m-%dT%H:%M:%S.%NZ
    70          </pattern>
    71          <pattern>
    72            format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
    73            time_format %Y-%m-%dT%H:%M:%S.%N%:z
    74          </pattern>
    75        </parse>
    76      </source>
    77  
    78      <filter reform.**>
    79        @type parser
    80        format /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<log>.*)/
    81        reserve_data true
    82        suppress_parse_error_log true
    83        emit_invalid_record_to_error false
    84        key_name log
    85      </filter>
    86  
    87      <match reform.**>
    88        @type record_reformer
    89        enable_ruby true
    90        # Tags at this point are in the format of:
    91        # 'raw.kubernetes.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>'.
    92        tag raw.kubernetes.${tag_suffix[4].split('-')[0..-2].join('-')}
    93      </match>
    94  
    95      # Detect exceptions in the log output and forward them as one log entry.
    96      <match raw.kubernetes.**>
    97        @type detect_exceptions
    98  
    99        remove_tag_prefix raw
   100        message log
   101        stream stream
   102        multiline_flush_interval 5
   103        max_bytes 500000
   104        max_lines 1000
   105      </match>
   106    system.input.conf: |-
   107      # Example:
   108      # Dec 21 23:17:22 gke-foo-1-1-4b5cbd14-node-4eoj startupscript: Finished running startup script /var/run/google.startup.script
   109      <source>
   110        @type tail
   111        format syslog
   112        path /var/log/startupscript.log
   113        pos_file /var/log/gcp-startupscript.log.pos
   114        tag startupscript
   115      </source>
   116  
   117      # Examples:
   118      # time="2016-02-04T06:51:03.053580605Z" level=info msg="GET /containers/json"
   119      # time="2016-02-04T07:53:57.505612354Z" level=error msg="HTTP Error" err="No such image: -f" statusCode=404
   120      # TODO(random-liu): Remove this after cri container runtime rolls out.
   121      <source>
   122        @type tail
   123        format /^time="(?<time>[^)]*)" level=(?<severity>[^ ]*) msg="(?<message>[^"]*)"( err="(?<error>[^"]*)")?( statusCode=($<status_code>\d+))?/
   124        path /var/log/docker.log
   125        pos_file /var/log/gcp-docker.log.pos
   126        tag docker
   127      </source>
   128  
   129      # Example:
   130      # 2016/02/04 06:52:38 filePurge: successfully removed file /var/etcd/data/member/wal/00000000000006d0-00000000010a23d1.wal
   131      <source>
   132        @type tail
   133        # Not parsing this, because it doesn't have anything particularly useful to
   134        # parse out of it (like severities).
   135        format none
   136        path /var/log/etcd.log
   137        pos_file /var/log/gcp-etcd.log.pos
   138        tag etcd
   139      </source>
   140  
   141      # Multi-line parsing is required for all the kube logs because very large log
   142      # statements, such as those that include entire object bodies, get split into
   143      # multiple lines by glog.
   144  
   145      # Example:
   146      # I0204 07:32:30.020537    3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
   147      <source>
   148        @type tail
   149        format multiline
   150        multiline_flush_interval 5s
   151        format_firstline /^\w\d{4}/
   152        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   153        time_format %m%d %H:%M:%S.%N
   154        path /var/log/kubelet.log
   155        pos_file /var/log/gcp-kubelet.log.pos
   156        tag kubelet
   157      </source>
   158  
   159      # Example:
   160      # I1118 21:26:53.975789       6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
   161      <source>
   162        @type tail
   163        format multiline
   164        multiline_flush_interval 5s
   165        format_firstline /^\w\d{4}/
   166        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   167        time_format %m%d %H:%M:%S.%N
   168        path /var/log/kube-proxy.log
   169        pos_file /var/log/gcp-kube-proxy.log.pos
   170        tag kube-proxy
   171      </source>
   172  
   173      # Example:
   174      # I0204 07:00:19.604280       5 handlers.go:131] GET /api/v1/nodes: (1.624207ms) 200 [[kube-controller-manager/v1.1.3 (linux/amd64) kubernetes/6a81b50] 127.0.0.1:38266]
   175      <source>
   176        @type tail
   177        format multiline
   178        multiline_flush_interval 5s
   179        format_firstline /^\w\d{4}/
   180        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   181        time_format %m%d %H:%M:%S.%N
   182        path /var/log/kube-apiserver.log
   183        pos_file /var/log/gcp-kube-apiserver.log.pos
   184        tag kube-apiserver
   185      </source>
   186  
   187      # Example:
   188      # I0204 06:55:31.872680       5 servicecontroller.go:277] LB already exists and doesn't need update for service kube-system/kube-ui
   189      <source>
   190        @type tail
   191        format multiline
   192        multiline_flush_interval 5s
   193        format_firstline /^\w\d{4}/
   194        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   195        time_format %m%d %H:%M:%S.%N
   196        path /var/log/kube-controller-manager.log
   197        pos_file /var/log/gcp-kube-controller-manager.log.pos
   198        tag kube-controller-manager
   199      </source>
   200  
   201      # Example:
   202      # W0204 06:49:18.239674       7 reflector.go:245] pkg/scheduler/factory/factory.go:193: watch of *api.Service ended with: 401: The event in requested index is outdated and cleared (the requested history has been cleared [2578313/2577886]) [2579312]
   203      <source>
   204        @type tail
   205        format multiline
   206        multiline_flush_interval 5s
   207        format_firstline /^\w\d{4}/
   208        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   209        time_format %m%d %H:%M:%S.%N
   210        path /var/log/kube-scheduler.log
   211        pos_file /var/log/gcp-kube-scheduler.log.pos
   212        tag kube-scheduler
   213      </source>
   214  
   215      # Example:
   216      # I0603 15:31:05.793605       6 cluster_manager.go:230] Reading config from path /etc/gce.conf
   217      <source>
   218        @type tail
   219        format multiline
   220        multiline_flush_interval 5s
   221        format_firstline /^\w\d{4}/
   222        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   223        time_format %m%d %H:%M:%S.%N
   224        path /var/log/glbc.log
   225        pos_file /var/log/gcp-glbc.log.pos
   226        tag glbc
   227      </source>
   228  
   229      # Example:
   230      # I0603 15:31:05.793605       6 cluster_manager.go:230] Reading config from path /etc/gce.conf
   231      <source>
   232        @type tail
   233        format multiline
   234        multiline_flush_interval 5s
   235        format_firstline /^\w\d{4}/
   236        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   237        time_format %m%d %H:%M:%S.%N
   238        path /var/log/cluster-autoscaler.log
   239        pos_file /var/log/gcp-cluster-autoscaler.log.pos
   240        tag cluster-autoscaler
   241      </source>
   242  
   243      # Logs from systemd-journal for interesting services.
   244      # TODO(random-liu): Keep this for compatibility, remove this after
   245      # cri container runtime rolls out.
   246      <source>
   247        @type systemd
   248        filters [{ "_SYSTEMD_UNIT": "docker.service" }]
   249        pos_file /var/log/gcp-journald-docker.pos
   250        read_from_head true
   251        tag docker
   252      </source>
   253  
   254      <source>
   255        @type systemd
   256        filters [{ "_SYSTEMD_UNIT": "{{ fluentd_container_runtime_service }}.service" }]
   257        pos_file /var/log/gcp-journald-container-runtime.pos
   258        read_from_head true
   259        tag container-runtime
   260      </source>
   261  
   262      <source>
   263        @type systemd
   264        filters [{ "_SYSTEMD_UNIT": "kubelet.service" }]
   265        pos_file /var/log/gcp-journald-kubelet.pos
   266        read_from_head true
   267        tag kubelet
   268      </source>
   269  
   270      <source>
   271        @type systemd
   272        filters [{ "_SYSTEMD_UNIT": "node-problem-detector.service" }]
   273        pos_file /var/log/gcp-journald-node-problem-detector.pos
   274        read_from_head true
   275        tag node-problem-detector
   276      </source>
   277  
   278      # BEGIN_NODE_JOURNAL
   279      # Whether to include node-journal or not is determined when starting the
   280      # cluster. It is not changed when the cluster is already running.
   281      <source>
   282        @type systemd
   283        pos_file /var/log/gcp-journald.pos
   284        read_from_head true
   285        tag node-journal
   286      </source>
   287  
   288      <filter node-journal>
   289        @type grep
   290        <exclude>
   291          key _SYSTEMD_UNIT
   292          pattern ^(docker|{{ fluentd_container_runtime_service }}|kubelet|node-problem-detector)\.service$
   293        </exclude>
   294      </filter>
   295      # END_NODE_JOURNAL
   296    monitoring.conf: |-
   297      # This source is used to acquire approximate process start timestamp,
   298      # which purpose is explained before the corresponding output plugin.
   299      <source>
   300        @type exec
   301        command /bin/sh -c 'date +%s'
   302        tag process_start
   303        time_format %Y-%m-%d %H:%M:%S
   304        keys process_start_timestamp
   305      </source>
   306  
   307      # This filter is used to convert process start timestamp to integer
   308      # value for correct ingestion in the prometheus output plugin.
   309      <filter process_start>
   310        @type record_transformer
   311        enable_ruby true
   312        auto_typecast true
   313        <record>
   314          process_start_timestamp ${record["process_start_timestamp"].to_i}
   315        </record>
   316      </filter>
   317    output.conf: |-
   318      # This match is placed before the all-matching output to provide metric
   319      # exporter with a process start timestamp for correct exporting of
   320      # cumulative metrics to Stackdriver.
   321      <match process_start>
   322        @type prometheus
   323  
   324        <metric>
   325          type gauge
   326          name process_start_time_seconds
   327          desc Timestamp of the process start in seconds
   328          key process_start_timestamp
   329        </metric>
   330      </match>
   331  
   332      # This filter allows to count the number of log entries read by fluentd
   333      # before they are processed by the output plugin. This in turn allows to
   334      # monitor the number of log entries that were read but never sent, e.g.
   335      # because of liveness probe removing buffer.
   336      <filter **>
   337        @type prometheus
   338        <metric>
   339          type counter
   340          name logging_entry_count
   341          desc Total number of log entries generated by either application containers or system components
   342        </metric>
   343      </filter>
   344  
   345      # TODO(instrumentation): Reconsider this workaround later.
   346      # Trim the entries which exceed slightly less than 100KB, to avoid
   347      # dropping them. It is a necessity, because Stackdriver only supports
   348      # entries that are up to 100KB in size.
   349      <filter kubernetes.**>
   350        @type record_transformer
   351        enable_ruby true
   352        <record>
   353          log ${record['log'].length > 100000 ? "[Trimmed]#{record['log'][0..100000]}..." : record['log']}
   354        </record>
   355      </filter>
   356  
   357      # Do not collect fluentd's own logs to avoid infinite loops.
   358      <match fluent.**>
   359        @type null
   360      </match>
   361  
   362      # We use 2 output stanzas - one to handle the container logs and one to handle
   363      # the node daemon logs, the latter of which explicitly sends its logs to the
   364      # compute.googleapis.com service rather than container.googleapis.com to keep
   365      # them separate since most users don't care about the node logs.
   366      <match kubernetes.**>
   367        @type google_cloud
   368  
   369        # Try to detect JSON formatted log entries.
   370        detect_json true
   371        # Collect metrics in Prometheus registry about plugin activity.
   372        enable_monitoring true
   373        monitoring_type prometheus
   374        # Allow log entries from multiple containers to be sent in the same request.
   375        split_logs_by_tag false
   376        # Set the buffer type to file to improve the reliability and reduce the memory consumption
   377        buffer_type file
   378        buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
   379        # Set queue_full action to block because we want to pause gracefully
   380        # in case of the off-the-limits load instead of throwing an exception
   381        buffer_queue_full_action block
   382        # Set the chunk limit conservatively to avoid exceeding the recommended
   383        # chunk size of 5MB per write request.
   384        buffer_chunk_limit 1M
   385        # Cap the combined memory usage of this buffer and the one below to
   386        # 1MiB/chunk * (6 + 2) chunks = 8 MiB
   387        buffer_queue_limit 6
   388        # Never wait more than 5 seconds before flushing logs in the non-error case.
   389        flush_interval 5s
   390        # Never wait longer than 30 seconds between retries.
   391        max_retry_wait 30
   392        # Disable the limit on the number of retries (retry forever).
   393        disable_retry_limit
   394        # Use multiple threads for processing.
   395        num_threads 2
   396        use_grpc true
   397      </match>
   398  
   399      # Keep a smaller buffer here since these logs are less important than the user's
   400      # container logs.
   401      <match **>
   402        @type google_cloud
   403  
   404        detect_json true
   405        enable_monitoring true
   406        monitoring_type prometheus
   407        # Allow entries from multiple system logs to be sent in the same request.
   408        split_logs_by_tag false
   409        detect_subservice false
   410        buffer_type file
   411        buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
   412        buffer_queue_full_action block
   413        buffer_chunk_limit 1M
   414        buffer_queue_limit 2
   415        flush_interval 5s
   416        max_retry_wait 30
   417        disable_retry_limit
   418        num_threads 2
   419        use_grpc true
   420      </match>
   421  metadata:
   422    name: fluentd-gcp-config-old-v1.2.5
   423    namespace: kube-system
   424    labels:
   425      addonmanager.kubernetes.io/mode: Reconcile