k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/cluster/addons/fluentd-gcp/fluentd-gcp-configmap.yaml (about)

     1  # This ConfigMap is used to ingest logs against new resources like
     2  # "k8s_container" and "k8s_node" when $LOGGING_STACKDRIVER_RESOURCE_TYPES is set
     3  # to "new".
     4  # When $LOGGING_STACKDRIVER_RESOURCE_TYPES is set to "old", the ConfigMap in
     5  # fluentd-gcp-configmap-old.yaml will be used for ingesting logs against old
     6  # resources like "gke_container" and "gce_instance".
     7  kind: ConfigMap
     8  apiVersion: v1
     9  data:
    10    containers.input.conf: |-
    11      # This configuration file for Fluentd is used
    12      # to watch changes to Docker log files that live in the
    13      # directory /var/lib/docker/containers/ and are symbolically
    14      # linked to from the /var/log/containers directory using names that capture the
    15      # pod name and container name. These logs are then submitted to
    16      # Google Cloud Logging which assumes the installation of the cloud-logging plug-in.
    17      #
    18      # Example
    19      # =======
    20      # A line in the Docker log file might look like this JSON:
    21      #
    22      # {"log":"2014/09/25 21:15:03 Got request with path wombat\\n",
    23      #  "stream":"stderr",
    24      #   "time":"2014-09-25T21:15:03.499185026Z"}
    25      #
    26      # The original tag is derived from the log file's location.
    27      # For example a Docker container's logs might be in the directory:
    28      #  /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b
    29      # and in the file:
    30      #  997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
    31      # where 997599971ee6... is the Docker ID of the running container.
    32      # The Kubernetes kubelet makes a symbolic link to this file on the host
    33      # machine in the /var/log/containers directory which includes the pod name,
    34      # the namespace name and the Kubernetes container name:
    35      #    synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
    36      #    ->
    37      #    /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log
    38      # The /var/log directory on the host is mapped to the /var/log directory in the container
    39      # running this instance of Fluentd and we end up collecting the file:
    40      #   /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
    41      # This results in the tag:
    42      #  var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log
    43      # where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the
    44      # namespace name, 'synth-lgr' is the container name and '997599971ee6..' is
    45      # the container ID.
    46      # The record reformer is used to extract pod_name, namespace_name and
    47      # container_name from the tag and set them in a local_resource_id in the
    48      # format of:
    49      # 'k8s_container.<NAMESPACE_NAME>.<POD_NAME>.<CONTAINER_NAME>'.
    50      # The reformer also changes the tags to 'stderr' or 'stdout' based on the
    51      # value of 'stream'.
    52      # local_resource_id is later used by google_cloud plugin to determine the
    53      # monitored resource to ingest logs against.
    54  
    55      # Json Log Example:
    56      # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"}
    57      # CRI Log Example:
    58      # 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here
    59      <source>
    60        @type tail
    61        path /var/log/containers/*.log
    62        pos_file /var/log/gcp-containers.log.pos
    63        # Tags at this point are in the format of:
    64        # reform.var.log.containers.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>-<CONTAINER_ID>.log
    65        tag reform.*
    66        read_from_head true
    67        <parse>
    68          @type multi_format
    69          <pattern>
    70            format json
    71            time_key time
    72            time_format %Y-%m-%dT%H:%M:%S.%NZ
    73          </pattern>
    74          <pattern>
    75            format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
    76            time_format %Y-%m-%dT%H:%M:%S.%N%:z
    77          </pattern>
    78        </parse>
    79      </source>
    80  
    81      <filter reform.**>
    82        @type parser
    83        format /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<log>.*)/
    84        reserve_data true
    85        suppress_parse_error_log true
    86        emit_invalid_record_to_error false
    87        key_name log
    88      </filter>
    89  
    90      <match reform.**>
    91        @type record_reformer
    92        enable_ruby true
    93        <record>
    94          # Extract local_resource_id from tag for 'k8s_container' monitored
    95          # resource. The format is:
    96          # 'k8s_container.<namespace_name>.<pod_name>.<container_name>'.
    97          "logging.googleapis.com/local_resource_id" ${"k8s_container.#{tag_suffix[4].rpartition('.')[0].split('_')[1]}.#{tag_suffix[4].rpartition('.')[0].split('_')[0]}.#{tag_suffix[4].rpartition('.')[0].split('_')[2].rpartition('-')[0]}"}
    98          # Rename the field 'log' to a more generic field 'message'. This way the
    99          # fluent-plugin-google-cloud knows to flatten the field as textPayload
   100          # instead of jsonPayload after extracting 'time', 'severity' and
   101          # 'stream' from the record.
   102          message ${record['log']}
   103          # If 'severity' is not set, assume stderr is ERROR and stdout is INFO.
   104          severity ${record['severity'] || if record['stream'] == 'stderr' then 'ERROR' else 'INFO' end}
   105        </record>
   106        tag ${if record['stream'] == 'stderr' then 'raw.stderr' else 'raw.stdout' end}
   107        remove_keys stream,log
   108      </match>
   109  
   110      # Detect exceptions in the log output and forward them as one log entry.
   111      <match {raw.stderr,raw.stdout}>
   112        @type detect_exceptions
   113  
   114        remove_tag_prefix raw
   115        message message
   116        stream "logging.googleapis.com/local_resource_id"
   117        multiline_flush_interval 5
   118        max_bytes 500000
   119        max_lines 1000
   120      </match>
   121    system.input.conf: |-
   122      # Example:
   123      # Dec 21 23:17:22 gke-foo-1-1-4b5cbd14-node-4eoj startupscript: Finished running startup script /var/run/google.startup.script
   124      <source>
   125        @type tail
   126        format syslog
   127        path /var/log/startupscript.log
   128        pos_file /var/log/gcp-startupscript.log.pos
   129        tag startupscript
   130      </source>
   131  
   132      # Examples:
   133      # time="2016-02-04T06:51:03.053580605Z" level=info msg="GET /containers/json"
   134      # time="2016-02-04T07:53:57.505612354Z" level=error msg="HTTP Error" err="No such image: -f" statusCode=404
   135      # TODO(random-liu): Remove this after cri container runtime rolls out.
   136      <source>
   137        @type tail
   138        format /^time="(?<time>[^)]*)" level=(?<severity>[^ ]*) msg="(?<message>[^"]*)"( err="(?<error>[^"]*)")?( statusCode=($<status_code>\d+))?/
   139        path /var/log/docker.log
   140        pos_file /var/log/gcp-docker.log.pos
   141        tag docker
   142      </source>
   143  
   144      # Example:
   145      # 2016/02/04 06:52:38 filePurge: successfully removed file /var/etcd/data/member/wal/00000000000006d0-00000000010a23d1.wal
   146      <source>
   147        @type tail
   148        # Not parsing this, because it doesn't have anything particularly useful to
   149        # parse out of it (like severities).
   150        format none
   151        path /var/log/etcd.log
   152        pos_file /var/log/gcp-etcd.log.pos
   153        tag etcd
   154      </source>
   155  
   156      # Multi-line parsing is required for all the kube logs because very large log
   157      # statements, such as those that include entire object bodies, get split into
   158      # multiple lines by glog.
   159  
   160      # Example:
   161      # I0204 07:32:30.020537    3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537]
   162      <source>
   163        @type tail
   164        format multiline
   165        multiline_flush_interval 5s
   166        format_firstline /^\w\d{4}/
   167        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   168        time_format %m%d %H:%M:%S.%N
   169        path /var/log/kubelet.log
   170        pos_file /var/log/gcp-kubelet.log.pos
   171        tag kubelet
   172      </source>
   173  
   174      # Example:
   175      # I1118 21:26:53.975789       6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed
   176      <source>
   177        @type tail
   178        format multiline
   179        multiline_flush_interval 5s
   180        format_firstline /^\w\d{4}/
   181        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   182        time_format %m%d %H:%M:%S.%N
   183        path /var/log/kube-proxy.log
   184        pos_file /var/log/gcp-kube-proxy.log.pos
   185        tag kube-proxy
   186      </source>
   187  
   188      # Example:
   189      # I0204 07:00:19.604280       5 handlers.go:131] GET /api/v1/nodes: (1.624207ms) 200 [[kube-controller-manager/v1.1.3 (linux/amd64) kubernetes/6a81b50] 127.0.0.1:38266]
   190      <source>
   191        @type tail
   192        format multiline
   193        multiline_flush_interval 5s
   194        format_firstline /^\w\d{4}/
   195        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   196        time_format %m%d %H:%M:%S.%N
   197        path /var/log/kube-apiserver.log
   198        pos_file /var/log/gcp-kube-apiserver.log.pos
   199        tag kube-apiserver
   200      </source>
   201  
   202      # Example:
   203      # I0204 06:55:31.872680       5 servicecontroller.go:277] LB already exists and doesn't need update for service kube-system/kube-ui
   204      <source>
   205        @type tail
   206        format multiline
   207        multiline_flush_interval 5s
   208        format_firstline /^\w\d{4}/
   209        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   210        time_format %m%d %H:%M:%S.%N
   211        path /var/log/kube-controller-manager.log
   212        pos_file /var/log/gcp-kube-controller-manager.log.pos
   213        tag kube-controller-manager
   214      </source>
   215  
   216      # Example:
   217      # W0204 06:49:18.239674       7 reflector.go:245] pkg/scheduler/factory/factory.go:193: watch of *api.Service ended with: 401: The event in requested index is outdated and cleared (the requested history has been cleared [2578313/2577886]) [2579312]
   218      <source>
   219        @type tail
   220        format multiline
   221        multiline_flush_interval 5s
   222        format_firstline /^\w\d{4}/
   223        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   224        time_format %m%d %H:%M:%S.%N
   225        path /var/log/kube-scheduler.log
   226        pos_file /var/log/gcp-kube-scheduler.log.pos
   227        tag kube-scheduler
   228      </source>
   229  
   230      # Example:
   231      # I0603 15:31:05.793605       6 cluster_manager.go:230] Reading config from path /etc/gce.conf
   232      <source>
   233        @type tail
   234        format multiline
   235        multiline_flush_interval 5s
   236        format_firstline /^\w\d{4}/
   237        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   238        time_format %m%d %H:%M:%S.%N
   239        path /var/log/glbc.log
   240        pos_file /var/log/gcp-glbc.log.pos
   241        tag glbc
   242      </source>
   243  
   244      # Example:
   245      # I0603 15:31:05.793605       6 cluster_manager.go:230] Reading config from path /etc/gce.conf
   246      <source>
   247        @type tail
   248        format multiline
   249        multiline_flush_interval 5s
   250        format_firstline /^\w\d{4}/
   251        format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/
   252        time_format %m%d %H:%M:%S.%N
   253        path /var/log/cluster-autoscaler.log
   254        pos_file /var/log/gcp-cluster-autoscaler.log.pos
   255        tag cluster-autoscaler
   256      </source>
   257  
   258      # Logs from systemd-journal for interesting services.
   259      # TODO(random-liu): Keep this for compatibility, remove this after
   260      # cri container runtime rolls out.
   261      <source>
   262        @type systemd
   263        filters [{ "_SYSTEMD_UNIT": "docker.service" }]
   264        pos_file /var/log/gcp-journald-docker.pos
   265        read_from_head true
   266        tag docker
   267      </source>
   268  
   269      <source>
   270        @type systemd
   271        filters [{ "_SYSTEMD_UNIT": "{{ fluentd_container_runtime_service }}.service" }]
   272        pos_file /var/log/gcp-journald-container-runtime.pos
   273        read_from_head true
   274        tag container-runtime
   275      </source>
   276  
   277      <source>
   278        @type systemd
   279        filters [{ "_SYSTEMD_UNIT": "kubelet.service" }]
   280        pos_file /var/log/gcp-journald-kubelet.pos
   281        read_from_head true
   282        tag kubelet
   283      </source>
   284  
   285      <source>
   286        @type systemd
   287        filters [{ "_SYSTEMD_UNIT": "node-problem-detector.service" }]
   288        pos_file /var/log/gcp-journald-node-problem-detector.pos
   289        read_from_head true
   290        tag node-problem-detector
   291      </source>
   292  
   293      # BEGIN_NODE_JOURNAL
   294      # Whether to include node-journal or not is determined when starting the
   295      # cluster. It is not changed when the cluster is already running.
   296      <source>
   297        @type systemd
   298        pos_file /var/log/gcp-journald.pos
   299        read_from_head true
   300        tag node-journal
   301      </source>
   302  
   303      <filter node-journal>
   304        @type grep
   305        <exclude>
   306          key _SYSTEMD_UNIT
   307          pattern ^(docker|{{ fluentd_container_runtime_service }}|kubelet|node-problem-detector)\.service$
   308        </exclude>
   309      </filter>
   310      # END_NODE_JOURNAL
   311    monitoring.conf: |-
   312      # This source is used to acquire approximate process start timestamp,
   313      # which purpose is explained before the corresponding output plugin.
   314      <source>
   315        @type exec
   316        command /bin/sh -c 'date +%s'
   317        tag process_start
   318        time_format %Y-%m-%d %H:%M:%S
   319        keys process_start_timestamp
   320      </source>
   321  
   322      # This filter is used to convert process start timestamp to integer
   323      # value for correct ingestion in the prometheus output plugin.
   324      <filter process_start>
   325        @type record_transformer
   326        enable_ruby true
   327        auto_typecast true
   328        <record>
   329          process_start_timestamp ${record["process_start_timestamp"].to_i}
   330        </record>
   331      </filter>
   332    output.conf: |-
   333      # This match is placed before the all-matching output to provide metric
   334      # exporter with a process start timestamp for correct exporting of
   335      # cumulative metrics to Stackdriver.
   336      <match process_start>
   337        @type prometheus
   338  
   339        <metric>
   340          type gauge
   341          name process_start_time_seconds
   342          desc Timestamp of the process start in seconds
   343          key process_start_timestamp
   344        </metric>
   345      </match>
   346  
   347      # This filter allows to count the number of log entries read by fluentd
   348      # before they are processed by the output plugin. This in turn allows to
   349      # monitor the number of log entries that were read but never sent, e.g.
   350      # because of liveness probe removing buffer.
   351      <filter **>
   352        @type prometheus
   353        <metric>
   354          type counter
   355          name logging_entry_count
   356          desc Total number of log entries generated by either application containers or system components
   357        </metric>
   358      </filter>
   359  
   360      # This section is exclusive for k8s_container logs. Those come with
   361      # 'stderr'/'stdout' tags.
   362      # TODO(instrumentation): Reconsider this workaround later.
   363      # Trim the entries which exceed slightly less than 100KB, to avoid
   364      # dropping them. It is a necessity, because Stackdriver only supports
   365      # entries that are up to 100KB in size.
   366      <filter {stderr,stdout}>
   367        @type record_transformer
   368        enable_ruby true
   369        <record>
   370          message ${record['message'].length > 100000 ? "[Trimmed]#{record['message'][0..100000]}..." : record['message']}
   371        </record>
   372      </filter>
   373  
   374      # Do not collect fluentd's own logs to avoid infinite loops.
   375      <match fluent.**>
   376        @type null
   377      </match>
   378  
   379      # Add a unique insertId to each log entry that doesn't already have it.
   380      # This helps guarantee the order and prevent log duplication.
   381      <filter **>
   382        @type add_insert_ids
   383      </filter>
   384  
   385      # This section is exclusive for k8s_container logs. These logs come with
   386      # 'stderr'/'stdout' tags.
   387      # We use a separate output stanza for 'k8s_node' logs with a smaller buffer
   388      # because node logs are less important than user's container logs.
   389      <match {stderr,stdout}>
   390        @type google_cloud
   391  
   392        # Try to detect JSON formatted log entries.
   393        detect_json true
   394        # Collect metrics in Prometheus registry about plugin activity.
   395        enable_monitoring true
   396        monitoring_type prometheus
   397        # Allow log entries from multiple containers to be sent in the same request.
   398        split_logs_by_tag false
   399        # Set the buffer type to file to improve the reliability and reduce the memory consumption
   400        buffer_type file
   401        buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer
   402        # Set queue_full action to block because we want to pause gracefully
   403        # in case of the off-the-limits load instead of throwing an exception
   404        buffer_queue_full_action block
   405        # Set the chunk limit conservatively to avoid exceeding the recommended
   406        # chunk size of 5MB per write request.
   407        buffer_chunk_limit 512k
   408        # Cap the combined memory usage of this buffer and the one below to
   409        # 512KiB/chunk * (6 + 2) chunks = 4 MiB
   410        buffer_queue_limit 6
   411        # Never wait more than 5 seconds before flushing logs in the non-error case.
   412        flush_interval 5s
   413        # Never wait longer than 30 seconds between retries.
   414        max_retry_wait 30
   415        # Disable the limit on the number of retries (retry forever).
   416        disable_retry_limit
   417        # Use multiple threads for processing.
   418        num_threads 2
   419        use_grpc true
   420        # Skip timestamp adjustment as this is in a controlled environment with
   421        # known timestamp format. This helps with CPU usage.
   422        adjust_invalid_timestamps false
   423      </match>
   424  
   425      # Attach local_resource_id for 'k8s_node' monitored resource.
   426      <filter **>
   427        @type record_transformer
   428        enable_ruby true
   429        <record>
   430          "logging.googleapis.com/local_resource_id" ${"k8s_node.#{ENV['NODE_NAME']}"}
   431        </record>
   432      </filter>
   433  
   434      # This section is exclusive for 'k8s_node' logs. These logs come with tags
   435      # that are neither 'stderr' or 'stdout'.
   436      # We use a separate output stanza for 'k8s_container' logs with a larger
   437      # buffer because user's container logs are more important than node logs.
   438      <match **>
   439        @type google_cloud
   440  
   441        detect_json true
   442        enable_monitoring true
   443        monitoring_type prometheus
   444        # Allow entries from multiple system logs to be sent in the same request.
   445        split_logs_by_tag false
   446        detect_subservice false
   447        buffer_type file
   448        buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer
   449        buffer_queue_full_action block
   450        buffer_chunk_limit 512k
   451        buffer_queue_limit 2
   452        flush_interval 5s
   453        max_retry_wait 30
   454        disable_retry_limit
   455        num_threads 2
   456        use_grpc true
   457        # Skip timestamp adjustment as this is in a controlled environment with
   458        # known timestamp format. This helps with CPU usage.
   459        adjust_invalid_timestamps false
   460      </match>
   461  metadata:
   462    name: fluentd-gcp-config-v1.2.5
   463    namespace: kube-system
   464    labels:
   465      addonmanager.kubernetes.io/mode: Reconcile