k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/cluster/addons/fluentd-gcp/fluentd-gcp-configmap-old.yaml (about) 1 # This ConfigMap is used to ingest logs against old resources like 2 # "gke_container" and "gce_instance" when $LOGGING_STACKDRIVER_RESOURCE_TYPES is 3 # set to "old". 4 # When $LOGGING_STACKDRIVER_RESOURCE_TYPES is set to "new", the ConfigMap in 5 # fluentd-gcp-configmap.yaml will be used for ingesting logs against new 6 # resources like "k8s_container" and "k8s_node". 7 kind: ConfigMap 8 apiVersion: v1 9 data: 10 containers.input.conf: |- 11 # This configuration file for Fluentd is used 12 # to watch changes to Docker log files that live in the 13 # directory /var/lib/docker/containers/ and are symbolically 14 # linked to from the /var/log/containers directory using names that capture the 15 # pod name and container name. These logs are then submitted to 16 # Google Cloud Logging which assumes the installation of the cloud-logging plug-in. 17 # 18 # Example 19 # ======= 20 # A line in the Docker log file might look like this JSON: 21 # 22 # {"log":"2014/09/25 21:15:03 Got request with path wombat\\n", 23 # "stream":"stderr", 24 # "time":"2014-09-25T21:15:03.499185026Z"} 25 # 26 # The original tag is derived from the log file's location. 27 # For example a Docker container's logs might be in the directory: 28 # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b 29 # and in the file: 30 # 997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log 31 # where 997599971ee6... is the Docker ID of the running container. 32 # The Kubernetes kubelet makes a symbolic link to this file on the host 33 # machine in the /var/log/containers directory which includes the pod name, 34 # the namespace name and the Kubernetes container name: 35 # synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log 36 # -> 37 # /var/lib/docker/containers/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b/997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b-json.log 38 # The /var/log directory on the host is mapped to the /var/log directory in the container 39 # running this instance of Fluentd and we end up collecting the file: 40 # /var/log/containers/synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log 41 # This results in the tag: 42 # var.log.containers.synthetic-logger-0.25lps-pod_default_synth-lgr-997599971ee6366d4a5920d25b79286ad45ff37a74494f262e3bc98d909d0a7b.log 43 # where 'synthetic-logger-0.25lps-pod' is the pod name, 'default' is the 44 # namespace name, 'synth-lgr' is the container name and '997599971ee6..' is 45 # the container ID. 46 # The record reformer is used is discard the var.log.containers prefix and 47 # the Docker container ID suffix and "kubernetes." is pre-pended giving the tag: 48 # kubernetes.synthetic-logger-0.25lps-pod_default_synth-lgr 49 # Tag is then parsed by google_cloud plugin and translated to the metadata, 50 # visible in the log viewer 51 52 # Json Log Example: 53 # {"log":"[info:2016-02-16T16:04:05.930-08:00] Some log text here\n","stream":"stdout","time":"2016-02-17T00:04:05.931087621Z"} 54 # CRI Log Example: 55 # 2016-02-17T00:04:05.931087621Z stdout F [info:2016-02-16T16:04:05.930-08:00] Some log text here 56 <source> 57 @type tail 58 path /var/log/containers/*.log 59 pos_file /var/log/gcp-containers.log.pos 60 # Tags at this point are in the format of: 61 # reform.var.log.containers.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>-<CONTAINER_ID>.log 62 tag reform.* 63 read_from_head true 64 <parse> 65 @type multi_format 66 <pattern> 67 format json 68 time_key time 69 time_format %Y-%m-%dT%H:%M:%S.%NZ 70 </pattern> 71 <pattern> 72 format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/ 73 time_format %Y-%m-%dT%H:%M:%S.%N%:z 74 </pattern> 75 </parse> 76 </source> 77 78 <filter reform.**> 79 @type parser 80 format /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<log>.*)/ 81 reserve_data true 82 suppress_parse_error_log true 83 emit_invalid_record_to_error false 84 key_name log 85 </filter> 86 87 <match reform.**> 88 @type record_reformer 89 enable_ruby true 90 # Tags at this point are in the format of: 91 # 'raw.kubernetes.<POD_NAME>_<NAMESPACE_NAME>_<CONTAINER_NAME>'. 92 tag raw.kubernetes.${tag_suffix[4].split('-')[0..-2].join('-')} 93 </match> 94 95 # Detect exceptions in the log output and forward them as one log entry. 96 <match raw.kubernetes.**> 97 @type detect_exceptions 98 99 remove_tag_prefix raw 100 message log 101 stream stream 102 multiline_flush_interval 5 103 max_bytes 500000 104 max_lines 1000 105 </match> 106 system.input.conf: |- 107 # Example: 108 # Dec 21 23:17:22 gke-foo-1-1-4b5cbd14-node-4eoj startupscript: Finished running startup script /var/run/google.startup.script 109 <source> 110 @type tail 111 format syslog 112 path /var/log/startupscript.log 113 pos_file /var/log/gcp-startupscript.log.pos 114 tag startupscript 115 </source> 116 117 # Examples: 118 # time="2016-02-04T06:51:03.053580605Z" level=info msg="GET /containers/json" 119 # time="2016-02-04T07:53:57.505612354Z" level=error msg="HTTP Error" err="No such image: -f" statusCode=404 120 # TODO(random-liu): Remove this after cri container runtime rolls out. 121 <source> 122 @type tail 123 format /^time="(?<time>[^)]*)" level=(?<severity>[^ ]*) msg="(?<message>[^"]*)"( err="(?<error>[^"]*)")?( statusCode=($<status_code>\d+))?/ 124 path /var/log/docker.log 125 pos_file /var/log/gcp-docker.log.pos 126 tag docker 127 </source> 128 129 # Example: 130 # 2016/02/04 06:52:38 filePurge: successfully removed file /var/etcd/data/member/wal/00000000000006d0-00000000010a23d1.wal 131 <source> 132 @type tail 133 # Not parsing this, because it doesn't have anything particularly useful to 134 # parse out of it (like severities). 135 format none 136 path /var/log/etcd.log 137 pos_file /var/log/gcp-etcd.log.pos 138 tag etcd 139 </source> 140 141 # Multi-line parsing is required for all the kube logs because very large log 142 # statements, such as those that include entire object bodies, get split into 143 # multiple lines by glog. 144 145 # Example: 146 # I0204 07:32:30.020537 3368 server.go:1048] POST /stats/container/: (13.972191ms) 200 [[Go-http-client/1.1] 10.244.1.3:40537] 147 <source> 148 @type tail 149 format multiline 150 multiline_flush_interval 5s 151 format_firstline /^\w\d{4}/ 152 format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/ 153 time_format %m%d %H:%M:%S.%N 154 path /var/log/kubelet.log 155 pos_file /var/log/gcp-kubelet.log.pos 156 tag kubelet 157 </source> 158 159 # Example: 160 # I1118 21:26:53.975789 6 proxier.go:1096] Port "nodePort for kube-system/default-http-backend:http" (:31429/tcp) was open before and is still needed 161 <source> 162 @type tail 163 format multiline 164 multiline_flush_interval 5s 165 format_firstline /^\w\d{4}/ 166 format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/ 167 time_format %m%d %H:%M:%S.%N 168 path /var/log/kube-proxy.log 169 pos_file /var/log/gcp-kube-proxy.log.pos 170 tag kube-proxy 171 </source> 172 173 # Example: 174 # I0204 07:00:19.604280 5 handlers.go:131] GET /api/v1/nodes: (1.624207ms) 200 [[kube-controller-manager/v1.1.3 (linux/amd64) kubernetes/6a81b50] 127.0.0.1:38266] 175 <source> 176 @type tail 177 format multiline 178 multiline_flush_interval 5s 179 format_firstline /^\w\d{4}/ 180 format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/ 181 time_format %m%d %H:%M:%S.%N 182 path /var/log/kube-apiserver.log 183 pos_file /var/log/gcp-kube-apiserver.log.pos 184 tag kube-apiserver 185 </source> 186 187 # Example: 188 # I0204 06:55:31.872680 5 servicecontroller.go:277] LB already exists and doesn't need update for service kube-system/kube-ui 189 <source> 190 @type tail 191 format multiline 192 multiline_flush_interval 5s 193 format_firstline /^\w\d{4}/ 194 format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/ 195 time_format %m%d %H:%M:%S.%N 196 path /var/log/kube-controller-manager.log 197 pos_file /var/log/gcp-kube-controller-manager.log.pos 198 tag kube-controller-manager 199 </source> 200 201 # Example: 202 # W0204 06:49:18.239674 7 reflector.go:245] pkg/scheduler/factory/factory.go:193: watch of *api.Service ended with: 401: The event in requested index is outdated and cleared (the requested history has been cleared [2578313/2577886]) [2579312] 203 <source> 204 @type tail 205 format multiline 206 multiline_flush_interval 5s 207 format_firstline /^\w\d{4}/ 208 format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/ 209 time_format %m%d %H:%M:%S.%N 210 path /var/log/kube-scheduler.log 211 pos_file /var/log/gcp-kube-scheduler.log.pos 212 tag kube-scheduler 213 </source> 214 215 # Example: 216 # I0603 15:31:05.793605 6 cluster_manager.go:230] Reading config from path /etc/gce.conf 217 <source> 218 @type tail 219 format multiline 220 multiline_flush_interval 5s 221 format_firstline /^\w\d{4}/ 222 format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/ 223 time_format %m%d %H:%M:%S.%N 224 path /var/log/glbc.log 225 pos_file /var/log/gcp-glbc.log.pos 226 tag glbc 227 </source> 228 229 # Example: 230 # I0603 15:31:05.793605 6 cluster_manager.go:230] Reading config from path /etc/gce.conf 231 <source> 232 @type tail 233 format multiline 234 multiline_flush_interval 5s 235 format_firstline /^\w\d{4}/ 236 format1 /^(?<severity>\w)(?<time>\d{4} [^\s]*)\s+(?<pid>\d+)\s+(?<source>[^ \]]+)\] (?<message>.*)/ 237 time_format %m%d %H:%M:%S.%N 238 path /var/log/cluster-autoscaler.log 239 pos_file /var/log/gcp-cluster-autoscaler.log.pos 240 tag cluster-autoscaler 241 </source> 242 243 # Logs from systemd-journal for interesting services. 244 # TODO(random-liu): Keep this for compatibility, remove this after 245 # cri container runtime rolls out. 246 <source> 247 @type systemd 248 filters [{ "_SYSTEMD_UNIT": "docker.service" }] 249 pos_file /var/log/gcp-journald-docker.pos 250 read_from_head true 251 tag docker 252 </source> 253 254 <source> 255 @type systemd 256 filters [{ "_SYSTEMD_UNIT": "{{ fluentd_container_runtime_service }}.service" }] 257 pos_file /var/log/gcp-journald-container-runtime.pos 258 read_from_head true 259 tag container-runtime 260 </source> 261 262 <source> 263 @type systemd 264 filters [{ "_SYSTEMD_UNIT": "kubelet.service" }] 265 pos_file /var/log/gcp-journald-kubelet.pos 266 read_from_head true 267 tag kubelet 268 </source> 269 270 <source> 271 @type systemd 272 filters [{ "_SYSTEMD_UNIT": "node-problem-detector.service" }] 273 pos_file /var/log/gcp-journald-node-problem-detector.pos 274 read_from_head true 275 tag node-problem-detector 276 </source> 277 278 # BEGIN_NODE_JOURNAL 279 # Whether to include node-journal or not is determined when starting the 280 # cluster. It is not changed when the cluster is already running. 281 <source> 282 @type systemd 283 pos_file /var/log/gcp-journald.pos 284 read_from_head true 285 tag node-journal 286 </source> 287 288 <filter node-journal> 289 @type grep 290 <exclude> 291 key _SYSTEMD_UNIT 292 pattern ^(docker|{{ fluentd_container_runtime_service }}|kubelet|node-problem-detector)\.service$ 293 </exclude> 294 </filter> 295 # END_NODE_JOURNAL 296 monitoring.conf: |- 297 # This source is used to acquire approximate process start timestamp, 298 # which purpose is explained before the corresponding output plugin. 299 <source> 300 @type exec 301 command /bin/sh -c 'date +%s' 302 tag process_start 303 time_format %Y-%m-%d %H:%M:%S 304 keys process_start_timestamp 305 </source> 306 307 # This filter is used to convert process start timestamp to integer 308 # value for correct ingestion in the prometheus output plugin. 309 <filter process_start> 310 @type record_transformer 311 enable_ruby true 312 auto_typecast true 313 <record> 314 process_start_timestamp ${record["process_start_timestamp"].to_i} 315 </record> 316 </filter> 317 output.conf: |- 318 # This match is placed before the all-matching output to provide metric 319 # exporter with a process start timestamp for correct exporting of 320 # cumulative metrics to Stackdriver. 321 <match process_start> 322 @type prometheus 323 324 <metric> 325 type gauge 326 name process_start_time_seconds 327 desc Timestamp of the process start in seconds 328 key process_start_timestamp 329 </metric> 330 </match> 331 332 # This filter allows to count the number of log entries read by fluentd 333 # before they are processed by the output plugin. This in turn allows to 334 # monitor the number of log entries that were read but never sent, e.g. 335 # because of liveness probe removing buffer. 336 <filter **> 337 @type prometheus 338 <metric> 339 type counter 340 name logging_entry_count 341 desc Total number of log entries generated by either application containers or system components 342 </metric> 343 </filter> 344 345 # TODO(instrumentation): Reconsider this workaround later. 346 # Trim the entries which exceed slightly less than 100KB, to avoid 347 # dropping them. It is a necessity, because Stackdriver only supports 348 # entries that are up to 100KB in size. 349 <filter kubernetes.**> 350 @type record_transformer 351 enable_ruby true 352 <record> 353 log ${record['log'].length > 100000 ? "[Trimmed]#{record['log'][0..100000]}..." : record['log']} 354 </record> 355 </filter> 356 357 # Do not collect fluentd's own logs to avoid infinite loops. 358 <match fluent.**> 359 @type null 360 </match> 361 362 # We use 2 output stanzas - one to handle the container logs and one to handle 363 # the node daemon logs, the latter of which explicitly sends its logs to the 364 # compute.googleapis.com service rather than container.googleapis.com to keep 365 # them separate since most users don't care about the node logs. 366 <match kubernetes.**> 367 @type google_cloud 368 369 # Try to detect JSON formatted log entries. 370 detect_json true 371 # Collect metrics in Prometheus registry about plugin activity. 372 enable_monitoring true 373 monitoring_type prometheus 374 # Allow log entries from multiple containers to be sent in the same request. 375 split_logs_by_tag false 376 # Set the buffer type to file to improve the reliability and reduce the memory consumption 377 buffer_type file 378 buffer_path /var/log/fluentd-buffers/kubernetes.containers.buffer 379 # Set queue_full action to block because we want to pause gracefully 380 # in case of the off-the-limits load instead of throwing an exception 381 buffer_queue_full_action block 382 # Set the chunk limit conservatively to avoid exceeding the recommended 383 # chunk size of 5MB per write request. 384 buffer_chunk_limit 1M 385 # Cap the combined memory usage of this buffer and the one below to 386 # 1MiB/chunk * (6 + 2) chunks = 8 MiB 387 buffer_queue_limit 6 388 # Never wait more than 5 seconds before flushing logs in the non-error case. 389 flush_interval 5s 390 # Never wait longer than 30 seconds between retries. 391 max_retry_wait 30 392 # Disable the limit on the number of retries (retry forever). 393 disable_retry_limit 394 # Use multiple threads for processing. 395 num_threads 2 396 use_grpc true 397 </match> 398 399 # Keep a smaller buffer here since these logs are less important than the user's 400 # container logs. 401 <match **> 402 @type google_cloud 403 404 detect_json true 405 enable_monitoring true 406 monitoring_type prometheus 407 # Allow entries from multiple system logs to be sent in the same request. 408 split_logs_by_tag false 409 detect_subservice false 410 buffer_type file 411 buffer_path /var/log/fluentd-buffers/kubernetes.system.buffer 412 buffer_queue_full_action block 413 buffer_chunk_limit 1M 414 buffer_queue_limit 2 415 flush_interval 5s 416 max_retry_wait 30 417 disable_retry_limit 418 num_threads 2 419 use_grpc true 420 </match> 421 metadata: 422 name: fluentd-gcp-config-old-v1.2.5 423 namespace: kube-system 424 labels: 425 addonmanager.kubernetes.io/mode: Reconcile