Remove excess line breaks from s3 log files (fluent-bit s3 output plugin)

70 Views Asked by At

I am using fluent-bit s3 output plugin to upload Kubernetes pod logs to s3. I see excessive line breaks in s3 log files as below:

2024-01-24 10:03:34.510 [65b0e07526a14752251fdf7a2e309f58] INFO [Log] Extensibility.TimingLoggingInterceptor [55]: ContentAdminGrpcService/UserGroupUserGetList clientRef=f0d7efaa-a608-472d-885a-4619945a2b03 == {15} ms

2024-01-24 10:03:34.607 [65b0e07526a14752251fdf7a2e309f58] INFO [Log] Extensibility.TimingLoggingInterceptor [49]: ContentAdminGrpcService/UserGroupBatchUserGetList clientRef=f0d7efaa-a608-472d-885a-4619945a2b03 == {59} ms

2024-01-24 10:03:34.657 [65b0e07526a14752251fdf7a2e309f58] INFO [Log] Extensibility.TimingLoggingInterceptor [49]: ContentAdminGrpcService/UserGroupUserGetList clientRef=f0d7efaa-a608-472d-885a-4619945a2b03 == {15} ms

The same logs in /var/log/containers/ look like below:

{"log":"2024-01-24 10:03:34.510 [65b0e07526a14752251fdf7a2e309f58] INFO [Log] Extensibility.TimingLoggingInterceptor [55]: ContentAdminGrpcService/UserGroupUserGetList clientRef=f0d7efaa-a608-472d-885a-4619945a2b03 == {15} ms\n","stream":"stdout"}
{"log":"
2024-01-24 10:03:34.607 [65b0e07526a14752251fdf7a2e309f58] INFO [Log] Extensibility.TimingLoggingInterceptor [49]: ContentAdminGrpcService/UserGroupBatchUserGetList clientRef=f0d7efaa-a608-472d-885a-4619945a2b03 == {59} ms\n",
"stream":"stdout"}
{"log":"2024-01-24 10:03:34.657 [65b0e07526a14752251fdf7a2e309f58] INFO [Log] Extensibility.TimingLoggingInterceptor [49]: ContentAdminGrpcService/UserGroupUserGetList clientRef=f0d7efaa-a608-472d-885a-4619945a2b03 == {15} ms\n","stream":"stdout"}

How can I remove that \n while uploading log files to s3?

My fluent-bit config:

[SERVICE]
    HTTP_Server  On
    HTTP_Listen  0.0.0.0
    HTTP_PORT    2020
    Health_Check On
    HC_Errors_Count 5
    HC_Retry_Failure_Count 5
    HC_Period 5

    Parsers_File /fluent-bit/parsers/parsers.conf
    Parsers_File /fluent-bit/etc/parser_extra.conf
    Parsers_File /fluent-bit/etc/parsers_multiline.conf
[INPUT]
    Name              tail
    Tag               kube.*
    Path              /var/log/containers/*.log
    DB                /var/log/flb_kube.db
    multiline.parser  docker, cri
    Mem_Buf_Limit     5MB
    Skip_Long_Lines   On
    Refresh_Interval  10
    Exclude_Path /var/log/containers/*aws-for-fluent-bit*,/var/log/containers/*cluster-autoscaler*,/var/log/containers/*kube-proxy*,/var/log/containers/*external-dns*,/var/log/containers/*ebs-csi-controller*
,/var/log/containers/*istio-ingressgateway*,/var/log/containers/*istiod*,/var/log/containers/*istio-proxy*,/var/log/containers/*istio-init*,/var/log/containers/*kube-system*
    Buffer_Chunk_Size 512k
    Buffer_Max_Size 512k

[FILTER]
    Name  multiline
    Match kube.*
    multiline.key_content log
    multiline.parser  multiline_logs
[FILTER]
    Name  kubernetes
    Match kube.*
    Kube_URL  https://kubernetes.default.svc.cluster.local:443
    Merge_Log On
    Keep_Log  Off
    K8S-Logging.Parser  On
    K8S-Logging.Exclude On
    Buffer_Size 0
    Labels  On
    Annotations Off
[FILTER]
    Name  lua
    Match kube.*
    code function append_tag(tag, timestamp, record) new_record = record if (new_record["kubernetes"]["labels"]["app.kubernetes.io/component-name"] ~= nil) then new_record["tag"] = "all_logs." .. tag .. "."
.. new_record["kubernetes"]["labels"]["app.kubernetes.io/component-name"] end return 1, timestamp, new_record end
    call append_tag
[FILTER]
    Name rewrite_tag
    Match kube.*
    rule $log ^.*$ $tag true
[FILTER]
    Name            nest
    Match           kube.*
    Operation       lift
    Nested_under    kubernetes
    Add_prefix      kubernetes.
[FILTER]
    Name            nest
    Match           kube.*
    Operation       lift
    Nested_under    kubernetes.labels
    Add_prefix      kubernetes.labels.
[FILTER]
    Name  modify
    Match kube.*
    Condition Key_exists kubernetes.labels.app.kubernetes.io/component-name
    Rename  kubernetes.labels.app.kubernetes.io/component-name facility
[FILTER]
    Name  modify
    Match kube.*
    Condition Key_Does_Not_Exist kubernetes.labels.app.kubernetes.io/component-name
    Rename  kubernetes.container_name facility
[FILTER]
    Name record_modifier
    Match kube.*
    Remove_key kubernetes.container_hash
    Remove_key kubernetes.pod_id
    Remove_key kubernetes.docker_id
    Remove_key kubernetes.labels.pod-template-hash
    Remove_key kubernetes.labels.security.istio.io/tlsMode
    Remove_key kubernetes.labels.service.istio.io/canonical-revision
    Remove_key kubernetes.labels.service.istio.io/canonical-name
[FILTER]
    Name  parser
    Match kube.*
    key_name  log
    parser logs_parser
    reserve_data  true
[FILTER]
    Name  parser
    Match kube.*
    key_name  log
    parser  logs2_parser
    reserve_data  true
[FILTER]
    Name  grep
    Match kube.*
    regex log_lvl (WARN|ERROR)
[OUTPUT]
    name  s3
    match all_logs.*
    bucket  logs-bucket
    region  us-east-1
    total_file_size 10M
    upload_timeout  60m
    log_key log
    content_type  text/plain
    static_file_path  true
    s3_key_format /ApplicationsLogs/$TAG[7]/%Y/%m/%d/$TAG[7]_%Y%m%d_%H%M%S.log

Tried to use Merge_Log_Trim On in Kubernetes filter, but it didn't help.

UPDATE

Solved by lua filter below:

  [FILTER]
      Name lua
      Match all_logs.*
      code  function remove_last_newline(tag, timestamp, record) local log_message = record["log"] if log_message then record["log"] = log_message:gsub("\n$", "") end return 1, timestamp, record end
      call remove_last_newline
0

There are 0 best solutions below