Logstash performance issue with filter and influxdb

I currenlty have a logstash config that is written as such and I've noticed something interesting. The throughput of this filter is about 400 messages/sec which is awfully slow.

input {
        kafka {
                topics => ["kafka-jmx"]
                bootstrap_servers => "kafka1.cloud.com:9092"
                consumer_threads => 10
        }

}
filter {
        json {
                source => "message"
        }
        grok {
                patterns_dir => "/home/ec2-user/logstash-5.2.0/bin/patterns/"
                match => {"metric_path" => [
                                                                                        "%{DATA:kafka_host}\.%{DATA:kafka_metric_group}:type=%{DATA:kafka_metric_type},name=%{WORD:kafka_metric_name},topic=%{KTOPIC:kafka_topic},partition=%{KPARTITION:topic_partition}\.%{GREEDYDATA:attr_type}",
                                                                                        "%{DATA:kafka_host}\.%{DATA:kafka_metric_group}:type=%{DATA:kafka_metric_type},name=%{WORD:kafka_metric_name},topic=%{KTOPIC:kafka_topic}\.%{GREEDYDATA:attr_type}",
                                                                                        "%{DATA:kafka_host}\.%{DATA:kafka_metric_group}:type=%{DATA:kafka_metric_type},name=%{GREEDYDATA:kafka_metric_name}\.%{GREEDYDATA:attr_type}"
                                                                                       ]
                         }
        }
               ruby {
                        code => "event.set('time', event.get('@timestamp').to_f * 1000 )"
                }
                mutate {
                        remove_field => ["message"]
                        convert => {"time" => "integer"
                                                "metric_value_number" => "integer"
                        }
                }
}

output {
        influxdb {
                host => "ip-address"
                port => 9050
                db => "sdp_metrics"
                measurement => "jmx"
                retention_policy => "one_week"
                allow_time_override => "true"
                exclude_fields => ["@timestamp", "@version"]
                flush_size => 100
                use_event_fields_for_data_points => "true"
                coerce_values => {"metric_value_number" => "integer"}
                send_as_tags => [ "kafka_host", "kafka_metric_group", "kafka_metric_type", "kafka_metric_name", "attr_type" ]

                }
      }

I run the filter like this:

LS_HEAP_SIZE=16g LS_JAVA_OPTS='-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=9999' ./logstash -f jmx_influx.conf -w 60 -b 350

I've noticed that if I change the output to just output { stdout { } }, the throughput increases to 2k/sec. If I then also remove the filter, it goes to 5k/sec. How can I understand why my throughput drop from 2k/sec to 400k/sec when I change the output using the influxdb output?

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.