Logstash elasticsearch-filter module return the wrong document

Hi, I'm trying to run a logstash ingestion pipeline that is able to enrich an event by querying an Elasticsearch node

But so far I can't get it to work 100%, here is what I did:

My logstash pipeline look like that :

input {
    http {
        port => 5140
	    ecs_compatibility => disabled
    }
}

filter {

    mutate {
        add_field => {
            "field" => "%{[headers][field]}"
            "data" => "%{[headers][data]}"
            "size" => "%{[headers][size]}"
        }
    }

    mutate {
        remove_field => [ "headers" ]
        remove_field => [ "message" ]
        remove_field => [ "host" ]
        remove_field => [ "@version" ]
    }

    elasticsearch {
        hosts => [ "localhost:9200" ]
        user => [ "logstash" ]
        index => "test_index"
        password => "${logstash.password}"
        query => "field:%{[field]} and data:%{[data]}"
        sort => "@timestamp:desc"
        fields => {
            "field" => "previousField"
            "data" => "previousData"
            "size" => "previousSize"
        }
        periodic_flush => "true"
   }

   ruby {
        code => '
        if event.get("size") == event.get("previousSize")
            event.set("sizeChangeDetected", "0")
        else
            event.set("sizeChangeDetected", "1")
        end
        '
   }
}

output {
    stdout {}
    elasticsearch {
        hosts => [ "localhost:9200" ]
        user => [ "logstash" ]
        index => "test_index"
        password => "${logstash.password}"
        index => "test_index"
    }
}

And now my stdout from logstash :

{
                 "event" => {
        "original" => ""
    },
                  "data" => "\\First line in \\document \\with weird char",
          "previousData" => "\\Fourth \\line \\in \\document",
    "sizeChangeDetected" => "1",
                  "size" => "154",
          "previousSize" => "147",
            "@timestamp" => 2022-05-20T16:56:32.092007Z,
         "previousField" => "field_4",
                 "field" => "field_1"
}
{
                 "event" => {
        "original" => ""
    },
                  "data" => "\\Second \\line \\in \\document",
          "previousData" => "\\Fourth \\line \\in \\document",
    "sizeChangeDetected" => "1",
                  "size" => "2457",
          "previousSize" => "147",
            "@timestamp" => 2022-05-20T16:56:32.129535Z,
         "previousField" => "field_4",
                 "field" => "field_2"
}
{
                 "event" => {
        "original" => ""
    },
                  "data" => "\\Third \\line \\in \\document",
          "previousData" => "\\Fourth \\line \\in \\document",
    "sizeChangeDetected" => "1",
                  "size" => "7854",
          "previousSize" => "147",
            "@timestamp" => 2022-05-20T16:56:32.156779Z,
         "previousField" => "field_4",
                 "field" => "field_3"
}
{
                 "event" => {
        "original" => ""
    },
                  "data" => "\\Fourth \\line \\in \\document",
          "previousData" => "\\Fourth \\line \\in \\document",
    "sizeChangeDetected" => "0",
                  "size" => "147",
          "previousSize" => "147",
            "@timestamp" => 2022-05-20T16:56:32.203754Z,
         "previousField" => "field_4",
                 "field" => "field_4"
}

Edit, the template behind the indice, but I don't think it cause the problem :

{
    "template":{
        "settings":{
            "number_of_shards": 1,
            "number_of_replicas": 0
        },
        "mappings": {
            "properties": {
                "field": { 
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword", "ignore_above": 256 
                        }
                    }
                },
                "size": { 
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword", "ignore_above": 256 
                        }
                    }
                },
                "data": { 
                    "type": "text",
                    "fields": {
                        "keyword": {
                            "type": "keyword", "ignore_above": 256 
                        }
                    }
                },
                "TimeStamp": { "type": "date" }
            }
        }
    },
    "index_patterns": [
        "test_index*"
    ],
    "composed_of": []
}

As you can see, the request goes to Elasticsearch, but the answer is not the right one, I suspect the characters like backslash to be the cause of the problem

Has anyone else had this use case and can point me in the right direction? :slight_smile: thanks

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.