Problem with tags field in Elasticsearch

Hi all,

here is the issue when i take input from a file and enriching with jdbc streaming filter

the below conf i used

input 
{
file {
  path => "/home/sgpl/ss"
  type => "end"
  sincedb_path => "/tmp/titanickk"
   start_position => "beginning"
   codec => "json"
    }

}
filter
{

jdbc_streaming {
      jdbc_driver_library => "/usr/share/java/mysql-connector-java-5.1.38.jar"
      jdbc_driver_class => "com.mysql.jdbc.Driver"
      jdbc_connection_string => "jdbc:mysql://xxxxxxxxxxxxxx:3306/xxxxxxxx"
      jdbc_user => "root"
      jdbc_password => "access"
      statement => "SELECT * from InstancesNew WHERE PrivateIP=:host"
      parameters => { "host" => "[tags][node_host]"}      
      target => "Instance"
  }
mutate {
add_field => { "status" => "%{[Instance][0][Status]}" "launchtime" => "%{[Instance][0][LaunchTime]}" "architecture" => "%{[Instance][0][Architecture]}" "instanceid" => "%{[Instance][0][InstanceId]}" "publicip" => "%{[Instance][0][PublicIP]}" "subnetid" => "%{[Instance][0][SubnetId]}" "securitygroup_groupname" => "%{[Instance][0][SecurityGroup_GroupName]}" "securitygroup_groupid" => "%{[Instance][0][SecurityGroup_GroupID]}" "monitoringstate" => "%{[Instance][0][MonitoringState]}" "keyname" => "%{[Instance][0][KeyName]}" "volumeid" => "%{[Instance][0][VolumeID]}" "vpcid" => "%{[Instance][0][VpcId]}" "ownerid" => "%{[Instance][0][OwnerId]}" "privateip" => "%{[Instance][0][PrivateIP]}" "imageid" => "%{[Instance][0][ImageId]}" "region" => "%{[Instance][0][Region]}" "instancetype" => "%{[Instance][0][InstanceType]}" "instancetags" => "%{[Instance][0][InstanceTags]}" }
}
#prune {
#  blacklist_names => ["^Instance$"]
#}
}
output
{
stdout { codec => rubydebug }
}

And the output is looks like this

    "securitygroup_groupname" => "xxxxxxx",
            "monitoringstate" => "disabled",
                    "imageid" => "xxxxxxxxx",
                   "Instance" => [
        [0] {
                             "Status" => "running",
                         "LaunchTime" => "2017-11-07T02:46:59.000Z",
                       "Architecture" => "x86_64",
                         "InstanceId" => "xxxxxxx",
                           "PublicIP" => "None",
            "SecurityGroup_GroupName" => "xxxxxx",
              "SecurityGroup_GroupID" => "xxxxxxxx",
                           "SubnetId" => "xxxxxxxxx",
                    "MonitoringState" => "disabled",
                            "KeyName" => "xxxxxxxxxx",
                           "VolumeID" => "xxxxxxx, xxxxxxxxxxx",
                              "VpcId" => "xxxxxxxxxxx",
                            "OwnerId" => "xxxxxxxxxxxxxx",
                          "PrivateIP" => "100.10.2.198",
                            "ImageId" => "xxxxxxxx",
                             "Region" => "us-east-1b",
                       "InstanceType" => "m4.xlarge",
                       "InstanceTags" => "{Key: Name, Value: sfty-es-node1}, {Key: Automation, Value: 9AM-7PM EST}, {Key: Organization, Value: Log Analytics}, {Key: ServerDescription, Value: ES Node}"
        }
    ],
                   "publicip" => "None",
                       "type" => "end",
                       "path" => "/home/sgpl/ss",
                   "@version" => "1",
                       "host" => "elasticsearch_masternode",
               "instancetype" => "m4.xlarge",
                    "keyname" => "xxxxxxxxxx",
                  "timestamp" => 1512902821,
               "architecture" => "x86_64",
                 "launchtime" => "2017-11-07T02:46:59.000Z",
                   "subnetid" => "xxxxxxxxxxxxxx",
                  "privateip" => "100.10.2.198",
                    "ownerid" => "xxxxxxxxxxxxxx",
                       "tags" => [
        [ 0] [
            [0] "cluster_name",
            [1] "aws_cluster"
        ],
        [ 1] [
            [0] "resourcegroup",
            [1] " "
        ],
        [ 2] [
            [0] "os",
            [1] "linux"
        ],
        [ 3] [
            [0] "loc_region",
            [1] " "
        ],
        [ 4] [
            [0] "loc_dc",
            [1] " "
        ],
        [ 5] [
            [0] "node_name",
            [1] "node_1"
        ],
        [ 6] [
            [0] "resourcevendor",
            [1] " "
        ],
        [ 7] [
            [0] "retrieval_type",
            [1] "agent"
        ],
        [ 8] [
            [0] "instance_id",
            [1] " "
        ],
        [ 9] [
            [0] "host",
            [1] "ip-100.10.2.198"
        ],
        [10] [
            [0] "volumeid",
            [1] " "
        ],
        [11] [
            [0] "node_host",
            [1] "100.10.2.198"
        ],
        [12] [
            [0] "node_id",
            [1] "CAP8ur"
        ]
    ],
                 "@timestamp" => 2017-12-11T10:46:10.100Z,
                 "instanceid" => "XXXXXXXXXXX",
                      "vpcid" => "xxxxxxxxxxxxx",
                       "name" => "elasticsearch_http",
                   "volumeid" => "xxxxxxxxxx, xxxxxxxxxxx",
               "instancetags" => "{Key: Name, Value: sfty-es-node1}, {Key: Automation, Value: 9AM-7PM EST}, {Key: Organization, Value: Log Analytics}, {Key: ServerDescription, Value: ES Node}",
                     "fields" => {
        "total_opened" => 82011,
        "current_open" => 84
    },
                     "region" => "us-east-1b",
      "securitygroup_groupid" => "xxxxxxxxx",
                     "status" => "running"
}

in this output you can see that tags field is divided into multiple arrays but i am expecting it in this form

    "tags" => [
       [0] {
            "resourcegroup" => " ",
              "instance_id" => " ",
                       "os" => "linux",
               "loc_region" => " ",
                     "host" => "ip-100.10.2.198",
                   "loc_dc" => " ",
           "resourcevendor" => " ",
                 "volumeid" => " ",
           "retrieval_type" => "agent"
       },

How would i do that...?
here is sample input

{"@timestamp":"2017-12-11T10:46:10.100Z","name":"elasticsearch_http","@version":"1","fields":{"total_opened":82011,"current_open":84},"tags":{"cluster_name":"aws_cluster","resourcegroup":" ","os":"linux","loc_region":" ","loc_dc":" ","node_name":"node_1","resourcevendor":" ","retrieval_type":"agent","instance_id":" ","host":"ip-100-100-2-198","volumeid":" ","node_host":"100.10.2.198","node_id":"CAP8ur"},"timestamp":1512902821}

Thank you.

Moved to Logstash forum

The tags field is a bit special in that it, by definition, is a list of strings. Unless you can change the contents of the JSON file and rename the tags field I suspect you have to use a json filter to deserialize the JSON string into a subfield, rename the tags subfield, then move all fields into the top level with a ruby filter.

1 Like

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.