Logstash filter mutate remove_field inside of an array


(Anthony) #1

so i was trying to filter out all references to "volumes.lun-mapping-list" in my json http_poller input. so this field repeats multiple times but the data is not valuable and its an array inside of the an array so ES doesnt like that at all with the index.
Here is my filter i was trying...

filter {
        mutate {
                remove_field => ["[volumes][lun-mapping-list]0"]
                }
        json {
                source => "message"
               }   
}

which above does NOT work...

here an snip of the input:

{
"params": {
    "id-property": "vol-id"
}, 
"volumes": [
    {
        "small-io-alerts": "disabled", 
        "last-refreshed-from-obj-name": null, 
        "obj-severity": "information", 
        "rd-bw": "13", 
        "iops": "50", 
        "replication-wr-bw-kbps": 0, 
        "qos-effective-bw": null, 
        "lb-size": 512, 
        "qos-exceeded-iops": "0", 
        "unaligned-rd-iops": "0", 
        "vaai-tp-alerts": "enabled", 
        "unaligned-io-alerts": "disabled", 
        "qos-exceeded-bw": "0", 
        "unique-physical-space": "0", 
        "tag-list": [], 
        "unaligned-io-ratio": "54", 
        "lun-mapping-list": [
            [
                [
                    "2c571a17371c48c5a96f1d3d8fe7c952", 
                    "dell_r640_esxi_cluster", 
                    2
                ], 
                [
                    "3635e0f1a92e4868a4df1cac9f6630ae", 
                    "Default", 
                    1
                ], 
                1
            ]
        ], 
        "wr-iops": "46", 
        "name": "xio-ds1", 
        "acc-num-of-unaligned-wr": "1852209230", 
        "related-consistency-groups": [], 
        "acc-num-of-wr": "3076892180", 
        "acc-size-of-rd": "28256380889", 
        "created-by-app": "xms", 
        "vol-id": [
            "d49282122bcb45b3a976c409686f9094", 
            "xio-ds1", 
            1
        ], 
        "snapgrp-index": 1, 
        "snapgrp-copy-efficiency": "0", 
        "small-io-ratio-level": "ok", 
        "alignment-offset": 0, 
        "snapshot-type": "regular", 
        "index": 1, 
        "small-rd-bw": "8", 
        "naa-name": "514f0c501fc00001", 
        "acc-num-of-small-rd": "424465044", 
        "xms-id": [
            "82682656e5d5408fbc675c046bf57a84", 
            "xms", 
            1
        ], 
        "small-wr-bw": "98", 
        "qos-implicit-obj-list": [], 
        "bw": "726", 
        "manager-guid": null, 
        "avg-latency": "529", 
        "snapgrp-name": "", 
        "certainty": "ok", 
        "vol-type": "regular", 
        "small-io-ratio": "60", 
        "unaligned-wr-iops": "32", 
        "performance-data-collection": "inherit_cluster_mode", 
        "qos-effective-max-bw": "0", 
        "application-type": "", 
        "wr-bw": "713", 
        "ancestor-vol-name": null, 
        "small-iops": "29", 
        "wr-latency": "579", 
        "unaligned-rd-bw": "4", 
        "num-of-dest-snaps": 0, 
        "acc-size-of-wr": "50490389189", 
        "href": 
 "https://10.237.33.100/api/json/v3/types/volumes/d49282122bcb45b3a976c409686f9094", 
        "num-of-lun-mappings": 1, 
        "acc-num-of-small-wr": "1822018810", 
        "guid": "d49282122bcb45b3a976c409686f9094", 
        "unaligned-io-ratio-level": "ok", 
        "acc-num-of-rd": "628415657", 
        "data-reduction-ratio": "0", 
        "qos-enabled-mode": "", 
        "management-locked": false, 
        "snapgrp-id": [
            "815f66c07b484a428c456b9fe53a68a6", 
            "", 
            1
        ], 
        "ancestor-vol-id": [], 
        "last-refresh-from-obj-id": null, 
        "acc-num-of-unaligned-rd": "168918349", 
        "unaligned-iops": "32", 
        "calculate-savings-timestamp": "", 
        "sys-name": "norcalXtrem", 
        "sys-id": [
            "bb4a8998e6fc4457ac7b23979cd15be4", 
            "norcalXtrem", 
            1
        ], 
        "rd-latency": "124", 
        "replication-rd-iops-16kb": 0, 
        "vol-access": "write_access", 
        "unaligned-wr-bw": "660", 
        "replication-wr-iops-16kb": 0, 
        "host-accessible-size": "0", 
        "dest-snap-list": [], 
        "snapgrp-unique-physical-space": "0", 
        "sys-index": 1, 
        "logical-space-in-use": "1628867560", 
        "snapset-list": [], 
        "created-by-external-client": "", 
        "created-from-volume": "", 
        "creation-time": "2017-06-19 20:26:29", 
        "snapgrp-drr": "0", 
        "last-refresh-time": "1497903989184", 
        "unaligned-bw": "664", 
        "replication-rd-bw-kbps": 0, 
        "small-rd-iops": "4", 
        "vol-size": "2147483648", 
        "small-wr-iops": "25", 
        "rd-iops": "4", 
        "qos-policy-id": [], 
        "qos-effective-burst-bw": "0", 
        "small-bw": "106"
    },

Thanks in advance!


Logstash to Elastic search, Could not index event to Elasticsearch , "reason"=>"mapper [] of different type, current_type [long], merged_type [text]
(Paris Mermigkas) #2

You should try and revert the filtering order. First decode the event as JSON and then remove the field by referencing it's path.
If it's plaintext (before converted to JSON), [volumes][lun-mapping-list] is just a substring and not an actual field, so Logstash doesn't have anything to remove anyway.

filter {
	json {
		source => "message"
	}
	mutate {
		remove_field => ["[volumes][lun-mapping-list]"]
	}
}

(Anthony) #3

i tried the string but its still trying to send it to elasticsearch....

i was hoping to just remove the field to clear the error:

this is from the logstash logs.

 Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>nil, :_index=>"norcalx1", :_type=>"doc", :routing=>nil}, #<LogStash::Event:0x508b5dfa>], :response=>{"index"=>{"_index"=>"norcalx1", "_type"=>"doc", "_id"=>"k8k282gBpI0vWukte0YL", "status"=>400, "error"=>{"type"=>"illegal_argument_exception", "reason"=>"mapper [volumes.lun-mapping-list] of different type, current_type [long], merged_type [text]"}}}}

#4

volumes is an array. Try

mutate { remove_field => [ "[volumes][0][lun-mapping-list]" ] }

(Anthony) #5

same error :frowning:

So i am trying to remove lun-mapping-list which is an array as well, so volumes is an array too.

so tried even

mutate { remove_field => [ "[volumes][0][lun-mapping-list][0]" ] }

all produce the same errror

Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>nil, 
:_index=>"norcalx1", :_type=>"doc", :routing=>nil}, #<LogStash::Event:0x2b494f22>],         
:response=>{"index"=>{"_index"=>"norcalx1", "_type"=>"doc",
 "_id"=>"n8lV82gBpI0vWuktnkYJ", "status"=>400, "error"=> 
  {"type"=>"illegal_argument_exception", "reason"=>"mapper [volumes.lun-mapping-list] of 
   different type, current_type [long], merged_type [text]"}}}}

I believe it must be a formating issues. does the field command even work when trying to remove entire arrays?? lun-mapping-list is an array in it self too. to sure if there is supposed to be a sort of wild card?? just trying to work with out.


#6

That will remove the lun-mapping-list object from the first entry in the volumes array. It makes no difference whether lun-mapping-list is an array or not.

Can you add a stdout output

output { stdout { codec => rubydebug } }

and see if lun-mapping-list appears in the output?


(Anthony) #7

Badger thanks for helping, just trying to work through this.

how do i check the output other than the index in elastic search? "GET /norcalx1" ??

here is my conf for this one.

input {
  http_poller {
    urls => {
   norcalxio => {
    # Supports all options supported by ruby's Manticore HTTP client
    method => get
    user => "xxxxx"
    password => "xxxxxx"
    url => "https://10.237.33.100/api/json/v3/types/volumes?full=1"
    headers => {
      Accept => "application/json"
    }
     }
    }
   request_timeout => 60
    # Supports "cron", "every", "at" and "in" schedules by rufus scheduler
   schedule => { cron => "* * * * * UTC"}
   codec => "json"
   # A hash of request metadata info (timing, response headers, etc.) will be sent here
   metadata_target => "http_poller_metadata"
   cacert => "/etc/logstash/norcalx1.cer"
 }
  }

filter {
    json {
            source => "message"
    }

    mutate {
            remove_field => [ "[volumes][0][lun-mapping-list]" ]
    }

}

output {
  stdout {
       codec => rubydebug
}
  elasticsearch {
        hosts => ["http://10.237.33.168:9200"]
       action => "index"
        index => "norcalx1"
  }
    }

#8

That will dump a copy of the message to stdout. If you cannot see stdout of the logstash process you could send it to a file

    output { file { path => "/tmp/rubydebug.txt" codec => rubydebug } }

(Anthony) #10

so i think its working..

what is happening it is showing up multiple times in the JSON. i might need to duplicate the line items to where it removes all those arrays..

i think we made progress, THANKS BADGER!


#11

If the volumes array has multiple entries (which it probably does sometimes, otherwise there would be little reason to make it an array), then you could either add enough remove_field entries to cover all of them, or you could use a ruby filter to iterate over the array

    ruby {
        code => '
            volumesArray = event.get("volumes")
            if volumesArray
                newVolumesArray = []
                volumesArray.each { |x|
                    x.delete("lun-mapping-list")
                    newVolumesArray << x
                }
                event.set("volumes", newVolumesArray)
            end
        '
    }

(Anthony) #12

cool thanks, i will keep this for future reference. Since i had multiple fields i just built up a few filters like bellow. I didn't catch that es was putting the 0,1,2 for each different array that was nested under volumes. i was only able to see that once you recommended to run via command line. then i was able to see the full structure of the import.

    mutate {
            remove_field => [ "[volumes][0][lun-mapping-list]" ]
            remove_field => [ "[volumes][1][lun-mapping-list]" ]
            remove_field => [ "[volumes][2][lun-mapping-list]" ]
            remove_field => [ "[volumes][3][lun-mapping-list]" ]
            remove_field => [ "[volumes][4][lun-mapping-list]" ]
            remove_field => [ "[volumes][5][lun-mapping-list]" ]
            remove_field => [ "[volumes][6][lun-mapping-list]" ]
            remove_field => [ "[volumes][7][lun-mapping-list]" ]

(system) closed #13

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.