Filtering csv headers with different lengths

I have two slightly different csv headers i would like to catch under a filter:

ACTION,DISCRIMINATOR,CHECKPOINT_ID,INSTALLATION_ID,NUMERIC_VALUE,STRING_VALUE,COMPONENT_NAME,TRANSACTION_ID,STATISTIC_DURATION,STATISTIC_NAME,ASSOCIATED_NAME,SINGLE_LATENCY,AVERAGE_LATENCY,MAX_LATENCY,MIN_LATENCY,TPS,MAX_LATENCY_TRANSACTION_ID,CREATED_ON_DATE,TOTAL_TRANSACTIONS,PROCESS_NAME,SLA1,SLA2,EXCEPTION_COUNT,CATEGORY,P95,P99,P999,P9999,P99999

and

ACTION,DISCRIMINATOR,CHECKPOINT_ID,INSTALLATION_ID,NUMERIC_VALUE,STRING_VALUE,COMPONENT_NAME,TRANSACTION_ID,STATISTIC_DURATION,STATISTIC_NAME,ASSOCIATED_NAME,SINGLE_LATENCY,AVERAGE_LATENCY,MAX_LATENCY,MIN_LATENCY,TPS,MAX_LATENCY_TRANSACTION_ID,CREATED_ON_DATE,TOTAL_TRANSACTIONS,PROCESS_NAME,SLA1,SLA2,EXCEPTION_COUNT,CATEGORY

To do this I have the following filter:

filter {
    if [fields][target-index] == "openet-fw-stats" {
     grok {
            match => { "message" =>
"%{WORD:ACTION},%{WORD:DISCRIMINATOR},%{DATA:CHECKPOINT_ID},%{DATA:INSTALLATION_ID},%{INT:NUMERIC_VALUE},%{DATA:STRING_VALUE},%{DATA:COMPONENT_NAME},%{DATA:TRANSACTION_ID},%{INT:STATISTIC_DURATION},%{DATA:STATISTIC_NAME},%{DATA:ASSOCIATED_NAME},%{INT:SINGLE_LATENCY},%{INT:AVERAGE_LATENCY},%{INT:MAX_LATENCY},%{INT:MIN_LATENCY},%{INT:TPS},%{DATA:MAX_LATENCY_TRANSACTION_ID},%{INT:CREATED_ON_DATE},%{INT:TOTAL_TRANSACTIONS},%{DATA:PROCESS_NAME},%{INT:SLA1},%{INT:SLA2},%{INT:EXCEPTION_COUNT},%{WORD:CATEGORY},%{INT:P95},%{INT:P99},%{INT:P999},%{INT:P9999},%{INT:P99999}"
}
     }
     # Support older FW stats format
     grok {
            match => { "message" =>
"%{WORD:ACTION},%{WORD:DISCRIMINATOR},%{DATA:CHECKPOINT_ID},%{DATA:INSTALLATION_ID},%{INT:NUMERIC_VALUE},%{DATA:STRING_VALUE},%{DATA:COMPONENT_NAME},%{DATA:TRANSACTION_ID},%{INT:STATISTIC_DURATION},%{DATA:STATISTIC_NAME},%{DATA:ASSOCIATED_NAME},%{INT:SINGLE_LATENCY},%{INT:AVERAGE_LATENCY},%{INT:MAX_LATENCY},%{INT:MIN_LATENCY},%{INT:TPS},%{DATA:MAX_LATENCY_TRANSACTION_ID},%{INT:CREATED_ON_DATE},%{INT:TOTAL_TRANSACTIONS},%{DATA:PROCESS_NAME},%{INT:SLA1},%{INT:SLA2},%{INT:EXCEPTION_COUNT},%{WORD:CATEGORY}"
}
     }
        if "_grokparsefailure" in [tags] {
            drop {}
        } else {
                ruby {
                code => "event.set('FW_DATE',Time.at(event.get('CREATED_ON_DATE')[0..9].to_i).strftime('%Y.%m.%d'))"
                }
                if "_rubyexception" in [tags] {
                drop {}
            } else {
                mutate {
                    remove_field => ["ACTION","DISCRIMINATOR","CHECKPOINT_ID","STRING_VALUE","P95","P99","P999","P9999","P99999"]
                    gsub => [
                        "ASSOCIATED_NAME", "\"", "",
                        "COMPONENT_NAME", "\"", "",
                        "STATISTIC_NAME", "\"", "",
                        "INSTALLATION_ID", "\"", "",
                        "PROCESS_NAME", "[\",]", "",
                        "PROCESS_NAME", "^[0-9]*", ""
                    ]
                    convert => {
                        "CREATED_ON_DATE" => "integer"
                        "NUMERIC_VALUE" => "integer"
                        "STATISTIC_DURATION" => "integer"
                        "SINGLE_LATENCY" => "integer"
                        "AVERAGE_LATENCY" => "integer"
                        "MAX_LATENCY" => "integer"
                        "MIN_LATENCY" => "integer"
                        "TPS" => "integer"
                        "TOTAL_TRANSACTIONS" => "integer"
                        "SLA1" => "integer"
                        "SLA2" => "integer"
                        "EXCEPTION_COUNT" => "integer"
                    }
                    replace => {
                        "INSTALLATION_ID" => "%{[host][name]}"
                    }
                }
                date {
                    match => ["CREATED_ON_DATE", "UNIX_MS"]
                    target => "@timestamp"
                }
            }
        }
    }
}

I am wondering if this is the right way to do it? In the remove_field section it is removing fields which won't exist in the second grok filter.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.