Load CSV data into ElasticSearch using Logstash

Hello,

I'm trying to import a csv file into ES, when I run the config file I get this :

Sending Logstash logs to C:/elastic_stack/logstash-7.6.2/logs which is now configured via log4j2.properties
[2020-04-13T20:17:24,421][WARN ][logstash.config.source.multilocal] Ignoring the 'pipelines.yml' file because modules or command line options are specified
[2020-04-13T20:17:24,552][INFO ][logstash.runner          ] Starting Logstash {"logstash.version"=>"7.6.2"}
[2020-04-13T20:17:27,285][INFO ][org.reflections.Reflections] Reflections took 41 ms to scan 1 urls, producing 20 keys and 40 values
[2020-04-13T20:17:28,281][WARN ][logstash.outputs.elasticsearch] You are using a deprecated config setting "document_type" set in elasticsearch. Deprecated settings will continue to work, but are scheduled for removal from logstash in the future. Document types are being deprecated in Elasticsearch 6.0, and removed entirely in 7.0. You should avoid this feature If you have any questions about this, please visit the #logstash channel on freenode irc. {:name=>"document_type", :plugin=><LogStash::Outputs::ElasticSearch action=>"index", index=>"data-index-1", id=>"f50f3d3c35597f0f88ef02b43e31857d7d737980bdf7205aff772cc7d03e2787", hosts=>[http://localhost:9200/], document_type=>"data", enable_metric=>true, codec=><LogStash::Codecs::Plain id=>"plain_20cd215d-eab9-4a36-b521-77461257297a", enable_metric=>true, charset=>"UTF-8">, workers=>1, manage_template=>true, template_name=>"logstash", template_overwrite=>false, doc_as_upsert=>false, script_type=>"inline", script_lang=>"painless", script_var_name=>"event", scripted_upsert=>false, retry_initial_interval=>2, retry_max_interval=>64, retry_on_conflict=>1, ilm_enabled=>"auto", ilm_rollover_alias=>"logstash", ilm_pattern=>"{now/d}-000001", ilm_policy=>"logstash-policy", ssl_certificate_verification=>true, sniffing=>false, sniffing_delay=>5, timeout=>60, pool_max=>1000, pool_max_per_route=>100, resurrect_delay=>5, validate_after_inactivity=>10000, http_compression=>false>}
[2020-04-13T20:17:30,403][INFO ][logstash.outputs.elasticsearch][main] Elasticsearch pool URLs updated {:changes=>{:removed=>[], :added=>[http://localhost:9200/]}}
[2020-04-13T20:17:30,634][WARN ][logstash.outputs.elasticsearch][main] Restored connection to ES instance {:url=>"http://localhost:9200/"}
[2020-04-13T20:17:30,703][INFO ][logstash.outputs.elasticsearch][main] ES Output version determined {:es_version=>7}
[2020-04-13T20:17:30,723][WARN ][logstash.outputs.elasticsearch][main] Detected a 6.x and above cluster: the `type` event field won't be used to determine the document _type {:es_version=>7}
[2020-04-13T20:17:30,800][INFO ][logstash.outputs.elasticsearch][main] New Elasticsearch output {:class=>"LogStash::Outputs::ElasticSearch", :hosts=>["http://localhost:9200/"]}
[2020-04-13T20:17:30,888][INFO ][logstash.outputs.elasticsearch][main] Using default mapping template
[2020-04-13T20:17:30,905][ERROR][logstash.javapipeline    ][main] Pipeline aborted due to error {:pipeline_id=>"main", :exception=>#<LogStash::ConfigurationError: translation missing: en.logstash.agent.configuration.invalid_plugin_register>, :backtrace=>["C:/elastic_stack/logstash-7.6.2/vendor/bundle/jruby/2.5.0/gems/logstash-filter-mutate-3.5.0/lib/logstash/filters/mutate.rb:222:in `block in register'", "org/jruby/RubyHash.java:1428:in `each'", "C:/elastic_stack/logstash-7.6.2/vendor/bundle/jruby/2.5.0/gems/logstash-filter-mutate-3.5.0/lib/logstash/filters/mutate.rb:220:in `register'", "org/logstash/config/ir/compiler/AbstractFilterDelegatorExt.java:56:in `register'", "C:/elastic_stack/logstash-7.6.2/logstash-core/lib/logstash/java_pipeline.rb:200:in `block in register_plugins'", "org/jruby/RubyArray.java:1814:in `each'", "C:/elastic_stack/logstash-7.6.2/logstash-core/lib/logstash/java_pipeline.rb:199:in `register_plugins'", "C:/elastic_stack/logstash-7.6.2/logstash-core/lib/logstash/java_pipeline.rb:502:in `maybe_setup_out_plugins'", "C:/elastic_stack/logstash-7.6.2/logstash-core/lib/logstash/java_pipeline.rb:212:in `start_workers'", "C:/elastic_stack/logstash-7.6.2/logstash-core/lib/logstash/java_pipeline.rb:154:in `run'", "C:/elastic_stack/logstash-7.6.2/logstash-core/lib/logstash/java_pipeline.rb:109:in `block in start'"], "pipeline.sources"=>["C:/Users/Asus/Dropbox/PFE_part2/data_logstash_configuration.conf"], :thread=>"#<Thread:0x7921b860 run>"}
[2020-04-13T20:17:30,950][ERROR][logstash.agent           ] Failed to execute action {:id=>:main, :action_type=>LogStash::ConvergeResult::**FailedAction, :message=>"Could not execute action: PipelineAction::Create<main>, action_result: false", :backtrace=>nil}**
[2020-04-13T20:17:31,025][INFO ][logstash.outputs.elasticsearch][main] Attempting to install template {:manage_template=>{"index_patterns"=>"logstash-*", "version"=>60001, "settings"=>{"index.refresh_interval"=>"5s", "number_of_shards"=>1}, "mappings"=>{"dynamic_templates"=>[{"message_field"=>{"path_match"=>"message", "match_mapping_type"=>"string", "mapping"=>{"type"=>"text", "norms"=>false}}}, {"string_fields"=>{"match"=>"*", "match_mapping_type"=>"string", "mapping"=>{"type"=>"text", "norms"=>false, "fields"=>{"keyword"=>{"type"=>"keyword", "ignore_above"=>256}}}}}], "properties"=>{"@timestamp"=>{"type"=>"date"}, "@version"=>{"type"=>"keyword"}, "geoip"=>{"dynamic"=>true, "properties"=>{"ip"=>{"type"=>"ip"}, "location"=>{"type"=>"geo_point"}, "latitude"=>{"type"=>"half_float"}, "longitude"=>{"type"=>"half_float"}}}}}}}
[2020-04-13T20:17:31,469][INFO ][logstash.agent           ] Successfully started Logstash API endpoint {:port=>9600}
[2020-04-13T20:17:36,558][INFO ][logstash.runner          ] Logstash shut down.

This is my conf file:

input{
file{
path => "C:\Users\Asus\Dropbox\PFE_part2\MOOV_ALEPE_Data.csv"
start_position => "beginning"
}
}
filter{
csv {
columns => [ "Message",
"Time",
"Distance",
"Longitude",
"Latitude",
"NemoEvent_GPRS_DataConnectionSuccess_DAC",
"NemoEvent_GPRS_DataConnectionAttempt_DAA",
"NemoEvent_GPRS_DataDisconnect_DAD"
]
separator => ","

 }
    mutate {convert => ["Longitude", "half_float"]} 
  mutate {convert => ["Latitude", "half_float"]} 
  mutate {convert => ["NemoEvent_GPRS_DataConnectionSuccess_DAC", "integer"]} 
  mutate {convert => ["NemoEvent_GPRS_DataConnectionAttempt_DAA", "integer"]} 
  mutate {convert => ["NemoEvent_GPRS_DataDisconnect_DAD", "integer"]}  

}
output{
elasticsearch {
action => "index"
hosts => ["http://localhost:9200/"]
index => "data-index-1"
document_type => "data"
}
stdout { }
}

Could you help me please?

mutate+convert does not support half_float, use "float" instead.

@Badger

I did like you say and I got this :

C:\elastic_stack\logstash-7.6.2>.\bin\logstash -f C:\Users\Asus\Dropbox\PFE_part2\data_logstash_configuration.conf
Sending Logstash logs to C:/elastic_stack/logstash-7.6.2/logs which is now configured via log4j2.properties
[2020-04-13T22:41:21,664][WARN ][logstash.config.source.multilocal] Ignoring the 'pipelines.yml' file because modules or command line options are specified
[2020-04-13T22:41:21,802][INFO ][logstash.runner          ] Starting Logstash {"logstash.version"=>"7.6.2"}
[2020-04-13T22:41:24,570][INFO ][org.reflections.Reflections] Reflections took 36 ms to scan 1 urls, producing 20 keys and 40 values
[2020-04-13T22:41:25,846][WARN ][logstash.outputs.elasticsearch] You are using a deprecated config setting "document_type" set in elasticsearch. Deprecated settings will continue to work, but are scheduled for removal from logstash in the future. Document types are being deprecated in Elasticsearch 6.0, and removed entirely in 7.0. You should avoid this feature If you have any questions about this, please visit the #logstash channel on freenode irc. {:name=>"document_type", :plugin=><LogStash::Outputs::ElasticSearch action=>"index", index=>"data-index-1", id=>"714f8942886bc8ee8f05196307de91c7c1ed2b2c43f266d908e544d5f2f3c907", hosts=>[http://localhost:9200/], document_type=>"data", enable_metric=>true, codec=><LogStash::Codecs::Plain id=>"plain_d28de08d-b3ce-4cf6-aa40-fcce73c426cc", enable_metric=>true, charset=>"UTF-8">, workers=>1, manage_template=>true, template_name=>"logstash", template_overwrite=>false, doc_as_upsert=>false, script_type=>"inline", script_lang=>"painless", script_var_name=>"event", scripted_upsert=>false, retry_initial_interval=>2, retry_max_interval=>64, retry_on_conflict=>1, ilm_enabled=>"auto", ilm_rollover_alias=>"logstash", ilm_pattern=>"{now/d}-000001", ilm_policy=>"logstash-policy", ssl_certificate_verification=>true, sniffing=>false, sniffing_delay=>5, timeout=>60, pool_max=>1000, pool_max_per_route=>100, resurrect_delay=>5, validate_after_inactivity=>10000, http_compression=>false>}
[2020-04-13T22:41:28,258][INFO ][logstash.outputs.elasticsearch][main] Elasticsearch pool URLs updated {:changes=>{:removed=>[], :added=>[http://localhost:9200/]}}
[2020-04-13T22:41:28,482][WARN ][logstash.outputs.elasticsearch][main] Restored connection to ES instance {:url=>"http://localhost:9200/"}
[2020-04-13T22:41:28,533][INFO ][logstash.outputs.elasticsearch][main] ES Output version determined {:es_version=>7}
[2020-04-13T22:41:28,539][WARN ][logstash.outputs.elasticsearch][main] Detected a 6.x and above cluster: the `type` event field won't be used to determine the document _type {:es_version=>7}
[2020-04-13T22:41:28,607][INFO ][logstash.outputs.elasticsearch][main] New Elasticsearch output {:class=>"LogStash::Outputs::ElasticSearch", :hosts=>["http://localhost:9200/"]}
[2020-04-13T22:41:28,707][INFO ][logstash.outputs.elasticsearch][main] Using default mapping template
[2020-04-13T22:41:28,798][WARN ][org.logstash.instrument.metrics.gauge.LazyDelegatingGauge][main] A gauge metric of an unknown type (org.jruby.specialized.RubyArrayOneObject) has been created for key: cluster_uuids. This may result in invalid serialization.  It is recommended to log an issue to the responsible developer/development team.
[2020-04-13T22:41:28,831][INFO ][logstash.outputs.elasticsearch][main] Attempting to install template {:manage_template=>{"index_patterns"=>"logstash-*", "version"=>60001, "settings"=>{"index.refresh_interval"=>"5s", "number_of_shards"=>1}, "mappings"=>{"dynamic_templates"=>[{"message_field"=>{"path_match"=>"message", "match_mapping_type"=>"string", "mapping"=>{"type"=>"text", "norms"=>false}}}, {"string_fields"=>{"match"=>"*", "match_mapping_type"=>"string", "mapping"=>{"type"=>"text", "norms"=>false, "fields"=>{"keyword"=>{"type"=>"keyword", "ignore_above"=>256}}}}}], "properties"=>{"@timestamp"=>{"type"=>"date"}, "@version"=>{"type"=>"keyword"}, "geoip"=>{"dynamic"=>true, "properties"=>{"ip"=>{"type"=>"ip"}, "location"=>{"type"=>"geo_point"}, "latitude"=>{"type"=>"half_float"}, "longitude"=>{"type"=>"half_float"}}}}}}}
[2020-04-13T22:41:28,838][INFO ][logstash.javapipeline    ][main] Starting pipeline {:pipeline_id=>"main", "pipeline.workers"=>4, "pipeline.batch.size"=>125, "pipeline.batch.delay"=>50, "pipeline.max_inflight"=>500, "pipeline.sources"=>["C:/Users/Asus/Dropbox/PFE_part2/data_logstash_configuration.conf"], :thread=>"#<Thread:0x36d2d519 run>"}
[2020-04-13T22:41:31,805][INFO ][logstash.inputs.file     ][main] No sincedb_path set, generating one based on the "path" setting {:sincedb_path=>"C:/elastic_stack/logstash-7.6.2/data/plugins/inputs/file/.sincedb_b8ef3178f418bf1c1406ddd19443ab7a", :path=>["C:\\Users\\Asus\\Dropbox\\PFE_part2\\MOOV_ALEPE_Data.csv"]}
[2020-04-13T22:41:31,847][INFO ][logstash.javapipeline    ][main] Pipeline started {"pipeline.id"=>"main"}
[2020-04-13T22:41:31,930][INFO ][filewatch.observingtail  ][main] START, creating Discoverer, Watch with file and sincedb collections
[2020-04-13T22:41:31,960][INFO ][logstash.agent           ] Pipelines running {:count=>1, :running_pipelines=>[:main], :non_running_pipelines=>[]}
[2020-04-13T22:41:32,366][INFO ][logstash.agent           ] Successfully started Logstash API endpoint {:port=>9600}

OK, so it starts OK now. The other change you need to make is to change the backslashes in the path option of the file input to forward slashes.

@Badger

It works thank you , but I don't understand why I can't find the other fields like (
"Latitude",
"NemoEvent_GPRS_DataConnectionSuccess_DAC",
"NemoEvent_GPRS_DataConnectionAttempt_DAA",
"NemoEvent_GPRS_DataDisconnect_DAD") in kibana when I use:

GET /data-index-1?pretty
{
  "query" : {
    "match_all" : {}
  }
}

I only get :
"Message",
"Time",
"Distance",
"Longitude"

 {
  "data-index-1" : {
    "aliases" : { },
    "mappings" : {
      "properties" : {
        "@timestamp" : {
          "type" : "date"
        },
        "@version" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "Distance" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "Longitude" : {
          "type" : "float"
        },
        "Message" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "Time" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "host" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "message" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        },
        "path" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword",
              "ignore_above" : 256
            }
          }
        }
      }
    },
    "settings" : {
      "index" : {
        "creation_date" : "1586821638734",
        "number_of_shards" : "1",
        "number_of_replicas" : "1",
        "uuid" : "H4ZccR5BSFWyeD-aKzCgTA",
        "version" : {
          "created" : "7060299"
        },
        "provided_name" : "data-index-1"
      }
    }
  }
}

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.