Multiple csv files in data folder and one column in common to relate the data between files.
created logstash conf file and running manually and logstash gets shutdown.
Sample data:
File1:sample_orders.csv
id,product,quantity
2,Apple,5
1,Banana,10
3,Orange,3
File2: sample_users.csv
1,John Doe,johndoe@example.com
2,Jane Smith,janesmith@example.com
3,Robert Johnson,robertjohnson@example.com
Logstash conf file: nw_combined.conf
input {
file {
path => [
"/data/sample_orders.csv",
"/data/sample_users.csv"
]
start_position => "beginning"
sincedb_path => "/dev/null"
codec => plain {
charset => "UTF-8"
}
}
}
filter {
if [source] =~ /sample_orders\.csv/ {
csv {
separator => ","
skip_header => true
columns => ["id", "product", "quantity"]
}
mutate {
convert => {
"id" => "integer"
"quantity" => "integer"
}
}
ruby {
code => "
event.set('map', {}) unless event.get('map')
event.get('map')['file1'] ||= {}
event.get('map')['file1'][event.get('id')] ||= {}
event.get('map')['file1'][event.get('id')]['product'] = event.get('product')
event.get('map')['file1'][event.get('id')]['quantity'] = event.get('quantity')
event.cancel()
"
}
}
if [source] =~ /sample_users\.csv/ {
csv {
separator => ","
skip_header => true
columns => ["id", "name", "email"]
}
ruby {
code => "
event.set('map', {}) unless event.get('map')
event.get('map')['file2'] ||= {}
event.get('map')['file2'][event.get('id')] ||= {}
event.get('map')['file2'][event.get('id')]['name'] = event.get('name')
event.get('map')['file2'][event.get('id')]['email'] = event.get('email')
event.cancel()
"
}
}
ruby {
code => "
event.get('map')&.each do |file, data|
data.each do |id, fields|
event = LogStash::Event.new(fields)
event.set('id', id)
event.set('source', file)
event.remove('@timestamp')
event.remove('@version')
event.tag('aggregated')
yield event
end
end
"
}
}
output {
if 'aggregated' in [tags] {
stdout {
codec => rubydebug
}
}
}
error:
OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release.
WARNING: Could not find logstash.yml which is typically located in $LS_HOME/config or /etc/logstash. You can specify the path using --path.settings. Continuing using the defaults
Could not find log4j2 configuration at path /usr/share/logstash/config/log4j2.properties. Using default config which logs errors to the console
[INFO ] 2023-07-19 13:48:19.807 [main] runner - Starting Logstash {"logstash.version"=>"7.17.11", "jruby.version"=>"jruby 9.2.20.1 (2.5.8) 2021-11-30 2a2962fbd1 OpenJDK 64-Bit Server VM 11.0.19+7 on 11.0.19+7 +indy +jit [linux-x86_64]"}
[INFO ] 2023-07-19 13:48:19.870 [main] runner - JVM bootstrap flags: [-Xms1g, -Xmx1g, -XX:+UseConcMarkSweepGC, -XX:CMSInitiatingOccupancyFraction=75, -XX:+UseCMSInitiatingOccupancyOnly, -Djava.awt.headless=true, -Dfile.encoding=UTF-8, -Djdk.io.File.enableADS=true, -Djruby.compile.invokedynamic=true, -Djruby.jit.threshold=0, -Djruby.regexp.interruptible=true, -XX:+HeapDumpOnOutOfMemoryError, -Djava.security.egd=file:/dev/urandom, -Dlog4j2.isThreadContextMapInheritable=true]
[WARN ] 2023-07-19 13:48:21.249 [LogStash::Runner] multilocal - Ignoring the 'pipelines.yml' file because modules or command line options are specified
[INFO ] 2023-07-19 13:48:28.546 [Api Webserver] agent - Successfully started Logstash API endpoint {:port=>9600, :ssl_enabled=>false}
[ERROR] 2023-07-19 13:48:31.752 [Converge PipelineAction::Create<main>] agent - Failed to execute action {:action=>LogStash::PipelineAction::Create/pipeline_id:main, :exception=>"LogStash::ConfigurationError", :message=>"Expected one of [ \\t\\r\\n], \"#\", \"=>\" at line 21, column 8 (byte 399) after filter {\n csv {\n separator => \",\"\n columns => [\"id\", \"product\", \"quantity\"]\n skip_header => true\n if ", :backtrace=>["/usr/share/logstash/logstash-core/lib/logstash/compiler.rb:32:in `compile_imperative'", "org/logstash/execution/AbstractPipelineExt.java:189:in `initialize'", "org/logstash/execution/JavaBasePipelineExt.java:72:in `initialize'", "/usr/share/logstash/logstash-core/lib/logstash/java_pipeline.rb:48:in `initialize'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline_action/create.rb:52:in `execute'", "/usr/share/logstash/logstash-core/lib/logstash/agent.rb:392:in `block in converge_state'"]}
[INFO ] 2023-07-19 13:48:32.058 [LogStash::Runner] runner - Logstash shut down.
Checked for any configuration issue but there is no issue.
Thanks
Vinod