I have a huge postgres database with 20 million rows and i want to transfer it to elasticsearch via logstash . I followed the advice mentioned here and I test it for a simple database with 300 rows and all things worked fine but when i tested it for my main database i allways cross with error:
nargess@nargess-Surface-Book:/usr/share/logstash/bin$ sudo ./logstash -w 1 -f students.conf --path.data /usr/share/logstash/data/students/ --path.settings /etc/logstash
Sending Logstash's logs to /var/log/logstash which is now configured via log4j2.properties
java.lang.OutOfMemoryError: Java heap space
Dumping heap to java_pid3453.hprof ...
Heap dump file created [13385912484 bytes in 53.304 secs]
Exception in thread "Ruby-0-Thread-11: /usr/share/logstash/vendor/bundle/jruby/1.9/gems/puma-2.16.0-java/lib/puma/thread_pool.rb:216" java.lang.ArrayIndexOutOfBoundsException: -1
at org.jruby.runtime.ThreadContext.popRubyClass(ThreadContext.java:729)
at org.jruby.runtime.ThreadContext.postYield(ThreadContext.java:1292)
at org.jruby.runtime.ContextAwareBlockBody.post(ContextAwareBlockBody.java:29)
at org.jruby.runtime.Interpreted19Block.yield(Interpreted19Block.java:198)
at org.jruby.runtime.Interpreted19Block.call(Interpreted19Block.java:125)
at org.jruby.runtime.Block.call(Block.java:101)
at org.jruby.RubyProc.call(RubyProc.java:300)
at org.jruby.RubyProc.call(RubyProc.java:230)
at org.jruby.internal.runtime.RubyRunnable.run(RubyRunnable.java:103)
at java.lang.Thread.run(Thread.java:748)
The signal INT is in use by the JVM and will not work correctly on this platform
Error: Your application used more memory than the safety cap of 12G.
Specify -J-Xmx####m to increase it (#### = cap size in MB).
Specify -w for full OutOfMemoryError stack trace
Although I go to file /etc/logstash/jvm.options and set -Xms256m
-Xmx12000m, but I have had these errors yet. I have 13g memory free. how can i send my data to Elasticsearch with this memory ? this is the student-index.json that i use in elasticsearch
{
"aliases": {},
"warmers": {},
"mappings": {
"tab_students_dfe": {
"properties": {
"stcode": {
"type": "text"
},
"voroodi": {
"type": "integer"
},
"name": {
"type": "text"
},
"family": {
"type": "text"
},
"namp": {
"type": "text"
},
"lastupdate": {
"type": "date"
},
"picture": {
"type": "text"
},
"uniquename": {
"type": "text"
}
}
}
},
"settings": {
"index": {
"number_of_shards": "5",
"number_of_replicas": "1"
}
}
}
then i try to insert this index in Elasticsearch by
curl -XPUT --header "Content-Type: application/json"
http://localhost:9200/students -d @postgres-index.json
and next, this is my configuration fil in /usr/shar/logstash/bin/students.conf file
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://localhost:5432/postgres"
jdbc_user => "postgres"
jdbc_password => "postgres"
The path to downloaded jdbc driver
jdbc_driver_library => "./postgresql-42.2.1.jar"
jdbc_driver_class => "org.postgresql.Driver"
The path to the file containing the query
statement => "select * from students"
}
}
filter {
aggregate {
task_id => "%{stcode}"
code => "
map['stcode'] = event.get('stcode')
map['voroodi'] = event.get('voroodi')
map['name'] = event.get('name')
map['family'] = event.get('family')
map['namp'] = event.get('namp')
map['uniquename'] = event.get('uniquename')
event.cancel()
"
push_previous_map_as_event => true
timeout => 5
}
}
output {
elasticsearch {
document_id => "%{stcode}"
document_type => "postgres"
index => "students"
codec => "json"
hosts => ["127.0.0.1:9200"]
}
}
Thank you for your help