Here is the way I am doing the indexing,
A. To Fetch ~150 million records
curl -XPUT 'http://XXX.X.X.XXX:9200/_river/blk_jdbc_river/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"strategy": "simple",
"autocommit": true,
"driver" : "com.mysql.jdbc.Driver",
"url" : "jdbc:mysql://localhost:3306/KA",
"user" : "",
"password" : "**",
"sql" : [
{
"statement" : "select id as "_id", p_id as "pid" ,s_id as "sid" ,d_name as "dname" from sohil"
}
],
"maxbulkactions":5000,
"maxconcurrrentbulkactions":1,
"index" : "blks",
"type" : "blk",
"type_mapping": {"blks" : {"properties" : {"_id":{"type":"long","store":"yes"},"dname":{"type":"string","store":"yes","index":"not_analyzed"},"pid":{"type":"long","store":"yes"},"sid":{"type":"long","store":"yes"}}}}
}
}'
B. To fetch delta at every run (Frequency :- every 6 hours)
curl -XPUT 'http://XXX.X.X.XXX:9200/_river/update_blk_jdbc_river/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"strategy": "simple",
"autocommit": true,
"schedule" : "0 /6 * * * ?",
"driver" : "com.mysql.jdbc.Driver",
"url" : "jdbc:mysql://localhost:3306/KA",
"user" : "",
"password" : "***",
"maxbulkactions":5000,
"maxconcurrrentbulkactions":1,
"sql" :
[
{
"statement" : "select id as "_id", p_id as "pid" ,s_id as "sid" ,d_name as "dname" from sohil where id > XXX"
}
]
,
"index" : "blks",
"type" : "blk",
"type_mapping": {"blks" : {"properties" : {"_id":{"type":"long","store":"yes"},"dname":{"type":"string","store":"yes","index":"not_analyzed"},"pid":{"type":"long","store":"yes"},"sid":{"type":"long","store":"yes"}}}}
}
}'
Please let me know in case I am doing anything wrong.