Is there a way to use the clean_run parameter in the jdbc input plugin to rebuild an index once a day while still doing periodic updates throughout the day to the same index? Using clean_run doesn't seem to work the way I expected it to in that it seems to reset the sql_last_start variable for the entire file, instead of per jdbc input. This is my logstash.conf file:
The idea was to have the rebuild run during the 6-7 hour and updates going on any other hours. But logstash is pulling the entire data set instead of a diff based on modified date. I'm using :sql_last_run in my query.
input {
# Rebuild Property Index Data
jdbc {
jdbc_driver_library => "sqljdbc42.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://127.0.0.1;databaseName=db"
jdbc_user => "user"
jdbc_password => "pass"
statement_filepath => "propertyQuery.sql"
type => "rcaproperty"
clean_run => true
# run rebuild at 6:00am every day.
schedule => "0 09 * * *"
}
# Update Property Index Data
jdbc {
jdbc_driver_library => "/sqljdbc42.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "jdbc:sqlserver://127.0.0.1;databaseName=db"
jdbc_user => "user"
jdbc_password => "pass"
statement_filepath => "propertyQuery.sql"
type => "rcaproperty"
# update every ten minutes.
schedule => "*/10 0-8,10-23 * * *"
}
}
filter {
# Property Index handling
if [type] == "rcaproperty" {
mutate {
split => { "altaddresses" => "|"}
}
mutate {
strip => ["altaddresses"]
}
if [lat_dbl] {
if [lon_dbl] {
mutate {
add_field => { "geocode" => "%{lat_dbl},%{lon_dbl}"}
}
}
}
}
}
output {
# Property Index Handling
if [type] == "rcaproperty" {
elasticsearch {
hosts => ["localhost:9200"]
document_id => "%{property_id}"
index => "rcasearchindex_dev_%{+YYYY.MM.dd}"
template_name => "rcasearchindex_template"
}
}
}