I have been using logstash to load a couple of millions of data into Elasticsearch from SQL server. However, every time I load the data, I am short of few thousands of records.
input {
jdbc {
jdbc_driver_library => "F:\MG_ES\elasticsearch-5.4.3\elasticsearch-5.4.3\lib\sqljdbc42.jar"
jdbc_driver_class => "com.microsoft.sqlserver.jdbc.SQLServerDriver"
jdbc_connection_string => "******"
jdbc_user => "*******"
jdbc_password => "*******"
statement => "select data_time_id,serialnumber,productnumber,hwgdid,MONITORING_SERIALNUMBER,MONITORING_PRODUCTNUMBER,ISNULL(region,'Region Not Available') as Region,ISNULL(country,'Country Not Available') as Country,softwaretitle, softwareversion,(CASE WHEN SOFTWAREVERSION LIKE '7.%' THEN 'V7.x' WHEN SOFTWAREVERSION LIKE '%05.%' THEN 'V5.x' WHEN SOFTWAREVERSION LIKE '3.%' THEN 'V3.x' WHEN SOFTWAREVERSION LIKE '4.%' THEN 'V4.x' ELSE 'No SW Version' END ) AS SWVersionGroup,(CASE WHEN IS_HOST=1 THEN 'CMS' WHEN IS_HOST=2 THEN 'CMS-MONITORED ENDPOINT' ELSE 'OTHER ENDPOINT' END) AS CMS,case when unique_device_id=1 then 'Duplicate' when unique_device_id=2 then 'unique' else 'Unknown' End as Unique_device,customer_name,systemmodel,AMID_L2,AMID_L4,First_Connection_DATE_ID as Device_install_date,SALESPARTNER_LOCATIONID,SUPPORTPARTNER_LOCATIONID,CHANNELPARTNER,last_monitored_date,remote_support_technology,SADB_REGSTATUS,ISVMware from ISEE_PUB.Production.IRS_FCT_V5DEVICES fct left join Production.IRS_DIM_CUSTOMER cust on fct.customer_id=cust.customer_id left join [ISEE_PUB].[Production].[IRS_DIM_JUNK] hw on fct.JUNK_ID=hw.JUNK_ID left join [ISEE_PUB].[Production].[IRS_DIM_host] host on fct.is_host_id=host.is_host_id left join Production.IRS_DIM_LAST_MONITORED mon on fct.MonitoredDate_ID=mon.MonitoredDate_ID left join Production.IRS_DIM_BU_HW bu on fct.bu_hw_id=bu.bu_hw_id left join Production.IRS_DIM_SUPERREGION sup on fct.superregion_id=sup.superregion_id left join dbo.GEO_BOE_Country boe ON sup.Country_Code = boe.[Iso Country Code] left join Production.IRS_DIM_REMOTE_SUPPORT_TECHNOLOGY rst on rst.registration_id=fct.registration_id left join Production.IRS_DIM_DEPLOYMENTTYPE dep on dep.DEPLOYMENTTYPE_ID=fct.DEPLOYMENTTYPE_ID left join Production.IRS_DIM_DATE dt on dt.DATE_ID=fct.DATE_ID left join Production.IRS_DIM_SALESPARTNER sales on sales.SALESPARTNER_ID=fct.SALESPARTNER_ID left join Production.IRS_DIM_SUPPORTPARTNER supp on supp.SUPPORTPARTNER_ID=fct.SUPPORTPARTNER_ID left join Production.IRS_DIM_CHANNELPARTNER ch on ch.channelpartner_id=fct.CHANNELPARTNER_ID left join Production.IRS_DIM_ISVMware vm on vm.ISVMware_ID=fct.ISVMware_ID where ISNULL(unique_device_id,2) = 2 and IS_HOST<>1 and fct.isvmware_id=0 and sadb_regstatus=1 and data_time_id in (20180131)"
jdbc_paging_enabled => "true"
jdbc_page_size => "500000"
}
}
filter {
mutate {
remove_field => [ "message", "path", "score", "@version", "host" ]
}
#mutate { convert => ["REGCONNTYPEID","integer"]}
#mutate { convert => ["data_time_id","integer"]}
}
output {
elasticsearch {
hosts => "localhost"
index => "sql_elk_fct_data"
document_type => "devices"
document_id => "%{hwgdid}"
}
#stdout { codec => rubydebug { metadata=> true } }
}
The query results 2027203 records in Database, however, I can see only the below count in Kibana -
I got 2298 record less.
hwgdid is the unique and not null field in the query.
What could be the reason?