I am trying to build search functionality for a website, where there are different assets
whose names, descriptions, tags, categories, etc. can be searched for. I am using the AWS elasticsearch service (elasticsearch verison is 7.4), and logstash(version 7.6.2) is running on an EC2 instance. I want to put edge n-gram tokenizers on specific fields, so I create the index with the appropriate mappings before I start running logstash. But the documents are not being updated in elasticsearch. If I don't have an index at all, then the data gets uploaded. Please help me out. Thanks
Logstash input and output:
input {
jdbc {
jdbc_driver_library => "/usr/share/java/mysql-connector-java-5.1.41-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => ""
jdbc_user => ""
jdbc_password => ""
statement => "
select os_assets.*, GROUP_CONCAT(os_metatags.slug) as tags, GROUP_CONCAT(DISTINCT(os_categories.slug)) as categories,
CASE when COUNT(os_asset_report.asset_rep_views) > 0 THEN COUNT(os_asset_report.asset_rep_views) ELSE 0 end as popularity,
MAX(os_asset_report.asset_rep_updated_on) as popularity_updated_on,
GROUP_CONCAT(DISTINCT(os_metatags.title)) as tags_ss,
GROUP_CONCAT(DISTINCT(os_categories.title)) as categories_ss
FROM os_assets
LEFT JOIN os_asset_report ON os_assets.asset_id = os_asset_report.asset_rep_asset_id
LEFT JOIN os_asset_metatags on os_assets.asset_id = os_asset_metatags.asset_id
LEFT JOIN os_metatags on os_metatags.id = os_asset_metatags.metatag_id
LEFT JOIN os_categories on os_metatags.category_id = os_categories.id
GROUP BY os_assets.asset_id
HAVING asset_updated_date >:sql_last_value OR popularity_updated_on > :sql_last_value;
"
schedule => " * * * * *"
type => "assets_prod"
}
}
output {
if [type] == "assets_prod" {
amazon_es {
document_id=> "%{asset_id}"
document_type => "doc"
index => "assets"
hosts => [""]
region => ""
aws_access_key_id => ''
aws_secret_access_key => ''
}
}
}
Elasticsearch mapping for the index assets
:
{
"settings": {
"analysis": {
"analyzer": {
"autocomplete": {
"tokenizer": "autocomplete",
"filter": [
"lowercase"
]
},
"autocomplete_search": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"autocomplete": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 15,
"token_chars": [
"letter"
]
}
}
}
},
"mappings": {
"properties": {
"asset_name": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"asset_description": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"asset_author": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"asset_category": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"tags_ss": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"categories_ss": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "autocomplete_search"
},
"asset_updated_date": {
"type": "date"
}
}
}
}
Please let me know if there is any problem in my code, or if there is a better way to do this. Thanks in advance!