Imagine I have an index "article" with 100 million pieces of data. Its template data and mapping file are as follows. What I want to ask is, does splitting query and aggs (The following query1) and then using _bulk api to execute them has higher performance than executing a dsl alone(The following query2)? (elasticsearch version 7.x)
mappings:
https://github.com/ZhangDi-d/elasticsearch-demo/blob/main/index_article_mappings.md
template data:
https://github.com/ZhangDi-d/elasticsearch-demo/blob/main/index_article_template_data.md
query1:
include a "query" dsl and a "aggs" dsl
GET /article/_search
{
"from": 0,
"size": 1000,
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"term": {
"hidden": {
"value": "false"
}
}
},
{
"match": {
"new_author_last_name": "Smith"
}
},
{
"range": {
"view_cnt": {
"gte": 1
}
}
},
{
"bool": {
"should": [
{
"simple_query_string": {
"query": "i",
"fields": ["content"]
}
}
]
}
}
],
"must_not": [
{
"match": {
"author_first_name": "Danny"
}
}
]
}
},
"functions": [
{
"filter": {
"match_all": {
"boost": 1
}
},
"field_value_factor": {
"field": "follower_num",
"factor": 1,
"missing": 22.5,
"modifier": "none"
}
}
]
}
},
"_source": false,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"follower_num": {
"order": "desc",
"missing": 0
}
},
{
"view_cnt": {
"order": "desc",
"missing": 0
}
}
],
"collapse": {
"field": "userID"
}
}
GET /article/_search
{
"size": 0,
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"term": {
"hidden": {
"value": "false"
}
}
},
{
"match": {
"new_author_last_name": "Smith"
}
},
{
"range": {
"view_cnt": {
"gte": 1
}
}
},
{
"bool": {
"should": [
{
"simple_query_string": {
"query": "i",
"fields": ["content"]
}
}
]
}
}
],
"must_not": [
{
"match": {
"author_first_name": "Danny"
}
}
]
}
},
"functions": [
{
"filter": {
"match_all": {
"boost": 1
}
},
"field_value_factor": {
"field": "follower_num",
"factor": 1,
"missing": 22.5,
"modifier": "none"
}
}
]
}
},
"_source": false,
"aggs": {
"userID_agg": {
"terms": {
"field": "userID",
"size": 100
}
}
}
}
query2:
GET /article/_search
{
"from": 0,
"size": 1000,
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"term": {
"hidden": {
"value": "false"
}
}
},
{
"match": {
"new_author_last_name": "Smith"
}
},
{
"range": {
"view_cnt": {
"gte": 1
}
}
},
{
"bool": {
"should": [
{
"simple_query_string": {
"query": "i",
"fields": ["content"]
}
}
]
}
}
],
"must_not": [
{
"match": {
"author_first_name": "Danny"
}
}
]
}
},
"functions": [
{
"filter": {
"match_all": {
"boost": 1
}
},
"field_value_factor": {
"field": "follower_num",
"factor": 1,
"missing": 22.5,
"modifier": "none"
}
}
]
}
},
"_source": false,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"follower_num": {
"order": "desc",
"missing": 0
}
},
{
"view_cnt": {
"order": "desc",
"missing": 0
}
}
],
"aggs": {
"userID_agg": {
"terms": {
"field": "userID",
"size": 100
}
}
},
"collapse": {
"field": "userID"
}
}