Hi,
I have an index with 700k+ documents in it. I am paging through the
documents to display 25 at a time by using size/from in my query JSON.
At the start of the index, I get fantastic performance (~30ms) but by
"from" : 100000 the response time has increased to 1s. At "from" :
150000 it is 6s and so on to "from" : 700000 taking 32s.
I have one index across 5 shards served by two nodes.
Can I optimize this index to even out the performance? Or is there a
better way of paging through the data?
Thanks,
Hugh
The mapping of the index is:
{
"commenttab" : {
"properties" : {
"transaction_time" : {
"format" : "dateOptionalTime",
"type" : "date"
},
"message_text" : {
"type" : "string"
},
"store" : {
"index" : "not_analyzed",
"type" : "string"
},
"cat1_id" : {
"type" : "long"
},
"transaction" : {
"type" : "long"
},
"conversation_type_id" : {
"type" : "long"
},
"conversation_start_time" : {
"format" : "dateOptionalTime",
"type" : "date"
},
"conversation_type_name" : {
"index" : "not_analyzed",
"type" : "string"
},
"cat1" : {
"index" : "not_analyzed",
"type" : "string"
},
"cat2" : {
"index" : "not_analyzed",
"type" : "string"
},
"rdm" : {
"index" : "not_analyzed",
"type" : "string"
},
"conversation" : {
"type" : "string"
},
"year" : {
"type" : "integer"
},
"message_type_id" : {
"type" : "long"
},
"conversation_finish_time" : {
"format" : "dateOptionalTime",
"type" : "date"
},
"message_time" : {
"format" : "dateOptionalTime",
"type" : "date"
},
"reason_for_call" : {
"type" : "string"
},
"conversation_hidden" : {
"type" : "boolean"
},
"question1" : {
"type" : "double"
},
"period" : {
"type" : "string"
},
"question2" : {
"type" : "double"
},
"cat2_id" : {
"type" : "long"
},
"question3" : {
"type" : "double"
},
"message" : {
"type" : "long"
},
"rod" : {
"index" : "not_analyzed",
"type" : "string"
},
"message_type" : {
"type" : "string"
},
"week" : {
"type" : "string"
}
}
}
The query is:
{
"from": 700000,
"size": 25,
"sort": [
{
"message_time": "desc"
}
],
"fields": [
"message_time",
"message_text",
"store",
"rdm",
"rod",
"question1",
"question2",
"question3",
"cat1",
"cat2"
],
"query": {
"matchAll": {}
}
}