Hi everybody.
We have big index "contacts" which size is about 3.5Gb (I mean "primary_size") with 6,448,782 documents. We have performance problems with particular queries. Their execution time is >5 seconds.
Our index configuration is 2 replicas, 3 shards. It's run on 3 EC2 m1.large servers.
All fields in index document are not analyzed, _source and _all are disabled.
Document has 2 big fields: "fields" and "reverse_fields". Last one is reverse version of "fields", it is designed to match words from the end.
Here is part of "fields": mapping:
...
"fields": {
"type": "object",
"dynamic" : False,
"properties" : {
.....
"city": {
"type": "string",
"index": "not_analyzed",
"omit_term_freq_and_positions": "true"
},
"state": {
"type": "string",
"index": "not_analyzed",
"omit_term_freq_and_positions": "true"
},
"zip": {
"type": "string",
"index": "not_analyzed",
"omit_term_freq_and_positions": "true"
},
....
}
}
.....
}
We looking for ways to speed up our "contain" query over all document fields. First 2 filter terms (company_id and is_visible) match ~43k documents.
Example of the query:
es_q = {'sort':
[{'_score': 'desc'}], 'query': {'filtered': {'filter': {'and': {
'filters': [{'term': {'company_id': '4b619ddffa5bd81b71000002'}}, {'term': {'is_visible': True}}, {
'or': [{'prefix': {'fields.last_name': 'alexander'}}, {'prefix': {'reverse_fields.last_name': 'rednaxela'}},
{'prefix': {'fields.twitter.profile': 'alexander'}},
{'prefix': {'reverse_fields.twitter.profile': 'rednaxela'}},
{'prefix': {'fields.twitter.user_name': 'alexander'}},
{'prefix': {'reverse_fields.twitter.user_name': 'rednaxela'}},
{'prefix': {'fields.twitter.user_id': 'Alexander'}},
{'prefix': {'reverse_fields.twitter.user_id': 'rednaxelA'}},
{'prefix': {'fields.linkedin.profile': 'alexander'}},
{'prefix': {'reverse_fields.linkedin.profile': 'rednaxela'}},
{'prefix': {'fields.linkedin.user_name': 'alexander'}},
{'prefix': {'reverse_fields.linkedin.user_name': 'rednaxela'}},
{'prefix': {'fields.linkedin.user_id': 'Alexander'}},
{'prefix': {'reverse_fields.linkedin.user_id': 'rednaxelA'}}, {'prefix': {'fields.street': 'alexander'}}
, {'prefix': {'reverse_fields.street': 'rednaxela'}},
{'prefix': {'fields.skype.profile': 'alexander'}},
{'prefix': {'reverse_fields.skype.profile': 'rednaxela'}},
{'prefix': {'fields.skype.user_name': 'alexander'}},
{'prefix': {'reverse_fields.skype.user_name': 'rednaxela'}},
{'prefix': {'fields.skype.user_id': 'Alexander'}},
{'prefix': {'reverse_fields.skype.user_id': 'rednaxelA'}}, {'prefix': {'fields.city': 'alexander'}},
{'prefix': {'reverse_fields.city': 'rednaxela'}}, {'prefix': {'fields.first_name': 'alexander'}},
{'prefix': {'reverse_fields.first_name': 'rednaxela'}}, {'prefix': {'fields.zip': 'alexander'}},
{'prefix': {'reverse_fields.zip': 'rednaxela'}}, {'prefix': {'fields.title': 'alexander'}},
{'prefix': {'reverse_fields.title': 'rednaxela'}}, {'prefix': {'fields.state': 'alexander'}},
{'prefix': {'reverse_fields.state': 'rednaxela'}}, {'prefix': {'fields.leadSource': 'alexander'}},
{'prefix': {'reverse_fields.leadSource': 'rednaxela'}}, {'prefix': {'fields.company_name': 'alexander'}}
, {'prefix': {'reverse_fields.company_name': 'rednaxela'}},
{'prefix': {'fields.department': 'alexander'}}, {'prefix': {'reverse_fields.department': 'rednaxela'}},
{'prefix': {'fields.email.profile': 'alexander'}}, {'prefix':
{
'reverse_fields.email.profile': 'rednaxela'}}
, {'prefix': {'fields.email.user_name': 'alexander'}},
{'prefix': {'reverse_fields.email.user_name': 'rednaxela'}},
{'prefix': {'fields.email.user_id': 'Alexander'}},
{'prefix': {'reverse_fields.email.user_id': 'rednaxelA'}},
{'prefix': {'fields.website': 'alexander'}}, {'prefix': {'reverse_fields.website': 'rednaxela'}}, {
'prefix': {'fields.description': 'alexander'}}, {'prefix': {'reverse_fields.description': 'rednaxela'}},
{
'prefix': {'fields.accountNumber': 'alexander'}},
{'prefix': {'reverse_fields.accountNumber': 'rednaxela'}}, {
'prefix': {'fields.assistant': 'alexander'}}, {'prefix': {'reverse_fields.assistant': 'rednaxela'}}, {
'prefix': {'fields.phone': 'alexander'}}, {'prefix': {'reverse_fields.phone': 'rednaxela'}}, {
'prefix': {'fields.facebook.profile': 'alexander'}},
{'prefix': {'reverse_fields.facebook.profile': 'rednaxela'}}, {
'prefix': {'fields.facebook.user_name': 'alexander'}}, {
'prefix': {'reverse_fields.facebook.user_name': 'rednaxela'}}, {
'prefix': {'fields.facebook.user_id': 'Alexander'}},
{'prefix': {'reverse_fields.facebook.user_id': 'rednaxelA'}}, {
'prefix': {'fields.leadType': 'alexander'}}, {'prefix': {'reverse_fields.leadType': 'rednaxela'}}, {
'prefix': {'fields.dates': 'alexander'}}, {'prefix': {'reverse_fields.dates': 'rednaxela'}}, {
'prefix': {'fields.name': 'alexander'}}, {'prefix': {'reverse_fields.name': 'rednaxela'}}, {
'prefix': {'fields.country': 'alexander'}}, {'prefix': {'reverse_fields.country': 'rednaxela'}}, {
'prefix': {'fields.assistantPhone': 'alexander'}}, {
'prefix': {'reverse_fields.assistantPhone': 'rednaxela'}}]}]}}, 'query': {'bool': {'must': [{
'dis_max': {'tie_breaker': 0.7,
'queries': [{'constant_score': {'filter': {'term': {'fields.first_name': 'alexander'}}, 'boost': 20.0}},
{'constant_score': {'filter': {'prefix': {'fields.first_name': 'alexander'}}, 'boost': 11.0}}, {
'constant_score': {'filter': {'prefix': {'reverse_fields.first_name': 'rednaxela'}},
'boost': 11.0}},
{'constant_score': {'filter': {'term': {'fields.last_name': 'alexander'}}, 'boost': 40.0}},
{'constant_score': {'filter': {'prefix': {'fields.last_name': 'alexander'}}, 'boost': 17.0}}, {
'constant_score': {'filter': {'prefix': {'reverse_fields.last_name': 'rednaxela'}},
'boost': 17.0}},
{'constant_score': {'filter': {'term': {'fields.name': 'alexander'}}, 'boost': 18.0}},
{'constant_score': {'filter': {'prefix': {'fields.name': 'alexander'}}, 'boost': 5.0}},
{'constant_score': {'filter': {'prefix': {'reverse_fields.name': 'rednaxela'}}, 'boost': 5.0}},
{'match_all': {}}]}}], 'should': []}}}}, 'explain': False}
Looking for any help
-- Michael Korbakov