I use Elasticsearch 6.2.4. I read indices from Remote Elasticsearch (ver 2.4) and copy them on my local Elasticsearch (6.2.4) and then I define the mapping for each index and finally I use composite aggregation. The pseudo code looks as below:
For idx in indices:
Copy_index()
Define_mapping()
Composit_agg()
and the mapping looks as below:
mymapping = {"mappings": {document_type: {
"properties": {
"aantal": {"type": "integer"},
"aantal_cilinders": {"type": "integer"},
"aantal_deuren": {"type": "integer"},
"aantal_zitplaatsen": {"type": "integer"},
"abtest_device": {"type": "keyword"},
"abtest_test_nr": {"type": "integer"},
"bouwjaar": {"type": "integer"},
"bouwmaand": {"type": "integer"},
"brandstof": {"type": "keyword"},
"cilinderinhoud": {"type": "integer"},
"co2_uitstoot": {"type": "integer"},
"datum": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"},
"energielabel": {"type": "integer"},
"kilometerstand": {"type": "integer"},
"kleur": {"type": "keyword"},
....
}}}}
indent preformatted text by 4 spaces
I use composite aggregation. A simplified query is as below:
q_z = {
"size": 0,
"query": {
"term": {
"type": 'zoekopdrachten'
}
},
"aggs": {
"by_user": {
"composite": {
"size": 1000,
"sources": [
{
"UserId.keyword": {
"terms": {
"field": "UserId.keyword"
}
}
}
]
},
"aggs": {
"zk_date_stats": {
"stats": {
"field": "datum"
}
},
"zk_bouwjaar_max_stats": {
"stats": {
"field": "bouwjaar_max"
}
},
"zk_bouwjaar_min_stats": {
"stats": {
"field": "bouwjaar_min"
}
},
"zk_prijs_min_stats": {
"stats": {
"field": "prijs_min"
}
},
"zk_prijs_max_stats": {
"stats": {
"field": "prijs_max"
}
},
"zk_brandstof_count": {
"terms": {
"field": "brandstof"
}
},
"zk_kleur_count": {
"terms": {
"field": "kleur"
}
}
}
}
}
}
responses = es.search(index=idx, doc_type="kliks", body=q_z)
The above procedure works on one index, but does not work on the other indices. When I remove some fields from the query (such as brandstof and kluer), the query runs. The Python script returns the following error:
Traceback (most recent call last):
File "/Applications/PyCharm CE.app/Contents/helpers/pydev/_pydevd_bundle/pydevd_exec.py", line 3, in Exec
exec exp in global_vars, local_vars
File "<input>", line 1, in <module>
File "/Users/Y/conda_env/lib/python2.7/site-packages/elasticsearch/client/utils.py", line 76, in _wrapped
return func(*args, params=params, **kwargs)
File "/Users/Y/conda_env/lib/python2.7/site-packages/elasticsearch/client/__init__.py", line 636, in search
doc_type, '_search'), params=params, body=body)
File "/Users/Y/conda_env/lib/python2.7/site-packages/elasticsearch/transport.py", line 314, in perform_request
status, headers_response, data = connection.perform_request(method, url, params, body, headers=headers, ignore=ignore, timeout=timeout)
File "/Users/Y/conda_env/lib/python2.7/site-packages/elasticsearch/connection/http_urllib3.py", line 163, in perform_request
self._raise_error(response.status, raw_data)
File "/Users/Y/conda_env/lib/python2.7/site-packages/elasticsearch/connection/base.py", line 125, in _raise_error
raise HTTP_EXCEPTIONS.get(status_code, TransportError)(status_code, error_message, additional_info)
TransportError: TransportError(500, u'search_phase_execution_exception', u'Failed to build aggregation [by_user]')
and when I run the query on the Dev Tools, it shows the following error:
{
"error": {
"root_cause": [
{
"type": "aggregation_execution_exception",
"reason": "Failed to build aggregation [by_user]"
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "logstash-2017.02.04",
"node": "GxDkQRl5QuuUvJbUjN-Aig",
"reason": {
"type": "aggregation_execution_exception",
"reason": "Failed to build aggregation [by_user]",
"caused_by": {
"type": "e_o_f_exception",
"reason": "read past EOF: MMapIndexInput(path=\"/Users/Y.Norouzzadeh/Documents/server/elasticsearch-6.2.4/data/nodes/0/indices/e-AigpRbSnaSHxHtSlgohw/0/index/_16.cfs\") [slice=_16_Lucene70_0.dvd] [slice=docs]"
}
}
}
]
},
"status": 500
}