Hello,
For more than two years I've been working on a project that implies to continually collect and store tweets.
In the earlier months the system was incredibly fast and after that time, it started to getting slower and slower...
My team and I have concluded that, even if our queries have very limiting rules such as a very small range on the tweet's creation date, the more our index grows, the slower it gets, even for simple queries.
Today our cluster is made of 4 computers with a total of 100GB of RAM and 10TB of storage.
It's size is: 764Gi (1.50Ti)
It counts 1 415 348 474 (2 910 677 195) documents
Here is the mapping:
{
"state":"open",
"settings":{
"index":{
"creation_date":"1464706655747",
"legacy":{
"routing":{
"hash":{
"type":"org.elasticsearch.cluster.routing.DjbHashFunction"
},
"use_type":"false"
}
},
"number_of_shards":"12",
"number_of_replicas":"1",
"uuid":"zmybcfvkS-uiXCusUGn-xg",
"version":{
"created":"1070599",
"upgraded":"2040199"
}
}
},
"mappings":{
"status":{
"_timestamp":{
},
"properties":{
"hashtags":{
"type":"string"
},
"created_at":{
"format":"epoch_millis||dateOptionalTime",
"type":"date"
},
"language":{
"index":"not_analyzed",
"type":"string"
},
"in_reply":{
"properties":{
"status_id":{
"index":"not_analyzed",
"type":"string"
},
"user_id":{
"index":"not_analyzed",
"type":"string"
},
"user_screen_name":{
"index":"not_analyzed",
"type":"string"
}
}
},
"urls":{
"properties":{
"expand_url":{
"index":"not_analyzed",
"type":"string"
},
"url":{
"index":"not_analyzed",
"type":"string"
}
}
},
"nperceptioncnn":{
"type":"double"
},
"location":{
"properties":{
"lon":{
"type":"double"
},
"lat":{
"type":"double"
}
}
},
"text":{
"type":"string"
},
"ngender":{
"index":"not_analyzed",
"type":"string"
},
"nperception":{
"type":"byte"
},
"user_mentions":{
"properties":{
"screen_name":{
"index":"not_analyzed",
"type":"string"
},
"id":{
"index":"not_analyzed",
"type":"string"
}
}
},
"user":{
"screen_name":{
"index":"not_analyzed",
"type":"string"
},
"id":{
"index":"not_analyzed",
"type":"string"
}
}
}
}
}
},
"aliases":[
"twitter2"
]
}
Did we concluded well? How can we solve that?
Thank you.