Get bulk data from elasticsearch

I am using scroll API to fetch data from elasticsearch. I have 400000 records in elasticsearch. And it is taking 2min. to fetch from elasticsearch. How can I reduce this time from minutes to seconds.
I have attached my code. Please provide suggestions. I want to fetch 400000 records within seconds.

    RestClient rest = RestClient.builder(new HttpHost("172.21.153.176", 9200, "http")).build();
    RestHighLevelClient restClient = new RestHighLevelClient(rest);
    final Scroll scroll = new Scroll(TimeValue.timeValueHours(1L));
    SearchRequest searchRequest = new SearchRequest("entity_fact");
    searchRequest.scroll(scroll);
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    WrapperQueryBuilder qb = QueryBuilders.wrapperQuery(queryELKPart1);
    searchSourceBuilder.size(10000);
    searchSourceBuilder.query(qb);
    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = restClient.search(searchRequest);
    String scrollId = searchResponse.getScrollId();
    SearchHit[] searchHits = searchResponse.getHits().getHits();
    
    for (int j = 0; j < searchHits.length; j++) {
    	
    	Map<String , Object> dataMap = searchHits[j].getSource();
		innerList = new  ArrayList();
		String elkEntityName = String.valueOf(dataMap.get("entity_name"));
		String elkSector = String.valueOf(dataMap.get("sector"));
		String elkRegion = String.valueOf(dataMap.get("region"));
		String elkCountryHq = String.valueOf(dataMap.get("country_hq"));
		String elkProduct = String.valueOf(dataMap.get("product"));
		int timeId = Integer.valueOf(dataMap.get("time_id").toString());
		double elkWallet = Double.valueOf(dataMap.get("wallet").toString());
		double elkRevenue = Double.valueOf(dataMap.get("revenue").toString());
		double elkSow = Double.valueOf(dataMap.get("sow").toString());
		String elkGapTox = String.valueOf(dataMap.get("gap_tox"));
		String elkRank = String.valueOf(dataMap.get("rank"));
		double elkGap = Double.valueOf(dataMap.get("gap").toString());
		int elkEntityId = Integer.valueOf(dataMap.get("entity_id").toString());
		String elkRegionHq = String.valueOf(dataMap.get("region_hq"));
		String elkThisLevel = "L1";
	
    }
    
    while (searchHits != null && searchHits.length > 0) { 
        SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId); 
        scrollRequest.scroll(scroll);
        searchResponse = restClient.searchScroll(scrollRequest);
        scrollId = searchResponse.getScrollId();
        searchHits = searchResponse.getHits().getHits();
        
        for (int j = 0; j < searchHits.length; j++) {
        	
        	Map<String , Object> dataMap = searchHits[j].getSource();
			innerList = new  ArrayList();
			String elkEntityName = String.valueOf(dataMap.get("entity_name"));
			String elkSector = String.valueOf(dataMap.get("sector"));
			String elkRegion = String.valueOf(dataMap.get("region"));
			String elkCountryHq = String.valueOf(dataMap.get("country_hq"));
			String elkProduct = String.valueOf(dataMap.get("product"));
			int timeId = Integer.valueOf(dataMap.get("time_id").toString());
			double elkWallet = Double.valueOf(dataMap.get("wallet").toString());
			double elkRevenue = Double.valueOf(dataMap.get("revenue").toString());
			double elkSow = Double.valueOf(dataMap.get("sow").toString());
			String elkGapTox = String.valueOf(dataMap.get("gap_tox"));
			String elkRank = String.valueOf(dataMap.get("rank"));
			double elkGap = Double.valueOf(dataMap.get("gap").toString());
			int elkEntityId = Integer.valueOf(dataMap.get("entity_id").toString());
			String elkRegionHq = String.valueOf(dataMap.get("region_hq"));
			String elkThisLevel = "L1";
			
        }
       }
    
        ClearScrollRequest clearScrollRequest = new ClearScrollRequest(); 
        clearScrollRequest.addScrollId(scrollId);
        ClearScrollResponse clearScrollResponse = restClient.clearScroll(clearScrollRequest);
        boolean succeeded = clearScrollResponse.isSucceeded();

What does your data and query look like? What is the specification of your Elasticsearch cluster?

GET /entity_fact/_search
{
"query": {
"bool": {
"must": [
{
"match_all": {}
},
{
"bool": {
"should": [
{
"terms": {
"product_id": [39,1,13,6,2,9,12,14,20,24,29,33,37,38,7,8,3,4,5,10,11,15,16,17,18,19,125,21,22,23,25,26,27,28,30,31,32,34,35,36]
}
}
],
"minimum_should_match": 1
}
},
{
"bool": {
"should": [
{
"terms": {
"sector_id": [1,2,3,4,6,7]
}
}
],
"minimum_should_match": 1
}
},
{
"bool": {
"should": [
{
"terms": {
"wallet_location_id": [1,2,3,4,5,6]
}
}
],
"minimum_should_match": 1
}
},
{
"bool": {
"should": [
{
"terms": {
"time_id": ["20121","20111"]
}
}
],
"minimum_should_match": 1
}
},
{
"bool": {
"should": [
{
"match_phrase": {
"client_id": "262"
}
}
],
"minimum_should_match": 1
}
},
{
"bool": {
"should": [
{
"match_phrase": {
"gap_tox_id": "1"
}
}
],
"minimum_should_match": 1
}
},
{
"bool": {
"should": [
{
"match_phrase": {
"data_version_id": "1"
}
}
],
"minimum_should_match": 1
}
},

    {
      "range": {
        "@timestamp": {
          "gte": 1483209000000,
          "lte": 1511334650586,
          "format": "epoch_millis"
        }
      }
    }
  ],
  "must_not": []
}

},

"_source": {"includes": ["entity_name","sector","region","country_hq","product","time_id","wallet","revenue","sow","gap_tox","rank","gap","entity_id","region_hq"]

}

}

What does disk I/O and iowait look like when you are running the query? What type of storage do you have? What is the specification/size of your cluster?

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.