Effective pagination over composite aggregation in Java

Hi,
I'm trying to paginate over composite aggregation in Java. I have some working code, but it doesn't seem to be very efficient, as it re-builds the query each time. Is there a way to re-use the query and just change the "afterKey" parameter?
Here is the code that I tried:

    public static void main(String[] args) throws IOException {
    		Instant before = Instant.now();
    		RestHighLevelClient client = new RestHighLevelClient(
    				RestClient.builder(new HttpHost("localhost", 9200, "http"), 
    								   new HttpHost("localhost", 9201, "http")));
    		int searchAfterKey = -1;
    		System.out.println("After key start: " + searchAfterKey);
    		Map<Object, Object> responseMap = null;
    		boolean emptyBuckets = false;

    		while (!emptyBuckets) {
    			responseMap = getProfiles(client, searchAfterKey);
    			emptyBuckets = (Boolean) responseMap.get("emptyBuckets");
    			if (!emptyBuckets) {
    				searchAfterKey = (Integer) responseMap.get("afterKeyVal");
    			}
    		}

    		System.out.println("After key end: " + searchAfterKey);
    		client.close();
    		Instant after = Instant.now();
    		long delta = Duration.between(before, after).toMillis();
    		System.out.println("Time taken: " + delta + " ms.");
    	}

    	private static Map<Object, Object> getProfiles(RestHighLevelClient client, int searchAfterKey) throws IOException {
    		SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
    		sourceBuilder.size(0);

    		QueryBuilder rangeQuery = QueryBuilders.rangeQuery("effectiveDate").from(0).to(1575518400, false);
    		QueryBuilder boolQuery = QueryBuilders.boolQuery().must(rangeQuery);
    		ConstantScoreQueryBuilder constScoreQB = new ConstantScoreQueryBuilder(boolQuery);

    		List<CompositeValuesSourceBuilder<?>> sources = new ArrayList<CompositeValuesSourceBuilder<?>>();
    		TermsValuesSourceBuilder id = new TermsValuesSourceBuilder("agg_on_id").field("id");
    		sources.add(id);
    		CompositeAggregationBuilder compositeAggregationBuilder = new CompositeAggregationBuilder("by_id", sources);

    		Map<String, Object> afterKey = new HashMap<String, Object>();
    		afterKey.put("agg_on_id", searchAfterKey);
    		compositeAggregationBuilder.size(10000).aggregateAfter(afterKey).subAggregation(AggregationBuilders
    				.topHits("latest_snapshot").sort("effectiveDate", SortOrder.DESC).size(100).fetchSource(true));
    		sourceBuilder.query(constScoreQB).aggregation(compositeAggregationBuilder);

    		SearchRequest searchRequest = new SearchRequest("from_dynamo");
    		searchRequest.source(sourceBuilder);
    		System.out.println(searchRequest.source().toString());

    //		response
    		SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

    		ParsedComposite aggs = searchResponse.getAggregations().get("by_id");

    		for (ParsedComposite.ParsedBucket bucket : aggs.getBuckets()) {
    			for (Aggregation pth : bucket.getAggregations().asList()) {
    				SearchHit[] searchHit = ((ParsedTopHits) pth).getHits().getHits();
    				for (SearchHit sh : searchHit) {
    					System.out.println(sh.getSourceAsString());
    				}
    			}
    		}
    		boolean emptyBuckets = aggs.getBuckets().isEmpty();
    		Map<Object, Object> responseMap = new HashMap<Object, Object>();
    		if (!emptyBuckets) {
    			Integer afterKeyVal = (Integer) aggs.afterKey().get("agg_on_id");
    			responseMap.put("afterKeyVal", afterKeyVal);
    		}
    		responseMap.put("emptyBuckets", emptyBuckets);
    		return responseMap;
    	}

Thanks,
Florin

Welcome !

Not sure it's a problem.

But anyway, have a look at https://www.elastic.co/guide/en/elasticsearch/reference/current/search-template.html

Thanks David for your quick answer!
Templates could be a way forward, but I seem unable to implement them in Java. I've written this code (using RestHighLevelClient as I understand TransportClient is deprecated)

public static void main(String[] args) throws IOException {
		Instant before = Instant.now();
		RestHighLevelClient client = new RestHighLevelClient(
				RestClient.builder(new HttpHost("localhost", 9200, "http"), 
								   new HttpHost("localhost", 9201, "http")));
		Map<String, Object> template_params = new HashMap<>();
		template_params.put("agg_on_id", -1);
		SearchResponse sr = new SearchTemplateRequestBuilder((ElasticsearchClient) client)
		        .setScript("{\n" + 
		        		"    \"size\": 0,\n" + 
		        		"    \"query\": {\n" + 
		        		"        \"constant_score\": {\n" + 
		        		"            \"filter\": {\n" + 
		        		"                \"bool\": {\n" + 
		        		"                    \"must\": [\n" + 
		        		"                        {\n" + 
		        		"                            \"range\": {\n" + 
		        		"                                \"effectiveDate\": {\n" + 
		        		"                                    \"from\": 0,\n" + 
		        		"                                    \"to\": 1575518400,\n" + 
		        		"                                    \"include_lower\": true,\n" + 
		        		"                                    \"include_upper\": false,\n" + 
		        		"                                    \"boost\": 1.0\n" + 
		        		"                                }\n" + 
		        		"                            }\n" + 
		        		"                        }\n" + 
		        		"                    ],\n" + 
		        		"                    \"adjust_pure_negative\": true,\n" + 
		        		"                    \"boost\": 1.0\n" + 
		        		"                }\n" + 
		        		"            },\n" + 
		        		"            \"boost\": 1.0\n" + 
		        		"        }\n" + 
		        		"    },\n" + 
		        		"    \"aggregations\": {\n" + 
		        		"        \"by_id\": {\n" + 
		        		"            \"composite\": {\n" + 
		        		"                \"size\": 10000,\n" + 
		        		"                \"sources\": [\n" + 
		        		"                    {\n" + 
		        		"                        \"agg_on_id\": {\n" + 
		        		"                            \"terms\": {\n" + 
		        		"                                \"field\": \"id\",\n" + 
		        		"                                \"missing_bucket\": false,\n" + 
		        		"                                \"order\": \"asc\"\n" + 
		        		"                            }\n" + 
		        		"                        }\n" + 
		        		"                    }\n" + 
		        		"                ],\n" + 
		        		"                \"after\": {\n" + 
		        		"                    \"agg_on_id\": {{agg_on_id}}\n" + 
		        		"                }\n" + 
		        		"            },\n" + 
		        		"            \"aggregations\": {\n" + 
		        		"                \"latest_snapshot\": {\n" + 
		        		"                    \"top_hits\": {\n" + 
		        		"                        \"from\": 0,\n" + 
		        		"                        \"size\": 1,\n" + 
		        		"                        \"version\": false,\n" + 
		        		"                        \"seq_no_primary_term\": false,\n" + 
		        		"                        \"explain\": false,\n" + 
		        		"                        \"_source\": {\n" + 
		        		"                            \"includes\": [],\n" + 
		        		"                            \"excludes\": []\n" + 
		        		"                        },\n" + 
		        		"                        \"sort\": [\n" + 
		        		"                            {\n" + 
		        		"                                \"effectiveDate\": {\n" + 
		        		"                                    \"order\": \"desc\"\n" + 
		        		"                                }\n" + 
		        		"                            }\n" + 
		        		"                        ]\n" + 
		        		"                    }\n" + 
		        		"                }\n" + 
		        		"            }\n" + 
		        		"        }\n" + 
		        		"    }\n" + 
		        		"}")
		        .setScriptType(ScriptType.INLINE)    
		        .setScriptParams(template_params)    
		        .setRequest(new SearchRequest("from_dynamo"))     
		        .get()                               
		        .getResponse(); 		

		client.close();
		Instant after = Instant.now();
		long delta = Duration.between(before, after).toMillis();
		System.out.println("Time taken: " + delta + " ms.");
	}

But when i try to run this code, I get this error:

Exception in thread "main" java.lang.ClassCastException: class org.elasticsearch.client.RestHighLevelClient cannot be cast to class org.elasticsearch.client.ElasticsearchClient (org.elasticsearch.client.RestHighLevelClient and org.elasticsearch.client.ElasticsearchClient are in unnamed module of loader 'app')
	at elasticClient.ElasticRequestCreator.main(ElasticRequestCreator.java:27)

Could you suggest a way out?
Florin

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.