Elasticsearch - filter index on aggregated field

Hi,

We could use your help. We have Elasticsearch with logstash. It is inserting data in elastic index from a jdbc source. In logstash we use an aggregate filter. The config looks like this:

input {
    jdbc {
        jdbc_connection_string => "jdbc:oracle:thin:@wntstdb03.izaaksuite.nl:1521:wntstf2"
        jdbc_user => "webnext_zaken"
		jdbc_password => "webnext_zaken"
        jdbc_driver_library => ""
        jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
        statement_filepath =>"/appl/sw/webnext/logstash/config_documenten/queries/documenten.sql"
	last_run_metadata_path => "/appl/sw/webnext/logstash/config_documenten/parameters/.jdbc_last_run_doc"
    }
}

# The filter part of this file is commented out to indicate that it is
# optional.

filter {
  aggregate {
       task_id => "%{zaakdoc_id}"
       code => "
        map['zaak_id'] ||= event.get('zaak_id')
	map['result_type'] ||= event.get('result_type')
	map['mutatiedatum'] ||= event.get('mutatiedatum')
	map['oge_id'] ||= event.get('oge_id')
	map['zaakidentificatie'] ||= event.get('zaakidentificatie')
	map['zaakomschrijving'] ||= event.get('zaakomschrijving')
	map['titel'] ||= event.get('titel')
	map['beschrijving'] ||= event.get('beschrijving')
	map['zaakdoc_id'] ||= event.get('zaakdoc_id')
	map['groepsrollenlijst'] ||= []
        map['groepsrollenlijst'] << {'groepsrol' => event.get('rol')}        
        event.cancel()
       "
       push_previous_map_as_event => true
       timeout => 5
     }
    
}

output {
# stdout { codec => rubydebug }
#  file { 
#  path => ["/appl/sw/webnext/logstash/config_documenten/output/documenten.txt"]
# 	}
  elasticsearch { 
 		hosts => ["localhost:9200"]
 		index => "documenten"
 		document_id => "%{zaakdoc_id}"
 	}
}

The index config looks like this:

{
	"documenten": {
		"aliases": {
			"izaaksuite": {}
		},
		"mappings": {
			"properties": {
				"@timestamp": {
					"type": "date"
				},
				"@version": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"beschrijving": {
					"type": "text"
				},
				"groepsrollenlijst": {
					"properties": {
						"groepsrol": {
							"type": "text",
							"fields": {
								"keyword": {
									"type": "keyword",
									"ignore_above": 256
								}
							}
						}
					}
				},
				"mutatiedatum": {
					"type": "date"
				},
				"oge_id": {
					"type": "text"
				},
				"result_type": {
					"type": "text"
				},
				"rol": {
					"type": "text"
				},
				"tags": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"titel": {
					"type": "text"
				},
				"zaak_id": {
					"type": "text"
				},
				"zaakdoc_id": {
					"type": "long"
				},
				"zaakidentificatie": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				},
				"zaakomschrijving": {
					"type": "text",
					"fields": {
						"keyword": {
							"type": "keyword",
							"ignore_above": 256
						}
					}
				}
			}
		},
		"settings": {
			"index": {
				"routing": {
					"allocation": {
						"include": {
							"_tier_preference": "data_content"
						}
					}
				},
				"number_of_shards": "1",
				"provided_name": "documenten",
				"creation_date": "1654158264412",
				"number_of_replicas": "1",
				"uuid": "bf4xj4TwQ-mP5K4Orc5HEA",
				"version": {
					"created": "8010399"
				}
			}
		}
	}
}

One document in the index that is eventually build, looks like this:

{
	"_index": "documenten",
	"_id": "25066386",
	"_version": 1,
	"_seq_no": 33039,
	"_primary_term": 6,
	"found": true,
	"_source": {
		"groepsrollenlijst": [
			{
				"groepsrol": "7710_AFH1"
			},
			{
				"groepsrol": "7710_AFH2"
			},
			{
				"groepsrol": "MR_GRP1"
			}
		],
		"zaak_id": 44973087,
		"oge_id": 98,
		"@version": "1",
		"@timestamp": "2022-07-11T08:24:07.717572Z",
		"zaakdoc_id": 25066386,
		"zaakomschrijving": "testOSiZaakAOS",
		"result_type": "doc",
		"titel": "Test4",
		"zaakidentificatie": "077215353",
		"mutatiedatum": "2022-06-27T09:51:52.078119Z",
		"beschrijving": "Test4"
	}
}

As you can see, the "groepsrollenlijst" is present. Now our problem: when searching we need to match one of the values in groepsrollenlijst (which is dutch for grouprole which is basically an autorisation within the application where the data is coming from) with the grouprole present on the user doing the search. This to prevent users to be able to have data in their search results they don't have acces to.

Our java code looks like this (sorry for the dutch sentences):

                    List<SearchResult> searchResults = new ArrayList<>();

                    SearchRequest searchRequest = new SearchRequest(index);
                    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();

                    BoolQueryBuilder fieldsQuery = new BoolQueryBuilder();

                    /*
                    * Haal per index alle velden op waarop gezocht kan en mag worden. We kunnen
                    * niet over alle velden zoeken omdat er dan ook hits voorkomen op de
                    * groepsrollenlijst (als je op bv op rutten zoekt worden er ook hits gevonden
                    * op groepsrol "RUTTENGROEP" wat je niet wilt) Ook bij documenten en
                    * betrokkenen wil je bv niet dat er hits gevonden worden op de
                    * zaakomschrijving.
                    */
                    String indexFields = index + "Fields";
                    indexFields = indexFields.substring(0, 1).toUpperCase() + indexFields.substring(1);
                    List<String> fields = getFieldsFor(indexFields);

                    // Voeg per veld een query toe voor de ingegeven zoektekst
                    HighlightBuilder highlightBuilder = new HighlightBuilder();
                    QueryStringQueryBuilder queryStringQueryBuilder = new QueryStringQueryBuilder(autoCompleteText);
                    for (String field : fields) {
                           queryStringQueryBuilder.field(field);
                           highlightBuilder.field(field);
                    }
                    fieldsQuery.should(queryStringQueryBuilder);

                    // Manipuleer de roles tbv test
                    roles.clear();
                    roles.add("7710_AFH1");
                    roles.add("7710_AFH2");

                    BoolQueryBuilder rolesQuery = QueryBuilders.boolQuery();

                    for (String role : roles) {
                           rolesQuery.should(QueryBuilders.wildcardQuery("groepsrol", "*" + role + "*"));
                    }

                    LOG.info("Rollen medewerker: " + roles);
                    BoolQueryBuilder mainQuery = new BoolQueryBuilder();
                    mainQuery.must(new TermsQueryBuilder("oge_id", String.valueOf(ogeId)));
                    mainQuery.must(fieldsQuery);
                    mainQuery.must(rolesQuery);

                    searchSourceBuilder.query(mainQuery);
                    searchSourceBuilder.highlighter(highlightBuilder);
                    searchRequest.source(searchSourceBuilder);
                    searchRequest.validate();

                    // Execute search
                    LOG.info("Search query: {}", searchRequest.source().toString());

                    SearchResponse searchResponse = null;
                    try {
                           searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
                    } catch (IOException | ElasticsearchStatusException e) {
                           // TODO Auto-generated catch block
                           e.printStackTrace();
                           return;
                    }

                    if (searchResponse == null) {
                           return;
                    }

                    SearchHits hits = searchResponse.getHits();

For the test we hardcoded the user's grouproles into the code.
The issue is that when we search for "testOSiZaakAOS" (one of the values in the document previously shown) which should be a hit, we don't get a result. If we comment out the "mainQuery.must(rolesQuery);" part, we do get a result. But then the roles are not taking into account.

How do we go about fixing this? So user has role x, some documents in the index have key-value pairs for role x, y and z. And some do have only y and z.
Search should only show those where role x is present.
Basically at least one of the roles of the user should match one of the roles present in the document in the index.

Your help is greatly appreciated! Let me know if you need more info.

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.