How to set a analyser for all the fields and the "_all" field


(Guilherme Melo) #1

Hello, I am using the elasticsearch-analysis-kuromoji, being created like so:

public final static String JAPANESE_LANGUAGE_ANALYSIS = "japanese_analyzer";

private void createCustomerIndex() {
    try {
        client.admin().indices().refresh(new RefreshRequest(Index.customer.name()));
        final CreateIndexRequestBuilder createIndexRequestBuilder = client.admin()
                .indices()
                .prepareCreate(Index.customer.name())
                .setSettings(ImmutableSettings.settingsBuilder().loadFromSource(jsonBuilder()
                        .startObject()
                        .startObject("analysis")
                                //
                        .startObject("tokenizer")
                        .startObject("kuromoji_user_dict")
                        .field("type", "kuromoji_tokenizer")
                        .field("mode", "search")
                        .field("discard_punctuation", "false")
                        .endObject()
                        .endObject()
                                //
                        .startObject("analyzer")
                        .startObject(JAPANESE_LANGUAGE_ANALYSIS)
                        .field("type", "custom")
                        .field("tokenizer", "kuromoji_user_dict")
                        .endObject()
                        .endObject()
                                //
                        .endObject()
                        .endObject().string()));

        createIndexRequestBuilder.execute().actionGet();

        String indexProperties = Customer.indexProperties();

        //if a index had properties
        if (indexProperties != null) {
            createIndexProperties(Index.customer.name(), indexProperties);
        }

    } catch (IOException e) {
        logger.error("Error Customer company Index");
        logger.error(e.getMessage());
    }

}

And am querying it like this:

public SearchResponse queryForFieldAndTerm(String field, String term, ElasticSearchService.Index index) {
    logger.info("Searching index {} for term: {}", index.name(), term);

    SearchRequestBuilder searchRequestBuilder = new SearchRequestBuilder(client);
    BoolQueryBuilder qb = QueryBuilders.boolQuery();

    QueryStringQueryBuilder queryBuilder = QueryBuilders
            .queryString(term)
            .defaultField(field)
            .defaultOperator(QueryStringQueryBuilder.Operator.AND);
    queryBuilder.analyzer("japanese_analyzer");
    qb.minimumShouldMatch("1");
    qb.must(queryBuilder);

    searchRequestBuilder.setQuery(qb);

    return searchRequestBuilder
            .setFrom(1)
            .setSize(10)
            .setIndices(index.name())
            .execute()
            .actionGet();
}

If I add the mapping to a field specifically, like:

XContentBuilder xbMapping =
        jsonBuilder()
                .startObject()
                .startObject(indexType)
                .startObject("properties")
                .startObject("source")
                .field("type", "string")
                .endObject()
                .startObject("text")
                .field("type", "string")
                .field("analyzer", JAPANESE_LANGUAGE_ANALYSIS)
                .endObject()
                .endObject()
                .endObject()
                .endObject();

elasticSearchClient.admin().indices()
        .preparePutMapping(indexName)
        .setType(indexType)
        .setSource(xbMapping)
        .execute().get();

If works, but I don't have control over all of the fields, so I am looking for the optimal way to set the analysers, if I should have a index for each language, and how to set the analyser for all the fields, or if I can do that with a type.

Thanks !!


(Jörg Prante) #2

Not sure what JAPANESE_LANGUAGE_ANALYSIS stands for, but if you set the analyzer name to default, Elasticsearch will use it for all fields, including _all.


(Guilherme Melo) #3

I have edited it, its just the name. Am running some tests with default as the name, will let you know, thanks !!


(Guilherme Melo) #4

I have run a few tests, but tbh ii don't see the difference between having or not having the analyser:
here is it having it

@Test
public void queryWithJapaneseAnalyserOnAll() throws InterruptedException {
    try {
        client.admin().indices().refresh(new RefreshRequest(indexName));
        final CreateIndexRequestBuilder createIndexRequestBuilder = client.admin()
                .indices()
                .prepareCreate(indexName)
                .setSettings(ImmutableSettings.settingsBuilder()
                        .loadFromSource(jsonBuilder()
                                .startObject()
                                .startObject("analysis")
                                        //
                                .startObject("tokenizer")
                                .startObject("kuromoji_user_dict")
                                .field("type", "kuromoji_tokenizer")
                                .field("mode", "search")
                                .field("discard_punctuation", "false")
                                .endObject()
                                .endObject()
                                        //
                                .startObject("analyzer")
                                        //named default
                                .startObject("default")
                                .field("type", "custom")
                                .field("tokenizer", "kuromoji_user_dict")
                                .endObject()
                                .endObject()
                                        //
                                .endObject()
                                .endObject().string()));
        createIndexRequestBuilder.execute().actionGet();
    } catch (Exception e) {
        logger.error("Error creating index {}", indexName);
        logger.error(e.getMessage());
    }
    ObjectMapper objectMapper = new ObjectMapper();
    Set<String> interestSet = null;
    Set<Address> addressSet = null;
    Set<Telephone> telephoneSet = null;
    Customer customer = new Customer("1", "開発者ジャワ", "Melo", "title", "Test Writer", "test@gmail.org", interestSet, addressSet, telephoneSet);
    try {
        String data = objectMapper.writeValueAsString(customer);
        logger.debug("Sending index indexName={} indexType={} id={}", new Object[]{indexName, indexName, customer.getId()});
        client.prepareIndex(indexName, indexName)
                .setId(customer.getId())
                .setSource(data)
                .execute().actionGet();
    } catch (IOException e) {
        logger.error("Error sending Index {}", e);
    }
    Thread.sleep(900);
    SearchRequestBuilder searchRequestBuilder = new SearchRequestBuilder(client);
    BoolQueryBuilder qb = QueryBuilders.boolQuery();
    QueryStringQueryBuilder queryBuilder = QueryBuilders
            .queryString("ジャワ")
            .defaultOperator(QueryStringQueryBuilder.Operator.AND);
    qb.minimumShouldMatch("1");
    queryBuilder.analyzer("default");
    qb.must(queryBuilder);
    searchRequestBuilder.setQuery(qb);
    SearchResponse response = searchRequestBuilder
            .setIndices(indexName)
            .execute()
            .actionGet();
    try {
        DeleteIndexResponse deleteIndexResponse = client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();
        logger.debug("Delete index response={}", ToStringBuilder.reflectionToString(deleteIndexResponse));
    } catch (Exception e) {
        logger.error(e.getMessage());
    }
    Assert.assertEquals(1, response.getHits().totalHits());
}

(Guilherme Melo) #5

And here not having it:

@Test
    public void query_WithNoAnalyserOnField() throws InterruptedException {
        try {
            client.admin().indices().refresh(new RefreshRequest(indexName));
            final CreateIndexRequestBuilder createIndexRequestBuilder = client.admin()
                    .indices()
                    .prepareCreate(indexName);
            createIndexRequestBuilder.execute().actionGet();
        } catch (Exception e) {
            logger.error("Error creating index {}", indexName);
            logger.error(e.getMessage());
        }
        ObjectMapper objectMapper = new ObjectMapper();
        Set<String> interestSet = null;
        Set<Address> addressSet = null;
        Set<Telephone> telephoneSet = null;
        Customer customer = new Customer("1", "開発者ジャワ", "Melo", "title", "Test Writer", "test@gmail.org", interestSet, addressSet, telephoneSet);
        try {
            String data = objectMapper.writeValueAsString(customer);
            logger.debug("Sending index indexName={} indexType={} id={}", new Object[]{indexName, indexName, customer.getId()});
            client.prepareIndex(indexName, indexName)
                    .setId(customer.getId())
                    .setSource(data)
                    .execute().actionGet();

        } catch (IOException e) {
            logger.error("Error sending Index {}", e);
        }
        Thread.sleep(900);
        SearchRequestBuilder searchRequestBuilder = new SearchRequestBuilder(client);
        BoolQueryBuilder qb = QueryBuilders.boolQuery();
        QueryStringQueryBuilder queryBuilder = QueryBuilders
                .queryString("ジャワ")
                .defaultField("firstName")
                .defaultOperator(QueryStringQueryBuilder.Operator.AND);
        qb.minimumShouldMatch("1");
        qb.must(queryBuilder);
        searchRequestBuilder.setQuery(qb);
        SearchResponse response = searchRequestBuilder
                .setIndices(indexName)
                .execute()
                .actionGet();
       try {
            DeleteIndexResponse deleteIndexResponse = client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();

            logger.debug("Delete index response={}", ToStringBuilder.reflectionToString(deleteIndexResponse));
        } catch (Exception e) {
            logger.error(e.getMessage());
        }
            Assert.assertEquals(1, response.getHits().totalHits());
        }
    }

(system) #6