Guill
(Guilherme Melo)
October 1, 2015, 5:47pm
1
Hello, I am using the elasticsearch-analysis-kuromoji, being created like so:
public final static String JAPANESE_LANGUAGE_ANALYSIS = "japanese_analyzer";
private void createCustomerIndex() {
try {
client.admin().indices().refresh(new RefreshRequest(Index.customer.name()));
final CreateIndexRequestBuilder createIndexRequestBuilder = client.admin()
.indices()
.prepareCreate(Index.customer.name())
.setSettings(ImmutableSettings.settingsBuilder().loadFromSource(jsonBuilder()
.startObject()
.startObject("analysis")
//
.startObject("tokenizer")
.startObject("kuromoji_user_dict")
.field("type", "kuromoji_tokenizer")
.field("mode", "search")
.field("discard_punctuation", "false")
.endObject()
.endObject()
//
.startObject("analyzer")
.startObject(JAPANESE_LANGUAGE_ANALYSIS)
.field("type", "custom")
.field("tokenizer", "kuromoji_user_dict")
.endObject()
.endObject()
//
.endObject()
.endObject().string()));
createIndexRequestBuilder.execute().actionGet();
String indexProperties = Customer.indexProperties();
//if a index had properties
if (indexProperties != null) {
createIndexProperties(Index.customer.name(), indexProperties);
}
} catch (IOException e) {
logger.error("Error Customer company Index");
logger.error(e.getMessage());
}
}
And am querying it like this:
public SearchResponse queryForFieldAndTerm(String field, String term, ElasticSearchService.Index index) {
logger.info("Searching index {} for term: {}", index.name(), term);
SearchRequestBuilder searchRequestBuilder = new SearchRequestBuilder(client);
BoolQueryBuilder qb = QueryBuilders.boolQuery();
QueryStringQueryBuilder queryBuilder = QueryBuilders
.queryString(term)
.defaultField(field)
.defaultOperator(QueryStringQueryBuilder.Operator.AND);
queryBuilder.analyzer("japanese_analyzer");
qb.minimumShouldMatch("1");
qb.must(queryBuilder);
searchRequestBuilder.setQuery(qb);
return searchRequestBuilder
.setFrom(1)
.setSize(10)
.setIndices(index.name())
.execute()
.actionGet();
}
If I add the mapping to a field specifically, like:
XContentBuilder xbMapping =
jsonBuilder()
.startObject()
.startObject(indexType)
.startObject("properties")
.startObject("source")
.field("type", "string")
.endObject()
.startObject("text")
.field("type", "string")
.field("analyzer", JAPANESE_LANGUAGE_ANALYSIS)
.endObject()
.endObject()
.endObject()
.endObject();
elasticSearchClient.admin().indices()
.preparePutMapping(indexName)
.setType(indexType)
.setSource(xbMapping)
.execute().get();
If works, but I don't have control over all of the fields, so I am looking for the optimal way to set the analysers, if I should have a index for each language, and how to set the analyser for all the fields, or if I can do that with a type.
Thanks !!
jprante
(Jörg Prante)
October 1, 2015, 7:50pm
2
Not sure what JAPANESE_LANGUAGE_ANALYSIS
stands for, but if you set the analyzer name to default
, Elasticsearch will use it for all fields, including _all
.
Guill
(Guilherme Melo)
October 1, 2015, 9:15pm
3
I have edited it, its just the name. Am running some tests with default as the name, will let you know, thanks !!
Guill
(Guilherme Melo)
October 6, 2015, 8:09pm
4
I have run a few tests, but tbh ii don't see the difference between having or not having the analyser:
here is it having it
@Test
public void queryWithJapaneseAnalyserOnAll() throws InterruptedException {
try {
client.admin().indices().refresh(new RefreshRequest(indexName));
final CreateIndexRequestBuilder createIndexRequestBuilder = client.admin()
.indices()
.prepareCreate(indexName)
.setSettings(ImmutableSettings.settingsBuilder()
.loadFromSource(jsonBuilder()
.startObject()
.startObject("analysis")
//
.startObject("tokenizer")
.startObject("kuromoji_user_dict")
.field("type", "kuromoji_tokenizer")
.field("mode", "search")
.field("discard_punctuation", "false")
.endObject()
.endObject()
//
.startObject("analyzer")
//named default
.startObject("default")
.field("type", "custom")
.field("tokenizer", "kuromoji_user_dict")
.endObject()
.endObject()
//
.endObject()
.endObject().string()));
createIndexRequestBuilder.execute().actionGet();
} catch (Exception e) {
logger.error("Error creating index {}", indexName);
logger.error(e.getMessage());
}
ObjectMapper objectMapper = new ObjectMapper();
Set<String> interestSet = null;
Set<Address> addressSet = null;
Set<Telephone> telephoneSet = null;
Customer customer = new Customer("1", "開発者ジャワ", "Melo", "title", "Test Writer", "test@gmail.org", interestSet, addressSet, telephoneSet);
try {
String data = objectMapper.writeValueAsString(customer);
logger.debug("Sending index indexName={} indexType={} id={}", new Object[]{indexName, indexName, customer.getId()});
client.prepareIndex(indexName, indexName)
.setId(customer.getId())
.setSource(data)
.execute().actionGet();
} catch (IOException e) {
logger.error("Error sending Index {}", e);
}
Thread.sleep(900);
SearchRequestBuilder searchRequestBuilder = new SearchRequestBuilder(client);
BoolQueryBuilder qb = QueryBuilders.boolQuery();
QueryStringQueryBuilder queryBuilder = QueryBuilders
.queryString("ジャワ")
.defaultOperator(QueryStringQueryBuilder.Operator.AND);
qb.minimumShouldMatch("1");
queryBuilder.analyzer("default");
qb.must(queryBuilder);
searchRequestBuilder.setQuery(qb);
SearchResponse response = searchRequestBuilder
.setIndices(indexName)
.execute()
.actionGet();
try {
DeleteIndexResponse deleteIndexResponse = client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();
logger.debug("Delete index response={}", ToStringBuilder.reflectionToString(deleteIndexResponse));
} catch (Exception e) {
logger.error(e.getMessage());
}
Assert.assertEquals(1, response.getHits().totalHits());
}
Guill
(Guilherme Melo)
October 6, 2015, 8:09pm
5
And here not having it:
@Test
public void query_WithNoAnalyserOnField() throws InterruptedException {
try {
client.admin().indices().refresh(new RefreshRequest(indexName));
final CreateIndexRequestBuilder createIndexRequestBuilder = client.admin()
.indices()
.prepareCreate(indexName);
createIndexRequestBuilder.execute().actionGet();
} catch (Exception e) {
logger.error("Error creating index {}", indexName);
logger.error(e.getMessage());
}
ObjectMapper objectMapper = new ObjectMapper();
Set<String> interestSet = null;
Set<Address> addressSet = null;
Set<Telephone> telephoneSet = null;
Customer customer = new Customer("1", "開発者ジャワ", "Melo", "title", "Test Writer", "test@gmail.org", interestSet, addressSet, telephoneSet);
try {
String data = objectMapper.writeValueAsString(customer);
logger.debug("Sending index indexName={} indexType={} id={}", new Object[]{indexName, indexName, customer.getId()});
client.prepareIndex(indexName, indexName)
.setId(customer.getId())
.setSource(data)
.execute().actionGet();
} catch (IOException e) {
logger.error("Error sending Index {}", e);
}
Thread.sleep(900);
SearchRequestBuilder searchRequestBuilder = new SearchRequestBuilder(client);
BoolQueryBuilder qb = QueryBuilders.boolQuery();
QueryStringQueryBuilder queryBuilder = QueryBuilders
.queryString("ジャワ")
.defaultField("firstName")
.defaultOperator(QueryStringQueryBuilder.Operator.AND);
qb.minimumShouldMatch("1");
qb.must(queryBuilder);
searchRequestBuilder.setQuery(qb);
SearchResponse response = searchRequestBuilder
.setIndices(indexName)
.execute()
.actionGet();
try {
DeleteIndexResponse deleteIndexResponse = client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();
logger.debug("Delete index response={}", ToStringBuilder.reflectionToString(deleteIndexResponse));
} catch (Exception e) {
logger.error(e.getMessage());
}
Assert.assertEquals(1, response.getHits().totalHits());
}
}