Hi!
I just removed NEST and started to use Elastic.Clients.Elasticsearch I get a really strange behavior.
The mulit_match looks like this:
"multi_match": {
"operator": "and",
"query": "Annika Jöns",
"type": "cross_fields"
}
This query gives me a hit in the NEST created index but not in the Elastic.Clients.Elasticsearch index.
Search for "Jönsson" gives me a hit in both indexes also "Annika Jons" gives me a hit in both indexes. So it seems to have something to do with the "ö" when there is a two part multi_match.
Any suggerstions why this is happening?
I do the search from Kibana and get the same behavior so it seems like it has nothing to do with the query using NEST och Elastic.Clients from C#.
Can I compare the indicies in Kibana, or is it something else I'm missing?
In NEST i create the index like this:
var createIndexResponse = Indices.Create(indexName, createIndex => createIndex
.Settings(settings => settings
.NumberOfShards(1)
.NumberOfReplicas(1)
.Analysis(analysis => analysisSettings))
.Timeout(TimeSpan.FromSeconds(60)));
In Elastic.Client.Elasticsearch I do like this:
var createIndexResponse = Indices.CreateAsync(indexName, createIndex => createIndex
.Settings(settings => settings
.NumberOfShards(1)
.NumberOfReplicas(1)
.Analysis(analysisSettings))
.Timeout(new Duration(TimeSpan.FromSeconds(60))));
var createIndexResult = createIndexResponse.Result;
This is the code for analys settings:
public class DefaultAnalyzerConfiguration : IAnalyzerConfiguration
{
public Dictionary<string, IAnalyzer> GetAnalyzers()
{
var analyzers = new Dictionary<string, IAnalyzer>
{
// Ref: https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-standard-analyzer.html
{ Constants.DefaultQueryAnalyzerName, new StandardAnalyzer() },
{
Constants.DefaultIndexAnalyzerName,
new CustomAnalyzer
{
Tokenizer = "standard",
Filter = new List<string>
{
"lowercase",
"english_stop",
},
}
},
{
Constants.AccentAnalyzerName,
new CustomAnalyzer
{
Tokenizer = "standard",
Filter = new List<string>
{
"lowercase",
"english_stop",
"asciifolding",
},
}
},
{
Constants.AccentEdgeNGramAnalyzerName,
new CustomAnalyzer
{
Tokenizer = "edgeNGramTokenizer",
Filter = new List<string>
{
"lowercase",
"english_stop",
"asciifolding",
},
}
},
};
return analyzers;
}
public Dictionary<string, ITokenFilter> GetTokenFilters()
{
var tokenFilters = new Dictionary<string, ITokenFilter>
{
{ "english_stop", new StopTokenFilter() { Stopwords = new[] { "_english_" } } },
};
return tokenFilters;
}
public Dictionary<string, ITokenizer> GetTokenizers()
{
var tokenizers = new Dictionary<string, ITokenizer>
{
{
"edgeNGramTokenizer",
new EdgeNGramTokenizer
{
MinGram = 1,
MaxGram = 30,
TokenChars = new List<TokenChar> { TokenChar.Letter, TokenChar.Digit }
}
},
};
return tokenizers;
}
public Dictionary<string, INormalizer> GetNormalizers()
{
var normalizers = new Dictionary<string, INormalizer>
{
{
"keywordNormalizer",
new CustomNormalizer
{
CharFilter = new List<string>(),
Filter = new List<string>
{
"lowercase", "asciifolding"
},
}
},
};
return normalizers;
}
}
Thanks!
/Kristoffer