0
I'm currently working on improving the accuracy of results returned from an Elasticsearch query by incorporating a "body" field into the search along with search fuzzyness. The website in question has a lot of content and is highly trafficked. I rebuilt the query using span_multi and span_near queries for improved accuracy with fuzzysearch, but it seems the rewrite overloaded my Elasticsearch servers and stopped search from working. Any pointers or observations on the existing and updated queries below?
The Elasticsearch server version is: 7.5.2 and admittedly outdated.
Existing Query:
return {
query: {
bool: {
must: [
{
dis_max: {
queries: [
{ match: { title_search: { query: keyword, boost: 60 } } },
{
wildcard: {
title_search: { value: '*' + keyword + '*', boost: 40 },
},
},
],
tie_breaker: 0.7,
boost: 5,
},
},
],
filter: [
fieldDateFilter,
{ term: { status: true } },
{ term: { type: contentType } },
{ term: { langcode: language } },
],
},
},
size: 1000,
sort: [{ _score: { order: 'desc' } }],
}
Updated Query:
function getContentTypeSearchAllResult(
keyword,
language,
contentType,
dateFilter
) {
keyword = String(decodeURI(keyword)).toLowerCase().replace(/\+/g, ' ');
// Setup span queries.
const keywordWords = keyword.split(' ');
const title_search_queries = [];
const body_search_queries = [];
// Construct title_search span queries for each word in the keyword.
keywordWords.forEach(word => {
const wordQuery = {
span_multi: {
match: {
fuzzy: {
title_search: {
value: word,
fuzziness: 1,
},
},
},
},
};
title_search_queries.push(wordQuery);
});
// Construct body span queries for each word in the keyword.
keywordWords.forEach(word => {
const wordQuery = {
span_multi: {
match: {
fuzzy: {
body: {
value: word,
fuzziness: 1,
},
},
},
},
};
body_search_queries.push(wordQuery);
});
const fieldDateFilter = contentType.includes('event')
? {
range: {
field_date: {
lt:
dateFilter !== 'all' && dateFilter !== null
? ElasticSearch[dateFilter]
: undefined,
gte:
dateFilter !== 'all' && dateFilter !== null
? dateFilter.includes('month')
? 'now/M'
: 'now'
: undefined,
},
},
}
: undefined;
return {
query: {
bool: {
must: [
{
dis_max: {
queries: [
{
span_near: {
clauses: title_search_queries,
slop: 0,
in_order: false,
boost: 60,
},
},
{
wildcard: {
title_search: { value: '*' + keyword + '*', boost: 40 },
},
},
{
span_near: {
clauses: body_search_queries,
slop: 0,
in_order: false,
boost: 5,
},
},
],
tie_breaker: 0.7,
boost: 5,
},
},
],
filter: [
fieldDateFilter,
{
term: {
status: true,
},
},
{ term: { type: contentType } },
{ term: { langcode: language } },
],
},
},
size: 1000,
sort: [{ _score: { order: 'desc' } }],
};
}
Expectation: The Elasticsearch server would not stop working.