For a particular usecase I'm trying to benchmark how long it would take to fetch all the entries for a particular search. I used the official client helper for scrolling all the entries like below, but it times out in some cases if the number of entries are very large and I can also see more delay for queries with <10K
const client = new Client({
node: process.env.ELASTIC_NODE,
auth: {
username: process.env.ELASTIC_U,
password: process.env.ELASTIC_P
},
tls: {
rejectUnauthorized: false
}
})
async function * scrollSearch (params) {
let response = await client.search(params);
while (true) {
const sourceHits = response.hits.hits
if (sourceHits.length === 0) {
break
}
yield sourceHits.length
if (!response._scroll_id) {
break
}
response = await client.scroll({
scroll_id: response._scroll_id,
scroll: params.scroll
})
}
}
async function run (query) {
let hits = 0;
const params = {
index: process.env.INDEX,
scroll: '1m',
size: 10000,
query : {
bool : {
must : query
}
}
}
for await (const noOfHits of scrollSearch(params)) {
hits += noOfHits;
}
return hits;
}
Is there any particular reason for it ?