Hi @Kathleen_DeRusso,
Thanks for the response. I am providing an edited mapping, query and response. Our core data is conversational data and we store it at a transcript, turn (speaker A speaking and then speaker B speaking, etc.) and utterance (i.e. sentence) level. Utterances are nested below turns and we are generating embeddings for each utterance. When I add the similarity parameter and tweak the value, the count of top level documents does change with a higher score reducing the results. But a hit is generated for each nested utterance even if it falls below the similarity score (I believe). I did not include the embeddings for brevity but they were generated from the phrase "flu vaccine". I've limited the inner hit results to 5 for this example but you can see how the _score value from the first hit and the last hit differ significantly. Also, the hits value is the total number of utterances for this transcript, so no utterance is getting filtered out. It seems like the similarity parameter is filtering out the top level document and not the nested utterances. I was expecting (hoping) it would only return the utterances that were above the similarity parameter. I've also tried modifying the score mode, using max and avg but this does not seem to have any effect.
mapping
{
"mr-sstuart-reveal_1": {
"aliases": {},
"mappings": {
"properties": {
"name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "english"
},
"transcript:en:content": {
"type": "text",
"store": true,
"fields": {
"standard": {
"type": "text",
"store": true,
"term_vector": "with_positions_offsets",
"analyzer": "standard"
}
},
"term_vector": "with_positions_offsets",
"analyzer": "english"
},
"turns": {
"type": "nested",
"properties": {
"en": {
"type": "nested",
"properties": {
"content": {
"type": "text",
"store": true,
"fields": {
"standard": {
"type": "text",
"store": true,
"term_vector": "with_positions_offsets",
"analyzer": "standard"
}
},
"term_vector": "with_positions_offsets",
"analyzer": "english"
},
"utterances": {
"type": "nested",
"properties": {
"content": {
"type": "text",
"store": true,
"fields": {
"standard": {
"type": "text",
"store": true,
"term_vector": "with_positions_offsets",
"analyzer": "standard"
}
},
"term_vector": "with_positions_offsets",
"analyzer": "english"
},
"embedding": {
"type": "dense_vector",
"dims": 384,
"index": true,
"similarity": "cosine",
"index_options": {
"type": "int8_hnsw",
"m": 16,
"ef_construction": 100
}
},
"embedding_a": {
"type": "dense_vector",
"dims": 1024,
"index": true,
"similarity": "cosine",
"index_options": {
"type": "int8_hnsw",
"m": 16,
"ef_construction": 100
}
}
}
}
}
}
}
}
}
}
}
}
query
GET /mr-sstuart-reveal_1/_search
{
"_source":
{
"includes":
[]
},
"from": 0,
"query":
{
"bool":
{
"must":
[
{
"bool":
{
"must":
[
{
"nested":
{
"ignore_unmapped": false,
"inner_hits":
{
"name": "A",
"size": 5,
"_source":
{
"includes":
["turns.en.utterances.id","turns.en.utterances.content"]
}
},
"path": "turns.en.utterances",
"query":
{
"bool":
{
"must":
[
{
"bool":
{
"must":
[
{
"knn":
{
"field": "turns.en.utterances.embedding",
"query_vector": [],
"num_candidates": 100,
"similarity": 0.75
}
}
]
}
}
]
}
},
"score_mode": "max"
}
}
]
}
}
]
}
},
"size": 20,
"sort":
[
{
"_score":
{
"order": "desc"
}
},
{
"id":
{
"order": "desc"
}
}
]
}
result
{
"took": 203,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "mr-sstuart-reveal_1",
"_id": "2546",
"_score": 1.1160005,
"_source": {
},
"sort": [
1.1160005,
2546
],
"inner_hits": {
"A": {
"hits": {
"total": {
"value": 172,
"relation": "eq"
},
"max_score": 0.97175026,
"hits": [
{
"_index": "mr-sstuart-reveal_1",
"_id": "2546",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 111,
"_nested": {
"field": "utterances",
"offset": 0
}
}
},
"_score": 0.97175026,
"_source": {
"id": 496818,
"content": "Flu vaccine?"
}
},
{
"_index": "mr-sstuart-reveal_1",
"_id": "2546",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 105,
"_nested": {
"field": "utterances",
"offset": 0
}
}
},
"_score": 0.59818417,
"_source": {
"id": 496822,
"content": "So, you need refills for any medicines?"
}
},
{
"_index": "mr-sstuart-reveal_1",
"_id": "2546",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 90,
"_nested": {
"field": "utterances",
"offset": 1
}
}
},
"_score": 0.59635586,
"_source": {
"id": 496801,
"content": "He seems to be a lot better now with, uh, with the medicines."
}
},
{
"_index": "mr-sstuart-reveal_1",
"_id": "2546",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 0,
"_nested": {
"field": "utterances",
"offset": 0
}
}
},
"_score": 0.5929342,
"_source": {
"id": 496669,
"content": "Have any medications changed since the last time you were here?"
}
},
{
"_index": "mr-sstuart-reveal_1",
"_id": "2546",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 25,
"_nested": {
"field": "utterances",
"offset": 0
}
}
},
"_score": 0.5895265,
"_source": {
"id": 496711,
"content": "Anxiety is better?"
}
}
]
}
}
}
},
{
"_index": "mr-sstuart-reveal_1",
"_id": "2545",
"_score": 1.0795139,
"_source": {
},
"sort": [
1.0795139,
2545
],
"inner_hits": {
"A": {
"hits": {
"total": {
"value": 350,
"relation": "eq"
},
"max_score": 0.90306646,
"hits": [
{
"_index": "mr-sstuart-reveal_1",
"_id": "2545",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 192,
"_nested": {
"field": "utterances",
"offset": 0
}
}
},
"_score": 0.90306646,
"_source": {
"id": 496559,
"content": "Flu shot?"
}
},
{
"_index": "mr-sstuart-reveal_1",
"_id": "2545",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 187,
"_nested": {
"field": "utterances",
"offset": 0
}
}
},
"_score": 0.8017596,
"_source": {
"id": 496560,
"content": "No, that, yeah, the flu shot and what's that other sickness?"
}
},
{
"_index": "mr-sstuart-reveal_1",
"_id": "2545",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 180,
"_nested": {
"field": "utterances",
"offset": 0
}
}
},
"_score": 0.7032441,
"_source": {
"id": 496554,
"content": "Have you ever gotten the pneumonia shot?"
}
},
{
"_index": "mr-sstuart-reveal_1",
"_id": "2545",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 109,
"_nested": {
"field": "utterances",
"offset": 0
}
}
},
"_score": 0.6227246,
"_source": {
"id": 496453,
"content": "I was, I was getting sick when I need, the last couple of winters, not this one, but the last, going back off it, I was getting off it."
}
},
{
"_index": "mr-sstuart-reveal_1",
"_id": "2545",
"_nested": {
"field": "turns",
"offset": 0,
"_nested": {
"field": "en",
"offset": 186,
"_nested": {
"field": "utterances",
"offset": 0
}
}
},
"_score": 0.62057436,
"_source": {
"id": 496561,
"content": "Shingles?"
}
}
]
}
}
}
}
]
}
}
Thanks
Shaun