Return term frequency of a single field


(Kennedy Idialu) #1

Hi,

I have being trying to use facet to get the term frequency of a field. My
query returns just one hit, so I would like to have the facet return the
terms that have the most frequency in a particular field.

My mapping:

{
"mappings":{
"document":{
"properties":{
"tags":{
"type":"object",
"properties":{
"title":{
"fields":{
"partial":{
"search_analyzer":"main",
"index_analyzer":"partial",
"type":"string",
"index" : "analyzed"
}
"title":{
"type":"string",
"analyzer":"main",
"index" : "analyzed"
}
},
"type":"multi_field"
}
}
}
}
}
},

"settings":{
    "analysis":{
        "filter":{
            "name_ngrams":{
                "side":"front",
                "max_gram":50,
                "min_gram":2,
                "type":"edgeNGram"
            }
        },

        "analyzer":{
            "main":{
                "filter": ["standard", "lowercase", "asciifolding"],
                "type": "custom",
                "tokenizer": "standard"
            },
            "partial":{

"filter":["standard","lowercase","asciifolding","name_ngrams"],
"type": "custom",
"tokenizer": "standard"
}
}
}
}

}

Test data:

curl -XPUT localhost:9200/testindex/document -d '{"tags": {"title": "people
also kill people"}}'

Query:
curl -XGET 'localhost:9200/testindex/document/_search?pretty=1' -d '
{
"query":
{
"term": { "tags.title": "people" }
},
"facets": {
"popular_tags": { "terms": {"field": "tags.title"}}
}
}'

This result

"hits" : {
"total" : 1,
"max_score" : 0.99381393,
"hits" : [ {
"_index" : "testindex",
"_type" : "document",
"_id" : "uI5k0wggR9KAvG9o7S7L2g",
"_score" : 0.99381393, "_source" : {"tags": {"title": "people also
kill people"}}
} ]
},
"facets" : {
"popular_tags" : {
"_type" : "terms",
"missing" : 0,
"total" : 3,
"other" : 0,
"terms" : [ {
"term" : "people",
"count" : 1 // I expect this to be 2
}, {
"term" : "kill",
"count" : 1
}, {
"term" : "also",
"count" : 1
} ]
}
}

The above result is not what I want. I want to have the frequency count be
2

"hits" : {
"total" : 1,
"max_score" : 0.99381393,
"hits" : [ {
"_index" : "testindex",
"_type" : "document",
"_id" : "uI5k0wggR9KAvG9o7S7L2g",
"_score" : 0.99381393, "_source" : {"tags": {"title": "people also
kill people"}}
} ]
},
"facets" : {
"popular_tags" : {
"_type" : "terms",
"missing" : 0,
"total" : 3,
"other" : 0,
"terms" : [ {
"term" : "people",
"count" : 2
}, {
"term" : "kill",
"count" : 1
}, {
"term" : "also",
"count" : 1
} ]
}
}

How do I achieve this? Is facet the wrong way to go?

--


(Kennedy Idialu) #2

Anybody?

--


(David Pilato) #3

IMHO, facets are computed on index not with source documents.

AFAIK, TermFacet does not use the TermFrequency in each doc but only
computes the number of docs that contains at least your term once, not the
frequency.

I don’t see a way to get what you are looking for.

Hey ES Gurus ! Any idea? A script?

David.

De : elasticsearch@googlegroups.com [mailto:elasticsearch@googlegroups.com]
De la part de Kennedy Idialu
Envoyé : dimanche 4 novembre 2012 11:39
À : elasticsearch@googlegroups.com
Objet : Return term frequency of a single field

Hi,

I have being trying to use facet to get the term frequency of a field. My
query returns just one hit, so I would like to have the facet return the
terms that have the most frequency in a particular field.

My mapping:

{

"mappings":{

    "document":{

        "properties":{

            "tags":{

                "type":"object",

                "properties":{

                    "title":{

                        "fields":{

                            "partial":{

                                "search_analyzer":"main",

                                "index_analyzer":"partial",

                                "type":"string",

                                "index" : "analyzed"

                            }

                            "title":{

                                "type":"string",

                                "analyzer":"main",

                                "index" : "analyzed"

                            }

                        },

                        "type":"multi_field"

                    }

                }

            }

        }

    }

},



"settings":{

    "analysis":{

        "filter":{

            "name_ngrams":{

                "side":"front",

                "max_gram":50,

                "min_gram":2,

                "type":"edgeNGram"

            }

        },



        "analyzer":{

            "main":{

                "filter": ["standard", "lowercase", "asciifolding"],

                "type": "custom",

                "tokenizer": "standard"

            },

            "partial":{

"filter":["standard","lowercase","asciifolding","name_ngrams"],

                "type": "custom",

                "tokenizer": "standard"

            }

        }

    }

}

}

Test data:

curl -XPUT localhost:9200/testindex/document -d '{"tags": {"title": "people
also kill people"}}'

Query:

curl -XGET 'localhost:9200/testindex/document/_search?pretty=1' -d '

{

"query":

 {

    "term": { "tags.title": "people" }

 },

"facets": {

  "popular_tags": { "terms": {"field": "tags.title"}}

}

}'

This result

"hits" : {

"total" : 1,

"max_score" : 0.99381393,

"hits" : [ {

  "_index" : "testindex",

  "_type" : "document",

  "_id" : "uI5k0wggR9KAvG9o7S7L2g",

  "_score" : 0.99381393, "_source" : {"tags": {"title": "people also

kill people"}}

} ]

},

"facets" : {

"popular_tags" : {

  "_type" : "terms",

  "missing" : 0,

  "total" : 3,

  "other" : 0,

  "terms" : [ {

    "term" : "people",

    "count" : 1            // I expect this to be 2

  }, {

    "term" : "kill",

    "count" : 1

  }, {

    "term" : "also",

    "count" : 1

  } ]

}

}

The above result is not what I want. I want to have the frequency count be 2

"hits" : {

"total" : 1,

"max_score" : 0.99381393,

"hits" : [ {

  "_index" : "testindex",

  "_type" : "document",

  "_id" : "uI5k0wggR9KAvG9o7S7L2g",

  "_score" : 0.99381393, "_source" : {"tags": {"title": "people also

kill people"}}

} ]

},

"facets" : {

"popular_tags" : {

  "_type" : "terms",

  "missing" : 0,

  "total" : 3,

  "other" : 0,

  "terms" : [ {

    "term" : "people",

    "count" : 2            

  }, {

    "term" : "kill",

    "count" : 1

  }, {

    "term" : "also",

    "count" : 1

  } ]

}

}

How do I achieve this? Is facet the wrong way to go?

--

--


(Kennedy Idialu) #4

I guess I will have to use the Analyze API then. thanks

On Sunday, 4 November 2012 22:32:56 UTC+1, David Pilato wrote:

IMHO, facets are computed on index not with source documents.

AFAIK, TermFacet does not use the TermFrequency in each doc but only
computes the number of docs that contains at least your term once, not the
frequency.

I don’t see a way to get what you are looking for.

Hey ES Gurus ! Any idea? A script?

David.

De : elasti...@googlegroups.com <javascript:> [mailto:
elasti...@googlegroups.com <javascript:>] De la part de Kennedy Idialu
Envoyé : dimanche 4 novembre 2012 11:39
À : elasti...@googlegroups.com <javascript:>
Objet : Return term frequency of a single field

Hi,

I have being trying to use facet to get the term frequency of a field. My
query returns just one hit, so I would like to have the facet return the
terms that have the most frequency in a particular field.

My mapping:

{

"mappings":{

    "document":{

        "properties":{

            "tags":{

                "type":"object",

                "properties":{

                    "title":{

                        "fields":{

                            "partial":{

                                "search_analyzer":"main",

                                "index_analyzer":"partial",

                                "type":"string",

                                "index" : "analyzed"

                            }

                            "title":{

                                "type":"string",

                                "analyzer":"main",

                                "index" : "analyzed"

                            }

                        },

                        "type":"multi_field"

                    }

                }

            }

        }

    }

},



"settings":{

    "analysis":{

        "filter":{

            "name_ngrams":{

                "side":"front",

                "max_gram":50,

                "min_gram":2,

                "type":"edgeNGram"

            }

        },



        "analyzer":{

            "main":{

                "filter": ["standard", "lowercase", "asciifolding"],

                "type": "custom",

                "tokenizer": "standard"

            },

            "partial":{

"filter":["standard","lowercase","asciifolding","name_ngrams"],

                "type": "custom",

                "tokenizer": "standard"

            }

        }

    }

}

}

Test data:

curl -XPUT localhost:9200/testindex/document -d '{"tags": {"title":
"people also kill people"}}'

Query:

curl -XGET 'localhost:9200/testindex/document/_search?pretty=1' -d '

{

"query":

 {

    "term": { "tags.title": "people" }

 },

"facets": {

  "popular_tags": { "terms": {"field": "tags.title"}}

}

}'

This result

"hits" : {

"total" : 1,

"max_score" : 0.99381393,

"hits" : [ {

  "_index" : "testindex",

  "_type" : "document",

  "_id" : "uI5k0wggR9KAvG9o7S7L2g",

  "_score" : 0.99381393, "_source" : {"tags": {"title": "people also 

kill people"}}

} ]

},

"facets" : {

"popular_tags" : {

  "_type" : "terms",

  "missing" : 0,

  "total" : 3,

  "other" : 0,

  "terms" : [ {

    "term" : "people",

    "count" : 1            *// I expect this to be 2*

  }, {

    "term" : "kill",

    "count" : 1

  }, {

    "term" : "also",

    "count" : 1

  } ]

}

}

The above result is not what I want. I want to have the frequency count be
2

"hits" : {

"total" : 1,

"max_score" : 0.99381393,

"hits" : [ {

  "_index" : "testindex",

  "_type" : "document",

  "_id" : "uI5k0wggR9KAvG9o7S7L2g",

  "_score" : 0.99381393, "_source" : {"tags": {"title": "people also 

kill people"}}

} ]

},

"facets" : {

"popular_tags" : {

  "_type" : "terms",

  "missing" : 0,

  "total" : 3,

  "other" : 0,

  "terms" : [ {

    "term" : "people",

    "count" : 2            

  }, {

    "term" : "kill",

    "count" : 1

  }, {

    "term" : "also",

    "count" : 1

  } ]

}

}

How do I achieve this? Is facet the wrong way to go?

--

--


(Ivan Brusic) #5

Jörg's termlist plugin could help

--
Ivan

On Sun, Nov 4, 2012 at 1:32 PM, David Pilato david@pilato.fr wrote:

IMHO, facets are computed on index not with source documents.****

AFAIK, TermFacet does not use the TermFrequency in each doc but only
computes the number of docs that contains at least your term once, not the
frequency.****


I don’t see a way to get what you are looking for.****


Hey ES Gurus ! Any idea? A script?****



David.****



De : elasticsearch@googlegroups.com [mailto:
elasticsearch@googlegroups.com] De la part de Kennedy Idialu
Envoyé : dimanche 4 novembre 2012 11:39
À : elasticsearch@googlegroups.com
Objet : Return term frequency of a single field****


Hi,****


I have being trying to use facet to get the term frequency of a field. My
query returns just one hit, so I would like to have the facet return the
terms that have the most frequency in a particular field.****


My mapping:****


{****

"mappings":{****

    "document":{****

        "properties":{****

            "tags":{****

                "type":"object",****

                "properties":{****

                    "title":{****

                        "fields":{****

                            "partial":{****

                                "search_analyzer":"main",****

                                "index_analyzer":"partial",****

                                "type":"string",****

                                "index" : "analyzed"****

                            }****

                            "title":{****

                                "type":"string",****

                                "analyzer":"main",****

                                "index" : "analyzed"****

                            }****

                        },****

                        "type":"multi_field"****

                    }****

                }****

            }****

        }****

    }****

},****

"settings":{****

    "analysis":{****

        "filter":{****

            "name_ngrams":{****

                "side":"front",****

                "max_gram":50,****

                "min_gram":2,****

                "type":"edgeNGram"****

            }****

        },****

        "analyzer":{****

            "main":{****

                "filter": ["standard", "lowercase", "asciifolding"],**

**

                "type": "custom",****

                "tokenizer": "standard"****

            },****

            "partial":{****

"filter":["standard","lowercase","asciifolding","name_ngrams"],****

                "type": "custom",****

                "tokenizer": "standard"****

            }****

        }****

    }****

}****

}****


Test data:****


curl -XPUT localhost:9200/testindex/document -d '{"tags": {"title":
"people also kill people"}}'****

Query:****

curl -XGET 'localhost:9200/testindex/document/_search?pretty=1' -d '****

{****

"query":****

 {****

    "term": { "tags.title": "people" }****

 },****

"facets": {****

  "popular_tags": { "terms": {"field": "tags.title"}}****

}****

}'****



This result****

"hits" : {****

"total" : 1,****

"max_score" : 0.99381393,****

"hits" : [ {****

  "_index" : "testindex",****

  "_type" : "document",****

  "_id" : "uI5k0wggR9KAvG9o7S7L2g",****

  "_score" : 0.99381393, "_source" : {"tags": {"title": "people also

kill people"}}****

} ]****

},****

"facets" : {****

"popular_tags" : {****

  "_type" : "terms",****

  "missing" : 0,****

  "total" : 3,****

  "other" : 0,****

  "terms" : [ {****

    "term" : "people",****

    "count" : 1            *// I expect this to be 2*****

  }, {****

    "term" : "kill",****

    "count" : 1****

  }, {****

    "term" : "also",****

    "count" : 1****

  } ]****

}****

}****



The above result is not what I want. I want to have the frequency count be
2 ****


"hits" : {****

"total" : 1,****

"max_score" : 0.99381393,****

"hits" : [ {****

  "_index" : "testindex",****

  "_type" : "document",****

  "_id" : "uI5k0wggR9KAvG9o7S7L2g",****

  "_score" : 0.99381393, "_source" : {"tags": {"title": "people also

kill people"}}****

} ]****

},****

"facets" : {****

"popular_tags" : {****

  "_type" : "terms",****

  "missing" : 0,****

  "total" : 3,****

  "other" : 0,****

  "terms" : [ {****

    "term" : "people",****

    "count" : 2            ****

  }, {****

    "term" : "kill",****

    "count" : 1****

  }, {****

    "term" : "also",****

    "count" : 1****

  } ]****

}****

}****



How do I achieve this? Is facet the wrong way to go?




--


--

--


(system) #6