Hi everyone!
I'm currently doing some text mining.
I'm looking for a way to insert a second field (storyline) in my following code:
res=es.mtermvectors(index="movies",doc_type="movie",ids=",".join(str(x) for x in docIds),fields= ["short_summary"], field_statistics=True,term_statistics=True,offsets=False,positions=False)
for doc in res["docs"]:
docId=int(doc["_id"]);
print docId
docNbTerms=int(doc["term_vectors"]["short_summary"]["field_statistics"]["sum_doc_freq"])
print docNbTerms
Here is my mapping if you need it:
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"docs": {
"_source": {
"enabled": true
},
"dynamic": "strict",
"properties": {
"title": {
"type": "text",
"index": "not_analyzed"
},
"release_year": {
"type" : "integer"
},
"rating": {
"type":"integer"
},
"short summary": {
"type":"text"
"index": "analyzed",
"analyzer": "english"
},
"directors": {
"type":"text"
"index": "not_analyzed"
},
"writers": {
"type":"text"
"index": "not_analyzed"
},
"stars": {
"type": "text"
"index": "not_analyzed"
},
"storyline": {
"type": "text"
"index": "analyzed",
"analyzer": "english"
},
"keywords": {
"type":"text"
"index": "not_analyzed"
},
"genres": {
"type":"text"
"index": "not_analyzed"
},
"duration": {
"type" : "text",
"index": "not_analyzed"
}
}
}
}
}
Thanks for your help !