How to create index for a attachment of pdf by using elasticsearch-river-mongodb: 1.6.9 (don't have any hits,or missing fields)

Dear All,
I am new to elasticsearch. I have tried to follow the different tutorials
and post on index and mapping attached pdf document in a mongodb database
for days without success. After running the codes below i don't have any
hits from words that exist in the mongodb attached files.

software version:
MongoDB: mongodb-linux-x86_64-2.4.3
elasticsearch-river-mongodb: 1.6.9
elasticsearch: 0.90
elasticsearch-mapper-attachments: 1.7.0

Problem No. 1

  1. BSON Structure, PDF attachment is in the "FileContent" field, the
    attachment is not in GridFS.
    byte [] fileser =;
    Pagecount = getpagenum(file);
    BasicDBObject articleobject = new BasicDBObject();
    articleobject.put("Title", jsonArray.getJSONObject(i).get("Title"));
    articleobject.put("Media", jsonArray.getJSONObject(i).get("Media"));
    articleobject.put("ISSN", jsonArray.getJSONObject(i).get("ISSN"));
    articleobject.put("Pages", jsonArray.getJSONObject(i).get("Pages"));
    articleobject.put("Pagecount", Pagecount);
    articleobject.put("Abstracts", jsonArray.getJSONObject(i).get("Abstracts"));
    articleobject.put("Keywords", "");
    articleobject.put("FileContent", fileser);

  1. create a index
    curl -XPUT "http://localhost:9200/articleindex"

  1. create a mapping
    curl -XPUT 'http://localhost:9200/articleindex/cardiopathy/_mapping' -d '
    "cardiopathy" : {
    "properties" : {
    "Authors" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
    "ik","store" : "yes"},
    "Media" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
    "ik","store" : "yes"},
    "Organization" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
    "ik","store" : "yes"},
    "Keywords" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer":
    "ik","store" : "yes"},
    "Title" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
    "ik","store" : "yes"},
    "ISSN" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
    "ik","store" : "yes"},
    "Pages" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer":
    "ik","store" : "yes"},
    "Abstracts" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer":
    "ik","store" : "yes"},
    "FileContent" : { "type" : "string" ,"indexAnalyzer":
    "ik","searchAnalyzer": "ik"}

  1. create the river
    curl -XPUT "http://localhost:9200/_river/mongodb/_meta" -d '
    "type": "mongodb",
    "mongodb": {
    "host": "",
    "port": "27107",
    "options": {"drop_collection": true },
    "db": "ftsearch1",
    "collection": "pdf"
    "index": {
    "name": "articleindex",
    "type": "cardiopathy"

  1. Retrieve the indexed document by the keyword
    curl -XGET http://localhost:9200/articleindex/cardiopathy/_search -d'
    "fields" : ["Title"],
    "query" : { "text" : { "FileContent" : "高血压病辨证分型与靶器官相关性研究的新进展" }}


Problem No. 2

alter mapping:

curl -XPUT 'http://localhost:9200/articleindex/cardiopathy/_mapping' -d '
"cardiopathy" : {
"file" : {
"properties" : {
"Authors" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
"ik","store" : "yes"},
"Media" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
"ik","store" : "yes"},
"Organization" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
"ik","store" : "yes"},
"Keywords" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer":
"ik","store" : "yes"},
"Title" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
"ik","store" : "yes"},
"ISSN" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer":
"ik","store" : "yes"},
"Pages" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer":
"ik","store" : "yes"},
"Abstracts" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer":
"ik","store" : "yes"},
"FileContent" : {
"type" : "attachment",
"fields" : {
"file" : { "indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes",
"index" : "analyzed" },
"date" : { "store" : "yes" },
"author" : { "store" : "yes" },
"keywords" : { "store" : "yes" },
"content_type" : { "store" : "yes" },
"title" : { "store" : "yes" }

Retrieve the indexed document by the keyword:


Problem No. 3

  1. The attachment is in GridFS, in addition, we define the other fields.
    GridFSInputFile gfsFile = gfsPhoto.createFile(file);
    String filename = file.getName();
    filename = filename.substring(0, filename.lastIndexOf("."));
    gfsFile.put("Title", jsonArray.getJSONObject(i).get("Title"));
    gfsFile.put("Organization", jsonArray.getJSONObject(i).get("Organization"));
    gfsFile.put("Media", jsonArray.getJSONObject(i).get("Media"));
    gfsFile.put("ISSN", jsonArray.getJSONObject(i).get("ISSN"));
    gfsFile.put("Pages", jsonArray.getJSONObject(i).get("Pages"));
    gfsFile.put("Pagecount", Pagecount);
    gfsFile.put("Abstracts", jsonArray.getJSONObject(i).get("Abstracts"));
    gfsFile.put("Keywords", "");;

  1. create a index
    curl -XPUT "http://localhost:9200/articleindex"

  1. create a mapping
    curl -XPUT 'http://localhost:9200/cardiopathyindex/cardiopathy/_mapping' -d
    "cardiopathy": {
    "properties" : {
    "content" : {
    "path" : "full",
    "type" : "attachment",
    "fields" : {
    "content" : {"type": "string","indexAnalyzer":
    "ik","searchAnalyzer": "ik"},
    "Authors" : {"type": "string","indexAnalyzer":
    "ik","searchAnalyzer": "ik"},
    "Media" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik"},
    "Organization" : {"type": "string","indexAnalyzer":
    "ik","searchAnalyzer": "ik"},
    "Keywords" : { "type" : "string" ,"indexAnalyzer":
    "ik","searchAnalyzer": "ik"},
    "Title" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik"},
    "ISSN" : {"type": "string","indexAnalyzer":
    "ik","searchAnalyzer": "ik"},
    "Pages" : { "type" : "string" ,"indexAnalyzer":
    "ik","searchAnalyzer": "ik"},
    "Abstracts" : { "type" : "string" ,"indexAnalyzer":
    "ik","searchAnalyzer": "ik"},
    "date" : {"format" : "dateOptionalTime","type" : "date" },
    "content_type" : { "type" : "string" }
    "chunkSize" : { "type" : "long" },
    "md5" : { "type" : "string" },
    "length" : { "type" : "long" },
    "filename" : { "type" : "string" },
    "contentType" : { "type" : "string" },
    "uploadDate" : {
    "format" : "dateOptionalTime",
    "type" : "date"
    "metadata" : { "type" : "object" }

  1. create the river
    curl -XPUT "http://localhost:9200/_river/mongodb/_meta" -d '
    "type": "mongodb",
    "mongodb": {
    "host": "",
    "port": "27107",
    "options": {"drop_collection": true },
    "db": "ftsearch",
    "collection": "fs",
    "gridfs": true
    "index": {
    "name": "cardiopathyindex",
    "type": "cardiopathy",
    "content_type": "application/pdf"

  1. Retrieve the indexed document by the keyword, hit, but the query result
    is missing the "Title" and "Authors" fields.
    curl -XGET http://localhost:9200/cardiopathyindex/cardiopathy/_search -d'
    "fields" : ["Title","Authors"],
    "query" : { "text" : { "content" : "高血压病辨证分型与靶器官相关性研究的新进展" }}

You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to
For more options, visit

I think you should first create the mapping (don't alter mapping as it will basically works only for new fields and not on existing ones):

curl -XPUT 'http://localhost:9200/articleindex/cardiopathy/_mapping' -d '
"cardiopathy" : {
"file" : {
"properties" : {
"Authors" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Media" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Organization" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Keywords" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Title" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"ISSN" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Pages" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Abstracts" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"FileContent" : {
"type" : "attachment",
"fields" : {
"file" : { "indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes", "index" : "analyzed" },
"date" : { "store" : "yes" },
"author" : { "store" : "yes" },
"keywords" : { "store" : "yes" },
"content_type" : { "store" : "yes" },
"title" : { "store" : "yes" }


David Pilato | Technical Advocate |
@dadoonet | @elasticsearchfr | @scrutmydocs

Le 8 juil. 2013 à 12:38, Jordon a écrit :

Dear All,
I am new to elasticsearch. I have tried to follow the different tutorials and post on index and mapping attached pdf document in a mongodb database for days without success. After running the codes below i don't have any hits from words that exist in the mongodb attached files.

software version:
MongoDB: mongodb-linux-x86_64-2.4.3
elasticsearch-river-mongodb: 1.6.9
elasticsearch: 0.90
elasticsearch-mapper-attachments: 1.7.0

Problem No. 1

  1. BSON Structure, PDF attachment is in the "FileContent" field, the attachment is not in GridFS.
    byte fileser =;
    Pagecount = getpagenum(file);
    BasicDBObject articleobject = new BasicDBObject();
    articleobject.put("Title", jsonArray.getJSONObject(i).get("Title"));
    articleobject.put("Organization", jsonArray.getJSONObject(i).get("Organization"));
    articleobject.put("Media", jsonArray.getJSONObject(i).get("Media"));
    articleobject.put("ISSN", jsonArray.getJSONObject(i).get("ISSN"));
    articleobject.put("Pages", jsonArray.getJSONObject(i).get("Pages"));
    articleobject.put("Pagecount", Pagecount);
    articleobject.put("Abstracts", jsonArray.getJSONObject(i).get("Abstracts"));
    articleobject.put("Keywords", "");
    articleobject.put("FileContent", fileser);

  1.    create a index

curl -XPUT "http://localhost:9200/articleindex"

  1.    create a mapping

curl -XPUT 'http://localhost:9200/articleindex/cardiopathy/_mapping' -d '
"cardiopathy" : {
"properties" : {
"Authors" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Media" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Organization" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Keywords" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Title" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"ISSN" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Pages" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Abstracts" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"FileContent" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik"}

  1. create the river

curl -XPUT "http://localhost:9200/_river/mongodb/_meta" -d '
"type": "mongodb",
"mongodb": {
"host": "",
"port": "27107",
"options": {"drop_collection": true },
"db": "ftsearch1",
"collection": "pdf"
"index": {
"name": "articleindex",
"type": "cardiopathy"

  1. Retrieve the indexed document by the keyword
    curl -XGET http://localhost:9200/articleindex/cardiopathy/_search -d'
    "fields" : ["Title"],
    "query" : { "text" : { "FileContent" : "高血压病辨证分型与靶器官相关性研究的新进展" }}


Problem No. 2

alter mapping:

curl -XPUT 'http://localhost:9200/articleindex/cardiopathy/_mapping' -d '
"cardiopathy" : {
"file" : {
"properties" : {
"Authors" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Media" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Organization" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Keywords" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Title" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"ISSN" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Pages" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"Abstracts" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes"},
"FileContent" : {
"type" : "attachment",
"fields" : {
"file" : { "indexAnalyzer": "ik","searchAnalyzer": "ik","store" : "yes", "index" : "analyzed" },
"date" : { "store" : "yes" },
"author" : { "store" : "yes" },
"keywords" : { "store" : "yes" },
"content_type" : { "store" : "yes" },
"title" : { "store" : "yes" }

Retrieve the indexed document by the keyword:


Problem No. 3

  1.    The attachment is in GridFS, in addition, we define the other fields.

GridFSInputFile gfsFile = gfsPhoto.createFile(file);
String filename = file.getName();
filename = filename.substring(0, filename.lastIndexOf("."));
gfsFile.put("Title", jsonArray.getJSONObject(i).get("Title"));
gfsFile.put("Organization", jsonArray.getJSONObject(i).get("Organization"));
gfsFile.put("Media", jsonArray.getJSONObject(i).get("Media"));
gfsFile.put("ISSN", jsonArray.getJSONObject(i).get("ISSN"));
gfsFile.put("Pages", jsonArray.getJSONObject(i).get("Pages"));
gfsFile.put("Pagecount", Pagecount);
gfsFile.put("Abstracts", jsonArray.getJSONObject(i).get("Abstracts"));
gfsFile.put("Keywords", "");;

  1.    create a index

curl -XPUT "http://localhost:9200/articleindex"

  1.    create a mapping

curl -XPUT 'http://localhost:9200/cardiopathyindex/cardiopathy/_mapping' -d '{
"cardiopathy": {
"properties" : {
"content" : {
"path" : "full",
"type" : "attachment",
"fields" : {
"content" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik"},
"Authors" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik"},
"Media" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik"},
"Organization" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik"},
"Keywords" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik"},
"Title" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik"},
"ISSN" : {"type": "string","indexAnalyzer": "ik","searchAnalyzer": "ik"},
"Pages" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik"},
"Abstracts" : { "type" : "string" ,"indexAnalyzer": "ik","searchAnalyzer": "ik"},
"date" : {"format" : "dateOptionalTime","type" : "date" },
"content_type" : { "type" : "string" }
"chunkSize" : { "type" : "long" },
"md5" : { "type" : "string" },
"length" : { "type" : "long" },
"filename" : { "type" : "string" },
"contentType" : { "type" : "string" },
"uploadDate" : {
"format" : "dateOptionalTime",
"type" : "date"
"metadata" : { "type" : "object" }

  1. create the river

curl -XPUT "http://localhost:9200/_river/mongodb/_meta" -d '
"type": "mongodb",
"mongodb": {
"host": "",
"port": "27107",
"options": {"drop_collection": true },
"db": "ftsearch",
"collection": "fs",
"gridfs": true
"index": {
"name": "cardiopathyindex",
"type": "cardiopathy",
"content_type": "application/pdf"

  1. Retrieve the indexed document by the keyword, hit, but the query result is missing the "Title" and "Authors" fields.
    curl -XGET http://localhost:9200/cardiopathyindex/cardiopathy/_search -d'
    "fields" : ["Title","Authors"],
    "query" : { "text" : { "content" : "高血压病辨证分型与靶器官相关性研究的新进展" }}

You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to
For more options, visit