Hi,
The setup
The setup I'm using is ES 0.90.10 with the FileServer River to index
corporate file server. Setup is pretty straightforward. Only the mapping
has been changed (differences between standard mapping of fsriver and mine
are bolded and act on the fields content-type and author):
{
"doc": {
"properties": {
"content": {
"type": "string",
"store": true
},
"file": {
"properties": {
"content_type": {
"type": "multi_field",
"fields": {
"content_type": {
"type": "string",
"store": true,
"analyzer": "simple"
},
"untouched": { "type": "string",
"index": "not_analyzed",
"store": true, "norms": {
"enabled": false },
"index_options": "docs",
"include_in_all": false }
}
},
"filename": {
"type": "string",
"store": true,
"analyzer": "simple"
},
"filesize": {
"type": "long",
"store": true
},
"indexed_chars": {
"type": "long",
"store": true
},
"indexing_date": {
"type": "date",
"store": true,
"format": "dateOptionalTime"
},
"last_modified": {
"type": "date",
"store": true,
"format": "dateOptionalTime"
},
"url": {
"type": "string",
"index": "no",
"store": true
}
}
},
"meta": {
"properties": {
"author": {
"type": "multi_field",
"fields": {
"author": {
"type": "string",
"store": true
},
"untouched": {
"type": "string",
"index": "not_analyzed",
"store": true,
"norms": {
"enabled": false
},
"index_options": "docs",
"include_in_all": false
}
}
},
"date": {
"type": "date",
"store": true,
"format": "dateOptionalTime"
},
"keywords": {
"type": "string",
"store": true
},
"title": {
"type": "string",
"store": true
}
}
},
"path": {
"properties": {
"encoded": {
"type": "string",
"index": "not_analyzed",
"store": true,
"norms": {
"enabled": false
},
"index_options": "docs"
},
"real": {
"type": "string",
"index": "not_analyzed",
"store": true,
"norms": {
"enabled": false
},
"index_options": "docs"
},
"root": {
"type": "string",
"index": "not_analyzed",
"store": true,
"norms": {
"enabled": false
},
"index_options": "docs"
},
"virtual": {
"type": "string",
"index": "not_analyzed",
"store": true,
"norms": {
"enabled": false
},
"index_options": "docs"
}
}
}
}
}
}
The source
The original filestructure is like:
- sales
- accounts
- account 1
- subforlders
- account 2
- subfolderrs
- account 3
- ....
- account n- other folder
- other folder
- etc.
The index
An indexed file looks like this:
{
"_index": "seneca_filesystem_general_documents",
"_type": "doc",
"_id": "6c6fcc2fee4baa44983ae3902918a72",
"_score": 1,
"_source": {
"file": {
"filename": "17172by3.pdf",
"last_modified": 1193145400000,
"indexing_date": "2014-02-10T23:14:18.189Z",
"content_type": "application/pdf",
"url": "file://\\\\file.seneca.intern\\General\\Accounts\\account
X...............subfolders.............\17172by3.pdf",
"filesize": 180365
},
"path": {
"encoded": "5b8c64f77d70b8c6b19c6e25cdddcc2c",
"root": "8a8d78ed7fc4be3b26921c78752553",
"virtual": "/Accounts/account X/..............rest
of path........................",
"real": "\\file.seneca.intern\General\Accounts\account
X\ ............path................ \17172by3.pdf"
},
"meta": {
"author": ".............",
"title": ".................",
"date": "2007-10-23T12:05:51Z",
"keywords": []
},
"content": ""
}
}
The query
{
"from": 0,
"size": 10,
"query": {
"filtered": {
"query": {
"query_string": {
"query": "smartsite",
"default_operator": "AND"
}
},
"filter": {
"and": [
{
"script": {
"script": "int start =
(doc['path.virtual'].value.indexOf('/',1)==-1) ? 1 :
doc['path.virtual'].value.indexOf('/',1) + 1 ;int end =
(doc['path.virtual'].value.indexOf('/',start)==-1) ?
doc['path.virtual'].value.length() :
doc['path.virtual'].value.indexOf('/',start) ;
doc['path.virtual'].value.substring(start,end) == folder ",
"params": {
"folder": "Seneca"
}
}
},
{}
]
}
}
},
"facets": {
"moddate": {
"date_histogram": {
"field": "file.last_modified",
"interval": "year",
"size": 20
}
},
"filetype": {
"terms": {
"field": "file.content_type.untouched",
"size": 20,
"order": "term"
}
},
"author": {
"terms": {
"field": "meta.author.untouched",
"size": 20,
"order": "count"
}
},
"folder": {
"terms": {
"field": "path.virtual",
"script": "int start = (term.indexOf('/',1)==-1) ? 1 :
term.indexOf('/',1) + 1 ;int end = (term.indexOf('/',start)==-1) ?
term.length() : term.indexOf('/',start) ; term.substring(start,end)",
"order": "term",
"size": 100
}
}
}
}
The error
{
"error": "SearchPhaseExecutionException[Failed to execute phase
[query], all shards failed; shardFailures
{[SjK7fZAaTviRHoNqqxJTGg][seneca_filesystem_general_documents][2]:
QueryPhaseExecutionException[[seneca_filesystem_general_documents][2]:
query[filtered(filtered(_all:smartsite)->+ScriptFilter(int start =
(doc['path.virtual'].value.indexOf('/',1)==-1) ? 1 :
doc['path.virtual'].value.indexOf('/',1) + 1 ;int end =
(doc['path.virtual'].value.indexOf('/',start)==-1) ?
doc['path.virtual'].value.length() :
doc['path.virtual'].value.indexOf('/',start) ;
doc['path.virtual'].value.substring(start,end) == folder
))->cache(_type:doc)],from[0],size[10],sort[<custom:"file.last_modified":
org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource@1a91c76>!,]:
Query Failed [Failed to execute main query]]; nested: NullPointerException;
}{[SjK7fZAaTviRHoNqqxJTGg][seneca_filesystem_general_documents][1]:
QueryPhaseExecutionException[[seneca_filesystem_general_documents][1]:
query[filtered(filtered(_all:smartsite)->+ScriptFilter(int start =
(doc['path.virtual'].value.indexOf('/',1)==-1) ? 1 :
doc['path.virtual'].value.indexOf('/',1) + 1 ;int end =
(doc['path.virtual'].value.indexOf('/',start)==-1) ?
doc['path.virtual'].value.length() :
doc['path.virtual'].value.indexOf('/',start) ;
doc['path.virtual'].value.substring(start,end) == folder
))->cache(_type:doc)],from[0],size[10],sort[<custom:"file.last_modified":
org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource@135d894>!,]:
Query Failed [Failed to execute main query]]; nested: NullPointerException;
}{[SjK7fZAaTviRHoNqqxJTGg][seneca_filesystem_general_documents][4]:
QueryPhaseExecutionException[[seneca_filesystem_general_documents][4]:
query[filtered(filtered(_all:smartsite)->+ScriptFilter(int start =
(doc['path.virtual'].value.indexOf('/',1)==-1) ? 1 :
doc['path.virtual'].value.indexOf('/',1) + 1 ;int end =
(doc['path.virtual'].value.indexOf('/',start)==-1) ?
doc['path.virtual'].value.length() :
doc['path.virtual'].value.indexOf('/',start) ;
doc['path.virtual'].value.substring(start,end) == folder
))->cache(_type:doc)],from[0],size[10],sort[<custom:"file.last_modified":
org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource@56adef>!,]:
Query Failed [Failed to execute main query]]; nested: NullPointerException;
}{[SjK7fZAaTviRHoNqqxJTGg][seneca_filesystem_general_documents][3]:
QueryPhaseExecutionException[[seneca_filesystem_general_documents][3]:
query[filtered(filtered(_all:smartsite)->+ScriptFilter(int start =
(doc['path.virtual'].value.indexOf('/',1)==-1) ? 1 :
doc['path.virtual'].value.indexOf('/',1) + 1 ;int end =
(doc['path.virtual'].value.indexOf('/',start)==-1) ?
doc['path.virtual'].value.length() :
doc['path.virtual'].value.indexOf('/',start) ;
doc['path.virtual'].value.substring(start,end) == folder
))->cache(_type:doc)],from[0],size[10],sort[<custom:"file.last_modified":
org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource@38ebef>!,]:
Query Failed [Failed to execute main query]]; nested: NullPointerException;
}{[SjK7fZAaTviRHoNqqxJTGg][seneca_filesystem_general_documents][0]:
QueryPhaseExecutionException[[seneca_filesystem_general_documents][0]:
query[filtered(filtered(_all:smartsite)->+ScriptFilter(int start =
(doc['path.virtual'].value.indexOf('/',1)==-1) ? 1 :
doc['path.virtual'].value.indexOf('/',1) + 1 ;int end =
(doc['path.virtual'].value.indexOf('/',start)==-1) ?
doc['path.virtual'].value.length() :
doc['path.virtual'].value.indexOf('/',start) ;
doc['path.virtual'].value.substring(start,end) == folder
))->cache(_type:doc)],from[0],size[10],sort[<custom:"file.last_modified":
org.elasticsearch.index.fielddata.fieldcomparator.LongValuesComparatorSource@9f932b>!,]:
Query Failed [Failed to execute main query]]; nested: NullPointerException;
}]",
"status": 500
}
The remarks
- The empty filter behind the script filter is used because the query is
dynamically created. Thus I don't have to worry about the separating
commas. I have satisfied that this empty filter is not the origin of the
error. - If I add a term filter, (e.g.
{ "term" : { "file.content_type.untouched" : "application/pdf" } }
) before or after the script filter, everything works fine
- An issue like this have been resolved in ES 0.90.3 0:
https://github.com/elasticsearch/elasticsearch/issues/3595
Erwin
--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/c7814845-bf0f-441f-a970-844749948bfc%40googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.