Longs being shown as strings


(Elad Amit) #1

I have started sending json entries to logstash with a few number fields
(e.g. applicationid)
The information in ES is correctly typed to long but the result of faceting
on the field is quite strange
i.e.
an example entry:

{
"_index": "logstash-2014.07.13",
"_type": "logbook_qa",
"_id": "QwiJPCjrRQmutQCWTwEjWg",
"_score": null,
"_source": {
"classname": "...",
"eventLogTime": "2014-07-13T18:54:00.5476081+00:00",
"exceptionmessage": "The remote server returned an error: (530) Not logged in.",
"exceptionname": "WebException",
"exceptionseverity": 0,
"exceptionstacktrace": "...",
"hostname": "...",
"logdatetime": "2014-07-13T18:54:00.5476081Z",
"applicationid": 6363313,
"methodparameters": "...",
"methodname": "GetItemsFromFeed",
"threadid": 24,
"username": "...",
"url": "",
"entrytype": "ERROR",
"@timestamp": "2014-07-13T18:54:00.547Z",
"type": "logbook_qa",
"message": "..."
},
"sort": [
1405277640547
]
}

The mapping:
{
"logbook_qa": {
"dynamic_templates": [
{
"string_template": {
"mapping": {
"index": "not_analyzed",
"type": "string"
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"_all": {
"enabled": false
},
"_source": {
"compress": true
},
"properties": {
"@timestamp": {
"type": "date",
"format": "dateOptionalTime"
},
"@version": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"appdomain": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"applicationid": {
"type": "long"
},
"assemblyname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"classname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"entrytype": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"eventLogTime": {
"type": "date",
"format": "dateOptionalTime"
},
"exceptionmessage": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"exceptionname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"exceptionseverity": {
"type": "long"
},
"exceptionstacktrace": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"host": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"hostname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"logdatetime": {
"type": "date",
"format": "dateOptionalTime"
},
"message": {
"type": "string"
},
"methodname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"methodparameters": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"tags": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"threadid": {
"type": "long"
},
"threadname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"tracemessage": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"type": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"url": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"username": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
}
}
}
}

The facet query:

{
"facets": {
"terms": {
"terms": {
"field": "applicationid",
"size": 10,
"order": "count",
"exclude": []
},
"facet_filter": {
"fquery": {
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"query_string": {
"query": "*"
}
}
]
}
},
"filter": {
"bool": {
"must": [
{
"fquery": {
"query": {
"field": {
"entrytype": {
"query": ""ERROR""
}
}
},
"_cache": true
}
},
{
"range": {
"@timestamp": {
"from": 1405256548987,
"to": "now"
}
}
}
],
"should": [
{
"fquery": {
"query": {
"field": {
"type": {
"query": ""logbook_qa""
}
}
},
"_cache": true
}
}
]
}
}
}
}
}
}
}
},
"size": 0
}

The result:
{
"took": 259,
"timed_out": false,
"_shards": {
"total": 41,
"successful": 41,
"failed": 0
},
"hits": {
"total": 212813454,
"max_score": 1,
"hits": []
},
"facets": {
"terms": {
"_type": "terms",
"missing": 0,
"total": 521392,
"other": 195522,
"terms": [
{
"term": "\\b",
"count": 32587
},
{
"term": "X\u0001\u0000",
"count": 32587
},
{
"term": "T\u0010\u0000",
"count": 32587
},
{
"term": "P\u0002\u0000\u0000",
"count": 32587
},
{
"term": "L \u0000\u0000",
"count": 32587
},
{
"term": "H\u0004\u0000\u0000\u0000",
"count": 32587
},
{
"term": "D@\u0000\u0000\u0000",
"count": 32587
},
{
"term": "@\b\u0000\u0000\u0000\u0000",
"count": 32587
},
{
"term": "<\u0001\u0000\u0000\u0000\u0000\u0000",
"count": 32587
},
{
"term": "8\u0010\u0000\u0000\u0000\u0000\u0000",
"count": 32587
}
]
}
}
}

I'd rather not pre-define that these fields are of type string to get
around this, just feels wrong :slight_smile:

--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/cc9aeb4e-f235-497e-88f3-624bb1d524cd%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.


(Itamar Syn-Hershko) #2

What you are seeing is the Lucene numeric field terms. Try having the
applicationid field as a not_analyzed string field for the purpose of
faceting, instead of having it as a numeric field (which is usable for
range queries or sorts)

--

Itamar Syn-Hershko
http://code972.com | @synhershko https://twitter.com/synhershko
Freelance Developer & Consultant
Author of RavenDB in Action http://manning.com/synhershko/

On Sun, Jul 13, 2014 at 10:06 PM, Elad Amit amitelad7@gmail.com wrote:

I have started sending json entries to logstash with a few number fields
(e.g. applicationid)
The information in ES is correctly typed to long but the result of
faceting on the field is quite strange
i.e.
an example entry:

{
"_index": "logstash-2014.07.13",
"_type": "logbook_qa",
"_id": "QwiJPCjrRQmutQCWTwEjWg",
"_score": null,
"_source": {
"classname": "...",
"eventLogTime": "2014-07-13T18:54:00.5476081+00:00",
"exceptionmessage": "The remote server returned an error: (530) Not logged in.",
"exceptionname": "WebException",
"exceptionseverity": 0,
"exceptionstacktrace": "...",
"hostname": "...",
"logdatetime": "2014-07-13T18:54:00.5476081Z",
"applicationid": 6363313,
"methodparameters": "...",
"methodname": "GetItemsFromFeed",
"threadid": 24,
"username": "...",
"url": "",
"entrytype": "ERROR",
"@timestamp": "2014-07-13T18:54:00.547Z",
"type": "logbook_qa",
"message": "..."
},
"sort": [
1405277640547
]
}

The mapping:
{
"logbook_qa": {
"dynamic_templates": [
{
"string_template": {
"mapping": {
"index": "not_analyzed",
"type": "string"
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"_all": {
"enabled": false
},
"_source": {
"compress": true
},
"properties": {
"@timestamp": {
"type": "date",
"format": "dateOptionalTime"
},
"@version": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"appdomain": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"applicationid": {
"type": "long"
},
"assemblyname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"classname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"entrytype": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"eventLogTime": {
"type": "date",
"format": "dateOptionalTime"
},
"exceptionmessage": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"exceptionname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"exceptionseverity": {
"type": "long"
},
"exceptionstacktrace": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"host": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"hostname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"logdatetime": {
"type": "date",
"format": "dateOptionalTime"
},
"message": {
"type": "string"
},
"methodname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"methodparameters": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"tags": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"threadid": {
"type": "long"
},
"threadname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"tracemessage": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"type": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"url": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"username": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
}
}
}
}

The facet query:

{
"facets": {
"terms": {
"terms": {
"field": "applicationid",
"size": 10,
"order": "count",
"exclude": []
},
"facet_filter": {
"fquery": {
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"query_string": {
"query": "*"
}
}
]
}
},
"filter": {
"bool": {
"must": [
{
"fquery": {
"query": {
"field": {
"entrytype": {
"query": ""ERROR""
}
}
},
"_cache": true
}
},
{
"range": {
"@timestamp": {
"from": 1405256548987,
"to": "now"
}
}
}
],
"should": [
{
"fquery": {
"query": {
"field": {
"type": {
"query": ""logbook_qa""
}
}
},
"_cache": true
}
}
]
}
}
}
}
}
}
}
},
"size": 0
}

The result:
{
"took": 259,
"timed_out": false,
"_shards": {
"total": 41,
"successful": 41,
"failed": 0
},
"hits": {
"total": 212813454,
"max_score": 1,
"hits": []
},
"facets": {
"terms": {
"_type": "terms",
"missing": 0,
"total": 521392,
"other": 195522,
"terms": [
{
"term": "\\b",
"count": 32587
},
{
"term": "X\u0001\u0000",
"count": 32587
},
{
"term": "T\u0010\u0000",
"count": 32587
},
{
"term": "P\u0002\u0000\u0000",
"count": 32587
},
{
"term": "L \u0000\u0000",
"count": 32587
},
{
"term": "H\u0004\u0000\u0000\u0000",
"count": 32587
},
{
"term": "D@\u0000\u0000\u0000",
"count": 32587
},
{
"term": "@\b\u0000\u0000\u0000\u0000",
"count": 32587
},
{
"term": "<\u0001\u0000\u0000\u0000\u0000\u0000",
"count": 32587
},
{
"term": "8\u0010\u0000\u0000\u0000\u0000\u0000",
"count": 32587
}
]
}
}
}

I'd rather not pre-define that these fields are of type string to get
around this, just feels wrong :slight_smile:

--
You received this message because you are subscribed to the Google Groups
"elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an
email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit
https://groups.google.com/d/msgid/elasticsearch/cc9aeb4e-f235-497e-88f3-624bb1d524cd%40googlegroups.com
https://groups.google.com/d/msgid/elasticsearch/cc9aeb4e-f235-497e-88f3-624bb1d524cd%40googlegroups.com?utm_medium=email&utm_source=footer
.
For more options, visit https://groups.google.com/d/optout.

--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/CAHTr4Zt1e2HyOZJ9LXHTKs9X_s8iEvWZ06iDu5WvZnqbSvshXw%40mail.gmail.com.
For more options, visit https://groups.google.com/d/optout.


(Elad Amit) #3

treating it as a string is what I am trying to avoid :slight_smile:
I have other fields typed as long which are not exhibiting this behavior
which is why I am finding this a bit odd

i.e.
mapping:
{
"iis-log": {
"dynamic_templates": [
{
"string_template": {
"mapping": {
"index": "not_analyzed",
"type": "string"
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"_all": {
"enabled": false
},
"_source": {
"compress": true
},
"properties": {
"@timestamp": {
"type": "date",
"format": "dateOptionalTime"
},
"@version": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"EventTime": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"bytesreceived": {
"type": "long"
},
"bytessent": {
"type": "long"
},
"clientip": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"host": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"hostname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"logstash_host": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"message": {
"type": "string"
},
"method": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"queryparam": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"status": {
"type": "long"
},
"substatus": {
"type": "long"
},
"tags": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"timetaken": {
"type": "long"
},
"type": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"useragent": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
}
}
}
}

The facet query:

{
"facets": {
"terms": {
"terms": {
"field": "applicationid",
"size": 10,
"order": "count",
"exclude": []
},
"facet_filter": {
"fquery": {
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"query_string": {
"query": "*"
}
}
]
}
},
"filter": {
"bool": {
"must": [
{
"fquery": {
"query": {
"field": {
"entrytype": {
"query": ""ERROR""
}
}
},
"_cache": true
}
},
{
"range": {
"@timestamp": {
"from": 1405256548987,
"to": "now"
}
}
}
],
"should": [
{
"fquery": {
"query": {
"field": {
"type": {
"query": ""logbook_qa""
}
}
},
"_cache": true
}
}
]
}
}
}
}
}
}
}
},
"size": 0
}

The result:
{
"took": 259,
"timed_out": false,
"_shards": {
"total": 41,
"successful": 41,
"failed": 0
},
"hits": {
"total": 212813454,
"max_score": 1,
"hits": []
},
"facets": {
"terms": {
"_type": "terms",
"missing": 0,
"total": 521392,
"other": 195522,
"terms": [
{
"term": "\\b",
"count": 32587
},
{
"term": "X\u0001\u0000",
"count": 32587
},
{
"term": "T\u0010\u0000",
"count": 32587
},
{
"term": "P\u0002\u0000\u0000",
"count": 32587
},
{
"term": "L \u0000\u0000",
"count": 32587
},
{
"term": "H\u0004\u0000\u0000\u0000",
"count": 32587
},
{
"term": "D@\u0000\u0000\u0000",
"count": 32587
},
{
"term": "@\b\u0000\u0000\u0000\u0000",
"count": 32587
},
{
"term": "<\u0001\u0000\u0000\u0000\u0000\u0000",
"count": 32587
},
{
"term": "8\u0010\u0000\u0000\u0000\u0000\u0000",
"count": 32587
}
]
}
}
}

I'd rather not pre-define that these fields are of type string to get
around this, just feels wrong :slight_smile:

--
You received this message because you are subscribed to the Google Groups
"elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an
email to elasticsearc...@googlegroups.com <javascript:>.
To view this discussion on the web visit
https://groups.google.com/d/msgid/elasticsearch/cc9aeb4e-f235-497e-88f3-624bb1d524cd%40googlegroups.com
https://groups.google.com/d/msgid/elasticsearch/cc9aeb4e-f235-497e-88f3-624bb1d524cd%40googlegroups.com?utm_medium=email&utm_source=footer
.
For more options, visit https://groups.google.com/d/optout.

--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/07952273-1590-4ab3-a9cb-af5cc011a948%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.


(Elad Amit) #4

ok, intriguing, looks like the problem was that I had multiple types in the
same index with the same field name with different types which made ES go a
bit nuts
migrated all of them and everything works fine now :slight_smile:

On Monday, July 14, 2014 8:20:57 AM UTC+3, Elad Amit wrote:

treating it as a string is what I am trying to avoid :slight_smile:
I have other fields typed as long which are not exhibiting this behavior
which is why I am finding this a bit odd

i.e.
mapping:
{
"iis-log": {
"dynamic_templates": [
{
"string_template": {
"mapping": {
"index": "not_analyzed",
"type": "string"
},
"match": "*",
"match_mapping_type": "string"
}
}
],
"_all": {
"enabled": false
},
"_source": {
"compress": true
},
"properties": {
"@timestamp": {
"type": "date",
"format": "dateOptionalTime"
},
"@version": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"EventTime": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"bytesreceived": {
"type": "long"
},
"bytessent": {
"type": "long"
},
"clientip": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"host": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"hostname": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"logstash_host": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"message": {
"type": "string"
},
"method": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"queryparam": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"status": {
"type": "long"
},
"substatus": {
"type": "long"
},
"tags": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"timetaken": {
"type": "long"
},
"type": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
},
"useragent": {
"type": "string",
"index": "not_analyzed",
"omit_norms": true,
"index_options": "docs"
}
}
}
}

The facet query:

{
"facets": {
"terms": {
"terms": {
"field": "applicationid",
"size": 10,
"order": "count",
"exclude": []
},
"facet_filter": {
"fquery": {
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"query_string": {
"query": "*"
}
}
]
}
},
"filter": {
"bool": {
"must": [
{
"fquery": {
"query": {
"field": {
"entrytype": {
"query": ""ERROR""
}
}
},
"_cache": true
}
},
{
"range": {
"@timestamp": {
"from": 1405256548987,
"to": "now"
}
}
}
],
"should": [
{
"fquery": {
"query": {
"field": {
"type": {
"query": ""logbook_qa""
}
}
},
"_cache": true
}
}
]
}
}
}
}
}
}
}
},
"size": 0
}

The result:
{
"took": 259,
"timed_out": false,
"_shards": {
"total": 41,
"successful": 41,
"failed": 0
},
"hits": {
"total": 212813454,
"max_score": 1,
"hits": []
},
"facets": {
"terms": {
"_type": "terms",
"missing": 0,
"total": 521392,
"other": 195522,
"terms": [
{
"term": "\\b",
"count": 32587
},
{
"term": "X\u0001\u0000",
"count": 32587
},
{
"term": "T\u0010\u0000",
"count": 32587
},
{
"term": "P\u0002\u0000\u0000",
"count": 32587
},
{
"term": "L \u0000\u0000",
"count": 32587
},
{
"term": "H\u0004\u0000\u0000\u0000",
"count": 32587
},
{
"term": "D@\u0000\u0000\u0000",
"count": 32587
},
{
"term": "@\b\u0000\u0000\u0000\u0000",
"count": 32587
},
{
"term": "<\u0001\u0000\u0000\u0000\u0000\u0000",
"count": 32587
},
{
"term": "8\u0010\u0000\u0000\u0000\u0000\u0000",
"count": 32587
}
]
}
}
}

I'd rather not pre-define that these fields are of type string to get
around this, just feels wrong :slight_smile:

--
You received this message because you are subscribed to the Google
Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send
an email to elasticsearc...@googlegroups.com.
To view this discussion on the web visit
https://groups.google.com/d/msgid/elasticsearch/cc9aeb4e-f235-497e-88f3-624bb1d524cd%40googlegroups.com
https://groups.google.com/d/msgid/elasticsearch/cc9aeb4e-f235-497e-88f3-624bb1d524cd%40googlegroups.com?utm_medium=email&utm_source=footer
.
For more options, visit https://groups.google.com/d/optout.

--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/elasticsearch/06f0f099-1547-4158-b1bf-8519219a1a7e%40googlegroups.com.
For more options, visit https://groups.google.com/d/optout.


(system) #5