Accessing all properties of an object field


(Daniel E) #1

Given documents with a structure like this:

{"histograms":{"CYCLE_COLLECTOR":{"range":[1,10000],"bucket_count":50,"histogram_type":0,"values":{"3":0,"4":4,"5":2,"7":1,"8":4,"10":2,"14":1,"17":0},"sum":102},"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1},"EARLY_GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"27":0,"79":1,"232":0},"sum":154},"GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":2},"GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"0":1,"1":0},"sum":0},"MEMORY_JS_GC_HEAP":{"range":[1024,524288],"bucket_count":10,"histogram_type":0,"values":{"4870":0,"10622":5,"23167":0},"sum":66560},"MEMORY_LAYOUT_ALL":{"range":[1024,65536],"bucket_count":10,"histogram_type":0,"values":{"0":0,"1024":4,"1722":1,"2896":0},"sum":8744},"MEMORY_RESIDENT":{"range":[32768,1048576],"bucket_count":10,"histogram_type":0,"values":{"50535":0,"77936":5,"120194":0},"sum":589172},"ZIPARCHIVE_CRC":{"range":[1,2],"bucket_count":3,"histogram_type":2,"values":{"0":0,"1":87,"2":0},"sum":87}}}

How could I use a script (such as in a facet query) to get at all the
attributes under histograms.EARLY_GLUESTARTUP_READ_OPS.values ?


(Daniel E) #2

As a follow-up, I've been trying to use ElasticSearch Head's transformation
function to aggregate the results, but the problem there is that I have to
get the complete result set which exhausts memory in ES.
So, I've been trying to come up with a script_fields statement that would
transform the values object into a format that would allow me to facet on
terms and then have a single object returned, but I can't find a way to do
even that. here is the closest I've come which fails with an error that
java.util.HashMap is not iterable:

"script_fields": {
"early_gluestartup_read_ops": {
"script": "var arr = []; for (key :
source.histograms.EARLY_GLUESTARTUP_READ_OPS.values) { arr.push(key); };
arr.reduce(function(comb,key){return
comb.push(key+'
'+_source.histograms.EARLY_GLUESTARTUP_READ_OPS.values[key]);},[]);
return comb;"
}
}

On Tue, Jul 5, 2011 at 5:02 PM, Daniel Einspanjer deinspanjer@gmail.comwrote:

Given documents with a structure like this:

{"histograms":{"CYCLE_COLLECTOR":{"range":[1,10000],"bucket_count":50,"histogram_type":0,"values":{"3":0,"4":4,"5":2,"7":1,"8":4,"10":2,"14":1,"17":0},"sum":102},"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1},"EARLY_GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"27":0,"79":1,"232":0},"sum":154},"GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":2},"GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"0":1,"1":0},"sum":0},"MEMORY_JS_GC_HEAP":{"range":[1024,524288],"bucket_count":10,"histogram_type":0,"values":{"4870":0,"10622":5,"23167":0},"sum":66560},"MEMORY_LAYOUT_ALL":{"range":[1024,65536],"bucket_count":10,"histogram_type":0,"values":{"0":0,"1024":4,"1722":1,"2896":0},"sum":8744},"MEMORY_RESIDENT":{"range":[32768,1048576],"bucket_count":10,"histogram_type":0,"values":{"50535":0,"77936":5,"120194":0},"sum":589172},"ZIPARCHIVE_CRC":{"range":[1,2],"bucket_count":3,"histogram_type":2,"values":{"0":0,"1":87,"2":0},"sum":87}}}

How could I use a script (such as in a facet query) to get at all the
attributes under histograms.EARLY_GLUESTARTUP_READ_OPS.values ?


(Shay Banon) #3

Maybe you can explain what you are trying to do exactly? I did not understand...

On Wednesday, July 6, 2011 at 2:52 AM, Daniel Einspanjer wrote:

As a follow-up, I've been trying to use ElasticSearch Head's transformation function to aggregate the results, but the problem there is that I have to get the complete result set which exhausts memory in ES.
So, I've been trying to come up with a script_fields statement that would transform the values object into a format that would allow me to facet on terms and then have a single object returned, but I can't find a way to do even that. here is the closest I've come which fails with an error that java.util.HashMap is not iterable:

"script_fields": {
"early_gluestartup_read_ops": {
"script": "var arr = []; for (key : source.histograms.EARLY_GLUESTARTUP_READ_OPS.values) { arr.push(key); }; arr.reduce(function(comb,key){return comb.push(key+''+_source.histograms.EARLY_GLUESTARTUP_READ_OPS.values[key]);},[]); return comb;"
}
}

On Tue, Jul 5, 2011 at 5:02 PM, Daniel Einspanjer <deinspanjer@gmail.com (mailto:deinspanjer@gmail.com)> wrote:

Given documents with a structure like this:

{"histograms":{"CYCLE_COLLECTOR":{"range":[1,10000],"bucket_count":50,"histogram_type":0,"values":{"3":0,"4":4,"5":2,"7":1,"8":4,"10":2,"14":1,"17":0},"sum":102},"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1},"EARLY_GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"27":0,"79":1,"232":0},"sum":154},"GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":2},"GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"0":1,"1":0},"sum":0},"MEMORY_JS_GC_HEAP":{"range":[1024,524288],"bucket_count":10,"histogram_type":0,"values":{"4870":0,"10622":5,"23167":0},"sum":66560},"MEMORY_LAYOUT_ALL":{"range":[1024,65536],"bucket_count":10,"histogram_type":0,"values":{"0":0,"1024":4,"1722":1,"2896":0},"sum":8744},"MEMORY_RESIDENT":{"range":[32768,1048576],"bucket_count":10,"histogram_type
":0,"values":{"50535":0,"77936":5,"120194":0},"sum":589172},"ZIPARCHIVE_CRC":{"range":[1,2],"bucket_count":3,"histogram_type":2,"values":{"0":0,"1":87,"2":0},"sum":87}}}

How could I use a script (such as in a facet query) to get at all the attributes under histograms.EARLY_GLUESTARTUP_READ_OPS.values ?


(Daniel E) #4

Ultimately, I was hoping to get a facet histogram of the histogram data
stored in these documents. for instance, in each document, there is an
EARLY_GLUESTARTUP_READ_OPS.values object which has properties consisting of
the histogram data, key is the histogram bucket and value is the count of
entries for that key.

It seems to be impossible to facet this data in any useful way, even with
the use of script_fields because I can't enumerate all the property keys
under the .values object since I don't know them ahead of time due to their
being a histogram.

As an example, if I had the following three documents:

{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1}}}
{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"3":2,"10":9,"11":0},"sum":1}}}
{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"3":1,"5":11,"11":0},"sum":1}}}

I would like to have some efficient way to generate the following aggregate
result:

{"sum_histogram":{"0":0,"1":1,"3":3,"5":11,"10":9,"11":0}}

But depending on the corpus of documents, I don't know what values will
actually be enumerated.

-Daniel

On Tue, Jul 5, 2011 at 9:27 PM, Shay Banon shay.banon@elasticsearch.comwrote:

Maybe you can explain what you are trying to do exactly? I did not
understand...

On Wednesday, July 6, 2011 at 2:52 AM, Daniel Einspanjer wrote:

As a follow-up, I've been trying to use ElasticSearch Head's transformation
function to aggregate the results, but the problem there is that I have to
get the complete result set which exhausts memory in ES.
So, I've been trying to come up with a script_fields statement that would
transform the values object into a format that would allow me to facet on
terms and then have a single object returned, but I can't find a way to do
even that. here is the closest I've come which fails with an error that
java.util.HashMap is not iterable:

"script_fields": {
"early_gluestartup_read_ops": {
"script": "var arr = []; for (key :
source.histograms.EARLY_GLUESTARTUP_READ_OPS.values) { arr.push(key); };
arr.reduce(function(comb,key){return
comb.push(key+'
'+_source.histograms.EARLY_GLUESTARTUP_READ_OPS.values[key]);},[]);
return comb;"
}
}

On Tue, Jul 5, 2011 at 5:02 PM, Daniel Einspanjer deinspanjer@gmail.comwrote:

Given documents with a structure like this:

{"histograms":{"CYCLE_COLLECTOR":{"range":[1,10000],"bucket_count":50,"histogram_type":0,"values":{"3":0,"4":4,"5":2,"7":1,"8":4,"10":2,"14":1,"17":0},"sum":102},"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1},"EARLY_GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"27":0,"79":1,"232":0},"sum":154},"GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":2},"GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"0":1,"1":0},"sum":0},"MEMORY_JS_GC_HEAP":{"range":[1024,524288],"bucket_count":10,"histogram_type":0,"values":{"4870":0,"10622":5,"23167":0},"sum":66560},"MEMORY_LAYOUT_ALL":{"range":[1024,65536],"bucket_count":10,"histogram_type":0,"values":{"0":0,"1024":4,"1722":1,"2896":0},"sum":8744},"MEMORY_RESIDENT":{"range":[32768,1048576],"bucket_count":10,"histogram_type":0,"values":{"50535":0,"77936":5,"120194":0},"sum":589172},"ZIPARCHIVE_CRC":{"range":[1,2],"bucket_count":3,"histogram_type":2,"values":{"0":0,"1":87,"2":0},"sum":87}}}

How could I use a script (such as in a facet query) to get at all the
attributes under histograms.EARLY_GLUESTARTUP_READ_OPS.values ?


(Daniel E) #5

I managed to make some progress.. I was able to create a script_field that
expands out all the data in each document's histogram:

{
"script_fields": {
"early_gluestartup_read_ops": {
"script": "var arr = []; var start =
_source.histograms.EARLY_GLUESTARTUP_READ_OPS.range[0]; var end =
_source.histograms.EARLY_GLUESTARTUP_READ_OPS.range[1]; var vals =
_source.histograms.EARLY_GLUESTARTUP_READ_OPS.values; for (i = start; i <=
end; i++) { if (vals.containsKey(String.valueOf(i))) { for (j = 0; j <
vals[String.valueOf(i)]; j++) { arr.add(i); } } }; return arr;"
}
},
"query": {
"term": {
"OS": "winnt"
}
},
"facets": {
"early_gluestartup_read_ops": {
"terms": {
"field": "early_gluestartup_read_ops"
}
}
}
}

But I can't facet on the script field. I expect this is the same problem
that was just posted in the mailing list where the person wanted to use a
script_field in a term_stats. Think I've hit a dead end here. Gonna have
to do these aggregations in some other system instead.

-Daniel

On Wed, Jul 6, 2011 at 11:06 AM, Daniel Einspanjer deinspanjer@gmail.comwrote:

Ultimately, I was hoping to get a facet histogram of the histogram data
stored in these documents. for instance, in each document, there is an
EARLY_GLUESTARTUP_READ_OPS.values object which has properties consisting of
the histogram data, key is the histogram bucket and value is the count of
entries for that key.

It seems to be impossible to facet this data in any useful way, even with
the use of script_fields because I can't enumerate all the property keys
under the .values object since I don't know them ahead of time due to their
being a histogram.

As an example, if I had the following three documents:

{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1}}}

{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"3":2,"10":9,"11":0},"sum":1}}}

{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"3":1,"5":11,"11":0},"sum":1}}}

I would like to have some efficient way to generate the following aggregate
result:

{"sum_histogram":{"0":0,"1":1,"3":3,"5":11,"10":9,"11":0}}

But depending on the corpus of documents, I don't know what values will
actually be enumerated.

-Daniel

On Tue, Jul 5, 2011 at 9:27 PM, Shay Banon shay.banon@elasticsearch.comwrote:

Maybe you can explain what you are trying to do exactly? I did not
understand...

On Wednesday, July 6, 2011 at 2:52 AM, Daniel Einspanjer wrote:

As a follow-up, I've been trying to use ElasticSearch Head's
transformation function to aggregate the results, but the problem there is
that I have to get the complete result set which exhausts memory in ES.
So, I've been trying to come up with a script_fields statement that would
transform the values object into a format that would allow me to facet on
terms and then have a single object returned, but I can't find a way to do
even that. here is the closest I've come which fails with an error that
java.util.HashMap is not iterable:

"script_fields": {
"early_gluestartup_read_ops": {
"script": "var arr = []; for (key :
source.histograms.EARLY_GLUESTARTUP_READ_OPS.values) { arr.push(key); };
arr.reduce(function(comb,key){return
comb.push(key+'
'+_source.histograms.EARLY_GLUESTARTUP_READ_OPS.values[key]);},[]);
return comb;"
}
}

On Tue, Jul 5, 2011 at 5:02 PM, Daniel Einspanjer deinspanjer@gmail.comwrote:

Given documents with a structure like this:

{"histograms":{"CYCLE_COLLECTOR":{"range":[1,10000],"bucket_count":50,"histogram_type":0,"values":{"3":0,"4":4,"5":2,"7":1,"8":4,"10":2,"14":1,"17":0},"sum":102},"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1},"EARLY_GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"27":0,"79":1,"232":0},"sum":154},"GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":2},"GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"0":1,"1":0},"sum":0},"MEMORY_JS_GC_HEAP":{"range":[1024,524288],"bucket_count":10,"histogram_type":0,"values":{"4870":0,"10622":5,"23167":0},"sum":66560},"MEMORY_LAYOUT_ALL":{"range":[1024,65536],"bucket_count":10,"histogram_type":0,"values":{"0":0,"1024":4,"1722":1,"2896":0},"sum":8744},"MEMORY_RESIDENT":{"range":[32768,1048576],"bucket_count":10,"histogram_type":0,"values":{"50535":0,"77936":5,"120194":0},"sum":589172},"ZIPARCHIVE_CRC":{"range":[1,2],"bucket_count":3,"histogram_type":2,"values":{"0":0,"1":87,"2":0},"sum":87}}}

How could I use a script (such as in a facet query) to get at all the
attributes under histograms.EARLY_GLUESTARTUP_READ_OPS.values ?


(Daniel E) #6

Since script_fields can't be used in facets, I left that approach and went
on to trying to refer to each value explicitly. You can see some of the
queries we have successfully made here:
http://etherpad.mozilla.org:9000/ep/pad/view/ro.zIxyOfHFden/rev.259
It is really unfortunate that faceting can't get at the data the way we
want. The only way I've thought of to really get it to work would be to
have a flat array instead of a histogram. But that would be very expensive
in terms of document size since: 5:5000 would turn into 5000 5 elements in
the array.

-Daniel

On Wed, Jul 6, 2011 at 12:56 PM, Daniel Einspanjer deinspanjer@gmail.comwrote:

I managed to make some progress.. I was able to create a script_field that
expands out all the data in each document's histogram:

{
"script_fields": {
"early_gluestartup_read_ops": {
"script": "var arr = []; var start =
_source.histograms.EARLY_GLUESTARTUP_READ_OPS.range[0]; var end =
_source.histograms.EARLY_GLUESTARTUP_READ_OPS.range[1]; var vals =
_source.histograms.EARLY_GLUESTARTUP_READ_OPS.values; for (i = start; i <=
end; i++) { if (vals.containsKey(String.valueOf(i))) { for (j = 0; j <
vals[String.valueOf(i)]; j++) { arr.add(i); } } }; return arr;"
}
},
"query": {
"term": {
"OS": "winnt"
}
},
"facets": {
"early_gluestartup_read_ops": {
"terms": {
"field": "early_gluestartup_read_ops"
}
}
}
}

But I can't facet on the script field. I expect this is the same problem
that was just posted in the mailing list where the person wanted to use a
script_field in a term_stats. Think I've hit a dead end here. Gonna have
to do these aggregations in some other system instead.

-Daniel

On Wed, Jul 6, 2011 at 11:06 AM, Daniel Einspanjer deinspanjer@gmail.comwrote:

Ultimately, I was hoping to get a facet histogram of the histogram data
stored in these documents. for instance, in each document, there is an
EARLY_GLUESTARTUP_READ_OPS.values object which has properties consisting of
the histogram data, key is the histogram bucket and value is the count of
entries for that key.

It seems to be impossible to facet this data in any useful way, even with
the use of script_fields because I can't enumerate all the property keys
under the .values object since I don't know them ahead of time due to their
being a histogram.

As an example, if I had the following three documents:

{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1}}}

{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"3":2,"10":9,"11":0},"sum":1}}}

{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"3":1,"5":11,"11":0},"sum":1}}}

I would like to have some efficient way to generate the following
aggregate result:

{"sum_histogram":{"0":0,"1":1,"3":3,"5":11,"10":9,"11":0}}

But depending on the corpus of documents, I don't know what values will
actually be enumerated.

-Daniel

On Tue, Jul 5, 2011 at 9:27 PM, Shay Banon shay.banon@elasticsearch.comwrote:

Maybe you can explain what you are trying to do exactly? I did not
understand...

On Wednesday, July 6, 2011 at 2:52 AM, Daniel Einspanjer wrote:

As a follow-up, I've been trying to use ElasticSearch Head's
transformation function to aggregate the results, but the problem there is
that I have to get the complete result set which exhausts memory in ES.
So, I've been trying to come up with a script_fields statement that would
transform the values object into a format that would allow me to facet on
terms and then have a single object returned, but I can't find a way to do
even that. here is the closest I've come which fails with an error that
java.util.HashMap is not iterable:

"script_fields": {
"early_gluestartup_read_ops": {
"script": "var arr = []; for (key :
source.histograms.EARLY_GLUESTARTUP_READ_OPS.values) { arr.push(key); };
arr.reduce(function(comb,key){return
comb.push(key+'
'+_source.histograms.EARLY_GLUESTARTUP_READ_OPS.values[key]);},[]);
return comb;"
}
}

On Tue, Jul 5, 2011 at 5:02 PM, Daniel Einspanjer <deinspanjer@gmail.com

wrote:

Given documents with a structure like this:

{"histograms":{"CYCLE_COLLECTOR":{"range":[1,10000],"bucket_count":50,"histogram_type":0,"values":{"3":0,"4":4,"5":2,"7":1,"8":4,"10":2,"14":1,"17":0},"sum":102},"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1},"EARLY_GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"27":0,"79":1,"232":0},"sum":154},"GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":2},"GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"0":1,"1":0},"sum":0},"MEMORY_JS_GC_HEAP":{"range":[1024,524288],"bucket_count":10,"histogram_type":0,"values":{"4870":0,"10622":5,"23167":0},"sum":66560},"MEMORY_LAYOUT_ALL":{"range":[1024,65536],"bucket_count":10,"histogram_type":0,"values":{"0":0,"1024":4,"1722":1,"2896":0},"sum":8744},"MEMORY_RESIDENT":{"range":[32768,1048576],"bucket_count":10,"histogram_type":0,"values":{"50535":0,"77936":5,"120194":0},"sum":589172},"ZIPARCHIVE_CRC":{"range":[1,2],"bucket_count":3,"histogram_type":2,"values":{"0":0,"1":87,"2":0},"sum":87}}}

How could I use a script (such as in a facet query) to get at all the
attributes under histograms.EARLY_GLUESTARTUP_READ_OPS.values ?


(Shay Banon) #7

I am still not sure I fully understand it..., maybe jump on IRC and we can have more realtime discussion?

On Thursday, July 7, 2011 at 9:01 PM, Daniel Einspanjer wrote:

Since script_fields can't be used in facets, I left that approach and went on to trying to refer to each value explicitly. You can see some of the queries we have successfully made here: http://etherpad.mozilla.org:9000/ep/pad/view/ro.zIxyOfHFden/rev.259
It is really unfortunate that faceting can't get at the data the way we want. The only way I've thought of to really get it to work would be to have a flat array instead of a histogram. But that would be very expensive in terms of document size since: 5:5000 would turn into 5000 5 elements in the array.

-Daniel

On Wed, Jul 6, 2011 at 12:56 PM, Daniel Einspanjer <deinspanjer@gmail.com (mailto:deinspanjer@gmail.com)> wrote:

I managed to make some progress.. I was able to create a script_field that expands out all the data in each document's histogram:

{
"script_fields": {
"early_gluestartup_read_ops": {
"script": "var arr = []; var start = _source.histograms.EARLY_GLUESTARTUP_READ_OPS.range[0]; var end = _source.histograms.EARLY_GLUESTARTUP_READ_OPS.range[1]; var vals = _source.histograms.EARLY_GLUESTARTUP_READ_OPS.values; for (i = start; i <= end; i++) { if (vals.containsKey(String.valueOf(i))) { for (j = 0; j < vals[String.valueOf(i)]; j++) { arr.add(i); } } }; return arr;"
}
},
"query": {
"term": {
"OS": "winnt"
}
},
"facets": {
"early_gluestartup_read_ops": {
"terms": {
"field": "early_gluestartup_read_ops"
}
}
}
}

But I can't facet on the script field. I expect this is the same problem that was just posted in the mailing list where the person wanted to use a script_field in a term_stats. Think I've hit a dead end here. Gonna have to do these aggregations in some other system instead.

-Daniel

On Wed, Jul 6, 2011 at 11:06 AM, Daniel Einspanjer <deinspanjer@gmail.com (mailto:deinspanjer@gmail.com)> wrote:

Ultimately, I was hoping to get a facet histogram of the histogram data stored in these documents. for instance, in each document, there is an EARLY_GLUESTARTUP_READ_OPS.values object which has properties consisting of the histogram data, key is the histogram bucket and value is the count of entries for that key.

It seems to be impossible to facet this data in any useful way, even with the use of script_fields because I can't enumerate all the property keys under the .values object since I don't know them ahead of time due to their being a histogram.

As an example, if I had the following three documents:

{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1}}}
{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"3":2,"10":9,"11":0},"sum":1}}}
{"histograms":{"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"3":1,"5":11,"11":0},"sum":1}}}

I would like to have some efficient way to generate the following aggregate result:

{"sum_histogram":{"0":0,"1":1,"3":3,"5":11,"10":9,"11":0}}

But depending on the corpus of documents, I don't know what values will actually be enumerated.

-Daniel

On Tue, Jul 5, 2011 at 9:27 PM, Shay Banon <shay.banon@elasticsearch.com (mailto:shay.banon@elasticsearch.com)> wrote:

Maybe you can explain what you are trying to do exactly? I did not understand...

On Wednesday, July 6, 2011 at 2:52 AM, Daniel Einspanjer wrote:

As a follow-up, I've been trying to use ElasticSearch Head's transformation function to aggregate the results, but the problem there is that I have to get the complete result set which exhausts memory in ES.
So, I've been trying to come up with a script_fields statement that would transform the values object into a format that would allow me to facet on terms and then have a single object returned, but I can't find a way to do even that. here is the closest I've come which fails with an error that java.util.HashMap is not iterable:

"script_fields": {
"early_gluestartup_read_ops": {
"script": "var arr = []; for (key : source.histograms.EARLY_GLUESTARTUP_READ_OPS.values) { arr.push(key); }; arr.reduce(function(comb,key){return comb.push(key+''+_source.histograms.EARLY_GLUESTARTUP_READ_OPS.values[key]);},[]); return comb;"
}
}

On Tue, Jul 5, 2011 at 5:02 PM, Daniel Einspanjer <deinspanjer@gmail.com (mailto:deinspanjer@gmail.com)> wrote:

Given documents with a structure like this:

{"histograms":{"CYCLE_COLLECTOR":{"range":[1,10000],"bucket_count":50,"histogram_type":0,"values":{"3":0,"4":4,"5":2,"7":1,"8":4,"10":2,"14":1,"17":0},"sum":102},"EARLY_GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":1},"EARLY_GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"27":0,"79":1,"232":0},"sum":154},"GLUESTARTUP_READ_OPS":{"range":[1,100],"bucket_count":12,"histogram_type":1,"values":{"0":0,"1":1,"11":0},"sum":2},"GLUESTARTUP_READ_TRANSFER":{"range":[1,51200],"bucket_count":12,"histogram_type":0,"values":{"0":1,"1":0},"sum":0},"MEMORY_JS_GC_HEAP":{"range":[1024,524288],"bucket_count":10,"histogram_type":0,"values":{"4870":0,"10622":5,"23167":0},"sum":66560},"MEMORY_LAYOUT_ALL":{"range":[1024,65536],"bucket_count":10,"histogram_type":0,"values":{"0":0,"1024":4,"1722":1,"2896":0},"sum":8744},"MEMORY_RESIDENT":{"range":[32768,1048576],"bucket_count":10,"histog
ram_type":0,"values":{"50535":0,"77936":5,"120194":0},"sum":589172},"ZIPARCHIVE_CRC":{"range":[1,2],"bucket_count":3,"histogram_type":2,"values":{"0":0,"1":87,"2":0},"sum":87}}}

How could I use a script (such as in a facet query) to get at all the attributes under histograms.EARLY_GLUESTARTUP_READ_OPS.values ?


(system) #8