Mysql to elasticsearch Query conversion


(Rabeet waqar) #1

SELECT user_country, game_name, proxy_country, game_server_country, proxy_id, COUNT(user_country) AS total_sessions,
SUM(if(t1.avg_ping < t1.avg_line_ping, 1, 0)) as positive_sessions, SUM(if(t1.avg_ping > t1.avg_line_ping, 1, 0)) as negative_sessions
FROM my_table t1
WHERE 1=1 AND session_time > 1441652400 AND session_time < 1442257200 GROUP BY user_country, game_name, proxy_country, game_server_country ORDER BY total_sessions DESC

@polyfractal
Please any one help me out with this.
Thanks


(Zachary Tong) #2

My SQL is rusty, but this is a fairly close approximation:

{
    "query": {
        "filtered": {
            "filter": {
                "range": {
                    "session_time": {
                        "gte": 1441652400,
                        "lte": 1442257200
                    }
                }
            }
        }   
    },
    "aggs": {
        "user_country_agg": {
            "terms": {
                "field": "user_country"
            },
            "aggs": {
                "game_name_agg": {
                    "terms": {
                        "field": "game_name"
                    },
                    "aggs": {
                        "proxy_country": {
                            "terms": {
                                "field": "proxy_country"
                            },
                            "aggs": {
                                "game_server_country_agg": {
                                    "terms": {
                                        "field": "game_server_country"
                                    },
                                    "aggs": {
                                        "total_sessions": {
                                            "value_count": {
                                                "field": "user_country"
                                            }
                                        },
                                        "positive_sessions": {
                                            "sum": {
                                                "script" : "doc['avg_ping'].value < avg_line_ping ? 1 : 0"
                                            }
                                        },
                                        "negative_sessions": {
                                            "sum": {
                                                "script" : "doc['avg_ping'].value > avg_line_ping ? 1 : 0"
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
        
    }
}

Note that it doesn't perform sorting, so if that is critical you may need a different solution. Currently, ES doesn't let you sort a terms agg based on the value of a multi-valued child agg (e.g. an embedded terms).

Also note that deeply embedded terms aggs can quickly cause problems due to high branching factor. E.g. if each terms agg has 50 terms, and you have four levels, that is 50^4 = 6,250,000 buckets. You may need to use a different collect_mode to prevent memory problems

If sorting is critical, you could try something like this instead:

{
    "query": {
        "filtered": {
            "filter": {
                "range": {
                    "session_time": {
                        "gte": 1441652400,
                        "lte": 1442257200
                    }
                }
            }
        }   
    },
    "aggs": {
        "user_country_agg": {
            "terms": {
                "script" : "doc['user_country'].value + '_' + doc['game_name_agg'].value + '_' + doc['proxy_country'].value + '_' + doc['game_server_country_agg'].value",
                "order": {
                    "total_sessions": "desc"
                }
            },
            "aggs": {
                "total_sessions": {
                    "value_count": {
                        "field": "user_country"
                    }
                },
                "positive_sessions": {
                    "sum": {
                        "script" : "doc['avg_ping'].value < avg_line_ping ? 1 : 0"
                    }
                },
                "negative_sessions": {
                    "sum": {
                        "script" : "doc['avg_ping'].value > avg_line_ping ? 1 : 0"
                    }
                }
            }
        }      
    }
}

E.g instead of nesting aggregations, you build your grouping key with a script by concatenating all the values together. Then you can sort on total_sessions since it is a single-valued agg

As general advice, directly translating SQL to Elasticsearch is not always the best idea. They operate on different principles, so you may have a hard time or bad performance trying to treat ES as a relational store.


(Rabeet waqar) #3

Thanks alot :smile:
I almost made up the same query
Thumbs up (y)


(Rabeet waqar) #4

I have a question,
How will i use the second sort query with the first one?

Thanks


(Rabeet waqar) #5

One more question :

I want to SELECT proxy_id in elasticsearch query also. Don't want to group_by with proxy_id.

How will i do this?

Thanks


(Rabeet waqar) #6

Unfortunately date range is not working , my final query is this.

{
"size": 0,
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
"filter": {
"bool": {
"must": [
{
"range": {
"session_time": {
"gte": 1441652400,
"lte": 1442257200
}
}
}
],
"must_not": []
}
}
}
},
"aggs": {
"date_range": {
"date_histogram": {
"field": "session_time",
"interval": "15m",
"min_doc_count": 1,
"extended_bounds": {
"min": 1441652400,
"max": 1442257200
}
},
"aggs": {
"game_name_agg": {
"terms": {
"field": "game_name"
},
"aggs": {
"proxy_country_agg": {
"terms": {
"field": "proxy_country"
},
"aggs": {
"game_server_country_agg": {
"terms": {
"field": "game_server_country"
},
"aggs": {
"user_country_agg": {
"terms": {
"script": "doc['user_country'].value + '' + doc['game_name'].value + '' + doc['proxy_country'].value + '_' + doc['game_server_country'].value",
"order": {
"total_sessions": "desc"
}
},
"aggs": {
"total_sessions": {
"value_count": {
"field": "user_country"
}
},
"positive_sessions": {
"sum": {
"script": "doc['avg_ping'].value < doc['avg_line_ping'].value ? 1 : 0"
}
},
"negative_sessions": {
"sum": {
"script": "doc['avg_ping'].value > doc['avg_line_ping'].value ? 1 : 0"
}
}
}
}
}
}
}
}
}
}
}
}
}
}

Thanks


(Croos Nilukshan) #7

Help me to convert this MySQL query to elasticsearch (json) query and how to set it on kibana.

SELECT
user, call_date, sum(call_length_in_sec) as 'work_time',
(max(end_epoch)-min(start_epoch))-sum(length_in_sec) as 'idle_time'
FROM vicidial_log
WHERE date(call_date)='2016-01-12'
group by user


(Christian Dahlqvist) #8

Please start a new thread for your question.


(Croos Nilukshan) #9

ok. Thanks for your response. I have posted the same as a new discussion. please refer it on the following link.


(system) #10