I am creating "bill" feature in my nodejs application that basically will save in Elasticsearch the username every time any user access any rest service.
I have struggling for the last two days with no success. It is my first time working with ELK.
Here is my last tentative.
step 1)
I start the logstash with ./logstash-5.2.2/bin/logstash -f "logstash.conf"
logstash.conf contains:
input {
tcp {
port => 5000
type => document_type
}
}
filter {
grok {
match => { "message" => "data=%{GREEDYDATA:request}"}
}
json{
source => "request"
target => "parsedJson"
remove_field=>["message"]
}
mutate {
add_field => {
"firstname" => "%{[parsedJson][firstname]}}"
"surname" => "%{[parsedJson][surname]}}"
}
}
kv {
source => "message"
remove_field => ["message"]
}
}
output {
elasticsearch {
codec => "json"
hosts => ["127.0.0.1:9200"]
index => "my_index_previously_mapped"
}
}
step 2)
nodejs sends the json object to logstash
var Logstash = require('logstash-client');
var logstash = new Logstash({
type: tcp,
host: localhost,
port: 5000
});
var user = {
firstname: req.body.username,
surname: req.body.surname
};
logstash.send(user);
step 3)
I was expecting to get "two columns", firstname and surname, the datetime from server when it was saved and not get message "column" which seems to me to be redudant (pointless in my scenario). The next step would be to agregate and filter something like how many access for certain user from this datetime to that datetime. But I am facing several issues naturally because I am dummy on ELK.
PS. Obviously, I am going to add few more info when finished but in order to keep things simple, lets say just log firstname and surname are enough.
First issue: why message "column" is still there?
Second issue: why firstname "column" just contain a static string "%{[parsedJson][surname]}}" (surname as well)?
Third issue: why @timestamp is always 3 hours ahead of my system datetime?
Fourth issue: I understand that I have to create a mapping if I want to aggregate. Why the bellow command is trying to re-create the index?
curl -XPUT 'http://localhost:9200/my_index_previously_mapped/' -d '
{
"mappings" : {
"my_document_type" : {
"properties" : {
"firstname" : { "type" : "text" } }
}
}
}'
{"error":{"root_cause":[{"type":"index_already_exists_exception","reason":"index [my_index_previously_mapped/_GiiT8JGSruBt9ytm8L6zQ] already exists","index_uuid":"_GiiT8JGSruBt9ytm8L6zQ","index":"my_index_previously_mapped"}],"type":"index_already_exists_exception","reason":"index [my_index_previously_mapped/_GiiT8JGSruBt9ytm8L6zQ] already exists","index_uuid":"_GiiT8JGSruBt9ytm8L6zQ","index":"my_index_previously_mapped"},"status":400}
*** Added in March 22 2017 at 11am UTC -3
I started the logstash with
input {
tcp {
port => 5000
type => document_type
}
}
output {
stdout { codec => rubydebug }
}
then I got:
{
"@timestamp" => 2017-03-22T13:43:01.443Z,
"port" => 58794,
"@version" => "1",
"host" => "127.0.0.1",
"message" => "{"firstname":"a","surname":"a"}",
"type" => "document_type"
}
As a previous SQL and NoSql (mongodb) user, my intention is to get three "columns" in ElastiSearch. One for timestamp, another for firstname and other one for surname. Then I can search aggregating how many access were done by certain user in certain period or, just as didactic example, let's say query how many users with firstname john are in Elasticsearch ignoring the surname. If I can learrn how to achieve these two tasks it will probably be a large step forward.
*** Added March 22 at 12am UTC -3
{
"@timestamp" => 2017-03-22T14:56:53.064Z,
"port" => 33666,
"@version" => "1",
"host" => "127.0.0.1",
"message" => "{"firstname":"a","surname":"a"}",
"type" => "document_type",
"tags" => [
[0] "_grokparsefailure"
]
}
is the result of
filter {
grok {
match => { "message" => "%{TIMESTAMP_ISO8601:time} \ %{GREEDYDATA:msg}" }
}
}
*** Added in Mar 22 at 12:10 PM UTC -3
my filter is:
filter {
json {
source => "message"
}
}
and the output is:
{
"firstname" => "a",
"@timestamp" => 2017-03-22T15:04:29.108Z,
"port" => 34102,
"surname" => "a",
"@version" => "1",
"host" => "127.0.0.1",
"message" => "{"firstname":"a","surname":"a"}",
"type" => "document_type"
}
I successfully search using:
{"query":{"bool":{"must":[{"range":{"@timestamp":{"gte":"2017-03-22","lte":"2017-03-22"}}}],"must_not":,"should":}},"from":0,"size":10,"sort":,"aggs":{}}
Nevertheless, I get such error while trying to aggregate:
... Fielddata is disabled on text fields by default. Set fielddata=true on [firstname] in order to load fielddata in memory by uninverting the inverted index...
Then, I tried:
curl -XPUT 'http://localhost:9200/greencard_indice/_mapping/cpfTipo
{
"properties": {
"firstname": {
"type": "text",
"fielddata": true
}
}
}'
curl: (3) [globbing] nested brace in column 75
and I am still getting the same error while aggregating.