Parsing nested json with logstash

I'm trying to import a collection from mongo with the following input

input {
    mongodb {
        uri => "uripath"
        placeholder_db_dir => "../opt/logstash-mongodb/"
        placeholder_db_name => "logstash_sqlite.db"
        collection => '^users$'
        batch_size => 5000
        add_field => {"application" => "mongo-users"}
    }
}
filter {
    if [application] == "mongo-users" { 
        mutate { 
            gsub => [ "log_entry", "=>", ": " ]
            rename => { "_id" => "mongo_id" }
            remove_field => ["_id"]
        }
        mutate { 
            gsub => [ "log_entry", "BSON::ObjectId\('([0-9a-z]+)'\)", '"\1"' ]
            rename => { "_id" => "mongo_id" }
        }
        mutate {
            gsub => [ "log_entry", "(\d{4}-\d{2}-\d{5}:\d{2}:\d{2}:.{4} \w+,)", '"\1"' ]
            remove_field => ["_id"]
        }
        json { 
            source => "log_entry" 
            remove_field => ["log_entry", "_id"]
        }
    }
}
output {
    if [application] == "mongo-users" {
        stdout {
            codec => rubydebug
        }
        # elasticsearch {
        #     hosts => ["localhost:9200"]
        #     action => "index"
        #     index => "mongo_log_users"
        # }
    }
}

My log file states

[\"614bf2264d02d7ca9c383fb3\"]}", :exception=>#<LogStash::Json::ParserError: Unexpected character ('-' (code 45)): was expecting comma to separate Object entries
 at [Source: (byte[])"{"_id": "614b3d5a76b3033e728de132", ..}

Output is

{
               "log_entry" => "{\"_id\": \"61490821d916252e0be39a82\", \"idealProfile\": [], \"firstTimeInit\": false, \"roles\": [\"60db49309fbbf53f5dd96619\", \"60db48939fbbf53f5dd96550\"], \"wishlist\": [], \"displayName\": \"Camilo Andres Lengua\", \"email\": \"2666camilo.lengua@imagemaker.com\", \"active\": true, \"position\": \"5ec2f4d6699d7b00369b6722\", \"profile\": \"610874af025d3800124a5e4f\", \"skills\": [], \"created_at\": 2021-04-20 20:19:11 UTC, \"updated_at\": 2021-09-06 20:26:13 UTC, \"__v\": 0, \"photoURL\": \"https://lh3.googleusercontent.com/a-/AOh14Ggr4JljAVCJPjiDBLiawNj5eCYFZVJCVAVkdyQL=s96-c\", \"lastName\": \"Lengua\", \"lastNameSecond\": \"Duque\", \"country\": \"AF\", \"name\": \"Camilo Andrés\", \"savedDomains\": [\"5ff75f1eff600800171ec7af\", \"5ff7627fff600800171ec7b1\", \"5ff76321ff600800171ec7b2\", \"5ff764b1ff600800171ec7b3\", 
\"5ff76700ff600800171ec7b4\", \"5ff7673eff600800171ec7b5\", \"5ff76884ff600800171ec7b6\", \"5ff768ebff600800171ec7b7\", \"5ff7698cff600800171ec7b8\", \"5ff7621eff600800171ec7b0\"], \"type\": \"internal\", \"area\": \"60ff52f32ee4d50012c62628\", \"client\": \"60ff52f32ee4d50012c6262b\", \"company\": \"60ff52f32ee4d50012c6262b\", \"statusSelfAssessment\": \"updated\", \"lastInventory\": 2021-09-03 16:14:54 UTC, \"assignedProfile\": \"6100623c8ed89a6cb04e9cc5\", \"documentId\": \"1212122121212\", \"phone\": {\"code\": \"+93\", \"number\": \"121212\"}, \"seniority\": \"5f3acb8f4f14e900173dc7a2\", \"test\": \"test2\", \"idEmpresas\": [{\"id\": \"614b5a7dc4f9418ee34ec6b3\", \"name\": \"imagemaker\"}], \"companies\": [\"614bf2264d02d7ca9c383fb3\"]}",
                "@version" => "1",
           "lastInventory" => "2021-09-03T16:14:54Z",
                 "logdate" => "2021-09-20T22:16:01+00:00",
                 "country" => "AF",
                     "__v" => 0,
                "mongo_id" => "61490821d916252e0be39a82",
          "lastNameSecond" => "Duque",
    "statusSelfAssessment" => "updated",
                    "test" => "test2",
                "lastName" => "Lengua",
             "displayName" => "Camilo Andres Lengua",
                   "email" => "2666camilo.lengua@imagemaker.com",
              "documentId" => 1212122121212,
                    "name" => "Camilo Andrés",
                    "tags" => [
        [0] "_jsonparsefailure"
    ],
                "photoURL" => "https://lh3.googleusercontent.com/a-/AOh14Ggr4JljAVCJPjiDBLiawNj5eCYFZVJCVAVkdyQL=s96-c",
                    "host" => "DESKTOP-KPJ5TLR",
              "created_at" => "2021-04-20T20:19:11Z",
              "updated_at" => "2021-09-06T20:26:13Z",
              "@timestamp" => 2021-10-08T23:51:04.181Z,
              "phone_code" => 93,
            "phone_number" => 121212,
             "application" => "mongo-users",
                    "type" => "internal"
}

Any suggestions what I can try. Not everything is passing through

Your second gsub does not match the timestamps you are trying to wrap, so "updated_at": 2021-09-06 20:26:13 UTC, etc. do not get modified and the parser chokes on the - after 2021.

Great find. I put input

input {
    mongodb {
        uri => "mongodb+srv://dev:dev123@cluster0.3wa6h.mongodb.net/Hugga?authSource=admin&replicaSet=atlas-4hv2i1-shard-0&readPreference=primary&appname=MongoDB%20Compass&ssl=true"
        placeholder_db_dir => "../opt/logstash-mongodb/"
        placeholder_db_name => "logstash_sqlite.db"
        collection => '^users$'
        batch_size => 5000
        add_field => {"application" => "mongo-users"}
    }
}
filter {
    if [application] == "mongo-users" { 
        mutate { 
            gsub => [ "log_entry", "=>", ": " ]
            rename => { "_id" => "mongo_id" }
            remove_field => ["_id"]
        }
        mutate { 
            gsub => [ "log_entry", "BSON::ObjectId\('([0-9a-z]+)'\)", '"\1"' ]
            rename => { "_id" => "mongo_id" }
        }
        mutate {
            gsub => [ "log_entry", "(\d{4}-\d{2}-\d{4}\w:\d{2}:\d{2}:.{4} \w+)", '"\1"' ]
            remove_field => ["_id"]
        }
        mutate {
            gsub => [ "log_entry", "(\d{4}-\d{2}-\d{4}\w:\d{2}:\d{2}.{4} \w+)", '"\1"' ]
            remove_field => ["_id"]
        }
        mutate {
            gsub => [ "log_entry", "(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \w+)", '"\1"' ]
            remove_field => ["_id"]
        }
        mutate {
            gsub => [ "log_entry", "\('([0-9a-z]+)'\)", '"\1"' ]
            remove_field => ["_id"]
        }
        if "_jsonparsefailure" in [tags] {
            drop {}
        } 
        else {
            json {
            source => "log_entry" 
            remove_field => ["log_entry"]
            }
        }
    }
}
output {
    if [application] == "mongo-users" {
        stdout {
            codec => rubydebug
        }
        elasticsearch {
            hosts => ["localhost:9200"]
            action => "index"
            index => "mongo_log_users"
        }
    }
}

And got

{"_index"=>"mongo_log_users", "_type"=>"_doc", "_id"=>"REe1YnwBH5lqlisltgys", "status"=>400, "error"=>{"type"=>"mapper_parsing_exception", "reason"=>"failed to parse field [created_at] of type [date] in document with id 'REe1YnwBH5lqlisltgys'. Preview of field's value: '2021-04-20 20:19:11 UTC'", "caused_by"=>{"type"=>"illegal_argument_exception", "reason"=>"failed to parse date field [2021-04-20 20:19:11 UTC] with format [strict_date_optional_time||epoch_millis]", "caused_by"=>{"type"=>"date_time_parse_exception", "reason"=>"Failed to parse with all enclosed parsers"}}}}}}

Solved this date error with

        date {
            match => [ "date" , "dd.MM.yyyy HH:mm:ss.SSS" ]
            target => "lastInventory"
        }

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.