Private ip geoip from dictionary


(Phill Pafford) #1

Related question Private ip geoip JSON object / structure and some questions but this is option 2

some other useful resources

my logstash config

input {
  redis {
    data_type => "list"
    key => "filebeat"
    host => "host.local"
  }
}

filter {
  # For Beat and LSF compatibility
  if [beat][hostname] {
    if [source] {
      if ![file] {
        mutate {
          add_field => {
            "file" => "%{source}"
          }
        }
      }
    }
  }

  ## Apache 
  ## https://www.elastic.co/guide/en/logstash/current/logstash-config-for-filebeat-modules.html
  if [fileset][module] == "apache2" {
    if [fileset][name] == "access" {
      grok {
        match => { "message" => ["%{IPORHOST:[apache2][access][remote_ip]} - %{DATA:[apache2][access][user_name]} \[%{HTTPDATE:[apache2][access][time]}\] \"%{WORD:[apache2][access][method]} %{DATA:[apache2][access][url]} HTTP/%{NUMBER:[apache2][access][http_version]}\" %{NUMBER:[apache2][access][response_code]} %{NUMBER:[apache2][access][body_sent][bytes]}( \"%{DATA:[apache2][access][referrer]}\")?( \"%{DATA:[apache2][access][agent]}\")?",
          "%{IPORHOST:[apache2][access][remote_ip]} - %{DATA:[apache2][access][user_name]} \\[%{HTTPDATE:[apache2][access][time]}\\] \"-\" %{NUMBER:[apache2][access][response_code]} -" ] }
        remove_field => "message"
      }
      mutate {
        add_field => { "read_timestamp" => "%{@timestamp}" }
      }
      date {
        match => [ "[apache2][access][time]", "dd/MMM/YYYY:H:m:s Z" ]
        remove_field => "[apache2][access][time]"
      }
      useragent {
        source => "[apache2][access][agent]"
        target => "[apache2][access][user_agent]"
        remove_field => "[apache2][access][agent]"
      }
      geoip {
        source => "[apache2][access][remote_ip]"
        target => "[apache2][access][geoip]"
      }
      mutate {
        add_tag => [ "unique-tag-name" ]
      }
    }
    else if [fileset][name] == "error" {
      grok {
        match => { "message" => ["\[%{APACHE_TIME:[apache2][error][timestamp]}\] \[%{LOGLEVEL:[apache2][error][level]}\]( \[client %{IPORHOST:[apache2][error][client]}\])? %{GREEDYDATA:[apache2][error][message]}",
          "\[%{APACHE_TIME:[apache2][error][timestamp]}\] \[%{DATA:[apache2][error][module]}:%{LOGLEVEL:[apache2][error][level]}\] \[pid %{NUMBER:[apache2][error][pid]}(:tid %{NUMBER:[apache2][error][tid]})?\]( \[client %{IPORHOST:[apache2][error][client]}\])? %{GREEDYDATA:[apache2][error][message1]}" ] }
        pattern_definitions => {
          "APACHE_TIME" => "%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}"
        }
        remove_field => "message"
      }
      mutate {
        rename => { "[apache2][error][message1]" => "[apache2][error][message]" }
      }
      date {
        match => [ "[apache2][error][timestamp]", "EEE MMM dd H:m:s YYYY", "EEE MMM dd H:m:s.SSSSSS YYYY" ]
        remove_field => "[apache2][error][timestamp]"
      }
    }
  }

  ## add geoip data for private ip address
  if [apache2][access][remote_ip] {
    translate {
        exact => true
        regex => true
        override => true
        field => "[apache2][access][remote_ip]"
        destination => "geoip"
        dictionary_path => "/path/to/mutate/static-ip-geoip-mapping.yml"
        fallback => "{\"geoip\":{\"timezone\":\"America/Anchorage\",\"continent_code\":\"NA\",\"country_name\":\"United States\",\"region_code\":\"AK\",\"country_code2\":\"US\",\"country_code3\":\"US\",\"region_name\":\"Alaska\",\"city_name\":\"Anchorage\",\"latitude\":61.19,\"longitude\":-149.8938,\"location\":[61.19,-149.8938]}}"
    }
    json {
        source => "geoip"
        target => "[apache2][access][geoip]"
    }
  } 
  ## add tags for private ip address
  if [apache2][access][remote_ip] {
    translate {
        exact => true
        regex => true
        override => true
        field => "[apache2][access][remote_ip]"
        destination => "tags"
        dictionary_path => "/path/to/mutate/static-ip-tag-mapping.yml"
        fallback => "Unmapped IP Range in the logs, ask networking team for mapping"
    }
    json {
        source => "tags"
        add_tag => [ "tags"]
    }
  } 

}

output {
  elasticsearch {
    hosts => ["host.local:9200"]
    ssl => true
    index => "logstash-%{+YYYY.MM}"
  }
}

static-ip-geoip-mapping.yml

'123.123.123.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":42.5597,"longitude":-83.1138,"location":[42.5597,-83.1138]}}'
'321.321.321.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":42.5597,"longitude":-83.1138,"location":[42.5597,-83.1138]}}'
'111.111.111.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":28.046,"longitude":-82.6848,"location":[28.046,-82.6848]}}'
'888.888.888.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":28.046,"longitude":-82.6848,"location":[28.046,-82.6848]}}'

static-ip-tag-mapping.yml

'123.123.123.*': '{"add_tags":["User Group A","Test1"]}'
'321.321.321.*': '{"add_tags":["User Group B","Test1"]}'
'111.111.111.*': '{"add_tags":["User Group A","Test2"]}'
'888.888.888.*': '{"add_tags":["User Group B","Test3"]}'

the idea is to add the geoip data from the dictionary lookup, then do another dictionary lookup and add a couple of tags as well. I'd like to do this in one dictionary lookup but did not see how to pull and set the data. if the ip match is not found then default to the fallback option (I think this is how fallback is used)

also the field

[apache2][access][geoip]

might exist so I'd like to overwrite it as well


(Phill Pafford) #2

another link for reference


(Phill Pafford) #3

ok after much debugging, why can't I update/overwrite/ merge/replace/copy

[apache2][access][geoip]

if I change this in the JSON to

[location][access][geoip]

this works

mutate {
    remove_field => [ "[apache2][access][geoip]" ]
    remove_tag => [ "_geoip_lookup_failure" ]
  }

  translate {
    field => "[apache2][access][remote_ip]"
    destination => "geo_point"
    dictionary_path => "/path/to/mutate/static-ip-geoip-mapping.yml"
    fallback => '{"timezone":"America/Anchorage","continent_code":"NA","country_name":"United States","region_code":"AK","country_code2":"US","country_code3":"US","region_name":"Alaska","city_name":"Anchorage","latitude":61.19,"longitude":-149.8938,"location":[61.19,-149.8938]}'
  }
  json {
    source => "geo_point"
    target => "[location][access][geoip]"
  }

  mutate {
    remove_field => [ "geo_point" ]
  }

but this does not (only changed the word apache2 to location in the JSON target)

mutate {
    remove_field => [ "[apache2][access][geoip]" ]
    remove_tag => [ "_geoip_lookup_failure" ]
  }

  translate {
    field => "[apache2][access][remote_ip]"
    destination => "geo_point"
    dictionary_path => "/path/to/mutate/static-ip-geoip-mapping.yml"
    fallback => '{"timezone":"America/Anchorage","continent_code":"NA","country_name":"United States","region_code":"AK","country_code2":"US","country_code3":"US","region_name":"Alaska","city_name":"Anchorage","latitude":61.19,"longitude":-149.8938,"location":[61.19,-149.8938]}'
  }
  json {
    source => "geo_point"
    target => "[apache2][access][geoip]"
  }

  mutate {
    remove_field => [ "geo_point" ]
  }

(Phill Pafford) #4

ugh...

I changed it to save [apache2][access][phill] instead of [apache2][access][geoip] and it works, so why can't I save it to [apache2][access][geoip] very frustrating

"apache2": {
      "access": {
        "user_name": "-",
        "url": "/version/",
        "http_version": "1.1",
        "remote_ip": "1.2.34.56",
        "response_code": "200",
        "referrer": "-",
        "phill": {
          "longitude": -149.8938,
          "region_code": "AK",
          "latitude": 61.19,
          "continent_code": "NA",
          "timezone": "America/Anchorage",
          "location": [
            61.19,
            -149.8938
          ],
          "region_name": "Alaska",
          "country_name": "United States",
          "country_code2": "US",
          "country_code3": "US",
          "city_name": "Anchorage"
        },
        "body_sent": {
          "bytes": "443"
        },
        "user_agent": {
          "device": "Other",
          "name": "Other",
          "os": "Other",
          "os_name": "Other",
          "build": ""
        },
        "method": "GET"
      }
    }

(Phill Pafford) #5

ok adding [lat] and [lon] to the location object and then casting them to float seems to have created the geo_point object in logstash

'123.123.123.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":42.5597,"longitude":-83.1138,"location":["lat":42.5597,"lon":-83.1138]}}'
'321.321.321.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":42.5597,"longitude":-83.1138,"location":["lat":42.5597,"lon":-83.1138]}}'
'111.111.111.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":28.046,"longitude":-82.6848,"location":["lat":28.046,"lon":-82.6848]}}'
'888.888.888.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":28.046,"longitude":-82.6848,"location":["lat": 28.046,"lon":-82.6848]}}'

adding this mutate

convert => [ "[apache2][access][geoip][location][lat]", "float"]
convert => [ "[apache2][access][geoip][location][lon]", "float"]
convert => [ "[apache2][access][geoip][latitude]", "float"]
convert => [ "[apache2][access][geoip][longitude]", "float"]

but the geo_point object is not at [apache2][access][geoip]

and it's escaped? does this need to be escaped?

"geo_point": "{\"timezone\":\"America/New_York\",\"continent_code\":\"NA\",\"country_name\":\"United States\",\"region_code\":\"NY\",\"country_code2\":\"US\",\"country_code3\":\"US\",\"region_name\":\"New York\",\"city_name\":\"New York\",\"latitude\":42.5597,\"longitude\":-83.1138,\"location\":[\"lat\": 42.5597,\"lon\": -83.1138],\"dma_code\":\"235\",\"postal_code\":\"20002\"}"


(Phill Pafford) #6

ok so I have some bad JSON, fixed below following example 1 https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-point.html

'123.123.123.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":42.5597,"longitude":-83.1138,"location":{"lat":42.5597,"lon":-83.1138}}}'
'321.321.321.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":42.5597,"longitude":-83.1138,"location":{"lat":42.5597,"lon":-83.1138}}}'
'111.111.111.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":28.046,"longitude":-82.6848,"location":{"lat":28.046,"lon":-82.6848}}}'
'888.888.888.*': '{"geoip":{"timezone":"America/New_York","continent_code":"NA","country_name":"United States","region_code":"NY","country_code2":"US","country_code3":"US","region_name":"New York","city_name":"New York","latitude":28.046,"longitude":-82.6848,"location":{"lat": 28.046,"lon":-82.6848}}}'

(Phill Pafford) #7

ok looks like I figured it out. after fixing the bad JSON I can now add the geoip data to apache access

here is my apache code block

if [fileset][module] == "apache2" {
    if [fileset][name] == "access" {
      ## remove after debug
      mutate {
        copy => { "message" => "original_message" }
      }
      grok {
        match => { "message" => ["%{IPORHOST:[apache2][access][remote_ip]} - %{DATA:[apache2][access][user_name]} \[%{HTTPDATE:[apache2][access][time]}\] \"%{WORD:[apache2][access][method]} %{DATA:[apache2][access][url]} HTTP/%{NUMBER:[apache2][access][http_version]}\" %{NUMBER:[apache2][access][response_code]} %{NUMBER:[apache2][access][body_sent][bytes]}( \"%{DATA:[apache2][access][referrer]}\")?( \"%{DATA:[apache2][access][agent]}\")?",
          "%{IPORHOST:[apache2][access][remote_ip]} - %{DATA:[apache2][access][user_name]} \\[%{HTTPDATE:[apache2][access][time]}\\] \"-\" %{NUMBER:[apache2][access][response_code]} -" ] }
        remove_field => "message"
      }
      mutate {
        add_field => { "read_timestamp" => "%{@timestamp}" }
      }
      date {
        match => [ "[apache2][access][time]", "dd/MMM/YYYY:H:m:s Z" ]
        remove_field => "[apache2][access][time]"
      }
      useragent {
        source => "[apache2][access][agent]"
        target => "[apache2][access][user_agent]"
        remove_field => "[apache2][access][agent]"
      }
      mutate {
        copy => { "[apache2][access][remote_ip]" => "remote_ip" }
      }
    }
    else if [fileset][name] == "error" {
      grok {
        match => { "message" => ["\[%{APACHE_TIME:[apache2][error][timestamp]}\] \[%{LOGLEVEL:[apache2][error][level]}\]( \[client %{IPORHOST:[apache2][error][client]}\])? %{GREEDYDATA:[apache2][error][message]}",
          "\[%{APACHE_TIME:[apache2][error][timestamp]}\] \[%{DATA:[apache2][error][module]}:%{LOGLEVEL:[apache2][error][level]}\] \[pid %{NUMBER:[apache2][error][pid]}(:tid %{NUMBER:[apache2][error][tid]})?\]( \[client %{IPORHOST:[apache2][error][client]}\])? %{GREEDYDATA:[apache2][error][message1]}" ] }
        pattern_definitions => {
          "APACHE_TIME" => "%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}"
        }
        remove_field => "message"
      }
      mutate {
        rename => { "[apache2][error][message1]" => "[apache2][error][message]" }
      }
      date {
        match => [ "[apache2][error][timestamp]", "EEE MMM dd H:m:s YYYY", "EEE MMM dd H:m:s.SSSSSS YYYY" ]
        remove_field => "[apache2][error][timestamp]"
      }
    }
  }

here is my dictionary code block

if [remote_ip] {
    translate {
      exact => true
      regex => true
      override => true
      field => "[remote_ip]"
      destination => "geo_point"
      dictionary_path => "/usr/share/logstash/mutate/static-ip-geoip-mapping.yml"
      fallback => '{"timezone":"America/Anchorage","continent_code":"NA","country_name":"United States","region_code":"AK","country_code2":"US","country_code3":"US","region_name":"Alaska","city_name":"Anchorage","latitude":61.19,"longitude":-149.8938,"location":[61.19,-149.8938],"dma_code":"743","postal_code":"99503"}'
      refresh_behaviour => "replace"
    }
    json {
      source => "geo_point"
      target => "[apache2][access][geoip]"
      add_field => { "[apache2][access][geoip][ip]" => "%{remote_ip}" }
      add_tag => [ "private-ip" ]
      remove_field => [ "geo_point" ]
    }
    mutate {
      remove_tag => [ "_geoip_lookup_failure" ]
    }
}

now off to improve this so I can have private and public for all geoip lookups


Private ip geoip JSON object / structure and some questions
(Mark Walkom) #8

Thanks for sharing your solution!