How to merge selected http headers into the doc fields

hello im trying to extract some headers from http request input and put into the document, but i dont know how to do that. this is what i tried:

my environment:

version: '3.8'

services:
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.6.2
    container_name: elasticsearch
    environment:
      - discovery.type=single-node
      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
      - xpack.security.enabled=false
    ports:
      - "9200:9200"

  logstash:
    image: docker.elastic.co/logstash/logstash:8.6.2
    container_name: logstash
    volumes:
      - ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf
    ports:
      - "5044:5044"
      - "9600:9600"
      - "8080:8080" # HTTP input port for JSON API
    depends_on:
      - elasticsearch

  kibana:
    image: docker.elastic.co/kibana/kibana:8.6.2
    container_name: kibana
    environment:
      - ELASTICSEARCH_HOSTS=http://elasticsearch:9200
    ports:
      - "5601:5601"
    depends_on:
      - elasticsearch

this is the config:

input {
  http {
    port => 8080
    codec => "json"  # Automatically decode JSON
  }
}

filter {
  #  Create structure of geolocation from headers
  mutate {
    add_field => {
      "timestamp" => "%{@timestamp}"
      "[geolocation][ip]" => "%{[@metadata][input][http][request][headers][cf-connecting-ip]}"
      "[geolocation][country]" => "%{[@metadata][input][http][request][headers][cf-ipcountry]}"
      "[geolocation][region]" => "%{[@metadata][input][http][request][headers][cf-region]}"
      "[geolocation][city]" => "%{[@metadata][input][http][request][headers][cf-ipcity]}"
      "[geolocation][continent]" => "%{[@metadata][input][http][request][headers][cf-ipcontinent]}"
      "[geolocation][location][lat]" => "%{[@metadata][input][http][request][headers][cf-iplatitude]}"
      "[geolocation][location][lon]" => "%{[@metadata][input][http][request][headers][cf-iplongitude]}"
    }
  }

  # Convert Coordinates from string to decimal(float)
  mutate {
    convert => {
      "[geolocation][location][lat]" => "float"
      "[geolocation][location][lon]" => "float"
    }
  }

  # This is important to make sure Logstash doesn't add anything extra
  # mutate {
  #   remove_field => ["host", "@timestamp", "path", "headers"]  # Remove internal fields
  # }
}

output {
  elasticsearch {
    hosts => ["http://elasticsearch:9200"]
    index => "events"
    # index => "events-%{+YYYY.MM.dd}"
    # document_id => "%{id}"  # If an 'id' field is present in the JSON, use it
    # action => "index"        # Index the document
    # Use the 'document' field directly (skip wrapping)
    # source => "message" 
  }
  stdout { codec => rubydebug }
}

this is an example of document that i wil receive:

{
  "video_id": "v12345abcde",
  "session_id": "sess_87654321",
  "user_id": "user_98765",
  "event_type": "play",
  "churn_time": 35,
  "video_duration": 3600
  },
  "client_info": {
    "browser": "Chrome",
    "os": "Windows",
    "device": "Desktop"
  }
}

In this way, the timestamp field will be the string.You should use:
mutate {copy => { "@timestamp" => "timestamp" } }

If you have IP, you can use geoip plugin

 geoip {
       source => "[source][ip]"
	   ecs_compatibility => "v8"
      }

If you have the user agent as source, the useragent plugin can help.

1 Like