How to load emails into ES instance

The store_xml and xpath options use different XML parsers (one uses NokoGiri, one uses XmlSimple). The xpath option does not like the xmlns attribute on the html element (and I'll never get back the hour and a half it took me to figure that out :frowning: )

If you use

    mutate { gsub => [ "message", '<html xmlns="[^"]*"', '<html' ] }
    xml {
        source => "message"
        store_xml => false
        remove_field => [ "message" ]
        xpath => {
            '/html/head/meta[@name="resourceName"]/@content' => "bar"
            '/html/head/meta[@name="dcterms:created"]/@content' => "foo"
        }
    }

then you will get

       "foo" => [
    [0] "2024-07-31T13:18:36Z"
],
       "bar" => [
    [0] "S12317 - Ciberseguridad - LeadsSpain (LeadsSpain@yourshortlist.com) - 2024-07-31 1518.eml"
]

Note that xpath always returns arrays, and setting force_array => false does not stop it doing so.

I am happy to report that now everything has turned out 100%, thank you very much for everything.
I put the code in case someone else has the same problem as me.

input {
  file {
    path => "/home/jaime/PRUEBA_CORREOS/*.xml"
    start_position => "beginning"
    sincedb_path => "/dev/null"
    codec => multiline {
	pattern => "^<\?xml"
        negate => "true"
        what => "previous"
	auto_flush_interval => 5
    }
  }
}

filter {
  grok { match => { "message" => "<body>%{GREEDYDATA:theHTML}</body>" } }

  mutate { gsub => [ "message", '<html xmlns="[^"]*"', '<html' ] }
  xml { 
    source => "message" 
    store_xml => false 
    remove_field => [ "message" ] 
    xpath => {
      '/html/head/meta[@name="dcterms:created"]/@content' => "Create_Date"
      '/html/head/meta[@name="Message:From-Email"]/@content' => "Message_from"
      '/html/head/meta[@name="Message-To"]/@content' => "Message_To"
      '/html/head/meta[@name="dc:title"]/@content' => "Titulo_email"
      '/html/head/meta[@name="resourceName"]/@content' => "eml_origen"
    }
  
  
  }
    ruby {
        code => '
            meta = event.remove("[parsed][head][meta]")
            meta.each { |x|
                key = x["name"]
                if key =~ /^Message:Raw-Header:/
                    newKey = key.sub(/^Message:Raw-Header:/, "")
                    newKey = "[headers][#{newKey}]"
                else
                    newKey = key.sub(/^.*:/, "")
                    newKey = "[otherStuff][#{newKey}]"
                end
                event.set(newKey, x["content"])
            }
        '
    }
}

output {
  stdout { codec => rubydebug }
  elasticsearch {
    hosts => ["http://localhost:9200"]
    index => "emails"
  }
}