Logstash can't convert boolean into boolean, it crash


(Sebastian Treu) #1

Hi, I'm moving data from 1 elasticsearch to another and I'm using logstash for that.

The thing is that logstash is randomly crashing in this procedure. I've monitored the heap, the queues and cpu, and so far the were OK at the moment the crash happens.

What I saw in the logs is this: https://pastebin.com/jt55syKB (using pastebin because body characters in this forum are restricted to 7000)

I could detect some issues with the convert option of the mutate filter in the logs. The only part where convert is used in the logstash that receives the input, is this one:

  ###################################################
  # every new document should have sa_manual = false
  ###################################################
  if ! [sa_manual] {
    # by default each document should be analyzed automatically
    mutate {
      add_field => { sa_manual => false }
    }
    # Logstash will add false as string "false" :(
    # Check this issue: https://github.com/elastic/logstash/issues/2987
    mutate {
      convert => { "sa_manual" => "boolean" }
    }
  }

I'm not sure how to follow this issue so far. This is only happening when migration occurs that would insert 1000 documents per scroll. This is the migration.conf from the logstash instance that do the migration:

input {
    elasticsearch {
        hosts => ["elasticsearch:9200"]
        index => "*,-.*"
        query => '{ "query": { "query_string": { "query": "NOT _exists_:is_migration" } } }'
        scroll => "5m"
        docinfo => true
    }
}

filter {
    if ![_agent] {
        mutate {
            add_field => {
                "_agent" => "logstash_migration"
            }
        }
    }

    mutate { add_field => { "is_migration" => true } }
}

output {
    # this endpoint is a load balanced proxy where logstash resides behinds it
    http {
        url => "url"
        http_method => "post"
        content_type => "application/json"
    }
}

The reason that output is HTTP is beacause logstash-migration is not exposed to internet. Neither the elasticsearch nor logstash where the data will go through. So, logstash-migration POST HTTP request into a load balancer that proxies the input into logstash, and then that logstash will output documents into the desired elasticsearch.

Any idea on this issue?


(Sebastian Treu) #2

Ok, I think I found the problem. Looks a bug for me. It seems logstash can't convert boolean to boleean, simple test:

input {
  http { port => 9443 }
}

filter {
  mutate { convert => { "foo" => "boolean" } }
}

output {
  stdout {}
}

then:

curl localhost:9443 -H 'Content-Type: application/json' -d '{ "foo": false }'

will produce:

[2019-01-13T05:09:40,768][DEBUG][logstash.pipeline        ] filter received {"event"=>{"@version"=>"1", "@timestamp"=>2019-01-13T05:09:40.632Z, "foo"=>false, "headers"=>{"http_version"=>"HTTP/1.1", "http_host"=>"localhost:9443", "http_user_agent"=>"curl/7.61.1", "http_accept"=>"*/*", "request_path"=>"/", "content_type"=>"application/json", "content_length"=>"16", "request_method"=>"POST"}, "host"=>"0:0:0:0:0:0:0:1"}}
[2019-01-13T05:09:40,901][ERROR][logstash.pipeline        ] Exception in pipelineworker, the pipeline stopped processing new events, please check your filter configuration and restart Logstash. {:pipeline_id=>"main", "exception"=>"undefined method `empty?' for false:FalseClass", "backtrace"=>["/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.3/lib/logstash/filters/mutate.rb:331:in `convert_boolean'", "org/jruby/RubyMethod.java:127:in `call'", "/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.3/lib/logstash/filters/mutate.rb:313:in `block in convert'", "org/jruby/RubyHash.java:1343:in `each'", "/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.3/lib/logstash/filters/mutate.rb:299:in `convert'", "/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.3/lib/logstash/filters/mutate.rb:252:in `filter'", "/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:143:in `do_filter'", "/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:162:in `block in multi_filter'", "org/jruby/RubyArray.java:1734:in `each'", "/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:159:in `multi_filter'", "/usr/share/logstash/logstash-core/lib/logstash/filter_delegator.rb:44:in `multi_filter'", "(eval):42:in `block in filter_func'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:341:in `filter_batch'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:320:in `worker_loop'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:286:in `block in start_workers'"], :thread=>"#<Thread:0x5fa79239 sleep>"}
[2019-01-13T05:09:40,990][FATAL][logstash.runner          ] An unexpected error occurred! {:error=>#<NoMethodError: undefined method `empty?' for false:FalseClass>, :backtrace=>["/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.3/lib/logstash/filters/mutate.rb:331:in `convert_boolean'", "org/jruby/RubyMethod.java:127:in `call'", "/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.3/lib/logstash/filters/mutate.rb:313:in `block in convert'", "org/jruby/RubyHash.java:1343:in `each'", "/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.3/lib/logstash/filters/mutate.rb:299:in `convert'", "/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/logstash-filter-mutate-3.3.3/lib/logstash/filters/mutate.rb:252:in `filter'", "/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:143:in `do_filter'", "/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:162:in `block in multi_filter'", "org/jruby/RubyArray.java:1734:in `each'", "/usr/share/logstash/logstash-core/lib/logstash/filters/base.rb:159:in `multi_filter'", "/usr/share/logstash/logstash-core/lib/logstash/filter_delegator.rb:44:in `multi_filter'", "(eval):42:in `block in filter_func'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:341:in `filter_batch'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:320:in `worker_loop'", "/usr/share/logstash/logstash-core/lib/logstash/pipeline.rb:286:in `block in start_workers'"]}
[2019-01-13T05:09:41,045][DEBUG][logstash.agent           ] Error in select: closed stream (IOError)
[2019-01-13T05:09:41,048][DEBUG][logstash.agent           ] ["org/jruby/RubyIO.java:3405:in `select'", "/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/puma-2.16.0-java/lib/puma/reactor.rb:29:in `run_internal'", "/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/puma-2.16.0-java/lib/puma/reactor.rb:138:in `block in run_in_thread'"]
[2019-01-13T05:09:41,058][DEBUG][logstash.agent           ] 2019-01-13 05:09:41 UTC: Listen loop error: java.nio.channels.ClosedSelectorException
[2019-01-13T05:09:41,059][DEBUG][logstash.agent           ] sun.nio.ch.SelectorImpl.keys(SelectorImpl.java:68)
org.jruby.util.io.SelectorPool.put(SelectorPool.java:88)
org.jruby.util.io.SelectExecutor.selectEnd(SelectExecutor.java:59)
org.jruby.util.io.SelectExecutor.go(SelectExecutor.java:44)
org.jruby.RubyIO.select(RubyIO.java:3405)
org.jruby.RubyIO$INVOKER$s$0$3$select.call(RubyIO$INVOKER$s$0$3$select.gen)
org.jruby.internal.runtime.methods.JavaMethod$JavaMethodN.call(JavaMethod.java:721)
org.jruby.internal.runtime.methods.DynamicMethod.call(DynamicMethod.java:200)
org.jruby.internal.runtime.methods.JavaMethod$JavaMethodN.call(JavaMethod.java:739)
org.jruby.ir.targets.InvokeSite.invoke(InvokeSite.java:145)
usr.share.logstash.vendor.bundle.jruby.$2_dot_3_dot_0.gems.puma_minus_2_dot_16_dot_0_minus_java.lib.puma.server.RUBY$method$handle_servers$0(/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/puma-2.16.0-java/lib/puma/server.rb:322)
usr.share.logstash.vendor.bundle.jruby.$2_dot_3_dot_0.gems.puma_minus_2_dot_16_dot_0_minus_java.lib.puma.server.RUBY$method$handle_servers$0$__VARARGS__(/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/puma-2.16.0-java/lib/puma/server.rb)
org.jruby.internal.runtime.methods.CompiledIRMethod.call(CompiledIRMethod.java:77)
org.jruby.internal.runtime.methods.MixedModeIRMethod.call(MixedModeIRMethod.java:93)
org.jruby.ir.targets.InvokeSite.invoke(InvokeSite.java:145)
usr.share.logstash.vendor.bundle.jruby.$2_dot_3_dot_0.gems.puma_minus_2_dot_16_dot_0_minus_java.lib.puma.server.RUBY$block$run$2(/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/puma-2.16.0-java/lib/puma/server.rb:296)
org.jruby.runtime.CompiledIRBlockBody.callDirect(CompiledIRBlockBody.java:145)
org.jruby.runtime.IRBlockBody.call(IRBlockBody.java:71)
org.jruby.runtime.Block.call(Block.java:124)
org.jruby.RubyProc.call(RubyProc.java:289)
org.jruby.RubyProc.call(RubyProc.java:246)
org.jruby.internal.runtime.RubyRunnable.run(RubyRunnable.java:104)
java.lang.Thread.run(Thread.java:748)
[2019-01-13T05:09:41,061][DEBUG][logstash.agent           ] 2019-01-13 05:09:41 UTC: Listen loop error: #<IOError: closed stream>
[2019-01-13T05:09:41,061][DEBUG][logstash.agent           ] org/jruby/RubyIO.java:3405:in `select'
/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/puma-2.16.0-java/lib/puma/server.rb:322:in `handle_servers'
/usr/share/logstash/vendor/bundle/jruby/2.3.0/gems/puma-2.16.0-java/lib/puma/server.rb:296:in `block in run'

(Sebastian Treu) #3

Finally, this is my workaround, ugly hack as mentioned here:

(btw, i'm gentunian answering that post, a little bit of my own medicine)

input {
  http { port => 9443 }
}

filter {
  mutate { 
    convert => {  "sa_manual" => "string" }
  }

  # Now we are sure sa_manual does not exist if this condition is true
  if ![sa_manual] {
    mutate {
      add_field => { "sa_manual" => false }
    }
  }
  # at this stage, we are sure sa_manual exists in the pipeline
  mutate {
    convert => { "sa_manual" => "boolean" }
  }
}

output { stdout {} }


#4

You may find it easier to build examples using a generator input.

input { generator { count => 1 message => '{ "foo" : false }' } }

It looks to me like the latest code has this fixed. Until November convert_boolean did

return false if value.empty? || value =~ FALSE_REGEX

which would produce exactly that exception. But now it does

return false if value.to_s.empty? || value.to_s =~ FALSE_REGEX

The commit is "Adding ability to directly convert from integer and float to boolean".

I don't understand the steps between committing a change and it getting into a release, so I don't know when this hits a release.


(Sebastian Treu) #5

Thanks for the info! I'm using docker images so I think this will have to wait.