Convert a csv file column to array of json

I have a csv file, where there is this one column with array of json.

column1, column2, array_json_col, column4, ...
item1, item2, "[{'type': 'StillImage', 'format': 'image/jpeg', 'url',: 'https://sample.url'}, {'type': 'StillImage', 'format': 'image/jpeg', 'url',: 'https://sample.url'}]", item4, ...

When I try,

input {
  file {
    path => "/usr/share/logstash/ingest_data/base_dataset_v2.csv"
    start_position =>"beginning"
    sincedb_path => "/usr/share/logstash/data/plugins/inputs/file/.sincedb"
  }
}

filter {
    csv {
        separator => ","
        columns => ["column1", "column2", "array_json_col", "column4"]
    }
}

filter {
     ruby {
        code => "
          require 'json'
          media = event.get('media')
          media = JSON.parse(media)
          event.set('media', media)
        "
    }
}

output {
  elasticsearch {
    index => "idx_001"
    hosts=> ["http://elasticsearch:9200"]
  }
}

I'm unable to convert the array_json_col to array of json type, and the column is rather stored as a string like, "[{'type': 'StillImage', 'format': 'image/jpeg', 'url',: 'https://sample.url'}, {'type': 'StillImage', 'format': 'image/jpeg', 'url',: 'https://sample.url'}]".

Can someone help me with the right logstash.conf to solve this issue?

You could try

    mutate { gsub => [ "message", ', "', ',"' ] }
    csv {
        separator => ","
        columns => ["column1", "column2", "array_json_col", "column4"]
    }
    mutate { gsub => [ "array_json_col", "'", '"', array_json_col, '",:', '":' ] }
    json { source => "array_json_col" target => "media" }

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.