MySQL > JDBC > Logstash > Elastic


#1

I got some Fields in MySQL that holds JSON Encoded Strings.

I want them in Elastic also to get stored as Object, but they are Stored as Stings.

When i post them normally to the API everything looks good. But i have to insert Millions of Lines, so i have to use that JDBC Thing.

How to do it?

Scripting? Reindexing? Moving the Field to _source, ???

Thanks in advance.


(Magnus B├Ąck) #2

Use a jdbc input to pull rows from the database into Logstash, then use a json filter to deserialize the JSON in one of the columns.


#3

Thank you. In the meantime i solved it with a MySQL Driver for JDBC and fetch the Data via Logstash:

input {
        jdbc {
                jdbc_driver_library => "/home/logstash/mysql-connector-java-5.1.42/mysql-connector-java-5.1.42-bin.jar"
                jdbc_driver_class => "com.mysql.jdbc.Driver"
                jdbc_connection_string => "jdbc:mysql://***.***.***.***/c1_db_etltest"
                jdbc_user => "*****"
                jdbc_password => "*****"
                statement => "SELECT CONCAT(field_121,'-',field_99) as id, field_98 as data_orig from _datasource_24"

        }
}

filter {
        mutate {
                gsub => ["data_orig","0000-00-00","1970-01-01"]
        }

        json {
                source => "data_orig"
                target => "data"
        }
}

output {
        stdout {codec => json_lines}
        elasticsearch {
                hosts => ["localhost:9200"]
                index => "etl"
                document_type => "account"
                document_id => "%{id}"
        }
}

One of the original Datasets (just a Demo) it fetches is:

{"SEQUENCE":"","EXPORT":{"EPO1OK":"X","EPO1MESSAGE":[{"TYPE":"","ID":"","NUMBER":0,"MESSAGE":"","LOG_NO":"","LOG_MSG_NO":0,"MESSAGE_V1":"","MESSAGE_V2":"","MESSAGE_V3":"","MESSAGE_V4":"","PARAMETER":"","ROW":0,"FIELD":"","SYSTEM":""}],"EPO1EXP":{"KOKRS":"1000","BELNR":"0200169583","GJAHR":2017,"VERSN":"000","VRGNG":"C","TIMESTMP":8628216750000,"PERAB":"005","PERBI":5,"BLDAT":"2017-05-05","BUDAT":"2017-05-05","CPUDT":"2017-05-05","USNAM":"ZEININGERC","REFBT":"R","REFBN":"5000000104","REFBK":"1000","REFGJ":2017,"BLART":"WL","WSDAT":"2017-05-05","KURST":"M","KWAER":"EUR","CTYP1":"30","CTYP2":"20","CTYP3":"10","CTYP4":"","AWTYP":"MKPF","AWORG":"2017","LOGSYSTEM":"","CPUTM":"10:41:15","ITEM":[{"BUZEI":1,"PERIO":5,"WTGBTR":102.91,"WOGBTR":102.91,"WKGBTR":102.91,"WKFBTR":0,"PAGBTR":0,"PAFBTR":0,"MEGBTR":1,"MEFBTR":0,"MBGBTR":1,"LEDNR":"00","OBJNR":"KS10000000004140","KOSTL":"0000004140","KOSTL_KTEXT":"Hausmeisterrei","GJAHR":2017,"WRTTP":4,"VERSN":"000","KSTAR":"0000400010","KSTAR_KTEXT":"Verbr. Rohstoffe 2","HRKFT":"","VRGNG":"C","TWAER":"EUR","OWAER":"EUR","MEINB":"ST","SGTXT":"","GKONT":"0000310000","GKOAR":"M","WERKS":"1200","MATNR":"M-18","RBEST":0,"EBELN":"","EBELP":0,"BUKRS":"1000","GSBER":"9900","FKBER":""}]}},"CALLSTATUS":{"CODE":0,"TYPE":"","SUBJECT":"","DESCRIPTION":"","TRANSACTIONID":"","MANDT":"","SYSID":""}}

My final Question is, if datas are stored optimally. As Objects. Kibana gives this, JSON formatted:

{
  "_index": "etl",
  "_type": "coposting",
  "_id": "1000-0200169583",
  "_version": 1,
  "_score": null,
  "_source": {
    "data_orig": "{\"SEQUENCE\":\"\",\"EXPORT\":{\"EPO1OK\":\"X\",\"EPO1MESSAGE\":[{\"TYPE\":\"\",\"ID\":\"\",\"NUMBER\":0,\"MESSAGE\":\"\",\"LOG_NO\":\"\",\"LOG_MSG_NO\":0,\"MESSAGE_V1\":\"\",\"MESSAGE_V2\":\"\",\"MESSAGE_V3\":\"\",\"MESSAGE_V4\":\"\",\"PARAMETER\":\"\",\"ROW\":0,\"FIELD\":\"\",\"SYSTEM\":\"\"}],\"EPO1EXP\":{\"KOKRS\":\"1000\",\"BELNR\":\"0200169583\",\"GJAHR\":2017,\"VERSN\":\"000\",\"VRGNG\":\"C\",\"TIMESTMP\":8628216750000,\"PERAB\":\"005\",\"PERBI\":5,\"BLDAT\":\"2017-05-05\",\"BUDAT\":\"2017-05-05\",\"CPUDT\":\"2017-05-05\",\"USNAM\":\"ZEININGERC\",\"REFBT\":\"R\",\"REFBN\":\"5000000104\",\"REFBK\":\"1000\",\"REFGJ\":2017,\"BLART\":\"WL\",\"WSDAT\":\"2017-05-05\",\"KURST\":\"M\",\"KWAER\":\"EUR\",\"CTYP1\":\"30\",\"CTYP2\":\"20\",\"CTYP3\":\"10\",\"CTYP4\":\"\",\"AWTYP\":\"MKPF\",\"AWORG\":\"2017\",\"LOGSYSTEM\":\"\",\"CPUTM\":\"10:41:15\",\"ITEM\":[{\"BUZEI\":1,\"PERIO\":5,\"WTGBTR\":102.91,\"WOGBTR\":102.91,\"WKGBTR\":102.91,\"WKFBTR\":0,\"PAGBTR\":0,\"PAFBTR\":0,\"MEGBTR\":1,\"MEFBTR\":0,\"MBGBTR\":1,\"LEDNR\":\"00\",\"OBJNR\":\"KS10000000004140\",\"KOSTL\":\"0000004140\",\"KOSTL_KTEXT\":\"Hausmeisterrei\",\"GJAHR\":2017,\"WRTTP\":4,\"VERSN\":\"000\",\"KSTAR\":\"0000400010\",\"KSTAR_KTEXT\":\"Verbr. Rohstoffe 2\",\"HRKFT\":\"\",\"VRGNG\":\"C\",\"TWAER\":\"EUR\",\"OWAER\":\"EUR\",\"MEINB\":\"ST\",\"SGTXT\":\"\",\"GKONT\":\"0000310000\",\"GKOAR\":\"M\",\"WERKS\":\"1200\",\"MATNR\":\"M-18\",\"RBEST\":0,\"EBELN\":\"\",\"EBELP\":0,\"BUKRS\":\"1000\",\"GSBER\":\"9900\",\"FKBER\":\"\"}]}},\"CALLSTATUS\":{\"CODE\":0,\"TYPE\":\"\",\"SUBJECT\":\"\",\"DESCRIPTION\":\"\",\"TRANSACTIONID\":\"\",\"MANDT\":\"\",\"SYSID\":\"\"}}",
    "@timestamp": "2017-05-15T15:04:29.623Z",
    "data": {
      "SEQUENCE": "",
      "EXPORT": {
        "EPO1EXP": {
          "KURST": "M",
          "VERSN": "000",
          ...
          "BELNR": "0200169583",
          "ITEM": [
            {
              "WTGBTR": 102.91,
              ...
            }
          ],
          "AWORG": "2017",
          "TIMESTMP": 8628216750000,
          "REFGJ": 2017,
          "CTYP4": "",
          "BLART": "WL",
          "CTYP3": "10",
          "CTYP2": "20",
          "CTYP1": "30"
        },
        "EPO1OK": "X",
        "EPO1MESSAGE": [
          {
            "SYSTEM": "",
            "NUMBER": 0,
            ...
          }
        ]
      },
      "CALLSTATUS": {
        "MANDT": "",
        "CODE": 0,
        "SYSID": "",
        "DESCRIPTION": "",
        "SUBJECT": "",
        "TRANSACTIONID": "",
        "TYPE": ""
      }
    },
    "@version": "1",
    "id": "1000-0200169583"
  },
  "fields": {
    "data.EXPORT.EPO1EXP.CPUDT": [
      1493942400000
    ],
    "data.EXPORT.EPO1EXP.BLDAT": [
      1493942400000
    ],
    "data.EXPORT.EPO1EXP.WSDAT": [
      1493942400000
    ],
    "@timestamp": [
      1494860669623
    ],
    "data.EXPORT.EPO1EXP.BUDAT": [
      1493942400000
    ]
  },
  "sort": [
    1494860669623
  ]
}

The _source.data_orig is not needed, i don't know how i would strip this Field out with my Filter.

Maybe it could be stored directly in _source. and not in _source.data.**

The Table View in Kibana shows:

data.CALLSTATUS.TRANSACTIONID	   	
t data.CALLSTATUS.TYPE	   	
t data.EXPORT.EPO1EXP.AWORG	   	2017
t data.EXPORT.EPO1EXP.AWTYP	   	MKPF
t data.EXPORT.EPO1EXP.BELNR	   	0200169583

-> Type = STRING. When i posted that Data via the API. Shouldn't the Type be Object/Nested...?


(system) #4

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.