Hello,
I have problem when import content in text file to ES via bulk function.
I used Elasticsearch-PHP lib but it duplicate content.
Any way to import data ES via bulk and not duplicate document?
Thanks so much.
My code:
<?php
require "app/init.php";
$datenow = date(DateTime::ISO8601);
$bulk = 1000;
$pathFile = "./file/data2019.txt";
$basename = strtolower(basename($pathFile, ".txt"));
$indexname = $basename;
$indexParams['index'] = $indexname;
$check = $client->indices()->exists($indexParams);
if ($check == FALSE) {
$paramsx = [
'index' => $indexname,
'type' => "ibank",
'body' => [
'timestamp' => $datenow,
'ip' => "0.0.0.0",
'username' => "Null",
'hash' => "Null",
'evidence' => "Null",
]
];
$response = $client->index($paramsx);
}
$max = count(file($pathFile));
$handle = fopen($pathFile, "r");
if ($handle) {
while (($lines = fgets($handle)) !== false) {
for ($i = 0; $i < $max; $i++) {
$line = explode(",", $lines);
$time = date(DateTime::ISO8601, strtotime($line['0']));
$ip = trim($line['1']);
$uname = trim($line['2']);
$hash = md5($time . $ip . $uname);
if ($ip != '') {
$params['body'][] = array(
'index' => array(
'_index' => $indexname,
'_type' => "ibank",
)
);
$params['body'][] = array(
'timestamp' => $time,
'ip' => $ip,
'username' => $uname,
'hash' => $hash,
'evidence' => $basename,
);
if (($i + 1) % $bulk === 0) {
$responses = $client->bulk($params);
$params = array(); // erase the old bulk request
unset($responses); // unset to save memory
}
}
}
if ($params) {
$responses = $client->bulk($params);
$params = array();
unset($responses);
}
}
fclose($handle);
}
?>