Import bulk from TXT file to ES via Elasticsearch-PHP not duplicate

Hello,

I have problem when import content in text file to ES via bulk function.
I used Elasticsearch-PHP lib but it duplicate content.

Any way to import data ES via bulk and not duplicate document?

Thanks so much.

My code:

<?php

require "app/init.php";
$datenow = date(DateTime::ISO8601);
$bulk = 1000;
$pathFile = "./file/data2019.txt"; 

$basename = strtolower(basename($pathFile, ".txt"));
$indexname = $basename;
$indexParams['index'] = $indexname;
$check = $client->indices()->exists($indexParams);

if ($check == FALSE) {
    $paramsx = [
        'index' => $indexname,
        'type' => "ibank",
        'body' => [
            'timestamp' => $datenow,
            'ip' => "0.0.0.0",
            'username' => "Null",
            'hash' => "Null",
            'evidence' => "Null",
        ]
    ];
    $response = $client->index($paramsx);
}
$max = count(file($pathFile));
$handle = fopen($pathFile, "r");
if ($handle) {
    while (($lines = fgets($handle)) !== false) {
        for ($i = 0; $i < $max; $i++) {
            $line = explode(",", $lines);
            $time = date(DateTime::ISO8601, strtotime($line['0']));
            $ip = trim($line['1']);
            $uname = trim($line['2']);
            $hash = md5($time . $ip . $uname);

            if ($ip != '') {
                $params['body'][] = array(
                    'index' => array(
                        '_index' => $indexname,
                        '_type' => "ibank",
                    )
                );
                $params['body'][] = array(
                    'timestamp' => $time,
                    'ip' => $ip,
                    'username' => $uname,
                    'hash' => $hash,
                    'evidence' => $basename,
                );
                if (($i + 1) % $bulk === 0) {
                    $responses = $client->bulk($params);
                    $params = array();  // erase the old bulk request
                    unset($responses); // unset  to save memory
                }
            }
        }
        if ($params) {
            $responses = $client->bulk($params);
            $params = array();
            unset($responses);
        }
    }
    fclose($handle);
}
?>

This topic was automatically closed 28 days after the last reply. New replies are no longer allowed.