Hi,
I have Elasticsearch and fs crawler running on same server. The crawler used to index remote files until today it started showing errors while creating the index. I was not able to index the files when i ran with --trace, its showing me openSSHConnection warnings. I didnot change anything in the installations, recently updated the remote server password. I have given correct details on the settings file and i am able to ssh from the server where crawler is running to the remote. But cant seem to find the issue.
Below are the logs. could you please give me some insight from the logs.
Logs:
[root@li393-89 fscrawler-es7-2.7-SNAPSHOT]# bin/fscrawler --config_dir test_remote_index remote_index --loop 1 --trace
03:21:38,612 INFO [f.p.e.c.f.c.BootstrapChecks] Memory [Free/Total=Percent]: HEAP [145.3mb/1.7gb=8.23%], RAM [220.2mb/7.7gb=2.78%], Swap [509.4mb/511.9mb=99.51%].
03:21:38,618 DEBUG [f.p.e.c.f.f.FsCrawlerUtil] Mapping [6/_settings.json] already exists
03:21:38,618 DEBUG [f.p.e.c.f.f.FsCrawlerUtil] Mapping [6/_settings_folder.json] already exists
03:21:38,618 DEBUG [f.p.e.c.f.f.FsCrawlerUtil] Mapping [7/_settings.json] already exists
03:21:38,619 DEBUG [f.p.e.c.f.f.FsCrawlerUtil] Mapping [7/_settings_folder.json] already exists
03:21:38,620 DEBUG [f.p.e.c.f.c.FsCrawlerCli] Starting job [remote_index]...
03:21:38,997 TRACE [f.p.e.c.f.c.FsCrawlerCli] settings used for this crawler: [---
name: "remote_index"
fs:
url: "/tmp/es"
update_rate: "15m"
excludes:
- "*/~*"
json_support: false
filename_as_id: false
add_filesize: true
remove_deleted: true
add_as_inner_object: false
store_source: false
index_content: true
attributes_support: true
raw_metadata: false
xml_support: false
index_folders: true
lang_detect: false
continue_on_error: false
ocr:
language: "eng"
enabled: true
pdf_strategy: "ocr_and_text"
follow_symlinks: false
server:
hostname: "45.xx.xx.xx"
port: 19
username: "xxusr"
protocol: "ssh"
elasticsearch:
nodes:
- url: "http://127.0.0.1:9200"
bulk_size: 5
flush_interval: "5s"
byte_size: "10mb"
]
03:21:39,003 DEBUG [f.p.e.c.f.c.ElasticsearchClientUtil] Trying to find a client version 7
03:21:39,003 TRACE [f.p.e.c.f.c.ElasticsearchClientUtil] Trying to find a class named [fr.pilato.elasticsearch.crawler.fs.client.v7.ElasticsearchClientV7]
03:21:39,014 TRACE [f.p.e.c.f.c.ElasticsearchClientUtil] Found [fr.pilato.elasticsearch.crawler.fs.client.v7.ElasticsearchClientV7] class as the elasticsearch client implementation.
03:21:39,680 INFO [f.p.e.c.f.c.v.ElasticsearchClientV7] Elasticsearch Client for version 7.x connected to a node running version 7.5.1
03:21:39,752 INFO [f.p.e.c.f.FsCrawlerImpl] Starting FS crawler
03:21:39,755 DEBUG [f.p.e.c.f.c.v.ElasticsearchClientV7] FS crawler connected to an elasticsearch [7.5.1] node.
03:21:39,756 DEBUG [f.p.e.c.f.c.v.ElasticsearchClientV7] create index [remote_index]
03:21:39,756 TRACE [f.p.e.c.f.c.v.ElasticsearchClientV7] index settings: [{
"settings": {
"number_of_shards": 1,
"index.mapping.total_fields.limit": 2000,
"analysis": {
"analyzer": {
"fscrawler_path": {
"tokenizer": "fscrawler_path"
}
},
"tokenizer": {
"fscrawler_path": {
"type": "path_hierarchy"
}
}
}
},
"mappings": {
"dynamic_templates": [
{
"raw_as_text": {
"path_match": "meta.raw.*",
"mapping": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
],
"properties": {
"attachment": {
"type": "binary",
"doc_values": false
},
"attributes": {
"properties": {
"group": {
"type": "keyword"
},
"owner": {
"type": "keyword"
}
}
},
"content": {
"type": "text"
},
"file": {
"properties": {
"content_type": {
"type": "keyword"
},
"filename": {
"type": "keyword",
"store": true
},
"extension": {
"type": "keyword"
},
"filesize": {
"type": "long"
},
"indexed_chars": {
"type": "long"
},
"indexing_date": {
"type": "date",
"format": "dateOptionalTime"
},
"created": {
"type": "date",
"format": "dateOptionalTime"
},
"last_modified": {
"type": "date",
"format": "dateOptionalTime"
},
"last_accessed": {
"type": "date",
"format": "dateOptionalTime"
},
"checksum": {
"type": "keyword"
},
"url": {
"type": "keyword",
"index": false
}
}
},
"meta": {
"properties": {
"author": {
"type": "text"
},
"date": {
"type": "date",
"format": "dateOptionalTime"
},
"keywords": {
"type": "text"
},
"title": {
"type": "text"
},
"language": {
"type": "keyword"
},
"format": {
"type": "text"
},
"identifier": {
"type": "text"
},
"contributor": {
"type": "text"
},
"coverage": {
"type": "text"
},
"modifier": {
"type": "text"
},
"creator_tool": {
"type": "keyword"
},
"publisher": {
"type": "text"
},
"relation": {
"type": "text"
},
"rights": {
"type": "text"
},
"source": {
"type": "text"
},
"type": {
"type": "text"
},
"description": {
"type": "text"
},
"created": {
"type": "date",
"format": "dateOptionalTime"
},
"print_date": {
"type": "date",
"format": "dateOptionalTime"
},
"metadata_date": {
"type": "date",
"format": "dateOptionalTime"
},
"latitude": {
"type": "text"
},
"longitude": {
"type": "text"
},
"altitude": {
"type": "text"
},
"rating": {
"type": "byte"
},
"comments": {
"type": "text"
}
}
},
"path": {
"properties": {
"real": {
"type": "keyword",
"fields": {
"tree": {
"type": "text",
"analyzer": "fscrawler_path",
"fielddata": true
},
"fulltext": {
"type": "text"
}
}
},
"root": {
"type": "keyword"
},
"virtual": {
"type": "keyword",
"fields": {
"tree": {
"type": "text",
"analyzer": "fscrawler_path",
"fielddata": true
},
"fulltext": {
"type": "text"
}
}
}
}
}
}
}
}
]
03:21:39,962 DEBUG [f.p.e.c.f.c.v.ElasticsearchClientV7] wait for yellow health on index [remote_index]
03:21:39,978 TRACE [f.p.e.c.f.c.v.ElasticsearchClientV7] health response: {"cluster_name":"elasticsearch","status":"yellow","timed_out":false,"number_of_nodes":1,"number_of_data_nodes":1,"active_primary_shards":1,"active_shards":1,"relocating_shards":0,"initializing_shards":0,"unassigned_shards":1,"delayed_unassigned_shards":0,"number_of_pending_tasks":0,"number_of_in_flight_fetch":0,"task_max_waiting_in_queue_millis":0,"active_shards_percent_as_number":53.84615384615385}
03:21:39,979 DEBUG [f.p.e.c.f.c.v.ElasticsearchClientV7] create index [remote_index_folder]
03:21:39,979 TRACE [f.p.e.c.f.c.v.ElasticsearchClientV7] index settings: [{
"settings": {
"analysis": {
"analyzer": {
"fscrawler_path": {
"tokenizer": "fscrawler_path"
}
},
"tokenizer": {
"fscrawler_path": {
"type": "path_hierarchy"
}
}
}
},
"mappings": {
"properties" : {
"real" : {
"type" : "keyword",
"store" : true
},
"root" : {
"type" : "keyword",
"store" : true
},
"virtual" : {
"type" : "keyword",
"store" : true
}
}
}
}
]
03:21:39,984 DEBUG [f.p.e.c.f.c.v.ElasticsearchClientV7] wait for yellow health on index [remote_index_folder]
03:21:39,987 TRACE [f.p.e.c.f.c.v.ElasticsearchClientV7] health response: {"cluster_name":"elasticsearch","status":"yellow","timed_out":false,"number_of_nodes":1,"number_of_data_nodes":1,"active_primary_shards":1,"active_shards":1,"relocating_shards":0,"initializing_shards":0,"unassigned_shards":1,"delayed_unassigned_shards":0,"number_of_pending_tasks":0,"number_of_in_flight_fetch":0,"task_max_waiting_in_queue_millis":0,"active_shards_percent_as_number":53.84615384615385}
03:21:39,992 DEBUG [f.p.e.c.f.FsParserAbstract] creating fs crawler thread [remote_index] for [/tmp/es] every [15m]
03:21:39,993 INFO [f.p.e.c.f.FsParserAbstract] FS crawler started for [remote_index] for [/tmp/es] every [15m]
03:21:39,994 DEBUG [f.p.e.c.f.FsParserAbstract] Fs crawler thread [remote_index] is now running. Run #1...
03:21:39,995 DEBUG [f.p.e.c.f.c.s.FileAbstractorSSH] Opening SSH connection to xxusr@45.xx.xx.xx
03:21:40,259 WARN [f.p.e.c.f.FsParserAbstract] Error while crawling /tmp/es: Algorithm negotiation fail
03:21:40,259 WARN [f.p.e.c.f.FsParserAbstract] Full stacktrace
com.jcraft.jsch.JSchException: Algorithm negotiation fail
at com.jcraft.jsch.Session.receive_kexinit(Session.java:590) ~[jsch-0.1.55.jar:?]
at com.jcraft.jsch.Session.connect(Session.java:320) ~[jsch-0.1.55.jar:?]
at com.jcraft.jsch.Session.connect(Session.java:183) ~[jsch-0.1.55.jar:?]
at fr.pilato.elasticsearch.crawler.fs.crawler.ssh.FileAbstractorSSH.openSSHConnection(FileAbstractorSSH.java:133) ~[fscrawler-crawler-ssh-2.7-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.crawler.ssh.FileAbstractorSSH.open(FileAbstractorSSH.java:110) ~[fscrawler-crawler-ssh-2.7-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.FsParserAbstract.run(FsParserAbstract.java:127) [fscrawler-core-2.7-SNAPSHOT.jar:?]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_232]
03:21:40,270 WARN [f.p.e.c.f.FsParserAbstract] Error while closing the connection: java.lang.NullPointerException
03:21:40,270 INFO [f.p.e.c.f.FsParserAbstract] FS crawler is stopping after 1 run
03:21:40,293 DEBUG [f.p.e.c.f.FsCrawlerImpl] Closing FS crawler [remote_index]
03:21:40,293 DEBUG [f.p.e.c.f.FsCrawlerImpl] FS crawler thread is now stopped
03:21:40,294 DEBUG [f.p.e.c.f.c.v.ElasticsearchClientV7] Closing Elasticsearch client manager
03:21:40,297 DEBUG [f.p.e.c.f.FsCrawlerImpl] ES Client Manager stopped
03:21:40,297 INFO [f.p.e.c.f.FsCrawlerImpl] FS crawler [remote_index] stopped
03:21:40,298 DEBUG [f.p.e.c.f.FsCrawlerImpl] Closing FS crawler [remote_index]
03:21:40,299 DEBUG [f.p.e.c.f.FsCrawlerImpl] FS crawler thread is now stopped
03:21:40,299 DEBUG [f.p.e.c.f.c.v.ElasticsearchClientV7] Closing Elasticsearch client manager
03:21:40,299 DEBUG [f.p.e.c.f.FsCrawlerImpl] ES Client Manager stopped
03:21:40,299 INFO [f.p.e.c.f.FsCrawlerImpl] FS crawler [remote_index] stopped
[root@li393-89 fscrawler-es7-2.7-SNAPSHOT]#
Below is the settings file:
---
name: "remote_index"
fs:
url: "/tmp/es"
update_rate: "15m"
excludes:
- "*/~*"
json_support: false
filename_as_id: false
add_filesize: true
remove_deleted: true
add_as_inner_object: false
store_source: false
index_content: true
attributes_support: true
raw_metadata: false
xml_support: false
index_folders: true
lang_detect: false
continue_on_error: false
ocr:
language: "eng"
enabled: true
pdf_strategy: "ocr_and_text"
follow_symlinks: false
elasticsearch:
nodes:
- url: "http://127.0.0.1:9200"
bulk_size: 5
flush_interval: "5s"
byte_size: "10mb"
server:
hostname: "45.xx.xx.xx"
username: "xxusr"
password: "879iehkjwjkt"
port: 19
protocol: "ssh"
~
Thank you,
Lisa