Please find below the _settings.json
file:
{
"name" : "fs-test-2024-001",
"fs" : {
"url" : "E:/Data/crawler_data/Test",
"update_rate" : "12h",
"includes" : [
"*/*.jpg",
"*/*.jpeg",
"*/*.png",
"*/*.doc",
"*/*.docx",
"*/*.pdf",
"*/*.txt",
"*/*.sql"
],
"excludes" : [
"*/*.zip",
"*/*.rar",
"*/*.exe",
"*/*.mp4",
"*/*.mp3"
],
"json_support" : false,
"xml_support" : false,
"add_as_inner_object" : false,
"filename_as_id" : false,
"add_filesize" : true,
"remove_deleted" : true,
"store_source" : false,
"index_content" : true,
"attributes_support" : false,
"raw_metadata" : false,
"index_folders" : false,
"lang_detect" : false,
"continue_on_error" : false,
"indexed_chars" : "-1",
"ignore_above": "50mb",
"checksum": "MD5",
"ocr" : {
"language" : "eng",
"enabled" : true,
"pdf_strategy": "ocr_and_text"
}
},
"elasticsearch" : {
"nodes" : [ {
"url": "https://localhost:9200"
} ],
"bulk_size": 1,
"flush_interval": "5s"
"username": "elastic",
"password": "",
"index": "fs-test-2024-001"
},
"rest" : {
"url": "http://127.0.0.1:8080/fscrawler"
}
}
If i change the bulk_size: 4, i am getting the below error.
02:29:13,612 DEBUG [f.p.e.c.f.FsCrawlerImpl] Closing FS crawler [fs-dark-2024-001]
02:29:13,612 DEBUG [f.p.e.c.f.FsCrawlerImpl] FS crawler thread is now stopped
02:29:13,614 DEBUG [f.p.e.c.f.c.ElasticsearchClient] Closing Elasticsearch client manager
02:29:13,616 DEBUG [f.p.e.c.f.f.b.FsCrawlerBulkProcessor] Closing BulkProcessor
02:29:13,617 DEBUG [f.p.e.c.f.f.b.FsCrawlerBulkProcessor] BulkProcessor is now closed
02:29:13,620 DEBUG [f.p.e.c.f.s.FsCrawlerManagementServiceElasticsearchImpl] Elasticsearch Management Service stopped
02:29:13,620 DEBUG [f.p.e.c.f.c.ElasticsearchClient] Closing Elasticsearch client manager
02:29:13,620 DEBUG [f.p.e.c.f.f.b.FsCrawlerBulkProcessor] Closing BulkProcessor
02:29:13,621 DEBUG [f.p.e.c.f.f.b.FsCrawlerBulkProcessor] BulkProcessor is now closed
02:29:13,621 DEBUG [f.p.e.c.f.f.b.FsCrawlerBulkProcessor] Executing [4] remaining actions
02:29:13,622 DEBUG [f.p.e.c.f.f.b.FsCrawlerSimpleBulkProcessorListener] Going to execute new bulk composed of 4 actions
02:29:16,568 DEBUG [f.p.e.c.f.c.ElasticsearchEngine] Sending a bulk request of [4] documents to the Elasticsearch service
02:29:16,735 DEBUG [f.p.e.c.f.c.ElasticsearchClient] bulk a ndjson of 230246338 characters
02:29:17,815 DEBUG [f.p.e.c.f.c.ElasticsearchClient] Error while running POST https://localhost:9200/_bulk:
02:29:17,815 WARN [f.p.e.c.f.f.b.FsCrawlerSimpleBulkProcessorListener] Error executing bulk
jakarta.ws.rs.ClientErrorException: HTTP 413 Request Entity Too Large
at org.glassfish.jersey.client.JerseyInvocation.createExceptionForFamily(JerseyInvocation.java:985) ~[jersey-client-3.1.5.jar:?]
at org.glassfish.jersey.client.JerseyInvocation.convertToException(JerseyInvocation.java:967) ~[jersey-client-3.1.5.jar:?]
at org.glassfish.jersey.client.JerseyInvocation.translate(JerseyInvocation.java:755) ~[jersey-client-3.1.5.jar:?]
at org.glassfish.jersey.client.JerseyInvocation.lambda$invoke$1(JerseyInvocation.java:675) ~[jersey-client-3.1.5.jar:?]
at org.glassfish.jersey.client.JerseyInvocation.call(JerseyInvocation.java:697) ~[jersey-client-3.1.5.jar:?]
at org.glassfish.jersey.client.JerseyInvocation.lambda$runInScope$3(JerseyInvocation.java:691) ~[jersey-client-3.1.5.jar:?]
at org.glassfish.jersey.internal.Errors.process(Errors.java:292) ~[jersey-common-3.1.5.jar:?]
at org.glassfish.jersey.internal.Errors.process(Errors.java:274) ~[jersey-common-3.1.5.jar:?]
at org.glassfish.jersey.internal.Errors.process(Errors.java:205) ~[jersey-common-3.1.5.jar:?]
at org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.java:390) ~[jersey-common-3.1.5.jar:?]
at org.glassfish.jersey.client.JerseyInvocation.runInScope(JerseyInvocation.java:691) ~[jersey-client-3.1.5.jar:?]
at org.glassfish.jersey.client.JerseyInvocation.invoke(JerseyInvocation.java:674) ~[jersey-client-3.1.5.jar:?]
at org.glassfish.jersey.client.JerseyInvocation$Builder.method(JerseyInvocation.java:450) ~[jersey-client-3.1.5.jar:?]
at fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClient.httpCall(ElasticsearchClient.java:871) ~[fscrawler-elasticsearch-client-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClient.httpPost(ElasticsearchClient.java:847) ~[fscrawler-elasticsearch-client-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClient.bulk(ElasticsearchClient.java:808) ~[fscrawler-elasticsearch-client-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchEngine.bulk(ElasticsearchEngine.java:82) ~[fscrawler-elasticsearch-client-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchEngine.bulk(ElasticsearchEngine.java:31) ~[fscrawler-elasticsearch-client-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.framework.bulk.FsCrawlerBulkProcessor.execute(FsCrawlerBulkProcessor.java:146) [fscrawler-framework-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.framework.bulk.FsCrawlerBulkProcessor.internalClose(FsCrawlerBulkProcessor.java:101) [fscrawler-framework-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.framework.bulk.FsCrawlerBulkProcessor.close(FsCrawlerBulkProcessor.java:77) [fscrawler-framework-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClient.close(ElasticsearchClient.java:452) [fscrawler-elasticsearch-client-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.service.FsCrawlerDocumentServiceElasticsearchImpl.close(FsCrawlerDocumentServiceElasticsearchImpl.java:60) [fscrawler-core-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.FsCrawlerImpl.close(FsCrawlerImpl.java:170) [fscrawler-core-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.cli.FsCrawlerCli.runner(FsCrawlerCli.java:399) [fscrawler-cli-2.10-SNAPSHOT.jar:?]
at fr.pilato.elasticsearch.crawler.fs.cli.FsCrawlerCli.main(FsCrawlerCli.java:119) [fscrawler-cli-2.10-SNAPSHOT.jar:?]
02:29:17,832 DEBUG [f.p.e.c.f.s.FsCrawlerDocumentServiceElasticsearchImpl] Elasticsearch Document Service stopped