I am using the Elasticsearch NEST client and I am not finding any good examples where someone use the foreach processor with an attachment processor within an array of attachments.
Now, I have this code, but it does not work:
public async Task IndexDocument(Guid applicationId, Guid projectId, ApplicationDto application)
{
var indexName = string.Format(ElasticsearchIndexName, projectId);
await _client.PutPipelineAsync("attachments", p => p
.Description("Document attachments pipeline")
.Processors(pr => pr
.Foreach<ApplicationDto>(fch => fch
.Field(f => f.Files)
.Processor(pcsr => pcsr
.Attachment<Attachment>(a => a
.Field(f => f.Content)
)
.Remove<Attachment>(r => r
.Field(f => f.Content)
)
)
)
)
);
//var t = await _client.GetPipelineAsync(ts => ts.Id("*"));
var request = new IndexRequest<ApplicationDto>(indexName, "applicationdto", applicationId)
{
Pipeline = "attachments",
Document = application
};
var result = await _client.IndexAsync(request);
}
What changes can be done to the code above, so it processes within an array of attachments.
public class ApplicationDto
{
public Guid Id { get; set; }
public int Version { get; set; }
public string FirstName { get; set; }
public string LastName { get; set; }
public string StreetAddress { get; set; }
public string PostalCode { get; set; }
public string City { get; set; }
public string Country { get; set; }
public string HigherEducationYears { get; set; }
public string CurrentPosition { get; set; }
public string CurrentEmployer { get; set; }
public Address Address { get; set; }
public PhoneNumber PhoneNumberMobile { get; set; }
public Birthdate BirthDate { get; set; }
public string WorkExperience { get; set; }
public int Age { get; set; }
public string Education { get; set; }
public string CoverLetter { get; set; }
[Attachment]
public List<Attachment> Files { get; set; }
[JsonConverter(typeof(StringEnumConverter))]
public StatusType Status { get; set; }
}
Your code is missing the ForeachProcessor; the NEST implementation for this is pretty much a direct translation of the Elasticsearch JSON example that you've posted in your question. It's a little easier using the Attachment type available in NEST too, which the attachment object that the data is extracted into will deserialize into.
void Main()
{
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var defaultIndex = "default-index";
var connectionSettings = new ConnectionSettings(pool)
.DefaultIndex(defaultIndex);
var client = new ElasticClient(connectionSettings);
if (client.IndexExists(defaultIndex).Exists)
client.DeleteIndex(defaultIndex);
client.PutPipeline("attachments", p => p
.Processors(pp => pp
.Description("Document attachment pipeline")
.Foreach<Document>(fe => fe
.Field(f => f.Attachments)
.Processor(fep => fep
.Attachment<Attachment>(a => a
.Field("_ingest._value.data")
.TargetField("_ingest._value.attachment")
)
)
)
)
);
var indexResponse = client.Index(new Document
{
Attachments = new List<DocumentAttachment>
{
new DocumentAttachment { Data = "dGhpcyBpcwpqdXN0IHNvbWUgdGV4dAo=" },
new DocumentAttachment { Data = "VGhpcyBpcyBhIHRlc3QK" }
}
},
i => i.Pipeline("attachments")
);
var getResponse = client.Get<Document>(indexResponse.Id);
}
public class Document
{
public List<DocumentAttachment> Attachments { get; set; }
}
public class DocumentAttachment
{
public string Data { get; set; }
public Attachment Attachment { get; set; }
}
You can chain the RemoveProcessor on to remove the data field from _source too, just specify another ForeachProcessor after the current ForeachProcessor that contains the RemoveProcessor, targeting the data field. Take a look at this answer for what the json would look like.
Apache, Apache Lucene, Apache Hadoop, Hadoop, HDFS and the yellow elephant
logo are trademarks of the
Apache Software Foundation
in the United States and/or other countries.