I answered your original question on Stack Overflow; I'll post here for posterity
Your code is missing the ForeachProcessor
; the NEST implementation for this is pretty much a direct translation of the Elasticsearch JSON example that you've posted in your question. It's a little easier using the Attachment
type available in NEST too, which the attachment object that the data is extracted into will deserialize into.
void Main()
{
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var defaultIndex = "default-index";
var connectionSettings = new ConnectionSettings(pool)
.DefaultIndex(defaultIndex);
var client = new ElasticClient(connectionSettings);
if (client.IndexExists(defaultIndex).Exists)
client.DeleteIndex(defaultIndex);
client.PutPipeline("attachments", p => p
.Processors(pp => pp
.Description("Document attachment pipeline")
.Foreach<Document>(fe => fe
.Field(f => f.Attachments)
.Processor(fep => fep
.Attachment<Attachment>(a => a
.Field("_ingest._value.data")
.TargetField("_ingest._value.attachment")
)
)
)
)
);
var indexResponse = client.Index(new Document
{
Attachments = new List<DocumentAttachment>
{
new DocumentAttachment { Data = "dGhpcyBpcwpqdXN0IHNvbWUgdGV4dAo=" },
new DocumentAttachment { Data = "VGhpcyBpcyBhIHRlc3QK" }
}
},
i => i.Pipeline("attachments")
);
var getResponse = client.Get<Document>(indexResponse.Id);
}
public class Document
{
public List<DocumentAttachment> Attachments { get; set; }
}
public class DocumentAttachment
{
public string Data { get; set; }
public Attachment Attachment { get; set; }
}
returns
{
"_index" : "default-index",
"_type" : "document",
"_id" : "AVrOVuC1vjcwkxZzCHYS",
"_version" : 1,
"found" : true,
"_source" : {
"attachments" : [
{
"data" : "dGhpcyBpcwpqdXN0IHNvbWUgdGV4dAo=",
"attachment" : {
"content_type" : "text/plain; charset=ISO-8859-1",
"language" : "en",
"content" : "this is\njust some text",
"content_length" : 24
}
},
{
"data" : "VGhpcyBpcyBhIHRlc3QK",
"attachment" : {
"content_type" : "text/plain; charset=ISO-8859-1",
"language" : "en",
"content" : "This is a test",
"content_length" : 16
}
}
]
}
}
You can chain the RemoveProcessor
on to remove the data field from _source too, just specify another ForeachProcessor
after the current ForeachProcessor
that contains the RemoveProcessor
, targeting the data
field. Take a look at this answer for what the json would look like.