I've been able to make a program to isolate this. I haven't as yet
reduced out may be the extra fields...
David
yml:
index:
analysis :
analyzer :
verity_tokenizer :
type: pattern
lowercase: true
pattern: '(?:(?!\w).)+'
stopwords: none
sortable_tokenizer :
type : keyword
lowercase_keyword :
type : custom
filter : [lowercase]
tokenizer : keyword
index creation:
curl -XPUT 'http://localhost:9200/rebuild_index21/' -d '
index :
number_of_shards : 1
number_of_replicas : 1
'
curl -XPUT 'http://localhost:9200/rebuild_index21/index21/_mapping' -d
'
{"index21":{"dynamic":false,"date_formats":
["date_optional_time"],"_source":{"compress":true},"properties":
{"author":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"bridgesymbols":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"fidelitycategory":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"fidelitymarketcategory":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"headline":
{"type":"multi_field","fields":{"headline":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"sortable":
{"type":"string","analyzer":"lowercase_keyword","omit_norms":true}}},"industries":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"industrygicscodes":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"priceclosing":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"pricecurrent":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"productcategory":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"productcode":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"publicationdatetime":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"sectorgicscodes":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"sectorindustries":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"sectorindustrycodes":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"sectors":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"source":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"storytext":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"teaser":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"dmsourceid":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"__documentdate":
{"type":"date","omit_norms":true},"documentdate":
{"type":"string","index":"no","omit_norms":true},"documenttag":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"documenttype":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"feedid":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"versiontag":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"wsodcompany":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"wsodissue":
{"type":"string","analyzer":"verity_tokenizer","omit_norms":true},"indexid":
{"type":"string","analyzer":"lowercase_keyword","omit_norms":true,"omit_term_freq_and_positions":true},"documentkey":
{"type":"string","analyzer":"lowercase_keyword","omit_norms":true,"omit_term_freq_and_positions":true},"subtransid":
{"type":"long","include_in_all":false},"__content":
{"type":"string","index":"no","omit_norms":true}}}}
'
Code
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.node.Node;
import org.elasticsearch.node.NodeBuilder;
class Hack {
public static void main(String args[]) throws Exception {
NodeBuilder nb =
nodeBuilder().client(true).clusterName("devcluster");
Node node = nb.node();
Client client = node.client();
XContentBuilder content =
XContentFactory.smileBuilder().startObject();
content.field("indexid", "index21");
content.field("documentkey", "532-
UMSACT0420100927111149CMCSA-20100927111149");
content.field("subtransid", "3072773936");
content.field("dmsourceid", "index21");
content.field("documentdate", "index21");
content.field("documenttag", "WSMarketCommentary_15884843");
content.field("documenttype", "MarketCommentary");
content.field("feedid", "532");
content.field("versiontag", "20100927111516");
content.field("bridgesymbols", "");
content.field("headline", "S&P Market Commentary");
content.field("productcategory", "MarketCommentary");
content.field("publicationdatetime", "2010-09-27
11:15:16.000");
content.field("source", "SNPMarketScope");
content.field("teaser", "");
content.field("wsodcompany", "");
content.field("wsodissue", "");
client.prepareIndex("rebuild_index21", "index21", "532-
UMSACT0420100927111149CMCSA-20100927111149")
.setSource(content).setOperationThreaded(false).execute()
.actionGet();
}
}