Yes I have only 1 index and yes, it shows more documents than I've (as far
as I'm aware of) created. Thanks for showing me this! I provide the code
which I use to create Index, index documents etc and the plain "Documents"
class. Note that I've only been working on a demo containting lots of other
elements too so I've began with only 1 index and 2 documents just to not
mess up everything completely.
import java.net.URL;
import java.util.LinkedHashSet;
import io.searchbox.client.JestClient;
import io.searchbox.client.JestClientFactory;
import io.searchbox.client.config.ClientConfig;
import io.searchbox.client.config.ClientConstants;
import io.searchbox.core.Delete;
import io.searchbox.core.Index;
import io.searchbox.indices.CreateIndex;
/**
-
This class will index documents into searchable JSON-objects.
*/
public class Indexing {
private ClientConfig clientConfig;
private LinkedHashSet servers;
private JestClientFactory factory;
JestClient client;
private String indexName = null, indexType = null;
public Indexing(){
indexName = "data";
indexType = "apa";
createJest();
//debug
System.out.println("You have Jest motherfucker \n");
try {
deleteIndex(indexName, indexType, "0");
deleteIndex(indexName, indexType, "1");
deleteIndex(indexName, indexType, "2");
} catch (Exception e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
createIndex(indexName);
System.out.println("Created an index");
try {
indexDocuments();
} catch (Exception e) {
// TODO Auto-generated catch block
System.out.println("I've got a bad feeling about this...");
e.printStackTrace();
}
}
/**
- Method which creates a Jest Client to be able to index documents via
Jest.
*/
private void createJest(){
// Configuration
clientConfig = new ClientConfig();
servers = new LinkedHashSet();
servers.add("http://localhost:9200");
clientConfig.getProperties().put(ClientConstants.SERVER_LIST,
servers);
clientConfig.getProperties().put(ClientConstants.IS_MULTI_THREADED,
true);
// Construct a new Jest client according to configuration via
factory
factory = new JestClientFactory();
factory.setClientConfig(clientConfig);
client = factory.getObject();
//debug
System.out.println(client.toString());
}
public JestClient getClient(){
return client;
}
/**
* Creating an index
*/
private void createIndex(String name){
try {
client.execute(new CreateIndex(name));
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private void deleteIndex(String index, String type, String id) throws
Exception{
client.execute(new
Delete.Builder(id).index(index).type(type).build());
}
private void indexDocuments() throws Exception{
String u = null;
String title1 = "Monkey Business";
String title2 = "Cake is a Lie";
String content1 = "Monkey is a rare business about bananas. You
don't want to feel the wrath of the monkeys.";
String content2 = "Everybody wants cake. If you see cake you want
to eat it. But what if it's a lie?";
u = "http://animal.discovery.com/mammals/monkey-info.htm";
URL url1 = new URL(u);
u = "http://www.thecakeisalie.com/";
URL url2 = new URL(u);
Documents source1 = new Documents(title1, content1, url1);
Documents source2 = new Documents(title2, content2, url2);
Index index = new
Index.Builder(source1).index(indexName).type(indexType).id("0").build();
Index index1 = new
Index.Builder(source2).index(indexName).type(indexType).id("1").build();
client.execute(index);
client.execute(index1);
DocumentManager dm = new DocumentManager(client);
System.out.println(dm.getDocuments("0", indexName, indexType));
System.out.println(dm.getDocuments("1", indexName, indexType));
System.out.println(dm.getDocuments("2", indexName, indexType));
}
}
import java.net.URL;
public class Documents {
private String title;
private String content;
private URL url;
public Documents(String title, String content, URL url){
this.title = title;
this.content = content;
this.url = url;
}
public String getTitle(){
return title;
}
public String getContent(){
return content;
}
public URL getURL(){
return url;
}
}
The class which connects to Elasticsearch:
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.node.Node;
import org.elasticsearch.node.NodeBuilder;
/**
- Class to connect the node to the cluster with possibility to start and
close it along
- with other functions.
*/
public class NodeConnection {
private Node node;
private Client client, tpclient;
public NodeConnection(){
startNode();
System.out.println("You have now successfully created a
NodeConnection \n");
//startTransportClient();
// System.out.println("You have now successfully created a
TransportClient \n");
}
/**
* Initialize the node and connect it to a cluster.
*/
private void startNode(){
//Starting the node and setting a cluster name which we can refer to
node = NodeBuilder.nodeBuilder().local(true).clusterName("Emmas
Cluster").node();
//some debugging
String string = node.toString();
System.out.print(string + "\n");
client = node.client();
}
public Client getClient(){
return client;
}
/**
*Transportclient which doesn't connect to the cluster,
*adding these temporarily since it might be needed for future
functionalities
*/
private void startTransportClient(){
//change the default name to the chosen one "Emmas cluster" to make
the tpclient work correctly
Settings settings = ImmutableSettings.settingsBuilder()
.put("cluster.name", "Emmas Cluster").build();
tpclient = new TransportClient(settings).addTransportAddress(new
InetSocketTransportAddress("host1", 9300))
.addTransportAddress(new InetSocketTransportAddress("host2", 9300));
//debugging
String string = tpclient.toString();
System.out.println(string + "\n");
}
public void closeNode(){
try{
//tpclient.close();
node.close();
}catch (Exception e){
e.printStackTrace();
}
System.out.println("The node is now closed with connected clients");
}
}
Den måndagen den 3:e juni 2013 kl. 21:16:11 UTC+2 skrev Ivan Brusic:
I think most of us are in agreement that you issue might be during
indexing.
The indices status API does not return any identifying information. You
can provide the results or scan it and report the important stats.
Going to http://localhost:9200/_status , find the number of documents
like so:
{
"ok": true,
"_shards": {
"total": 10,
"successful": 10,
"failed": 0
},
"indices": {
"foo": {
...
-
"docs": {*
-
"num_docs": 2,*
-
"max_doc": 2,*
-
"deleted_docs": 0*
-
},*
How many docs (across all indices) do you have? Do you have only one
index? If your browser does not have a JSON viewer, you can probably
download an extension or paste the results into
http://jsonviewer.stack.hu/
Elasticsearch Platform — Find real-time answers at scale | Elastic
--
Ivan
On Mon, Jun 3, 2013 at 12:04 PM, David Pilato <da...@pilato.fr<javascript:>
wrote:
I don't need to see the payload of each doc. Only how you create each doc
(you can remove the doc itself).
--
David Pilato | Technical Advocate | Elasticsearch.com
@dadoonet https://twitter.com/dadoonet | @elasticsearchfrhttps://twitter.com/elasticsearchfr
| @scrutmydocs https://twitter.com/scrutmydocs
Le 3 juin 2013 à 21:03, Kya W <kyawolf...@gmail.com <javascript:>> a
écrit :
I actually don't think I'm allowed to do that. I'm doing this thesis work
at Volvo and they don't want people to see everything.... I'll try to
figure some way to show you exactly what happens. But is it possible that
Elasticsearch create multiple posts in an index with literally exactly the
same content (as in fieldnames, indexname, id, type etc)?
I create an index by using Jests API, the commando looks like this:
client.execute(new CreateIndex("articles"));
I honestly don't think the problem is in the creation of the indices and
atm I have only created a single index called "data". Haven't changed the
name or anything when I've indexed documents etc
Den måndagen den 3:e juni 2013 kl. 20:44:33 UTC+2 skrev David Pilato:
I would really love to help you but it's not possible until you give
more information about what you are doing:
How do you index and what?
How do you search and for what? (I don't speak about GET here).
Please tell us more. You can GIST a full java example if you want. May
be we will be able to understand JEST code.
That said, I'm pretty sure that JEST allows logging REST API calls. It
will be even easier if you provide that traces.
--
David Pilato | Technical Advocate | Elasticsearch.comhttp://elasticsearch.com/
*
@dadoonet https://twitter.com/dadoonet | @elasticsearchfrhttps://twitter.com/elasticsearchfr
|* @scrutmydocs https://twitter.com/scrutmydocs
Le 3 juin 2013 à 20:12, Kya W kyawolf...@gmail.com a écrit :
I'm using "GET" to see what I have in the index. I've used it both in
the java-code by the Elasticsearch API and via cURL. I've used the name of
the index, type and id since I haven't seen any "show all documents"-get
anywhere. And I've typed these names for what I've been indexing, which are
only two documents. So I've indexed two documents, I've also deleted the
whole thing to be sure and then indexed them again. In my GET I can only
see these two documents and yet the search-query returns the same documents
multiple times. I mean that it returns the SAME index, type, id, title,
content and url several times. Unknown reason.
Den måndagen den 3:e juni 2013 kl. 18:30:33 UTC+2 skrev Ivan Brusic:
How exactly are you determining that there are only two documents in
your index? The issue is very likely not in your querying code, but in your
indexing. Then again, I have never used Jest so I haven't explored that
code.
On Mon, Jun 3, 2013 at 9:26 AM, Kya W kyawolf...@gmail.com wrote:
Yes, I have tested and used cUrl a bit. But it's the same result as in
my java-application. I only see two documents in my index. Is it possible
that Elasticsearch might index identical documents multiple times? Have no
idea how I could see that in my index since it only returns one post per
index/type/id. I'm really stuck. I provided my java-code for the query in
the first post.
Den måndagen den 3:e juni 2013 kl. 17:39:39 UTC+2 skrev Roy Russo:
If you aren't comfortable with cURL, then just do a GET request from
your browser:
Elasticsearch Platform — Find real-time answers at scale | Elastic**
request/http://www.elasticsearch.org/guide/reference/api/search/uri-request/
You can also use a chrome extension, like Advanced REST client for
testing:
https://chrome.google.com/**webs**tore/detail/advanced-rest-**clie**
nt/hgmloofddffdnphfgcellkdfbfbjel**oo?hl=en-UShttps://chrome.google.com/webstore/detail/advanced-rest-client/hgmloofddffdnphfgcellkdfbfbjeloo?hl=en-US
On Sunday, June 2, 2013 3:45:22 PM UTC-4, Kya W wrote:
Hi there!
I'm currently trying to use Elasticsearch in my thesis work, but
encountered a problem. If I use a term, for example "Monkey", which has 1
hit in my index... It returns this document 7 times. I use Jest together
with Elasticsearch since it was a bit easier to understand. I want every
document in my index to be returned maximum 1 time per search... not 7. I
would be very grateful for your help. I provide the code I have i my
"QueryManager" so far:
public class QueryManager {
private SearchSourceBuilder searchBuilder;
private JestClient client;
public QueryManager(JestClient client){
this.client = client;
searchBuilder = new SearchSourceBuilder();
}
public List<Documents> matches(String search){
Search search1 = new Search(searchBuilder.query(**Que**
ryBuilders.multiMatchQuery(
search, //our search-term
"title", "content", "url") //searching in the fields title,
content and url
).toString());
search1.addIndex("data");
search1.addType("apa");
JestResult result;
try {
result = client.execute(search1);
List<Documents> documents = result.getSourceAsObjectList(
Documents.class);
//debug, return size of list
System.out.println(documents.size());
return documents;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
System.out.println("Nooo \n");
return null;
}
}
}
--
You received this message because you are subscribed to the Google
Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send
an email to elasticsearc...@googlegroups.**com.
For more options, visit https://groups.google.com/**groups/opt_outhttps://groups.google.com/groups/opt_out
.
--
You received this message because you are subscribed to the Google
Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send
an email to elasticsearc...@**googlegroups.com.
For more options, visit https://groups.google.com/**groups/opt_outhttps://groups.google.com/groups/opt_out
.
--
You received this message because you are subscribed to the Google Groups
"elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an
email to elasticsearc...@googlegroups.com <javascript:>.
For more options, visit https://groups.google.com/groups/opt_out.
--
You received this message because you are subscribed to the Google Groups "elasticsearch" group.
To unsubscribe from this group and stop receiving emails from it, send an email to elasticsearch+unsubscribe@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.