Hi,
I am using Elasticsearch in Hadoop version 6.2.0 to connect Hive and elasticsearch.
My queries is as below.
Hive Tables
CREATE TABLE IF NOT EXISTS User_Details(
Id bigint,
username string,
name string,
address1 string,
address2 string,
city string,
state string
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS temp_User_Orders(
Id bigint,
ItemId bigint,
orderdate String
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
CREATE TABLE IF NOT EXISTS User_Orders(
Id bigint,
ItemId bigint,
orderdate timestamp
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
insert into table User_Orders select Id,ItemId,from_unixtime(unix_timestamp(orderdate, 'yyyy-MM-dd')) from temp_User_Orders;
Elasticsearch index mapping overridden as below
curl -XPUT 'localhost:9200/user?pretty' -H 'Content-Type: application/json' -d'
{
"mappings" : {
"details" : {
"properties" : {
"id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"user1" : {
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"orders" : {
"type":"nested",
"properties" : {
"itemid" : {
"type" : "long"
},
"orderdate" : {
"type" : "date",
"format": "date_time_no_millis"
}
}
},
"shippingaddress" : {
"properties" : {
"address1" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"address2" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"city" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"state" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"username" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
}
'
Elasticsearch external table query
CREATE EXTERNAL TABLE IF NOT EXISTS User_Nested_Table_poc(
Id STRING,
user1 STRUCT<username: STRING, name:STRING,shippingaddress:struct<address1:STRING,address2:STRING,city:STRING,state:STRING>, orders:array<STRUCT<ItemId:bigint,orderdate:timestamp>>>
) STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES('es.nodes' = 'localhost',
'es.resource' = 'user/details',
'es.mapping.id' = 'Id');
insert overwrite table User_Nested_Table_poc
select ud.Id,
named_struct('username',ud.username,
'name',ud.name,
'shippingaddress',named_struct('address1',ud.address1,
'address2',ud.address2,
'city',ud.city,
'state',ud.state),
'orders',array(named_struct('ItemId',uo.ItemId,'orderdate',cast(uo.orderdate as timestamp)))
)
from User_Details ud JOIN User_Orders uo on ud.Id=uo.Id;
After insertion, I am unable to execute the select query for external table from Hive and is throwing the below error.
select * from user_nested_table_poc;
Failed with exception java.io.IOException:org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to org.apache.hadoop.hive.serde2.io.TimestampWritable
Time taken: 0.042 seconds
I am stuck in this for the past 2 days, please help me resolve this issue.
Regards,
Adarsh K S