Member since
11-20-2018
19
Posts
0
Kudos Received
5
Solutions
My Accepted Solutions
Title | Views | Posted |
---|---|---|
24492 | 01-20-2019 02:59 AM | |
2081 | 01-16-2019 09:07 PM | |
17131 | 01-16-2019 04:49 PM | |
6009 | 01-11-2019 04:06 AM | |
2775 | 11-20-2018 05:11 PM |
01-20-2019
02:59 AM
After trail and error basis, I am able to resolve this issue after removing JSON Serde on ORC table.
... View more
01-18-2019
09:25 PM
I am getting below error when loading Json data from Hive table of type TEXTFILE to ORC type Hive table. Please share your thoughts to overcome from this issue. Thanks. Input sample file TestFile1.json:
{"jsondata":[{ "id":1, "m":"Edward the Elder", "cty":"Uited Kigdom", "hse":"House of Wessex", "yrs":"899925" },{ "id":2, "m":"Edward the Elder", "cty":"Uited Kigdom", "hse":"House of Wessex", "yrs":"899925" }]} Cluster version: HDP 2.6.5.0-292 TEXFILE Hive Table schema: CREATE EXTERNAL TABLE IF NOT EXISTS TestJson1 (jsondata array<struct<id:int,nm:varchar(30),cty:varchar(30),hse:varchar(30),yrs:varchar(20)>>) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' Location '/data/3rdPartyData/Hive/TestJson1'; Load json into above table: load data local inpath '/data/Hive/TestFile1.json' overwrite into table TestJson1; ORC Hive table schema: CREATE EXTERNAL TABLE IF NOT EXISTS TestJson1ORC(jsondata array<struct<id:int,nm:varchar(30),cty:varchar(30),hse:varchar(30),yrs:varchar(20)>>) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' STORED as ORC Location '/data/3rdPartyData/Hive/TestJson1ORC'; Insert statement: INSERT OVERWRITE TABLE TestJson1ORC SELECT * FROM TestJson1; Getting below error while executing above statement. Error log: <small> java.sql.SQLException: Error while processing statement: FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez.TezTask. Vertex failed, vertexName=Map 1, vertexId=vertex_1547835302497_0003_1_00, diagnostics=[Task failed, taskId=task_1547835302497_0003_1_00_000000, diagnostics=[TaskAttempt 0 failed, info=[Error: Failure while running task:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:347)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:194)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:185)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:185)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:181)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:96)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:73)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:325)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:150)
... 14 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:565)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:88)
... 17 more
Caused by: java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to org.apache.hadoop.hive.ql.io.orc.OrcSerde$OrcSerdeRow
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.write(OrcOutputFormat.java:81)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:763)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:841)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:841)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:133)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:170)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:555)
... 18 more
], TaskAttempt 1 failed, info=[Error: Failure while running task:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:347)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:194)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:185)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:185)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:181)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:96)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:73)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:325)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:150)
... 14 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:565)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:88)
... 17 more
Caused by: java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to org.apache.hadoop.hive.ql.io.orc.OrcSerde$OrcSerdeRow
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.write(OrcOutputFormat.java:81)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:763)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:841)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:841)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:133)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:170)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:555)
... 18 more
], TaskAttempt 2 failed, info=[Error: Failure while running task:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:347)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:194)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:185)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:185)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:181)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:96)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:73)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:325)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:150)
... 14 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:565)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:88)
... 17 more
Caused by: java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to org.apache.hadoop.hive.ql.io.orc.OrcSerde$OrcSerdeRow
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.write(OrcOutputFormat.java:81)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:763)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:841)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:841)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:133)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:170)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:555)
... 18 more
], TaskAttempt 3 failed, info=[Error: Failure while running task:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:347)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:194)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:185)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:185)
at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:181)
at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:96)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:73)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:325)
at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:150)
... 14 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"jsondata":[{"id":1,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"},{"id":2,"nm":null,"cty":"Uited Kigdom","hse":"House of Wessex","yrs":"899925"}]}
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:565)
at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:88)
... 17 more
Caused by: java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to org.apache.hadoop.hive.ql.io.orc.OrcSerde$OrcSerdeRow
at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.write(OrcOutputFormat.java:81)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:763)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:841)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:841)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:133)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:170)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:555)
... 18 more
]], Vertex did not succeed due to OWN_TASK_FAILURE, failedTasks:1 killedTasks:0, Vertex vertex_1547835302497_0003_1_00 [Map 1] killed/failed due to:OWN_TASK_FAILURE]DAG did not succeed due to VERTEX_FAILURE. failedVertices:1 killedVertices:0
<a href="http://ambaripoc:8080/views/HIVE/2.0.0/AUTO_HIVE20_INSTANCE/#">(less...)</a></small>
... View more
Labels:
- Labels:
-
Apache Hive
01-18-2019
08:53 PM
As per my analysis, I understand that org.apache.hive.hcatalog.data.JsonSerDe doesn't support the JSON which starts with square bracket "[". [{ "id":1, "nm":"Edward the Elder", "cty":"United Kingdom", "hse":"House of Wessex", "yrs":"899925" }, { "id":2, "nm":"Edward the Elder", "cty":"United Kingdom", "hse":"House of Wessex", "yrs":"899925" }]
... View more
01-16-2019
09:07 PM
Since it is weird behavior, I have rechecked all the memory usage and configuration and noticed that it was due to TEZ memory set to Yarn Max memory. After reducing the TEZ memory, this is fixed.
... View more
01-16-2019
05:42 PM
I have noticed that it is not only ORC table, it is also the same for normal table. This is happening to load data from other table whereas loading data from source file into table is fine.
... View more
01-16-2019
04:57 PM
Issue: Loading data into Hive ORC table is infinite, I should manually kill the load process. I am trying to load data into ORC Hive table from another
Hive TEXTFILE table. Since the source files are TXT/Json, loading data first
into TEXT table and then trying to load into ORC table. Cluster: HDP 2.6.5-292 Hive version: 1.2.1000.2.6.5.0-292 Here is the Hive TEXTFILE table schema: Create external table if not exists TEXTTable(ID
bigint, DOCUMENT_ID bigint, NUM varchar(20), SUBMITTER_ID bigint, FILING
string, CODE varchar(10), RECEIPTNUM varchar(20)) row format delimited fields terminated by '|' Location '/data/3rdPartyData/Hive/ TEXTTable ' TBLPROPERTIES ('skip.header.line.count'='1'); Load Data into TEXTFILE table: load data local inpath '/data/TextFile.txt' overwrite into
table TEXTTable; Here is the Hive ORC table schema: Create external table if not exists ORCTable(ID
bigint, DOCUMENT_ID bigint, NUM varchar(20), SUBMITTER_ID bigint, FILING
TIMESTAMP, CODE varchar(10), RECEIPTNUM varchar(20)) row format delimited fields terminated by '|' STORED as ORC Location '/data/3rdPartyData/Hive/ ORCTable ' TBLPROPERTIES ('orc.compress'='SNAPPY'); Load data into ORC table: Insert overwrite table ORCTable select _ID, DOCUMENT_ID,
NUM, SUBMITTER_ID,from_unixtime(unix_timestamp(FILING,
"yyyy-MM-dd'T'HH:mm:ss")) as FILING, CODE, RECEIPTNUM from TEXTTable;
... View more
Labels:
- Labels:
-
Apache Hive
01-16-2019
04:52 PM
@atrivedi Thank you. do you mean starting with Square bracket "[" instead of curly bracket "{"?
... View more
01-16-2019
04:49 PM
Thank you @jbarnett. I worked on the similar JSON structure before with Spark, but I am checking now the possibility to ingest data using only shell scripts and Hive scripts.
... View more
01-15-2019
12:02 AM
Thank you @jbarnett Yes, it worked as well to me the simple json format after correcting the schema but the other format JSON starts with Square bracket that requires a tweak to work it. Receiving hundreds of files, and 1000s of Array elements in each file as below format: [{ "id":1, "nm":"Edward the
Elder", "cty":"United Kingdom",
"hse":"House of Wessex", "yrs":"899925"
}, { "id":2, "nm":"Edward the Elder",
"cty":"United Kingdom", "hse":"House of
Wessex", "yrs":"899925" }] However, JSON SerDe only supporting single line JSON meaning each JSON
doc have to be in a different line otherwise Array of JSON objects in the below updated format: { "jsondata":[{ "id":1,
"nm":"Edward the Elder", "cty":"United
Kingdom", "hse":"House of Wessex",
"yrs":"899925" }] } And, the schema: CREATE EXTERNAL TABLE IF NOT EXISTS TestJson1 (jsondata
array<struct<id:int,nm:varchar(30),cty:varchar(30),hse:varchar(30),yrs:varchar(20)>>) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' Location '/data/3rdPartyData/Hive'; Any thoughts here to manage SerDe/Schema without updating the source files?
... View more
01-12-2019
04:08 AM
@jbarnett Thank you! Please find below requested details Hive Schema: CREATE TABLE IF NOT EXISTS TestJson (id int, nm varchar(30), cty varchar(30), hse varchar(30), yrs varchar(20)) ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe' STORED AS TEXTFILE; Sample Json: [ { "id": 1, "nm": "Edward the Elder", "cty": "United Kingdom", "hse": "House of Wessex", "yrs": "899-925" }, { "id": 2, "nm": "Athelstan", "cty": "United Kingdom", "hse": "House of Wessex", "yrs": "925-940" }, ] Also tried with below JSON format as well: { "id": 1, "nm": "Edward the Elder", "cty": "United Kingdom", "hse": "House of Wessex", "yrs": "899-925" } { "id": 2, "nm": "Athelstan", "cty": "United Kingdom", "hse": "House of Wessex", "yrs": "925-940" } Error stack trace: org.apache.hive.service.cli.HiveSQLException: java.io.IOException: org.apache.hadoop.hive.serde2.SerDeException: java.io.IOException: Start token not found where expected
org.apache.hive.service.cli.HiveSQLException: java.io.IOException: org.apache.hadoop.hive.serde2.SerDeException: java.io.IOException: Start token not found where expected
at org.apache.hive.jdbc.Utils.verifySuccess(Utils.java:264)
at org.apache.hive.jdbc.Utils.verifySuccessWithInfo(Utils.java:250)
at org.apache.hive.jdbc.HiveQueryResultSet.next(HiveQueryResultSet.java:373)
at org.apache.ambari.view.hive20.actor.ResultSetIterator.getNext(ResultSetIterator.java:119)
at org.apache.ambari.view.hive20.actor.ResultSetIterator.handleMessage(ResultSetIterator.java:78)
at org.apache.ambari.view.hive20.actor.HiveActor.onReceive(HiveActor.java:38)
at akka.actor.UntypedActor$anonfun$receive$1.applyOrElse(UntypedActor.scala:167)
at akka.actor.Actor$class.aroundReceive(Actor.scala:467)
at akka.actor.UntypedActor.aroundReceive(UntypedActor.scala:97)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:516)
at akka.actor.ActorCell.invoke(ActorCell.scala:487)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:238)
at akka.dispatch.Mailbox.run(Mailbox.scala:220)
at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
Caused by: org.apache.hive.service.cli.HiveSQLException: java.io.IOException: org.apache.hadoop.hive.serde2.SerDeException: java.io.IOException: Start token not found where expected
at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:416)
at org.apache.hive.service.cli.operation.OperationManager.getOperationNextRowSet(OperationManager.java:243)
at org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:793)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
at org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
at org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
at com.sun.proxy.$Proxy29.fetchResults(Unknown Source)
at org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:523)
at org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:709)
at org.apache.hive.service.cli.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1617)
at org.apache.hive.service.cli.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1602)
at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.io.IOException: org.apache.hadoop.hive.serde2.SerDeException: java.io.IOException: Start token not found where expected
at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:520)
at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:427)
at org.apache.hadoop.hive.ql.exec.FetchTask.fetch(FetchTask.java:146)
at org.apache.hadoop.hive.ql.Driver.getResults(Driver.java:1782)
at org.apache.hive.service.cli.operation.SQLOperation.getNextRowSet(SQLOperation.java:411)
... 25 more
Caused by: org.apache.hadoop.hive.serde2.SerDeException: java.io.IOException: Start token not found where expected
at org.apache.hive.hcatalog.data.JsonSerDe.deserialize(JsonSerDe.java:186)
at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:501)
... 29 more
Caused by: java.io.IOException: Start token not found where expected
at org.apache.hive.hcatalog.data.JsonSerDe.deserialize(JsonSerDe.java:172) ... 30 more
... View more