Created 12-14-2023 08:33 AM
Hello,
with NiFi I'm trying to move large amount of data from one hive database into another hive database and im getting error writing
Flow
GeneretCreate table
Replace text
Propertiesc
CREATE TABLE IF NOT EXISTS ${db_dest}.${table_dest} (
Columns ...
)
PARTITIONED BY (creationdatemonth STRING)
CLUSTERED BY (ProcessValueDate) INTO 16 BUCKETS
STORED AS ORC
TBLPROPERTIES ('transactional'='true')
PutHiveQL
SelectHiveQL HiveQL Select Query
Select
Columns..
FROM
${db_source}.product_latest pv
WHERE pv.Crea
tionDate BETWEEN DATE_FORMAT (DATE_SUB (CURRENT_DATE, 1), 'yyyy-MM-dd 00:00:00.000') AND DATE_FORMAT (DATE_SUB (CURRENT_DATE, 0), 'yyyy-MM-dd 00:00:00.000')
PutHiveStreaming
And im getting this error
When i limit the select statement to 100 puthivewill work without problem when i put like 1000 limit it fails. I've tried using split record to split it to 100 size filow files no luck tried changing the records per trasaction but still having the same error. Tried manually inserting the data and it worked.
Can any one help me?
Thanks
Example data in flowfile
[ {
"hostname" : "dfgdfg",
"originid" : 1,
"productionlineid" : 12,
"productionlineshorttext" : "dfgdfg",
"productionlinelongtext" : "dfgdfgd",
"workstationid" : 123,
"workstationposition" : 12,
"workstationshorttext" : "dfgdf",
"workstationlongtext" : "dfgdfg",
"workpiececarrierid" : null,
"workpiececarrierlabelno" : null,
"progressid" : 12,
"productid" : 1234234345,
"orderid" : 34534534,
"materialno" : "dfgdfg",
"materialtext" : null,
"processvaluereference" : "dfgdfg",
"processvalueid" : 100,
"processvalueid2" : 1,
"processvalueshorttext" : "#?#",
"processvaluelongtext" : "###?###",
"processvaluevaluetype" : "I",
"processvalueinteger" : 123,
"processvaluefloat" : null,
"processvaluechar" : null,
"processvaluebinary" : null,
"processvaluerecordtype" : "P",
"processvalueenabled" : true,
"processvaluesource" : "PLC ",
"processvaluedate" : "2023-12-13 01:30:08.384",
"comqueueid" : 123,
"comqueuedate" : "2023-12-13 01:30:08.369",
"creationdatemonth" : "2023-12",
"importcreationdate" : "2023-12-13 01:30:10.779"
}, {
Created 12-14-2023 10:02 AM
@Richard0213 Welcome to the Cloudera Community!
To help you get the best possible solution, I have tagged our NiFi experts @MattWho @SAMSAL who may be able to assist you further.
Please keep us updated on your post, and we hope you find a satisfactory solution to your query.
Regards,
Diana Torres,Created on 12-14-2023 11:19 PM - edited 12-15-2023 01:25 AM
Hello,
thanks, I'm using exact same processors with same configuration with smaller tables it worked without problem.
Also tried in selecthive split it with 100 rows 2 flow files passed and rest is giving error
Logs
2023-12-15 10:24:11,193 ERROR [Timer-Driven Process Thread-5] o.a.n.processors.hive.PutHiveStreaming PutHiveStreaming[id=fb82303f-d19c-10b2-ba32-ec442533a51b] Failed to process session due to org.apache.nifi.processor.exception.ProcessException: Error writing [org.apache.nifi.processors.hive.PutHiveStreaming$HiveStreamingRecord@9e5c12c] to Hive Streaming transaction due to java.lang.IllegalStateException: TransactionBatch TxnIds=[10434809...10434908] on endPoint = {metaStoreUri='thrift://de00srvapp673r1.de.olan.dir:9083', database='spdm_datamart_dev', table='product_processvalueinfo', partitionVals=[2023-12] } has been closed(): org.apache.nifi.processor.exception.ProcessException: Error writing [org.apache.nifi.processors.hive.PutHiveStreaming$HiveStreamingRecord@9e5c12c] to Hive Streaming transaction due to java.lang.IllegalStateException: TransactionBatch TxnIds=[10434809...10434908] on endPoint = {metaStoreUri='thrift://de00srvapp673r1.de.olan.dir:9083', database='spdm_datamart_dev', table='product_processvalueinfo', partitionVals=[2023-12] } has been closed()
org.apache.nifi.processor.exception.ProcessException: Error writing [org.apache.nifi.processors.hive.PutHiveStreaming$HiveStreamingRecord@9e5c12c] to Hive Streaming transaction due to java.lang.IllegalStateException: TransactionBatch TxnIds=[10434809...10434908] on endPoint = {metaStoreUri='thrift://de00srvapp673r1.de.olan.dir:9083', database='spdm_datamart_dev', table='product_processvalueinfo', partitionVals=[2023-12] } has been closed()
at org.apache.nifi.processors.hive.PutHiveStreaming.lambda$onHiveRecordsError$1(PutHiveStreaming.java:643)
at org.apache.nifi.processor.util.pattern.ExceptionHandler$OnError.lambda$andThen$0(ExceptionHandler.java:54)
at org.apache.nifi.processors.hive.PutHiveStreaming.lambda$onHiveRecordError$2(PutHiveStreaming.java:650)
at org.apache.nifi.processor.util.pattern.ExceptionHandler.execute(ExceptionHandler.java:148)
at org.apache.nifi.processors.hive.PutHiveStreaming$1.process(PutHiveStreaming.java:841)
at org.apache.nifi.controller.repository.StandardProcessSession.read(StandardProcessSession.java:2212)
at org.apache.nifi.controller.repository.StandardProcessSession.read(StandardProcessSession.java:2180)
at org.apache.nifi.processors.hive.PutHiveStreaming.onTrigger(PutHiveStreaming.java:794)
at org.apache.nifi.processors.hive.PutHiveStreaming.lambda$onTrigger$4(PutHiveStreaming.java:660)
at org.apache.nifi.processor.util.pattern.PartialFunctions.onTrigger(PartialFunctions.java:114)
at org.apache.nifi.processor.util.pattern.RollbackOnFailure.onTrigger(RollbackOnFailure.java:184)
at org.apache.nifi.processors.hive.PutHiveStreaming.onTrigger(PutHiveStreaming.java:660)
at org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1162)
at org.apache.nifi.controller.tasks.ConnectableTask.invoke(ConnectableTask.java:209)
at org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:117)
at org.apache.nifi.engine.FlowEngine$2.run(FlowEngine.java:110)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalStateException: TransactionBatch TxnIds=[10434809...10434908] on endPoint = {metaStoreUri='thrift://de00srvapp673r1.de.olan.dir:9083', database='spdm_datamart_dev', table='product_processvalueinfo', partitionVals=[2023-12] } has been closed()
at org.apache.hive.hcatalog.streaming.HiveEndPoint$TransactionBatchImpl.checkIsClosed(HiveEndPoint.java:738)
at org.apache.hive.hcatalog.streaming.HiveEndPoint$TransactionBatchImpl.write(HiveEndPoint.java:777)
at org.apache.hive.hcatalog.streaming.HiveEndPoint$TransactionBatchImpl.write(HiveEndPoint.java:734)
at org.apache.nifi.util.hive.HiveWriter$1.call(HiveWriter.java:123)
at org.apache.nifi.util.hive.HiveWriter$1.call(HiveWriter.java:120)
at org.apache.nifi.util.hive.HiveWriter.lambda$null$3(HiveWriter.java:368)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1869)
at org.apache.nifi.util.hive.HiveWriter.lambda$callWithTimeout$4(HiveWriter.java:368)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
... 3 common frames omitted