Member since
07-26-2017
1
Post
0
Kudos Received
0
Solutions
07-26-2017
09:49 AM
Hi! I am trying to build an Oozie workflow to push data from hdfs to s3. We are using hadoop 2.6.0 cdh 5.5.5 This is the table I created CREATE EXTERNAL TABLE IF NOT EXISTS demo_table1(
id string,
name string
)
ROW FORMAT SERDE
'org.apache.hive.hcatalog.data.JsonSerDe'
LOCATION 's3a://path/' workflow.xml <workflow-app xmlns="uri:oozie:workflow:0.4" name="copy-json-data-to-s3">
<start to="create-table"/>
<action name="create-table">
<hive xmlns="uri:oozie:hive-action:0.5">
<job-xml>${concat(nameNode, "/etc/hive/conf/hive-site.xml")}</job-xml>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${hiveQueueName}</value>
</property>
</configuration>
<script>createTable.hql</script>
</hive>
<ok to="copy-to-s3"/>
<error to="kill"/>
</action>
<action name="copy-to-s3">
<hive xmlns="uri:oozie:hive-action:0.5">
<job-xml>${concat(nameNode, "/etc/hive/conf/hive-site.xml")}</job-xml>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${hiveQueueName}</value>
</property>
</configuration>
<script>copyToS3.hql</script>
</hive>
<ok to="end"/>
<error to="end"/>
</action>
<kill name="kill">
<message>failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<!-- End control node - indicates a workflow job has completed successfully -->
<end name="end"/>
</workflow-app> Then when I executed the workflow I got the below error message 50698 [uber-SubtaskRunner] ERROR hive.ql.exec.DDLTask - org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:com.cloudera.com.amazonaws.AmazonClientException: Unable to load AWS credentials from any provider in the chain)
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:755)
at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:3988)
at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:303)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:160)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1690)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1449)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1235)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1053)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1043)
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:209)
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:161)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:372)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:307)
at org.apache.hadoop.hive.cli.CliDriver.processReader(CliDriver.java:405)
at org.apache.hadoop.hive.cli.CliDriver.processFile(CliDriver.java:421)
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:710)
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:677)
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:617)
at org.apache.oozie.action.hadoop.HiveMain.runHive(HiveMain.java:325)
at org.apache.oozie.action.hadoop.HiveMain.run(HiveMain.java:302)
at org.apache.oozie.action.hadoop.LauncherMain.run(LauncherMain.java:49)
at org.apache.oozie.action.hadoop.HiveMain.main(HiveMain.java:69)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.oozie.action.hadoop.LauncherMapper.map(LauncherMapper.java:236)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.LocalContainerLauncher$EventHandler.runSubtask(LocalContainerLauncher.java:388)
at org.apache.hadoop.mapred.LocalContainerLauncher$EventHandler.runTask(LocalContainerLauncher.java:302)
at org.apache.hadoop.mapred.LocalContainerLauncher$EventHandler.access$200(LocalContainerLauncher.java:187)
at org.apache.hadoop.mapred.LocalContainerLauncher$EventHandler$1.run(LocalContainerLauncher.java:230)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: MetaException(message:com.cloudera.com.amazonaws.AmazonClientException: Unable to load AWS credentials from any provider in the chain)
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_table_with_environment_context_result$create_table_with_environment_context_resultStandardScheme.read(ThriftHiveMetastore.java:29992)
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_table_with_environment_context_result$create_table_with_environment_context_resultStandardScheme.read(ThriftHiveMetastore.java:29960)
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$create_table_with_environment_context_result.read(ThriftHiveMetastore.java:29886)
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:78)
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_create_table_with_environment_context(ThriftHiveMetastore.java:1075)
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.create_table_with_environment_context(ThriftHiveMetastore.java:1061)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.create_table_with_environment_context(HiveMetaStoreClient.java:2066)
at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.create_table_with_environment_context(SessionHiveMetaStoreClient.java:97)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:671)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createTable(HiveMetaStoreClient.java:659)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:91)
at com.sun.proxy.$Proxy85.createTable(Unknown Source)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:1998)
at com.sun.proxy.$Proxy85.createTable(Unknown Source)
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:749)
... 39 more For security and other reasons, I do not have sudo/admin access to Cloudera Manager and unable to modify some configuration files, like hadoop's core-site.xml, hive-site.xml and oozie-site.xml. If I have sudo sccess, I can put those aws credentials directly in those configs and the problem will be gone. So that is the problem. I did go through some online links and tried the following approaches by my own, but none of them works. 1. add fs.s3a.access.key and fs.s3a.secret.key properties inside workflow.xml 2. create a .jceks keystore and put it in workflow.xml 3. create a job.properties file and execute the workflow in the command line 4. create a local copy of hive-site.xml and added the keys there and update the path in <job-xml> Is there a way to provide those AWS credentials during Oozie workflow execution? Appreciate any helps!
... View more
Labels:
- Labels:
-
Apache Hive
-
Apache Oozie