Created 12-31-2015 08:13 AM
#Data I'm using
ID,fname,lname,age,Career 4000001,Kristina,Chung,55,Pilot 4000002,Paige,Chen,74,Teacher 4000003,Sherri,Melton,34,Firefighter 4000004,Gretchen,Hill,66,Computer hardware engineer 4000005,Karen,Puckett,74,Lawyer 4000006,Patrick,Song,42,Veterinarian 4000007,Elsie,Hamilton,43,Pilot
# Table Creation:
hive> create table foo(id string,fname string,lname string,age string,career string) > ROW FORMAT DELIMITED > FIELDS TERMINATED BY ' ';
# Copying Data:
hive> load data inpath '/API/demo1/customers.txt' overwrite into table foo; Loading data to table default.foo Deleted hdfs://172.16.56.136:10001/user/hive/warehouse/foo Table default.foo stats: [num_partitions: 0, num_files: 1, num_rows: 0, total_size: 554, raw_data_size: 0] OK Time taken: 0.282 seconds
# Conversion
create table Jac(id string,fname string,lname string,age string,career string) > STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' > WITH SERDEPROPERTIES ('hbase.columns.mapping' = ':key,cf1:fname,cf1:lname,cf1:age,cf1:career') > TBLPROPERTIES('hbase.table.name' = 'j');
#error:
hive> INSERT OVERWRITE TABLE Jac SELECT * FROM foo; Total MapReduce jobs = 1 Launching Job 1 out of 1 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_201512091118_0019, Tracking URL = http://172.16.56.136:50030/jobdetails.jsp?jobid=j... Kill Command = /usr/local/hadoop/libexec/../bin/hadoop job -kill job_201512091118_0019 Hadoop job information for Stage-0: number of mappers: 1; number of reducers: 0 2015-12-24 16:11:27,258 Stage-0 map = 0%, reduce = 0% 2015-12-24 16:11:44,312 Stage-0 map = 100%, reduce = 100% Ended Job = job_201512091118_0019 with errors Error during job, obtaining debugging information... Job Tracking URL: http://172.16.56.136:50030/jobdetails.jsp?jobid=j... Examining task ID: task_201512091118_0019_m_000002 (and more) from job job_201512091118_0019 Exception in thread "Thread-90" java.lang.RuntimeException: Error while reading from task log url at org.apache.hadoop.hive.ql.exec.errors.TaskLogProcessor.getStackTraces(TaskLogProcessor.java:247) at org.apache.hadoop.hive.ql.exec.JobDebugger.showJobFailDebugInfo(JobDebugger.java:285) at org.apache.hadoop.hive.ql.exec.JobDebugger.run(JobDebugger.java:118) at java.lang.Thread.run(Thread.java:745) Caused by: java.io.FileNotFoundException: http://hadoop:50060/tasklog?attemptid=attempt_201... at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1626) at java.net.URL.openStream(URL.java:1041) at org.apache.hadoop.hive.ql.exec.errors.TaskLogProcessor.getStackTraces(TaskLogProcessor.java:198) ... 3 more FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask MapReduce Jobs Launched: Job 0: Map: 1 HDFS Read: 0 HDFS Write: 0 FAIL Total MapReduce CPU Time Spent: 0 msec
Created 12-31-2015 09:54 AM
In your Hive table definition, it should be ... FIELDS TERMINATED BY ',' ... by comma not by space. Otherwise it's fine and it works, I had a few free moments and just tried on HDP-2.3.2 sandbox. Also, you may wish to remove the table header when working with real data.
Created 12-31-2015 09:54 AM
In your Hive table definition, it should be ... FIELDS TERMINATED BY ',' ... by comma not by space. Otherwise it's fine and it works, I had a few free moments and just tried on HDP-2.3.2 sandbox. Also, you may wish to remove the table header when working with real data.
Created 01-04-2016 04:39 PM
Thank you for your reply.. sorry for lateness was out due to holiday.... let me give it a shot
Created 01-04-2016 04:40 PM
Can I update my code without having to recreate a table...?
Created 01-04-2016 04:58 PM
Tried the change
hive> create table yep(id string,fname string,lname string,age string,career string) > ROW FORMAT DELIMITED > FIELDS TERMINATED BY ','; OK Time taken: 0.064 seconds
# After Insertion:
hive> INSERT OVERWRITE TABLE atari SELECT * FROM yep; Total MapReduce jobs = 1 Launching Job 1 out of 1 Number of reduce tasks is set to 0 since there's no reduce operator Starting Job = job_201512091118_0020, Tracking URL = http://172.16.56.136:50030/jobdetails.jsp?jobid=job_201512091118_0020 Kill Command = /usr/local/hadoop/libexec/../bin/hadoop job -kill job_201512091118_0020 Hadoop job information for Stage-0: number of mappers: 1; number of reducers: 0 2015-12-29 05:50:38,483 Stage-0 map = 0%, reduce = 0% 2015-12-29 05:50:56,549 Stage-0 map = 100%, reduce = 100% Ended Job = job_201512091118_0020 with errors Error during job, obtaining debugging information... Job Tracking URL: http://172.16.56.136:50030/jobdetails.jsp?jobid=job_201512091118_0020 Examining task ID: task_201512091118_0020_m_000002 (and more) from job job_201512091118_0020 Task with the most failures(4): ----- Task ID: task_201512091118_0020_m_000000 URL: http://172.16.56.136:50030/taskdetails.jsp?jobid=job_201512091118_0020&tipid=task_201512091118_0020_... ----- Diagnostic Messages for this Task: java.lang.RuntimeException: Error in configuring object at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:426) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:366) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) ... 9 more Caused by: java.lang.RuntimeException: Error in configuring object at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:93) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:64) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:117) at org.apache.hadoop.mapred.MapRunner.configure(MapRunner.java:34) ... 14 more Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:88) ... 17 more Caused by: java.lang.RuntimeException: Map operator initialization failed at org.apache.hadoop.hive.ql.exec.ExecMapper.configure(ExecMapper.java:121) ... 22 more Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.FileSinkOperator.initializeOp(FileSinkOperator.java:385) at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:375) at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:451) at org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:407) at org.apache.hadoop.hive.ql.exec.SelectOperator.initializeOp(SelectOperator.java:62) at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:375) at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:451) at org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:407) at org.apache.hadoop.hive.ql.exec.TableScanOperator.initializeOp(TableScanOperator.java:186) at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:375) at org.apache.hadoop.hive.ql.exec.MapOperator.initializeOp(MapOperator.java:543) at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:375) at org.apache.hadoop.hive.ql.exec.ExecMapper.configure(ExecMapper.java:100) ... 22 more Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.FileSinkOperator.initializeOp(FileSinkOperator.java:322) ... 34 more FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask MapReduce Jobs Launched: Job 0: Map: 1 HDFS Read: 0 HDFS Write: 0 FAIL Total MapReduce CPU Time Spent: 0 msec
Created 01-07-2016 04:20 AM
@Paul Wilson The delimiter in "FIELDS TERMINATED BY" has to match the delimiter used in the input file. After "LOAD DATA ..." run for example "SELECT id, career from yep" to make sure loading of the Hive table was successful. If it is but the job still fails then it can be something related to permissions. Can you run other Hive queries using MR? And what version of HDP/Hive/HBase are you using?