Created on 11-01-2016 07:17 PM - edited 09-16-2022 03:46 AM
all the Hadoop services in my cloudera manager green and still my sqoop import fails badly.
(DFSOutputStream.java:789)
16/11/01 19:13:34 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:34 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:36 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:36 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:36 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeInternal(DFSOutputStream.java:830)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:826)
16/11/01 19:13:40 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:40 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:40 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:40 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:40 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:40 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:40 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:40 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:40 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeInternal(DFSOutputStream.java:830)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:826)
16/11/01 19:13:41 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:41 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:42 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/01 19:13:43 INFO db.DBInputFormat: Using read commited transaction isolation
16/11/01 19:13:43 INFO mapreduce.JobSubmitter: number of splits:1
16/11/01 19:13:43 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1478051991187_0002
16/11/01 19:13:45 INFO impl.YarnClientImpl: Submitted application application_1478051991187_0002
16/11/01 19:13:45 INFO mapreduce.Job: The url to track the job: http://quickstart.cloudera:8088/proxy/application_1478051991187_0002/
16/11/01 19:13:45 INFO mapreduce.Job: Running job: job_1478051991187_0002
16/11/01 19:14:09 INFO mapreduce.Job: Job job_1478051991187_0002 running in uber mode : false
16/11/01 19:14:09 INFO mapreduce.Job: map 0% reduce 0%
16/11/01 19:14:30 INFO mapreduce.Job: Task Id : attempt_1478051991187_0002_m_000000_0, Status : FAILED
Error: org.kitesdk.data.DatasetOperationException: Failed to append {"customer_id": 1, "customer_fname": "Richard", "customer_lname": "Hernandez", "customer_email": "XXXXXXXXX", "customer_password": "XXXXXXXXX", "customer_street": "6303 Heather Plaza", "customer_city": "Brownsville", "customer_state": "TX", "customer_zipcode": "78521"} to ParquetAppender{path=hdfs://quickstart.cloudera:8020/tmp/default/.temp/job_1478051991187_0002/mr/attempt_1478051991187_0002_m_000000_0/.50037358-6ff7-4964-8742-d6736bbbacb7.parquet.tmp, schema={"type":"record","name":"customers","fields":[{"name":"id","type":["null","int"],"doc":"Converted from 'int'","default":null},{"name":"name","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"email_preferences","type":["null",{"type":"record","name":"email_preferences","fields":[{"name":"email_format","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"frequency","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"categories","type":["null",{"type":"record","name":"categories","fields":[{"name":"promos","type":["null","boolean"],"doc":"Converted from 'boolean'","default":null},{"name":"surveys","type":["null","boolean"],"doc":"Converted from 'boolean'","default":null}]}],"default":null}]}],"default":null},{"name":"addresses","type":["null",{"type":"map","values":["null",{"type":"record","name":"addresses","fields":[{"name":"street_1","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"street_2","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"city","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"state","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"zip_code","type":["null","string"],"doc":"Converted from 'string'","default":null}]}]}],"doc":"Converted from 'map<string,struct<street_1:string,street_2:string,city:string,state:string,zip_code:string>>'","default":null},{"name":"orders","type":["null",{"type":"array","items":["null",{"type":"record","name":"orders","fields":[{"name":"order_id","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"order_date","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"items","type":["null",{"type":"array","items":["null",{"type":"record","name":"items","fields":[{"name":"product_id","type":["null","int"],"doc":"Converted from 'int'","default":null},{"name":"sku","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"name","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"price","type":["null","double"],"doc":"Converted from 'double'","default":null},{"name":"qty","type":["null","int"],"doc":"Converted from 'int'","default":null}]}]}],"doc":"Converted from 'array<struct<product_id:int,sku:string,name:string,price:double,qty:int>>'","default":null}]}]}],"doc":"Converted from 'array<struct<order_id:string,order_date:string,items:array<struct<product_id:int,sku:string,name:string,price:double,qty:int>>>>'","default":null}]}, fileSystem=DFS[DFSClient[clientName=DFSClient_attempt_1478051991187_0002_m_000000_0_-1040737999_1, ugi=cloudera (auth:SIMPLE)]], avroParquetWriter=parquet.avro.AvroParquetWriter@4c380929}
at org.kitesdk.data.spi.filesystem.FileSystemWriter.write(FileSystemWriter.java:184)
at org.kitesdk.data.mapreduce.DatasetKeyOutputFormat$DatasetRecordWriter.write(DatasetKeyOutputFormat.java:325)
at org.kitesdk.data.mapreduce.DatasetKeyOutputFormat$DatasetRecordWriter.write(DatasetKeyOutputFormat.java:304)
at org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:658)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112)
at org.apache.sqoop.mapreduce.ParquetImportMapper.map(ParquetImportMapper.java:70)
at org.apache.sqoop.mapreduce.ParquetImportMapper.map(ParquetImportMapper.java:39)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: java.lang.ClassCastException: java.lang.String cannot be cast to org.apache.avro.generic.IndexedRecord
at org.apache.avro.generic.GenericData.getField(GenericData.java:658)
at parquet.avro.AvroWriteSupport.writeRecordFields(AvroWriteSupport.java:164)
at parquet.avro.AvroWriteSupport.writeRecord(AvroWriteSupport.java:149)
at parquet.avro.AvroWriteSupport.writeValue(AvroWriteSupport.java:262)
at parquet.avro.AvroWriteSupport.writeRecordFields(AvroWriteSupport.java:167)
at parquet.avro.AvroWriteSupport.write(AvroWriteSupport.java:142)
at parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:116)
at parquet.hadoop.ParquetWriter.write(ParquetWriter.java:324)
at org.kitesdk.data.spi.filesystem.ParquetAppender.append(ParquetAppender.java:75)
at org.kitesdk.data.spi.filesystem.ParquetAppender.append(ParquetAppender.java:36)
at org.kitesdk.data.spi.filesystem.FileSystemWriter.write(FileSystemWriter.java:178)
... 16 more
16/11/01 19:14:47 INFO mapreduce.Job: Task Id : attempt_1478051991187_0002_m_000000_1, Status : FAILED
Error: org.kitesdk.data.DatasetOperationException: Failed to append {"customer_id": 1, "customer_fname": "Richard", "customer_lname": "Hernandez", "customer_email": "XXXXXXXXX", "customer_password": "XXXXXXXXX", "customer_street": "6303 Heather Plaza", "customer_city": "Brownsville", "customer_state": "TX", "customer_zipcode": "78521"} to ParquetAppender{path=hdfs://quickstart.cloudera:8020/tmp/default/.temp/job_1478051991187_0002/mr/attempt_1478051991187_0002_m_000000_1/.28314686-e026-4280-b624-260a2a95e50b.parquet.tmp, schema={"type":"record","name":"customers","fields":[{"name":"id","type":["null","int"],"doc":"Converted from 'int'","default":null},{"name":"name","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"email_preferences","type":["null",{"type":"record","name":"email_preferences","fields":[{"name":"email_format","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"frequency","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"categories","type":["null",{"type":"record","name":"categories","fields":[{"name":"promos","type":["null","boolean"],"doc":"Converted from 'boolean'","default":null},{"name":"surveys","type":["null","boolean"],"doc":"Converted from 'boolean'","default":null}]}],"default":null}]}],"default":null},{"name":"addresses","type":["null",{"type":"map","values":["null",{"type":"record","name":"addresses","fields":[{"name":"street_1","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"street_2","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"city","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"state","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"zip_code","type":["null","string"],"doc":"Converted from 'string'","default":null}]}]}],"doc":"Converted from 'map<string,struct<street_1:string,street_2:string,city:string,state:string,zip_code:string>>'","default":null},{"name":"orders","type":["null",{"type":"array","items":["null",{"type":"record","name":"orders","fields":[{"name":"order_id","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"order_date","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"items","type":["null",{"type":"array","items":["null",{"type":"record","name":"items","fields":[{"name":"product_id","type":["null","int"],"doc":"Converted from 'int'","default":null},{"name":"sku","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"name","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"price","type":["null","double"],"doc":"Converted from 'double'","default":null},{"name":"qty","type":["null","int"],"doc":"Converted from 'int'","default":null}]}]}],"doc":"Converted from 'array<struct<product_id:int,sku:string,name:string,price:double,qty:int>>'","default":null}]}]}],"doc":"Converted from 'array<struct<order_id:string,order_date:string,items:array<struct<product_id:int,sku:string,name:string,price:double,qty:int>>>>'","default":null}]}, fileSystem=DFS[DFSClient[clientName=DFSClient_attempt_1478051991187_0002_m_000000_1_-1610591894_1, ugi=cloudera (auth:SIMPLE)]], avroParquetWriter=parquet.avro.AvroParquetWriter@43569009}
at org.kitesdk.data.spi.filesystem.FileSystemWriter.write(FileSystemWriter.java:184)
at org.kitesdk.data.mapreduce.DatasetKeyOutputFormat$DatasetRecordWriter.write(DatasetKeyOutputFormat.java:325)
at org.kitesdk.data.mapreduce.DatasetKeyOutputFormat$DatasetRecordWriter.write(DatasetKeyOutputFormat.java:304)
at org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:658)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112)
at org.apache.sqoop.mapreduce.ParquetImportMapper.map(ParquetImportMapper.java:70)
at org.apache.sqoop.mapreduce.ParquetImportMapper.map(ParquetImportMapper.java:39)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: java.lang.ClassCastException: java.lang.String cannot be cast to org.apache.avro.generic.IndexedRecord
at org.apache.avro.generic.GenericData.getField(GenericData.java:658)
at parquet.avro.AvroWriteSupport.writeRecordFields(AvroWriteSupport.java:164)
at parquet.avro.AvroWriteSupport.writeRecord(AvroWriteSupport.java:149)
at parquet.avro.AvroWriteSupport.writeValue(AvroWriteSupport.java:262)
at parquet.avro.AvroWriteSupport.writeRecordFields(AvroWriteSupport.java:167)
at parquet.avro.AvroWriteSupport.write(AvroWriteSupport.java:142)
at parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:116)
at parquet.hadoop.ParquetWriter.write(ParquetWriter.java:324)
at org.kitesdk.data.spi.filesystem.ParquetAppender.append(ParquetAppender.java:75)
at org.kitesdk.data.spi.filesystem.ParquetAppender.append(ParquetAppender.java:36)
at org.kitesdk.data.spi.filesystem.FileSystemWriter.write(FileSystemWriter.java:178)
... 16 more
16/11/01 19:15:07 INFO mapreduce.Job: Task Id : attempt_1478051991187_0002_m_000000_2, Status : FAILED
Error: org.kitesdk.data.DatasetOperationException: Failed to append {"customer_id": 1, "customer_fname": "Richard", "customer_lname": "Hernandez", "customer_email": "XXXXXXXXX", "customer_password": "XXXXXXXXX", "customer_street": "6303 Heather Plaza", "customer_city": "Brownsville", "customer_state": "TX", "customer_zipcode": "78521"} to ParquetAppender{path=hdfs://quickstart.cloudera:8020/tmp/default/.temp/job_1478051991187_0002/mr/attempt_1478051991187_0002_m_000000_2/.3a99f42c-4eb1-4485-a3c3-6ba06bd52148.parquet.tmp, schema={"type":"record","name":"customers","fields":[{"name":"id","type":["null","int"],"doc":"Converted from 'int'","default":null},{"name":"name","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"email_preferences","type":["null",{"type":"record","name":"email_preferences","fields":[{"name":"email_format","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"frequency","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"categories","type":["null",{"type":"record","name":"categories","fields":[{"name":"promos","type":["null","boolean"],"doc":"Converted from 'boolean'","default":null},{"name":"surveys","type":["null","boolean"],"doc":"Converted from 'boolean'","default":null}]}],"default":null}]}],"default":null},{"name":"addresses","type":["null",{"type":"map","values":["null",{"type":"record","name":"addresses","fields":[{"name":"street_1","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"street_2","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"city","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"state","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"zip_code","type":["null","string"],"doc":"Converted from 'string'","default":null}]}]}],"doc":"Converted from 'map<string,struct<street_1:string,street_2:string,city:string,state:string,zip_code:string>>'","default":null},{"name":"orders","type":["null",{"type":"array","items":["null",{"type":"record","name":"orders","fields":[{"name":"order_id","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"order_date","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"items","type":["null",{"type":"array","items":["null",{"type":"record","name":"items","fields":[{"name":"product_id","type":["null","int"],"doc":"Converted from 'int'","default":null},{"name":"sku","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"name","type":["null","string"],"doc":"Converted from 'string'","default":null},{"name":"price","type":["null","double"],"doc":"Converted from 'double'","default":null},{"name":"qty","type":["null","int"],"doc":"Converted from 'int'","default":null}]}]}],"doc":"Converted from 'array<struct<product_id:int,sku:string,name:string,price:double,qty:int>>'","default":null}]}]}],"doc":"Converted from 'array<struct<order_id:string,order_date:string,items:array<struct<product_id:int,sku:string,name:string,price:double,qty:int>>>>'","default":null}]}, fileSystem=DFS[DFSClient[clientName=DFSClient_attempt_1478051991187_0002_m_000000_2_-1940758529_1, ugi=cloudera (auth:SIMPLE)]], avroParquetWriter=parquet.avro.AvroParquetWriter@249cfd31}
at org.kitesdk.data.spi.filesystem.FileSystemWriter.write(FileSystemWriter.java:184)
at org.kitesdk.data.mapreduce.DatasetKeyOutputFormat$DatasetRecordWriter.write(DatasetKeyOutputFormat.java:325)
at org.kitesdk.data.mapreduce.DatasetKeyOutputFormat$DatasetRecordWriter.write(DatasetKeyOutputFormat.java:304)
at org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:658)
at org.apache.hadoop.mapreduce.task.TaskInputOutputContextImpl.write(TaskInputOutputContextImpl.java:89)
at org.apache.hadoop.mapreduce.lib.map.WrappedMapper$Context.write(WrappedMapper.java:112)
at org.apache.sqoop.mapreduce.ParquetImportMapper.map(ParquetImportMapper.java:70)
at org.apache.sqoop.mapreduce.ParquetImportMapper.map(ParquetImportMapper.java:39)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:145)
at org.apache.sqoop.mapreduce.AutoProgressMapper.run(AutoProgressMapper.java:64)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:787)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1693)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: java.lang.ClassCastException: java.lang.String cannot be cast to org.apache.avro.generic.IndexedRecord
at org.apache.avro.generic.GenericData.getField(GenericData.java:658)
at parquet.avro.AvroWriteSupport.writeRecordFields(AvroWriteSupport.java:164)
at parquet.avro.AvroWriteSupport.writeRecord(AvroWriteSupport.java:149)
at parquet.avro.AvroWriteSupport.writeValue(AvroWriteSupport.java:262)
at parquet.avro.AvroWriteSupport.writeRecordFields(AvroWriteSupport.java:167)
at parquet.avro.AvroWriteSupport.write(AvroWriteSupport.java:142)
at parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:116)
at parquet.hadoop.ParquetWriter.write(ParquetWriter.java:324)
at org.kitesdk.data.spi.filesystem.ParquetAppender.append(ParquetAppender.java:75)
at org.kitesdk.data.spi.filesystem.ParquetAppender.append(ParquetAppender.java:36)
at org.kitesdk.data.spi.filesystem.FileSystemWriter.write(FileSystemWriter.java:178)
... 16 more
16/11/01 19:15:25 INFO mapreduce.Job: map 100% reduce 0%
16/11/01 19:15:27 INFO mapreduce.Job: Job job_1478051991187_0002 failed with state FAILED due to: Task failed task_1478051991187_0002_m_000000
Job failed as tasks failed. failedMaps:1 failedReduces:0
16/11/01 19:15:27 INFO mapreduce.Job: Counters: 8
Job Counters
Failed map tasks=4
Launched map tasks=4
Other local map tasks=4
Total time spent by all maps in occupied slots (ms)=33132032
Total time spent by all reduces in occupied slots (ms)=0
Total time spent by all map tasks (ms)=64711
Total vcore-seconds taken by all map tasks=64711
Total megabyte-seconds taken by all map tasks=33132032
16/11/01 19:15:27 WARN mapreduce.Counters: Group FileSystemCounters is deprecated. Use org.apache.hadoop.mapreduce.FileSystemCounter instead
16/11/01 19:15:27 INFO mapreduce.ImportJobBase: Transferred 0 bytes in 122.9588 seconds (0 bytes/sec)
16/11/01 19:15:27 WARN mapreduce.Counters: Group org.apache.hadoop.mapred.Task$Counter is deprecated. Use org.apache.hadoop.mapreduce.TaskCounter instead
16/11/01 19:15:27 INFO mapreduce.ImportJobBase: Retrieved 0 records.
16/11/01 19:15:27 ERROR tool.ImportAllTablesTool: Error during import: Import job failed!
Created on 11-02-2016 04:31 AM - edited 11-02-2016 04:33 AM
1 . Type show tables in Hive and note down the tables .
2 . check under user/hive/warehuse/ using Hue -> File Browser or command line
that Customer folder or categories folders are already being populated .
if so Remove it using HUE->File browser-Delete or
Drop table command from Hive.
Then re run the script and please let me know .
Or
Simply change the last line of the script
sqoop import-all-tables \ -m 1 \ --connect jdbc:mysql://quickstart:3306/retail_db \ --username=retail_dba \ --password=cloudera \ --compression-codec=snappy \ --as-sequencefile \ --warehouse-dir=/user/hive/warehouse \ --hive-overwrite
Created 11-01-2016 07:43 PM
Its throwing a class cast exception , meaning you are trying to cast java.lang.String to org.apache.avro.generic.IndexedRecord which is not comptabile . Could you provide the table schema and your sqoop import command.
Created 11-01-2016 07:53 PM
Here is my sqoop command from exercise 1 and I am not sure how to find out the table schema. will research on that shortly.
sqoop import-all-tables \ -m 1 \ --connect jdbc:mysql://quickstart:3306/retail_db \ --username=retail_dba \ --password=cloudera \ --compression-codec=snappy \ --as-parquetfile \ --warehouse-dir=/user/hive/warehouse \ --hive-import
Created 11-01-2016 09:35 PM
Could you replace
--as-parquetfile
with
--as-sequencefile
and let me know if you are able pass through the error.
Created 11-02-2016 04:12 AM
Doesn't work
[cloudera@quickstart ~]$ sqoop import-all-tables \
> -m 1 \
> --connect jdbc:mysql://quickstart:3306/retail_db \
> --username=retail_dba \
> --password=cloudera \
> --compression-codec=snappy \
> --as-sequencefile \
> --warehouse-dir=/user/hive/warehouse \
> --hive-import
Warning: /usr/lib/sqoop/../accumulo does not exist! Accumulo imports will fail.
Please set $ACCUMULO_HOME to the root of your Accumulo installation.
16/11/02 04:11:22 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6-cdh5.8.0
16/11/02 04:11:22 WARN tool.BaseSqoopTool: Setting your password on the command-line is insecure. Consider using -P instead.
16/11/02 04:11:22 INFO tool.BaseSqoopTool: Using Hive-specific delimiters for output. You can override
16/11/02 04:11:22 INFO tool.BaseSqoopTool: delimiters with --fields-terminated-by, etc.
Hive import is not compatible with importing into SequenceFile format.
[cloudera@quickstart ~]$ ^C
Created on 11-02-2016 04:31 AM - edited 11-02-2016 04:33 AM
1 . Type show tables in Hive and note down the tables .
2 . check under user/hive/warehuse/ using Hue -> File Browser or command line
that Customer folder or categories folders are already being populated .
if so Remove it using HUE->File browser-Delete or
Drop table command from Hive.
Then re run the script and please let me know .
Or
Simply change the last line of the script
sqoop import-all-tables \ -m 1 \ --connect jdbc:mysql://quickstart:3306/retail_db \ --username=retail_dba \ --password=cloudera \ --compression-codec=snappy \ --as-sequencefile \ --warehouse-dir=/user/hive/warehouse \ --hive-overwrite
Created 11-02-2016 07:13 AM
Created 11-02-2016 07:15 AM
Things I did to fix the issue.
1) Create a new virtual box sadnbox environment.
2) Lanched cloudera manager express environment.
3) Go to Hue > Hive editor > deleted all the tables with in it.
4) ran the Exercise 1
sqoop import-all-tables \ -m {{cluster_data.worker_node_hostname.length}} \ --connect jdbc:mysql://{{cluster_data.manager_node_hostname}}:3306/retail_db \ --username=retail_dba \ --password=cloudera \ --compression-codec=snappy \ --as-parquetfile \ --warehouse-dir=/user/hive/warehouse \ --hive-import
5) Check all the tables with in Hive and you are good to go.
But there is a minor glitch i.e exception which I am curious about.
/11/02 07:08:32 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/02 07:08:33 INFO db.DBInputFormat: Using read commited transaction isolation
16/11/02 07:08:33 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
16/11/02 07:08:33 WARN hdfs.DFSClient: Caught exception
java.lang.InterruptedException
at java.lang.Object.wait(Native Method)
at java.lang.Thread.join(Thread.java:1281)
at java.lang.Thread.join(Thread.java:1355)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.closeResponder(DFSOutputStream.java:862)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.endBlock(DFSOutputStream.java:600)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:789)
Created 12-19-2016 05:33 PM
@harish172were you able to figure out the cause of the exception? Are you able to see all tables including customers,departments,order_items,products under warehouse in the hive query browser. I only see the table categories after the exception was seen. My command used is typed below in the thread.
Created 12-10-2016 10:21 AM
hadoop fs -ls /user/hive/warehouse//categories shows only .metadata
canot remove that file using hadoop fs -ls rm /user/hive/warehouse/categories/.metadata
alsoe Hue > Query editor
select * from categories keeps running for a while with no result. also delete from categories in hive query editor says failed attempt to delete using transaction manager does not support these operations.
lastly hive in cmd shows nothing
hive> show_tables
>
How do i get rid of categories tables to run the comman as overwrite as you mentioned.