<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Debug Spark program  in Eclipse Data in AWS in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/48482#M48706</link>
    <description>&lt;P&gt;It's also possible to establish an ssl tunnel in order to connect to a remote debug session. &amp;nbsp;Take a look at the -L option for ssh, you will be able to open a local port and setup the remote port within the ssh command. &amp;nbsp;This will work for private IPs as long as you can connect to a server from a public IP that has access to the private network. &amp;nbsp;Note though that there can be extreme latency and still be difficult to debug in setups like this.&lt;/P&gt;</description>
    <pubDate>Tue, 13 Dec 2016 19:11:25 GMT</pubDate>
    <dc:creator>hubbarja</dc:creator>
    <dc:date>2016-12-13T19:11:25Z</dc:date>
    <item>
      <title>Debug Spark program  in Eclipse Data in AWS</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/48472#M48704</link>
      <description>&lt;P&gt;hi All&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;i am trying to run debug spark program in eclipse on Cloudera cluster on AWS EC2. i tried&amp;nbsp;&lt;/P&gt;&lt;P&gt;val conf = new SparkConf().setAppName("WordCount").setMaster("yarn-client")&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;val conf = new SparkConf()&lt;/SPAN&gt;&lt;SPAN&gt;.setAppName("WordCount")&lt;/SPAN&gt;&lt;SPAN&gt;.setMaster("local[3]")&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;i find out i am facing an issue . the namenode in the AWS EC2 cluster return me the private IP in AWS.like&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;172.31.26.79,172.31.26.80 etc.. which my local windows mechine not able to resolve .&amp;nbsp; &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; Any idea how to handle all this ?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 10:51:05 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/48472#M48704</guid>
      <dc:creator>ranan</dc:creator>
      <dc:date>2022-09-16T10:51:05Z</dc:date>
    </item>
    <item>
      <title>Re: Debug Spark program  in Eclipse Data in AWS</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/48477#M48705</link>
      <description>&lt;P&gt;I know move the program inside the cluster network will help ~ but sometime you can't move the program into the cluster&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 13 Dec 2016 18:34:15 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/48477#M48705</guid>
      <dc:creator>ranan</dc:creator>
      <dc:date>2016-12-13T18:34:15Z</dc:date>
    </item>
    <item>
      <title>Re: Debug Spark program  in Eclipse Data in AWS</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/48482#M48706</link>
      <description>&lt;P&gt;It's also possible to establish an ssl tunnel in order to connect to a remote debug session. &amp;nbsp;Take a look at the -L option for ssh, you will be able to open a local port and setup the remote port within the ssh command. &amp;nbsp;This will work for private IPs as long as you can connect to a server from a public IP that has access to the private network. &amp;nbsp;Note though that there can be extreme latency and still be difficult to debug in setups like this.&lt;/P&gt;</description>
      <pubDate>Tue, 13 Dec 2016 19:11:25 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/48482#M48706</guid>
      <dc:creator>hubbarja</dc:creator>
      <dc:date>2016-12-13T19:11:25Z</dc:date>
    </item>
    <item>
      <title>Re: Debug Spark program  in Eclipse Data in AWS</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/48496#M48707</link>
      <description>&lt;P&gt;Thank you for your reply .&amp;nbsp;&lt;/P&gt;&lt;P&gt;I solve the issue by create another node in the AWS EC2 as workspace through that node to connect to other AWS EC2 cluster .&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 13 Dec 2016 22:56:54 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/48496#M48707</guid>
      <dc:creator>ranan</dc:creator>
      <dc:date>2016-12-13T22:56:54Z</dc:date>
    </item>
    <item>
      <title>Re: Debug Spark program  in Eclipse Data in AWS</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/49574#M48708</link>
      <description>&lt;P&gt;&lt;A href="http://aws.amazon.com/elasticmapreduce/" target="_blank"&gt;Amazon Elastic MapReduce&lt;/A&gt;&lt;SPAN&gt; (EMR) builds proprietary versions of Apache Hadoop, Hive, and Pig optimized for running on Amazon Web Services. Amazon EMR provides a hosted Hadoop framework running on the web-scale infrastructure of Amazon Elastic Compute Cloud (EC2) or Simple Storage Service (S3)&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 18 Jan 2017 01:39:50 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/49574#M48708</guid>
      <dc:creator>ZachRoes</dc:creator>
      <dc:date>2017-01-18T01:39:50Z</dc:date>
    </item>
    <item>
      <title>Re: Debug Spark program  in Eclipse Data in AWS</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/292132#M48709</link>
      <description>&lt;P&gt;can you help me out this error....&lt;/P&gt;&lt;P&gt;[Stage 0:&amp;gt; (0 + 1) / 1]20/03/19 21:40:24 WARN BlockReaderFactory: I/O error constructing remote block reader.&lt;BR /&gt;java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)&lt;BR /&gt;at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3436)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:777)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:673)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:882)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:934)&lt;BR /&gt;at java.io.DataInputStream.read(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:144)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:184)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:181)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)&lt;BR /&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;BR /&gt;at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)&lt;BR /&gt;at org.apache.spark.scheduler.Task.run(Task.scala:123)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)&lt;BR /&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)&lt;BR /&gt;at java.lang.Thread.run(Unknown Source)&lt;BR /&gt;20/03/19 21:40:24 WARN DFSClient: Failed to connect to /172.31.34.22:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)&lt;BR /&gt;at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3436)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:777)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:673)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:882)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:934)&lt;BR /&gt;at java.io.DataInputStream.read(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:144)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:184)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:181)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)&lt;BR /&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;BR /&gt;at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)&lt;BR /&gt;at org.apache.spark.scheduler.Task.run(Task.scala:123)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)&lt;BR /&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)&lt;BR /&gt;at java.lang.Thread.run(Unknown Source)&lt;BR /&gt;20/03/19 21:40:45 WARN BlockReaderFactory: I/O error constructing remote block reader.&lt;BR /&gt;java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)&lt;BR /&gt;at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3436)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:777)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:673)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:882)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:934)&lt;BR /&gt;at java.io.DataInputStream.read(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:144)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:184)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:181)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)&lt;BR /&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;BR /&gt;at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)&lt;BR /&gt;at org.apache.spark.scheduler.Task.run(Task.scala:123)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)&lt;BR /&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)&lt;BR /&gt;at java.lang.Thread.run(Unknown Source)&lt;BR /&gt;20/03/19 21:40:45 WARN DFSClient: Failed to connect to /172.31.37.160:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)&lt;BR /&gt;at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3436)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:777)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:673)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:882)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:934)&lt;BR /&gt;at java.io.DataInputStream.read(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:144)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:184)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:181)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)&lt;BR /&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;BR /&gt;at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)&lt;BR /&gt;at org.apache.spark.scheduler.Task.run(Task.scala:123)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)&lt;BR /&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)&lt;BR /&gt;at java.lang.Thread.run(Unknown Source)&lt;BR /&gt;[Stage 0:&amp;gt; (0 + 1) / 1]20/03/19 21:41:06 WARN BlockReaderFactory: I/O error constructing remote block reader.&lt;BR /&gt;java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)&lt;BR /&gt;at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3436)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:777)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:673)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:882)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:934)&lt;BR /&gt;at java.io.DataInputStream.read(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:144)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:184)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:181)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)&lt;BR /&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;BR /&gt;at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)&lt;BR /&gt;at org.apache.spark.scheduler.Task.run(Task.scala:123)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)&lt;BR /&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)&lt;BR /&gt;at java.lang.Thread.run(Unknown Source)&lt;BR /&gt;20/03/19 21:41:06 WARN DFSClient: Failed to connect to /172.31.37.41:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)&lt;BR /&gt;at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3436)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:777)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:673)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:882)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:934)&lt;BR /&gt;at java.io.DataInputStream.read(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:144)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:184)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:181)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)&lt;BR /&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;BR /&gt;at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)&lt;BR /&gt;at org.apache.spark.scheduler.Task.run(Task.scala:123)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)&lt;BR /&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)&lt;BR /&gt;at java.lang.Thread.run(Unknown Source)&lt;BR /&gt;20/03/19 21:41:06 WARN DFSClient: DFS chooseDataNode: got # 1 IOException, will wait for 2719.6350079908057 msec.&lt;BR /&gt;20/03/19 21:41:30 WARN BlockReaderFactory: I/O error constructing remote block reader.&lt;BR /&gt;java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)&lt;BR /&gt;at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3436)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:777)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:673)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:882)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:934)&lt;BR /&gt;at java.io.DataInputStream.read(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:144)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:184)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:181)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)&lt;BR /&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;BR /&gt;at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)&lt;BR /&gt;at org.apache.spark.scheduler.Task.run(Task.scala:123)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)&lt;BR /&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)&lt;BR /&gt;at java.lang.Thread.run(Unknown Source)&lt;BR /&gt;20/03/19 21:41:30 WARN DFSClient: Failed to connect to /172.31.37.160:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;java.net.ConnectException: Connection timed out: no further information&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)&lt;BR /&gt;at sun.nio.ch.SocketChannelImpl.finishConnect(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)&lt;BR /&gt;at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3436)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:777)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)&lt;BR /&gt;at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:673)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:882)&lt;BR /&gt;at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:934)&lt;BR /&gt;at java.io.DataInputStream.read(Unknown Source)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)&lt;BR /&gt;at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:144)&lt;BR /&gt;at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:184)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.hasNext(HadoopFileLinesReader.scala:69)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:181)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:101)&lt;BR /&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)&lt;BR /&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;BR /&gt;at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)&lt;BR /&gt;at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)&lt;BR /&gt;at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)&lt;BR /&gt;at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)&lt;BR /&gt;at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)&lt;BR /&gt;at org.apache.spark.scheduler.Task.run(Task.scala:123)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)&lt;BR /&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)&lt;BR /&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)&lt;BR /&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)&lt;BR /&gt;at java.lang.Thread.run(Unknown Source)&lt;/P&gt;</description>
      <pubDate>Thu, 19 Mar 2020 16:38:55 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Debug-Spark-program-in-Eclipse-Data-in-AWS/m-p/292132#M48709</guid>
      <dc:creator>ParthiCyberPunk</dc:creator>
      <dc:date>2020-03-19T16:38:55Z</dc:date>
    </item>
  </channel>
</rss>

