Support Questions

Find answers, ask questions, and share your expertise
Announcements
Celebrating as our community reaches 100,000 members! Thank you!

spark and s3 dependencies

avatar

Trying to process a file from s3 but cant load the right dependencies. Appreciate your advice.

I am passing the aws-sdk jars to the spark-shell, I am getting an error at initialization of shell.

[root@sandbox bin]# ./spark-shell  --master yarn-client --jars /usr/hdp/2.4.0.0-169/hadoop/hadoop-aws-2.7.1.2.4.0.0-169.jar:/usr/hdp/2.4.0.0-169/hadoop/hadoop-auth.jar:/root/aws-java-sdk-1.10.65.jar --driver-memory 512m --executor-memory 512m
Warning: Local jar /usr/hdp/2.4.0.0-169/hadoop/hadoop-aws-2.7.1.2.4.0.0-169.jar:/usr/hdp/2.4.0.0-169/hadoop/hadoop-auth.jar:/root/aws-java-sdk-1.10.65.jar does not exist, skipping.

[root@sandbox bin]# ll /root/aws-java-sdk-1.10.65.jar -rw-r--r-- 1 root root 32380018 2016-03-31 21:56 /root/aws-java-sdk-1.10.65.jar

without the aws-sdk jar I get the NoClassDefFoundError "Caused by: java.lang.NoClassDefFoundError: com/amazonaws/event/ProgressListener".

[root@sandbox bin]# ./spark-shell  --master yarn-client --jars /usr/hdp/2.4.0.0-169/hadoop/hadoop-aws-2.7.1.2.4.0.0-169.jar:/usr/hdp/2.4.0.0-169/hadoop/hadoop-auth.jar --driver-memory 512m --executor-memory 512m

java.util.ServiceConfigurationError: org.apache.hadoop.fs.FileSystem: Provider org.apache.hadoop.fs.s3a.S3AFileSystem could not be instantiated

	at java.util.ServiceLoader.fail(ServiceLoader.java:224)

	at java.util.ServiceLoader.access$100(ServiceLoader.java:181)

	at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:377)

	at java.util.ServiceLoader$1.next(ServiceLoader.java:445)

	at org.apache.hadoop.fs.FileSystem.loadFileSystems(FileSystem.java:2623)

	at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2634)

	at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2651)

	at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:92)

	at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2687)

	at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2669)

	at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:371)

	at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:170)

	at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:355)

	at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)

	at org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl.serviceStart(TimelineClientImpl.java:373)

	at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)

	at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.serviceStart(YarnClientImpl.java:194)

	at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)

	at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:126)

	at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:56)

	at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:144)

	at org.apache.spark.SparkContext.<init>(SparkContext.scala:530)

	at org.apache.spark.repl.SparkILoop.createSparkContext(SparkILoop.scala:1017)

	at $iwC$$iwC.<init>(<console>:15)

	at $iwC.<init>(<console>:24)

	at <init>(<console>:26)

	at .<init>(<console>:30)

	at .<clinit>(<console>)

	at .<init>(<console>:7)

	at .<clinit>(<console>)

	at $print(<console>)

	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)

	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

	at java.lang.reflect.Method.invoke(Method.java:606)

	at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)

	at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)

	at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)

	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)

	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)

	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)

	at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)

	at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)

	at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:125)

	at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124)

	at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324)

	at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124)

	at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64)

	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974)

	at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159)

	at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64)

	at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:108)

	at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64)

	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:991)

	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)

	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)

	at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)

	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)

	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)

	at org.apache.spark.repl.Main$.main(Main.scala:31)

	at org.apache.spark.repl.Main.main(Main.scala)

	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)

	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

	at java.lang.reflect.Method.invoke(Method.java:606)

	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)

	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)

	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)

	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)

	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

Caused by: java.lang.NoClassDefFoundError: com/amazonaws/event/ProgressListener

	at java.lang.Class.getDeclaredConstructors0(Native Method)

	at java.lang.Class.privateGetDeclaredConstructors(Class.java:2595)

	at java.lang.Class.getConstructor0(Class.java:2895)

	at java.lang.Class.newInstance(Class.java:354)

	at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:373)

	... 67 more

Caused by: java.lang.ClassNotFoundException: com.amazonaws.event.ProgressListener

	at java.net.URLClassLoader$1.run(URLClassLoader.java:366)

	at java.net.URLClassLoader$1.run(URLClassLoader.java:355)

	at java.security.AccessController.doPrivileged(Native Method)

	at java.net.URLClassLoader.findClass(URLClassLoader.java:354)

	at java.lang.ClassLoader.loadClass(ClassLoader.java:425)

	at java.lang.ClassLoader.loadClass(ClassLoader.java:358)

	... 72 more




java.lang.NullPointerException

	at org.apache.spark.sql.SQLContext$.createListenerAndUI(SQLContext.scala:1367)

	at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:101)

	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)

	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)

	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)

	at java.lang.reflect.Constructor.newInstance(Constructor.java:526)

	at org.apache.spark.repl.SparkILoop.createSQLContext(SparkILoop.scala:1028)

	at $iwC$$iwC.<init>(<console>:15)

	at $iwC.<init>(<console>:24)

	at <init>(<console>:26)

	at .<init>(<console>:30)

	at .<clinit>(<console>)

	at .<init>(<console>:7)

	at .<clinit>(<console>)

	at $print(<console>)

	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)

	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

	at java.lang.reflect.Method.invoke(Method.java:606)

	at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)

	at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)

	at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)

	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)

	at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)

	at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)

	at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)

	at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)

	at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:132)

	at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124)

	at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324)

	at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124)

	at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64)

	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974)

	at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159)

	at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64)

	at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:108)

	at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64)

	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:991)

	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)

	at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)

	at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)

	at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)

	at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)

	at org.apache.spark.repl.Main$.main(Main.scala:31)

	at org.apache.spark.repl.Main.main(Main.scala)

	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)

	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

	at java.lang.reflect.Method.invoke(Method.java:606)

	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)

	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)

	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)

	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)

	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)




<console>:16: error: not found: value sqlContext

         import sqlContext.implicits._

                ^

<console>:16: error: not found: value sqlContext

         import sqlContext.sql
1 ACCEPTED SOLUTION

avatar
Guru

The --jars option is comma delimited, not colon delimited. Hence the error: it says it can't find a single file with a very long name. Change your ':' to ',' and you should get further.

View solution in original post

5 REPLIES 5

avatar
[root@sandbox bin]# ll /usr/hdp/2.4.0.0-169/hadoop/
total 242692
-rw-r--r-- 1 root root  32380018 2016-03-31 22:02 aws-java-sdk-1.10.65.jar
drwxr-xr-x 2 root root      4096 2016-02-29 18:05 bin
drwxr-xr-x 2 root root     12288 2016-02-29 17:49 client
lrwxrwxrwx 1 root root        25 2016-03-31 21:08 conf -> /etc/hadoop/2.4.0.0-169/0
drwxr-xr-x 2 root root      4096 2016-02-29 17:46 etc
-rw-r--r-- 1 root root     17366 2016-02-10 06:44 hadoop-annotations-2.7.1.2.4.0.0-169.jar
lrwxrwxrwx 1 root root        40 2016-02-29 17:46 hadoop-annotations.jar -> hadoop-annotations-2.7.1.2.4.0.0-169.jar
-rw-r--r-- 1 root root     71534 2016-02-10 06:44 hadoop-auth-2.7.1.2.4.0.0-169.jar
lrwxrwxrwx 1 root root        33 2016-02-29 17:46 hadoop-auth.jar -> hadoop-auth-2.7.1.2.4.0.0-169.jar
-rw-r--r-- 1 root root    103049 2016-02-10 06:44 hadoop-aws-2.7.1.2.4.0.0-169.jar
lrwxrwxrwx 1 root root        32 2016-02-29 17:46 hadoop-aws.jar -> hadoop-aws-2.7.1.2.4.0.0-169.jar
-rw-r--r-- 1 root root    138488 2016-02-10 06:44 hadoop-azure-2.7.1.2.4.0.0-169.jar
lrwxrwxrwx 1 root root        34 2016-02-29 17:46 hadoop-azure.jar -> hadoop-azure-2.7.1.2.4.0.0-169.jar
-rw-r--r-- 1 root root   3469432 2016-02-10 06:44 hadoop-common-2.7.1.2.4.0.0-169.jar
-rw-r--r-- 1 root root   1903274 2016-02-10 06:44 hadoop-common-2.7.1.2.4.0.0-169-tests.jar
lrwxrwxrwx 1 root root        35 2016-02-29 17:46 hadoop-common.jar -> hadoop-common-2.7.1.2.4.0.0-169.jar
lrwxrwxrwx 1 root root        41 2016-02-29 17:46 hadoop-common-tests.jar -> hadoop-common-2.7.1.2.4.0.0-169-tests.jar
-rw-r--r-- 1 root root    159484 2016-02-10 06:44 hadoop-nfs-2.7.1.2.4.0.0-169.jar
lrwxrwxrwx 1 root root        32 2016-02-29 17:46 hadoop-nfs.jar -> hadoop-nfs-2.7.1.2.4.0.0-169.jar
drwxr-xr-x 5 root root      4096 2016-03-31 20:27 lib
drwxr-xr-x 2 root root      4096 2016-02-29 17:46 libexec
drwxr-xr-x 3 root root      4096 2016-02-29 17:46 man
-rw-r--r-- 1 root root 210216729 2016-02-10 06:44 mapreduce.tar.gz
drwxr-xr-x 2 root root      4096 2016-02-29 17:46 sbin

avatar
Guru

The --jars option is comma delimited, not colon delimited. Hence the error: it says it can't find a single file with a very long name. Change your ':' to ',' and you should get further.

avatar

Dope...Thanks @Simon Elliston Ball that worked!

 ./spark-shell  --master yarn-client --jars /usr/hdp/2.4.0.0-169/hadoop/hadoop-aws-2.7.1.2.4.0.0-169.jar,/usr/hdp/2.4.0.0-169/hadoop/hadoop-auth.jar,/usr/hdp/2.4.0.0-169/hadoop/aws-java-sdk-1.10.65.jar --driver-memory 512m --executor-memory 512m

avatar
Contributor

I am still having this issue

Env:

HDInsight: Spark 1.6 on Linux (HDI 3.5.1000.0)

HDP Version: 2.5.1.0-56

Spark: spark-assembly-1.6.2.2.5.1.0-56-hadoop2.7.3.2.5.1.0-56.jar

Issue:

{code}

11/01/2016 04:06:31 [INFO] [ExecHelper] [] [] [] [] [20] [] [] [] Executing Command :[/opt/lib/spark-1.6.2-bin-hadoop2.7/bin/spark-submit, --name, AJ-21-for-Romo-MsSQL-HDFS, --class, com.bigdlabs.romo .tool.RomoMain, --master, yarn-client, --num-executors, 1, --driver-memory, 1g, --executor-memory, 1g, --executor-cores, 1, --driver-java-options="-XX:MaxPermSize=256m", --jars, /opt/conflux/dependenc ylibs/spark1.6/jersey-server-1.9.jar,/opt/conflux/dependencylibs/spark1.6/datanucleus-api-jdo-3.2.6.jar,/opt/conflux/dependencylibs/spark1.6/sqljdbc4.jar,/opt/conflux/dependencylibs/spark1.6/datanucle us-rdbms-3.2.9.jar,/opt/conflux/dependencylibs/spark1.6/microsoft-log4j-etwappender-1.0.jar,/opt/conflux/dependencylibs/spark1.6/datanucleus-core-3.2.10.jar, /opt/conflux/lib/romolib/romo-0.0.1-SNAPSH OT.jar, --STATS_REST_ENDPOINT=http://apervi-azr-conflux-test2.apervi.com:8080/workflowmanager, sourcerdbms=sourcerdbms, sourcerdbms.name=RDBMS-1, sourcerdbms.url=jdbc:sqlserver://....., sourcerdbms.table=sanity_test, sourcerdbms.driver=com.microsoft.sqlserver.jdbc.SQLServerDriver, sourcerdbms.query=RDBMS-Sr c-21-RDBMS-1.sql, sourcerdbms.infields=id,name,logindate, sourcerdbms.infieldstypes=java.lang.Integer,java.lang.String,java.util.Date, sourcerdbms.outfields=id,name,logindate, sourcerdbms.outfieldstyp es=java.lang.Integer,java.lang.String,java.util.Date, sourcerdbms.parallelism=2, sourcerdbms.retain.fields=id,name,logindate, sourcerdbms.wfitemstatusid=40, filesink=filesink, filesink.name=Delimited_ File-1, filesink.source=RDBMS-1, filesink.filetype=text, filesink.fsurl=hdfs://.....:8020, filesink.path=/user/conflux/output/mssql-hdfs.out, f ilesink.delimiter=,, filesink.quote=", filesink.quotemode=MINIMAL, filesink.compression, filesink.infields=id,name,logindate, filesink.infieldstypes=java.lang.Integer,java.lang.String,java.util.Date, filesink.writefields=id,name,logindate, filesink.writefieldstypes=java.lang.Integer,java.lang.String,java.util.Date, filesink.replace=true, filesink.writeheader=true, filesink.singlefile=true, filesin k.retain.fields=id,name,logindate, filesink.wfitemstatusid=39]

{code}

Logs

{code}

11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT>2016-11-01 04:06:34,771 - WARN [main:FileSystem@2731] - Cannot load filesystem 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT>java.util.ServiceConfigurationError: org.apache.hadoop.fs.FileSystem: Provider org.apache.hadoop.fs.s3a.S3AFileSystem could not be instantiated 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader.fail(ServiceLoader.java:232) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader.access$100(ServiceLoader.java:185) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader$1.next(ServiceLoader.java:480) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.loadFileSystems(FileSystem.java:2723) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2742) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2759) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:99) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2795) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:2783) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:433) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:441) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:423) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.yarn.client.api.impl.FileSystemTimelineWriter.<init>(FileSystemTimelineWriter.java:122) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl.createTimelineWriter(TimelineClientImpl.java:317) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl.serviceStart(TimelineClientImpl.java:309) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.serviceStart(YarnClientImpl.java:199) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:127) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:56) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:144) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.SparkContext.<init>(SparkContext.scala:530) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:59) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at com.bigdlabs.romo.tool.RomoMain.plan(RomoMain.java:318) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at com.bigdlabs.romo.tool.RomoMain.execute(RomoMain.java:257) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at com.bigdlabs.romo.tool.RomoMain.main(RomoMain.java:471) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.lang.reflect.Method.invoke(Method.java:498) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$runMain(SparkSubmit.scala:731) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT>Caused by: java.lang.NoClassDefFoundError: com/amazonaws/AmazonClientException

{code}

avatar

you shouldn't be seeing this on HDP2.5; everything needed to talk to S3A is on the classpath for spark already (we have done a lot of work on S3a performance for this release)

Is the job actually failing, or is it just warning you that it couldn't create the s3a filesystem, but carrying on?