Created 03-31-2016 10:16 PM
Trying to process a file from s3 but cant load the right dependencies. Appreciate your advice.
I am passing the aws-sdk jars to the spark-shell, I am getting an error at initialization of shell.
[root@sandbox bin]# ./spark-shell --master yarn-client --jars /usr/hdp/2.4.0.0-169/hadoop/hadoop-aws-2.7.1.2.4.0.0-169.jar:/usr/hdp/2.4.0.0-169/hadoop/hadoop-auth.jar:/root/aws-java-sdk-1.10.65.jar --driver-memory 512m --executor-memory 512m Warning: Local jar /usr/hdp/2.4.0.0-169/hadoop/hadoop-aws-2.7.1.2.4.0.0-169.jar:/usr/hdp/2.4.0.0-169/hadoop/hadoop-auth.jar:/root/aws-java-sdk-1.10.65.jar does not exist, skipping.
[root@sandbox bin]# ll /root/aws-java-sdk-1.10.65.jar -rw-r--r-- 1 root root 32380018 2016-03-31 21:56 /root/aws-java-sdk-1.10.65.jar
without the aws-sdk jar I get the NoClassDefFoundError "Caused by: java.lang.NoClassDefFoundError: com/amazonaws/event/ProgressListener".
[root@sandbox bin]# ./spark-shell --master yarn-client --jars /usr/hdp/2.4.0.0-169/hadoop/hadoop-aws-2.7.1.2.4.0.0-169.jar:/usr/hdp/2.4.0.0-169/hadoop/hadoop-auth.jar --driver-memory 512m --executor-memory 512m
java.util.ServiceConfigurationError: org.apache.hadoop.fs.FileSystem: Provider org.apache.hadoop.fs.s3a.S3AFileSystem could not be instantiated at java.util.ServiceLoader.fail(ServiceLoader.java:224) at java.util.ServiceLoader.access$100(ServiceLoader.java:181) at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:377) at java.util.ServiceLoader$1.next(ServiceLoader.java:445) at org.apache.hadoop.fs.FileSystem.loadFileSystems(FileSystem.java:2623) at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2634) at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2651) at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:92) at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2687) at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2669) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:371) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:170) at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:355) at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295) at org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl.serviceStart(TimelineClientImpl.java:373) at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.serviceStart(YarnClientImpl.java:194) at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:126) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:56) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:144) at org.apache.spark.SparkContext.<init>(SparkContext.scala:530) at org.apache.spark.repl.SparkILoop.createSparkContext(SparkILoop.scala:1017) at $iwC$$iwC.<init>(<console>:15) at $iwC.<init>(<console>:24) at <init>(<console>:26) at .<init>(<console>:30) at .<clinit>(<console>) at .<init>(<console>:7) at .<clinit>(<console>) at $print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:125) at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124) at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324) at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124) at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974) at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159) at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64) at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:108) at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:991) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) at org.apache.spark.repl.Main$.main(Main.scala:31) at org.apache.spark.repl.Main.main(Main.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.NoClassDefFoundError: com/amazonaws/event/ProgressListener at java.lang.Class.getDeclaredConstructors0(Native Method) at java.lang.Class.privateGetDeclaredConstructors(Class.java:2595) at java.lang.Class.getConstructor0(Class.java:2895) at java.lang.Class.newInstance(Class.java:354) at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:373) ... 67 more Caused by: java.lang.ClassNotFoundException: com.amazonaws.event.ProgressListener at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) ... 72 more java.lang.NullPointerException at org.apache.spark.sql.SQLContext$.createListenerAndUI(SQLContext.scala:1367) at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:101) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:526) at org.apache.spark.repl.SparkILoop.createSQLContext(SparkILoop.scala:1028) at $iwC$$iwC.<init>(<console>:15) at $iwC.<init>(<console>:24) at <init>(<console>:26) at .<init>(<console>:30) at .<clinit>(<console>) at .<init>(<console>:7) at .<clinit>(<console>) at $print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:132) at org.apache.spark.repl.SparkILoopInit$$anonfun$initializeSpark$1.apply(SparkILoopInit.scala:124) at org.apache.spark.repl.SparkIMain.beQuietDuring(SparkIMain.scala:324) at org.apache.spark.repl.SparkILoopInit$class.initializeSpark(SparkILoopInit.scala:124) at org.apache.spark.repl.SparkILoop.initializeSpark(SparkILoop.scala:64) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1$$anonfun$apply$mcZ$sp$5.apply$mcV$sp(SparkILoop.scala:974) at org.apache.spark.repl.SparkILoopInit$class.runThunks(SparkILoopInit.scala:159) at org.apache.spark.repl.SparkILoop.runThunks(SparkILoop.scala:64) at org.apache.spark.repl.SparkILoopInit$class.postInitialization(SparkILoopInit.scala:108) at org.apache.spark.repl.SparkILoop.postInitialization(SparkILoop.scala:64) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:991) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) at org.apache.spark.repl.Main$.main(Main.scala:31) at org.apache.spark.repl.Main.main(Main.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) <console>:16: error: not found: value sqlContext import sqlContext.implicits._ ^ <console>:16: error: not found: value sqlContext import sqlContext.sql
Created 03-31-2016 11:59 PM
The --jars option is comma delimited, not colon delimited. Hence the error: it says it can't find a single file with a very long name. Change your ':' to ',' and you should get further.
Created 03-31-2016 10:17 PM
[root@sandbox bin]# ll /usr/hdp/2.4.0.0-169/hadoop/ total 242692 -rw-r--r-- 1 root root 32380018 2016-03-31 22:02 aws-java-sdk-1.10.65.jar drwxr-xr-x 2 root root 4096 2016-02-29 18:05 bin drwxr-xr-x 2 root root 12288 2016-02-29 17:49 client lrwxrwxrwx 1 root root 25 2016-03-31 21:08 conf -> /etc/hadoop/2.4.0.0-169/0 drwxr-xr-x 2 root root 4096 2016-02-29 17:46 etc -rw-r--r-- 1 root root 17366 2016-02-10 06:44 hadoop-annotations-2.7.1.2.4.0.0-169.jar lrwxrwxrwx 1 root root 40 2016-02-29 17:46 hadoop-annotations.jar -> hadoop-annotations-2.7.1.2.4.0.0-169.jar -rw-r--r-- 1 root root 71534 2016-02-10 06:44 hadoop-auth-2.7.1.2.4.0.0-169.jar lrwxrwxrwx 1 root root 33 2016-02-29 17:46 hadoop-auth.jar -> hadoop-auth-2.7.1.2.4.0.0-169.jar -rw-r--r-- 1 root root 103049 2016-02-10 06:44 hadoop-aws-2.7.1.2.4.0.0-169.jar lrwxrwxrwx 1 root root 32 2016-02-29 17:46 hadoop-aws.jar -> hadoop-aws-2.7.1.2.4.0.0-169.jar -rw-r--r-- 1 root root 138488 2016-02-10 06:44 hadoop-azure-2.7.1.2.4.0.0-169.jar lrwxrwxrwx 1 root root 34 2016-02-29 17:46 hadoop-azure.jar -> hadoop-azure-2.7.1.2.4.0.0-169.jar -rw-r--r-- 1 root root 3469432 2016-02-10 06:44 hadoop-common-2.7.1.2.4.0.0-169.jar -rw-r--r-- 1 root root 1903274 2016-02-10 06:44 hadoop-common-2.7.1.2.4.0.0-169-tests.jar lrwxrwxrwx 1 root root 35 2016-02-29 17:46 hadoop-common.jar -> hadoop-common-2.7.1.2.4.0.0-169.jar lrwxrwxrwx 1 root root 41 2016-02-29 17:46 hadoop-common-tests.jar -> hadoop-common-2.7.1.2.4.0.0-169-tests.jar -rw-r--r-- 1 root root 159484 2016-02-10 06:44 hadoop-nfs-2.7.1.2.4.0.0-169.jar lrwxrwxrwx 1 root root 32 2016-02-29 17:46 hadoop-nfs.jar -> hadoop-nfs-2.7.1.2.4.0.0-169.jar drwxr-xr-x 5 root root 4096 2016-03-31 20:27 lib drwxr-xr-x 2 root root 4096 2016-02-29 17:46 libexec drwxr-xr-x 3 root root 4096 2016-02-29 17:46 man -rw-r--r-- 1 root root 210216729 2016-02-10 06:44 mapreduce.tar.gz drwxr-xr-x 2 root root 4096 2016-02-29 17:46 sbin
Created 03-31-2016 11:59 PM
The --jars option is comma delimited, not colon delimited. Hence the error: it says it can't find a single file with a very long name. Change your ':' to ',' and you should get further.
Created 04-01-2016 01:30 AM
Dope...Thanks @Simon Elliston Ball that worked!
./spark-shell --master yarn-client --jars /usr/hdp/2.4.0.0-169/hadoop/hadoop-aws-2.7.1.2.4.0.0-169.jar,/usr/hdp/2.4.0.0-169/hadoop/hadoop-auth.jar,/usr/hdp/2.4.0.0-169/hadoop/aws-java-sdk-1.10.65.jar --driver-memory 512m --executor-memory 512m
Created 11-01-2016 04:18 AM
I am still having this issue
Env:
HDInsight: Spark 1.6 on Linux (HDI 3.5.1000.0)
HDP Version: 2.5.1.0-56
Spark: spark-assembly-1.6.2.2.5.1.0-56-hadoop2.7.3.2.5.1.0-56.jar
Issue:
{code}
11/01/2016 04:06:31 [INFO] [ExecHelper] [] [] [] [] [20] [] [] [] Executing Command :[/opt/lib/spark-1.6.2-bin-hadoop2.7/bin/spark-submit, --name, AJ-21-for-Romo-MsSQL-HDFS, --class, com.bigdlabs.romo .tool.RomoMain, --master, yarn-client, --num-executors, 1, --driver-memory, 1g, --executor-memory, 1g, --executor-cores, 1, --driver-java-options="-XX:MaxPermSize=256m", --jars, /opt/conflux/dependenc ylibs/spark1.6/jersey-server-1.9.jar,/opt/conflux/dependencylibs/spark1.6/datanucleus-api-jdo-3.2.6.jar,/opt/conflux/dependencylibs/spark1.6/sqljdbc4.jar,/opt/conflux/dependencylibs/spark1.6/datanucle us-rdbms-3.2.9.jar,/opt/conflux/dependencylibs/spark1.6/microsoft-log4j-etwappender-1.0.jar,/opt/conflux/dependencylibs/spark1.6/datanucleus-core-3.2.10.jar, /opt/conflux/lib/romolib/romo-0.0.1-SNAPSH OT.jar, --STATS_REST_ENDPOINT=http://apervi-azr-conflux-test2.apervi.com:8080/workflowmanager, sourcerdbms=sourcerdbms, sourcerdbms.name=RDBMS-1, sourcerdbms.url=jdbc:sqlserver://....., sourcerdbms.table=sanity_test, sourcerdbms.driver=com.microsoft.sqlserver.jdbc.SQLServerDriver, sourcerdbms.query=RDBMS-Sr c-21-RDBMS-1.sql, sourcerdbms.infields=id,name,logindate, sourcerdbms.infieldstypes=java.lang.Integer,java.lang.String,java.util.Date, sourcerdbms.outfields=id,name,logindate, sourcerdbms.outfieldstyp es=java.lang.Integer,java.lang.String,java.util.Date, sourcerdbms.parallelism=2, sourcerdbms.retain.fields=id,name,logindate, sourcerdbms.wfitemstatusid=40, filesink=filesink, filesink.name=Delimited_ File-1, filesink.source=RDBMS-1, filesink.filetype=text, filesink.fsurl=hdfs://.....:8020, filesink.path=/user/conflux/output/mssql-hdfs.out, f ilesink.delimiter=,, filesink.quote=", filesink.quotemode=MINIMAL, filesink.compression, filesink.infields=id,name,logindate, filesink.infieldstypes=java.lang.Integer,java.lang.String,java.util.Date, filesink.writefields=id,name,logindate, filesink.writefieldstypes=java.lang.Integer,java.lang.String,java.util.Date, filesink.replace=true, filesink.writeheader=true, filesink.singlefile=true, filesin k.retain.fields=id,name,logindate, filesink.wfitemstatusid=39]
{code}
Logs
{code}
11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT>2016-11-01 04:06:34,771 - WARN [main:FileSystem@2731] - Cannot load filesystem 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT>java.util.ServiceConfigurationError: org.apache.hadoop.fs.FileSystem: Provider org.apache.hadoop.fs.s3a.S3AFileSystem could not be instantiated 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader.fail(ServiceLoader.java:232) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader.access$100(ServiceLoader.java:185) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.util.ServiceLoader$1.next(ServiceLoader.java:480) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.loadFileSystems(FileSystem.java:2723) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2742) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2759) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:99) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2795) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:2783) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:433) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:441) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:423) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.yarn.client.api.impl.FileSystemTimelineWriter.<init>(FileSystemTimelineWriter.java:122) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl.createTimelineWriter(TimelineClientImpl.java:317) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl.serviceStart(TimelineClientImpl.java:309) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.serviceStart(YarnClientImpl.java:199) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:127) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:56) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:144) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.SparkContext.<init>(SparkContext.scala:530) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:59) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at com.bigdlabs.romo.tool.RomoMain.plan(RomoMain.java:318) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at com.bigdlabs.romo.tool.RomoMain.execute(RomoMain.java:257) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at com.bigdlabs.romo.tool.RomoMain.main(RomoMain.java:471) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at java.lang.reflect.Method.invoke(Method.java:498) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$runMain(SparkSubmit.scala:731) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 11/01/2016 04:06:34 [INFO] [StreamGobbler] [] [] [] [] [20] [] [] [] OUTPUT>Caused by: java.lang.NoClassDefFoundError: com/amazonaws/AmazonClientException
{code}
Created 11-07-2016 03:28 PM
you shouldn't be seeing this on HDP2.5; everything needed to talk to S3A is on the classpath for spark already (we have done a lot of work on S3a performance for this release)
Is the job actually failing, or is it just warning you that it couldn't create the s3a filesystem, but carrying on?