Reply
Highlighted
New Contributor
Posts: 1
Registered: ‎04-21-2017

CDH 5.10.1 Oozie cannot start Spark job

We have an Oozie job that worked for us in 5.9.x and works directly with a Spark submit, but fails with Oozie in 5.10.1 with the following error:

 

java.io.FileNotFoundException: File file:/data03/yarn/nm/usercache/gap/appcache/application_1491921197580_0957/container_e05_1491921197580_0957_01_000001/spark.yarn.maxAppAttempts=1 does not exist

This looks like something is parsing incorrectly.  It seems to be using the configuration settings in a file as part of the file name.

 

Here is our workflow.xml:

<workflow-app xmlns="uri:oozie:workflow:0.4" name="run-aggregate-events-job-wf">

    <start to="run-aggregate-events-job"/>

    <action name="run-aggregate-events-job" retry-max="0">
        <spark xmlns="uri:oozie:spark-action:0.1">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
                <property>
                    <name>oozie.launcher.mapred.job.queue.name</name>
                    <value>root.production.oozie-launcher</value>
                </property>
                <property>
                    <!--See https://community.hortonworks.com/questions/14402/yarn-preemption-and-oozie.html-->
                    <name>oozie.launcher.mapreduce.job.ubertask.enable</name>
                    <value>true</value>
                </property>

            </configuration>
            <master>${sparkMaster}</master>
            <name>${sparkJobName}</name>
            <class>com.glispa.gdmp.aggregate.events.AggregateEventsJob</class>
            <jar>${jarPath}</jar>
            <spark-opts>--queue ${queueName} --files=${sparkFiles} --conf spark.yarn.maxAppAttempts=1 --conf spark.task.maxFailures=10
                --executor-memory=${sparkExecutorMemory} --conf spark.driver.extraJavaOptions=${sparkDriverJavaOptions} --conf
                spark.executor.extraJavaOptions=${sparkExecutorJavaOptions} --conf spark.executor.extraClassPath=${sparkExecutorExtraClassPath} --conf
                spark.driver.extraClassPath=${sparkDriverExtraClassPath}
            </spark-opts>
            <arg>-baseInputPath</arg>
            <arg>${inputDir}</arg>
            <arg>-baseOutputPath</arg>
            <arg>${outputDir}</arg>
        </spark>
        <ok to="end"/>
        <error to="path-deletion"/>
    </action>

    <action name="path-deletion">
        <fs>
            <delete path="${outputDir}"/>
        </fs>
        <ok to="fail"/>
        <error to="fail"/>
    </action>

    <kill name="fail">
        <message>Glispa AggregateEvents job workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
        </message>
    </kill>

    <end name="end"/>
</workflow-app>

Here is the oozie.properties file:

## paths
nameNode=hdfs://nameservice1
jobTracker=stage-gap-namenode-1-n15.srv.glispa.com:8032
wfPath=/user/gap/aggregate-events/job
jarPath=${nameNode}${wfPath}/lib/job-aggregate-events-0.28.0-SNAPSHOT-shaded.jar

# normally this should be overridden with -DstartTime=`date -u "+%Y-%m-%dT%H:20Z"` upon scheduling the coordinator job from CLI
startTime=2016-10-12T00:00Z
endTime=2100-01-01T00:00Z
timezone=Europe/Berlin

## oozie
# HDFS path of the coordinator app
#oozie.wf.application.path=${nameNode}${wfPath}/
oozie.coord.application.path=${nameNode}${wfPath}/coordinator
user.name=gap
# adds a default system library path to the workflow's classpath
# used to allow workflows to access the Oozie Share Lib, which includes the Hive action
oozie.use.system.libpath=true

## spark job
sparkMaster=yarn-cluster
sparkJobName=aggregate-events-job
sparkExecutorMemory=4g
# on YARN, list of files to be uploaded with the application
sparkFiles=${nameNode}${wfPath}/config#config
sparkExecutorJavaOptions="-Dlog4j.configuration=log4j.xml -XX:+UseG1GC"
sparkDriverJavaOptions=-Dlog4j.configuration=log4j.xml
sparkExecutorExtraClassPath=config
sparkDriverExtraClassPath=config

queueName=root.production.high
scriptPath=${nameNode}${wfPath}/scripts
baseOutputPath=/user/gap/aggregate-events/events/daily/
baseInputPath=/user/gap/camus/topics/events2/hourly/

Any ideas on what could be going wrong?

Announcements