Reply
Highlighted
New Contributor
Posts: 14
Registered: ‎04-10-2017

Oozie Workflow Error

I am trying to use integrate components like Shell, Hive, Pig in a single workflow.Following is my workflow

<workflow-app name="workflow" xmlns="uri:oozie:workflow:0.1">
    <start to="hdfsinput"/>
    <action name="hdfsinput">
        <shell xmlns="uri:oozie:shell-action:0.1">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <exec>${myscript}</exec>
            <file>${myscriptPath}</file>
        </shell>
        <ok to="hivejob"/>
        <error to="hdfsfail"/>
    </action>
    <kill name="hdfsfail">
        <message>Shell action failed</message>
    </kill>
    <action name="hivejob">
        <hive2 xmlns="uri:oozie:hive2-action:0.1">
          <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <prepare>
                <delete path="$(nameNode)/user/$(user.name)/oozie"/>
            </prepare>
            <configuration>
                <property>
                    <name>mapred.job.queue.name</name>
                    <value>${queueName}</value>
                </property>
            </configuration>
            <jdbc-url>${jdbcURL}</jdbc-url>
            <script>$(hiveScript)</script>
        </hive2>
        <ok to="pigjob"/>
        <error to="hivefail"/>
    </action>
    <action name="pigjob">
       <pig>
          <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <prepare>
                <delete path="${outputDir}"/>
            </prepare>
            <configuration>
                <property>
                    <name>mapred.compress.map.output</name>
                    <value>true</value>
                </property>
                <property>
                    <name>oozie.action.external.stats.write</name>
                    <value>true</value>
                </property>
            </configuration>
            <script>$(pigScript)</script>
            <param>INPUT=${pigInputDir}</param>
            <param>OUTPUT=${pigOutputDir}</param>
        </pig>
        <ok to="pigend"/>
        <error to="pigfail"/>
    </action>
    <kill name="pigfail">
        <message>Pig failed</message>
    </kill>
    <kill name="hdfsfail">
        <message>HDFS failed</message>
    </kill>
    <kill name="hivefail">
        <message>Hive failed</message>
    </kill>
    <end name="pigend"/>
</workflow-app>

Following are my Shell, Hive, Pig scripts.

hdfs dfs -put /home/cloudera/numbers.csv /user/cloudera/input/
use streaming;
CREATE TABLE number(
num INT
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
LOAD DATA INPATH '/user/cloudera/input/number.csv' OVERWRITE INTO TABLE number;
A = LOAD '$(pigInputDir)' using PigStorage(',') as (num : INT);
B = DISTINCT A;
C = FILTER B BY (num!=NULL);
STORE C INTO '$(pigOutputDir);

Following is my job.properties file,

nameNode=hdfs://quickstart.cloudera:8020
jobTracker=quickstart.cloudera:8032
inputDir=/home/cloudera/
outputDir=$(inputDir)/numbers/
queueName=default
jdbcURL=jdbc:hive2://quickstart.cloudera:10000/default
myscript=hdfs.sh
myscriptPath=${oozie.wf.application.path}/$(myscript)
hiveScript=${oozie.wf.application.path}/hive.hql
pigInputDir=/user/hive/warehouse/streaming.db/number
pigOutputDir=/home/cloudera/numbers/

oozie.use.system.libpath=true

oozie.wf.application.path=/user/cloudera/oozie

I am facing an issue when running this script. My error is

Error: E0705 : E0705: Nnode already defined, node [hdfsfail]

How should I get this workflow running?

Announcements