<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Execute a pig job on all nodes in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Execute-a-pig-job-on-all-nodes/m-p/37083#M10027</link>
    <description>&lt;P&gt;I found out why this was happening.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Since I was on a DEV cluster I stopped and started the services every day.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Also, the data from the table to which I was writing was moving&amp;nbsp;to a single machine from time to time (due to service failing, start and stop etc.)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;After I balanced the HBase Table the script was distributed.&amp;nbsp;&lt;/STRONG&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 05 Feb 2016 12:49:22 GMT</pubDate>
    <dc:creator>AlinaGHERMAN</dc:creator>
    <dc:date>2016-02-05T12:49:22Z</dc:date>
    <item>
      <title>Execute a pig job on all nodes</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Execute-a-pig-job-on-all-nodes/m-p/33814#M10024</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have a pig job that I schedule with &lt;STRONG&gt;oozie&lt;/STRONG&gt;. &amp;nbsp;This pig job is reading data from a &lt;STRONG&gt;Hive table&lt;/STRONG&gt; and is writing into &lt;STRONG&gt;3 HBase tables (UDF)&lt;/STRONG&gt;.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The problem is that only one node is working.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;I notice that this job has only mappers and no reducers. Is this the problem?&amp;nbsp;&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm asking this because of the thread:&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.cloudera.com/t5/Batch-Processing-and-Workflow/Execute-Shell-script-through-oozie-job-in-all-node/m-p/33136#M1765" target="_blank"&gt;https://community.cloudera.com/t5/Batch-Processing-and-Workflow/Execute-Shell-script-through-oozie-job-in-all-node/m-p/33136#M1765&amp;nbsp;&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;where &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/12109"&gt;@Sue&lt;/a&gt; said "&lt;SPAN&gt;The Oozie shell action is&amp;nbsp;run as a Hadoop job with one map task and zero reduce tasks - the job runs on one&amp;nbsp;arbitrary node in the cluster.&lt;/SPAN&gt;"&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Is there a way to force the cluster to use all the nodes?&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 09:47:59 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Execute-a-pig-job-on-all-nodes/m-p/33814#M10024</guid>
      <dc:creator>AlinaGHERMAN</dc:creator>
      <dc:date>2022-09-16T09:47:59Z</dc:date>
    </item>
    <item>
      <title>Re: Execute a pig job on all nodes</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Execute-a-pig-job-on-all-nodes/m-p/33833#M10025</link>
      <description>&lt;P&gt;If you are using the Pig action, as opposed to a shell action, then your Pig job should run in a distributed fashion.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here is a great blog post (recently updated for CDH 5) that shows a step by step example for running a Pig script as an&amp;nbsp;Oozie Pig action:&lt;/P&gt;&lt;P&gt;&lt;A href="https://blog.cloudera.com/blog/2013/03/how-to-use-oozie-shell-and-java-actions/" target="_self"&gt;https://blog.cloudera.com/blog/2013/03/how-to-use-oozie-shell-and-java-actions/&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Nov 2015 16:15:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Execute-a-pig-job-on-all-nodes/m-p/33833#M10025</guid>
      <dc:creator>Sue</dc:creator>
      <dc:date>2015-11-05T16:15:52Z</dc:date>
    </item>
    <item>
      <title>Re: Execute a pig job on all nodes</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Execute-a-pig-job-on-all-nodes/m-p/33840#M10026</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you for your answer.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The problem is that clasic Pig scripts (no access to Hive tables, nor to HBase) are running in a distributed way (they have mappers and reducers).&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;However, this one is running only on one node&lt;/P&gt;&lt;P&gt;(in Cloudera Manager -&amp;gt;Hosts all namenodes have a Load Average of 0.* and one node has 9.* as load charge)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Since you say that normally, even if only mappers are created the script should run in a distributed node, I will post an anonymised version of my script.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;SET mapreduce.fileoutputcommitter.marksuccessfuljobs false;
SET output.compression.codec org.apache.hadoop.io.compress.SnappyCodec;
SET hbase.zookeeper.quorum '${ZOOKEEPER_QUORUM}';
SET oozie.use.system.libpath true
SET oozie.libpath '${PATH_LIB_OOZIE}'
------------------------------------------------------------


-- hcat
register 'hive-hcatalog-core-0.13.1-cdh5.3.0.jar';
register 'hive-hcatalog-core.jar';
register 'hive-hcatalog-pig-adapter-0.13.1-cdh5.3.0.jar';
register 'hive-hcatalog-pig-adapter.jar';
register 'hive-metastore-0.13.1-cdh5.3.0.jar';
register 'datanucleus-core-3.2.10.jar';
register 'datanucleus-api-jdo-3.2.6.jar';
register 'datanucleus-rdbms-3.2.9.jar';
register 'commons-dbcp-1.4.jar';
register 'commons-pool-1.5.4.jar';
register 'jdo-api-3.0.1.jar';

-- UDF
REGISTER 'MyStoreUDF-0.3.8.jar';

------------------------------------------------------------------------------------------------------------
----------------------------------------------- input data -------------------------------------------------

var_a= LOAD 'my_database.my_table' USING org.apache.hcatalog.pig.HCatLoader() as 
			(
                        a:chararray ,
                        b:chararray,
                        c:chararray,
			 d:chararray,
			 e:chararray,
			 f:long,
			 g:chararray,
			h:chararray,
			 i:long,
			 j:chararray,
			 k:bag{((name:chararray,value:chararray))},
                         l:chararray,
                         m:chararray  );

var_a_filtered= FILTER sessions BY (a== 'abcd' );

var_a_proj= FOREACH var_a_filteredGENERATE
                        a,
			b,
                        c,
                        d;

 STORE var_a_proj INTO 'hbaseTableName' 
 USING MyStoreUDF('-hbaseTableName1 hbaseTableName1 -hbaseTableName2 -hbaseTableName2 ');&lt;/PRE&gt;&lt;P&gt;Thank you!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Alina GHERMAN&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Nov 2015 17:43:25 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Execute-a-pig-job-on-all-nodes/m-p/33840#M10026</guid>
      <dc:creator>AlinaGHERMAN</dc:creator>
      <dc:date>2015-11-05T17:43:25Z</dc:date>
    </item>
    <item>
      <title>Re: Execute a pig job on all nodes</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Execute-a-pig-job-on-all-nodes/m-p/37083#M10027</link>
      <description>&lt;P&gt;I found out why this was happening.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Since I was on a DEV cluster I stopped and started the services every day.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Also, the data from the table to which I was writing was moving&amp;nbsp;to a single machine from time to time (due to service failing, start and stop etc.)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;After I balanced the HBase Table the script was distributed.&amp;nbsp;&lt;/STRONG&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 05 Feb 2016 12:49:22 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Execute-a-pig-job-on-all-nodes/m-p/37083#M10027</guid>
      <dc:creator>AlinaGHERMAN</dc:creator>
      <dc:date>2016-02-05T12:49:22Z</dc:date>
    </item>
  </channel>
</rss>

