Created on 01-21-2015 03:51 AM - edited 09-16-2022 02:19 AM
Hi ,
I am running my spark program on yarn. but i am getting below endless message on console.
15/01/22 01:12:13 INFO cluster.YarnClientSchedulerBackend: Application report from ASM:
appMasterRpcPort: -1
appStartTime: 1421868378949
yarnAppState: ACCEPTED
I am using Cloudera 5.2 and setting HADOOP_CONF_DIR
export HADOOP_CONF_DIR="/etc/hadoop/conf"
Running using below command. Please suggest me if I am missing any configuration
spark-submit --master yarn --class alu.ausdc.analitics.sparkEx.ParallelizeEx sparkanalitics-1.jar
ParallelizeEx.java:
package alu.ausdc.analitics.sparkEx;
import java.util.Arrays;
import org.apache.spark.Accumulator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
/**
* @author sangalar
*
*/
public class ParallelizeEx {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("ParallelizeEx")/*.setMaster("local")*/;
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> lines = sc.parallelize(Arrays.asList("pandas", "i like pandas"));
System.out.println("No. lines: "+lines.count());//2
final Accumulator<Integer> i=sc.accumulator(0);
JavaRDD<Integer> line = lines
.map(new Function<String, Integer>() {
public Integer call(String s) {
i.add(1);
return s.split(" ").length;
}
});
int totalWords=line.reduce(new Function2<Integer, Integer, Integer>() {
public Integer call(Integer arg0, Integer arg1) throws Exception {
// TODO Auto-generated method stub
return arg0+arg1;
}
});
System.out.println("totalWords: "+ totalWords);
}
}
thanks,
Shekhar Reddy.