Support Questions
Find answers, ask questions, and share your expertise

Caused by: java.lang.ClassCastException: cannot assign instance of java.lang.invoke.SerializedLambda to field org.apache.spark.api.java.JavaRDDLike

New Contributor

I made two different codes on eclipse oxygen ide. In the former one, master of running spark is local.

public class JavaClientLocal {

    public static void main(String[] args) {
        // TODO Auto-generated method stub
        SparkConf sc = new SparkConf().setAppName("SparkTest").setMaster("local");
        JavaSparkContext jsc = new JavaSparkContext(sc);
        System.out.println(jsc.master() + " : " + jsc.version());

        JavaRDD<String> textFile = jsc.parallelize(Arrays.asList("spark rdd example", "sample example", "sit on spark"));
        System.out.println(textFile.collect());


        JavaRDD<String> words = textFile.flatMap((String str) -> Arrays.asList(str.split(" ")).iterator());

        JavaPairRDD<String, Integer> wcPair = words.mapToPair((String t) -> new Tuple2(t,1));

        JavaPairRDD<String, Integer> result = wcPair.reduceByKey((Integer c1,Integer c2) -> c1 + c2);
        System.out.println(result.collect());
    }

}

The above codes are executed without exception. System.out.println(result.collect) show the right values. However the below codes throws the exception.

public class JavaClientYarn {

    private static final String srcDir = "/home/m_usr/sparkData/";
    private static final String srcFile = "data.txt";
    private static final String dstSrc = "hdfs://master:9000/user/m_usr/data.txt";
    private static final String dstFile = "hdfs://master:9000/user/m_usr/result.txt";

    public static void main(String[] args) throws Exception {
        // TODO Auto-generated method stub
        SparkConf sc = new SparkConf().setAppName("SparkTest").setMaster("yarn-client");
        JavaSparkContext jsc = new JavaSparkContext(sc);
        System.out.println(jsc.master() + " : " + jsc.version());

        Path srcPath = new Path(srcDir + srcFile);
        Path dstPath = new Path(dstSrc);

        FileSystem fs = FileSystem.get(URI.create(dstSrc), new Configuration());
        fs.copyFromLocalFile(srcPath, dstPath);

        JavaRDD<String> textFile = jsc.textFile(dstSrc);
        System.out.println(textFile.collect());

        JavaRDD<String> words = textFile.flatMap((String str) -> Arrays.asList(str.split(" ")).iterator());

        JavaPairRDD<String, Integer> wcPair = words.mapToPair((String t) -> new Tuple2(t,1));

        JavaPairRDD<String, Integer> result = wcPair.reduceByKey((Integer c1,Integer c2) -> c1 + c2);
        System.out.println(result.collect());

        jsc.stop();
    }
}

The exceptions are thrown like below

Caused by: java.lang.ClassCastException: cannot assign instance of java.lang.invoke.SerializedLambda to field org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$1$1.f$3 of type org.apache.spark.api.java.function.FlatMapFunction in instance of org.apache.spark.api.java.JavaRDDLike$$anonfun$fn$1$1
    at java.io.ObjectStreamClass$FieldReflector.setObjFieldValues(ObjectStreamClass.java:2133)
    at java.io.ObjectStreamClass.setObjFieldValues(ObjectStreamClass.java:1305)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2251)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:422)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80)
    at org.apache.spark.scheduler.Task.run(Task.scala:108)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:748)

The diffrence between 2 codes is master. The master of the former codes is local and the master of latter codes is yarn-client. But the latter code do not recognize the java 8 lambda expression and bring error. I have no idea what configuration of spark yarn bring the errors. Your any relpy will be deeply appreciated. Thanks in advance

0 REPLIES 0