<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question spark join with udf fails in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/spark-join-with-udf-fails/m-p/122872#M85625</link>
    <description>&lt;P&gt;Hi, &lt;/P&gt;&lt;P&gt;I am trying to do a join in spark using  udfs in the join condition, but getting the error shown below:&lt;/P&gt;&lt;P&gt;The joins work fine without udfs. Is is possibel to use udfs in the manner below. The udfs work fine in select etc. &lt;/P&gt;&lt;P&gt;result_df = t1_df.join(t2_df, _udf1(t1_df['col1']) == _udf2(t2_df['col1']), "inner") &lt;/P&gt;&lt;P&gt;  File "/usr/hdp/2.3.4.14-9/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py", line 584, in join
  File "/usr/hdp/2.3.4.14-9/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", line 538, in __call__
  File "/usr/hdp/2.3.4.14-9/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 36, in deco
  File "/usr/hdp/2.3.4.14-9/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 300, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o79.join.
: java.lang.ClassCastException: org.apache.spark.sql.catalyst.plans.logical.Project cannot be cast to org.apache.spark.sql.catalyst.plans.logical.Join
        at org.apache.spark.sql.DataFrame.join(DataFrame.scala:554)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
        at py4j.Gateway.invoke(Gateway.java:259)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:207)
        at java.lang.Thread.run(Thread.java:745)&lt;/P&gt;</description>
    <pubDate>Mon, 11 Jul 2016 21:00:35 GMT</pubDate>
    <dc:creator>xrcsblue</dc:creator>
    <dc:date>2016-07-11T21:00:35Z</dc:date>
  </channel>
</rss>

