<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Can I join 2 dataframe with condition in column value? in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Can-I-join-2-dataframe-with-condition-in-column-value/m-p/168264#M53924</link>
    <description>&lt;P&gt;I have 2 Dataframe  and I would like to show the one of the dataframe if my conditions satishfied. I want to match the first column of both the DB and also the condition SEV_LVL='3'. Can I get some guidance or help please&lt;/P&gt;&lt;PRE&gt;
scala&amp;gt; input_file.show()
+-----------+--------+-----+----+-------+
|     ckt_id|location|usage|port|machine|
+-----------+--------+-----+----+-------+
|     ckt_id|location|usage|port|machine|
| AXZCSD21DF|     USA|  2GB| 101|   MAC1|
| ABZCSD21DF|     OTH|  4GB| 101|   MAC2|
| AXZCSD21DF|     USA|  6GB| 101|   MAC4|
| BXZCSD21DF|     USA|  7GB| 101|   MAC6|
| CXZCSD21DF|     IND|  2GB| 101|   MAC9|
| AXZCSD21DF|     USA|  1GB| 101|   MAC0|
| AXZCSD22DF|     IND|  9GB| 101|   MAC3|
|ADZZCSD21DF|     USA|  1GB| 101|   MAC4|
| AXZCSD21DF|     USA|  2GB| 101|   MAC5|
| XZDCSD21DF|     OTH|  2GB| 101|   MAC1|
+-----------+--------+-----+----+-------+

scala&amp;gt; gsam.show()
+-----------+-------+
|    CCKT_NO|SEV_LVL|
+-----------+-------+
| AXZCSD21DF|      1|
| BXZCSD21DF|      1|
| ABZCSD21DF|      3|
| CXZCSD21DF|      2|
| AXZCSD22DF|      2|
| XZDCSD21DF|      3|
|ADZZCSD21DF|      1|
+-----------+-------+





scala&amp;gt; val gsamjoin = gsam.join(input_file,(gsam("CCKT_NO") &amp;lt;=&amp;gt; input_file("ckt_id")));
gsamjoin: org.apache.spark.sql.DataFrame = [CCKT_NO: string, SEV_LVL: decimal(38,0), ckt_id: string, location: string, usage: string, port: string, machine: string]


scala&amp;gt; gsamjoin.show()
+-----------+-------+-----------+--------+-----+----+-------+
|    CCKT_NO|SEV_LVL|     ckt_id|location|usage|port|machine|
+-----------+-------+-----------+--------+-----+----+-------+
| CXZCSD21DF|      2| CXZCSD21DF|     IND|  2GB| 101|   MAC9|
| ABZCSD21DF|      3| ABZCSD21DF|     OTH|  4GB| 101|   MAC2|
| XZDCSD21DF|      3| XZDCSD21DF|     OTH|  2GB| 101|   MAC1|
| AXZCSD22DF|      2| AXZCSD22DF|     IND|  9GB| 101|   MAC3|
|ADZZCSD21DF|      1|ADZZCSD21DF|     USA|  1GB| 101|   MAC4|
| BXZCSD21DF|      1| BXZCSD21DF|     USA|  7GB| 101|   MAC6|
| AXZCSD21DF|      1| AXZCSD21DF|     USA|  2GB| 101|   MAC1|
| AXZCSD21DF|      1| AXZCSD21DF|     USA|  6GB| 101|   MAC4|
| AXZCSD21DF|      1| AXZCSD21DF|     USA|  1GB| 101|   MAC0|
| AXZCSD21DF|      1| AXZCSD21DF|     USA|  2GB| 101|   MAC5|
+-----------+-------+-----------+--------+-----+----+-------+
&lt;/PRE&gt;</description>
    <pubDate>Thu, 09 Feb 2017 23:42:30 GMT</pubDate>
    <dc:creator>das_dineshk</dc:creator>
    <dc:date>2017-02-09T23:42:30Z</dc:date>
    <item>
      <title>Can I join 2 dataframe with condition in column value?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Can-I-join-2-dataframe-with-condition-in-column-value/m-p/168264#M53924</link>
      <description>&lt;P&gt;I have 2 Dataframe  and I would like to show the one of the dataframe if my conditions satishfied. I want to match the first column of both the DB and also the condition SEV_LVL='3'. Can I get some guidance or help please&lt;/P&gt;&lt;PRE&gt;
scala&amp;gt; input_file.show()
+-----------+--------+-----+----+-------+
|     ckt_id|location|usage|port|machine|
+-----------+--------+-----+----+-------+
|     ckt_id|location|usage|port|machine|
| AXZCSD21DF|     USA|  2GB| 101|   MAC1|
| ABZCSD21DF|     OTH|  4GB| 101|   MAC2|
| AXZCSD21DF|     USA|  6GB| 101|   MAC4|
| BXZCSD21DF|     USA|  7GB| 101|   MAC6|
| CXZCSD21DF|     IND|  2GB| 101|   MAC9|
| AXZCSD21DF|     USA|  1GB| 101|   MAC0|
| AXZCSD22DF|     IND|  9GB| 101|   MAC3|
|ADZZCSD21DF|     USA|  1GB| 101|   MAC4|
| AXZCSD21DF|     USA|  2GB| 101|   MAC5|
| XZDCSD21DF|     OTH|  2GB| 101|   MAC1|
+-----------+--------+-----+----+-------+

scala&amp;gt; gsam.show()
+-----------+-------+
|    CCKT_NO|SEV_LVL|
+-----------+-------+
| AXZCSD21DF|      1|
| BXZCSD21DF|      1|
| ABZCSD21DF|      3|
| CXZCSD21DF|      2|
| AXZCSD22DF|      2|
| XZDCSD21DF|      3|
|ADZZCSD21DF|      1|
+-----------+-------+





scala&amp;gt; val gsamjoin = gsam.join(input_file,(gsam("CCKT_NO") &amp;lt;=&amp;gt; input_file("ckt_id")));
gsamjoin: org.apache.spark.sql.DataFrame = [CCKT_NO: string, SEV_LVL: decimal(38,0), ckt_id: string, location: string, usage: string, port: string, machine: string]


scala&amp;gt; gsamjoin.show()
+-----------+-------+-----------+--------+-----+----+-------+
|    CCKT_NO|SEV_LVL|     ckt_id|location|usage|port|machine|
+-----------+-------+-----------+--------+-----+----+-------+
| CXZCSD21DF|      2| CXZCSD21DF|     IND|  2GB| 101|   MAC9|
| ABZCSD21DF|      3| ABZCSD21DF|     OTH|  4GB| 101|   MAC2|
| XZDCSD21DF|      3| XZDCSD21DF|     OTH|  2GB| 101|   MAC1|
| AXZCSD22DF|      2| AXZCSD22DF|     IND|  9GB| 101|   MAC3|
|ADZZCSD21DF|      1|ADZZCSD21DF|     USA|  1GB| 101|   MAC4|
| BXZCSD21DF|      1| BXZCSD21DF|     USA|  7GB| 101|   MAC6|
| AXZCSD21DF|      1| AXZCSD21DF|     USA|  2GB| 101|   MAC1|
| AXZCSD21DF|      1| AXZCSD21DF|     USA|  6GB| 101|   MAC4|
| AXZCSD21DF|      1| AXZCSD21DF|     USA|  1GB| 101|   MAC0|
| AXZCSD21DF|      1| AXZCSD21DF|     USA|  2GB| 101|   MAC5|
+-----------+-------+-----------+--------+-----+----+-------+
&lt;/PRE&gt;</description>
      <pubDate>Thu, 09 Feb 2017 23:42:30 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Can-I-join-2-dataframe-with-condition-in-column-value/m-p/168264#M53924</guid>
      <dc:creator>das_dineshk</dc:creator>
      <dc:date>2017-02-09T23:42:30Z</dc:date>
    </item>
    <item>
      <title>Re: Can I join 2 dataframe with condition in column value?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Can-I-join-2-dataframe-with-condition-in-column-value/m-p/168265#M53925</link>
      <description>&lt;P&gt;Definitely possible!  Here is some sample code:&lt;/P&gt;&lt;PRE&gt;gsam.join(input_file, (gsam("CCKT_NO")===input_file("ckt_id")) &amp;amp;&amp;amp; (gsam("SEV_LVL") === 3)), "inner")

&lt;/PRE&gt;&lt;P&gt;Notice the double &amp;amp;&amp;amp; sign.  You can put as many conditions as you'd like in.&lt;/P&gt;</description>
      <pubDate>Fri, 10 Feb 2017 08:35:51 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Can-I-join-2-dataframe-with-condition-in-column-value/m-p/168265#M53925</guid>
      <dc:creator>jwiden</dc:creator>
      <dc:date>2017-02-10T08:35:51Z</dc:date>
    </item>
    <item>
      <title>Re: Can I join 2 dataframe with condition in column value?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Can-I-join-2-dataframe-with-condition-in-column-value/m-p/168266#M53926</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/472/jwiden.html" nodeid="472"&gt;@Joe Widen&lt;/A&gt; &lt;/P&gt;&lt;P&gt;Thank you Sir, But I think if we do join for a larger dataset memory issues will happen. So in such case can we use if/else or look up function here .&lt;/P&gt;&lt;P&gt;My Aim is to match input_file DFwith gsam DF and if CCKT_NO = ckt_id and SEV_LVL = 3 then print complete row for that ckt_id.&lt;/P&gt;</description>
      <pubDate>Fri, 10 Feb 2017 20:00:45 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Can-I-join-2-dataframe-with-condition-in-column-value/m-p/168266#M53926</guid>
      <dc:creator>das_dineshk</dc:creator>
      <dc:date>2017-02-10T20:00:45Z</dc:date>
    </item>
  </channel>
</rss>

