<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Data transformations at Spark level in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Data-transformations-at-Spark-level/m-p/29508#M6550</link>
    <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm trying to work with Spark and Cassandra to extract data from the data lake&amp;nbsp;and transform it. Transformation may be done before of after loading in Cassandra.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;About transformations, I'm wondering: what would be the transformation tool that would allow me to do transformations without wondering of data storage? I mean that, if tomorrow I don't want to use Cassandra anymore but Hadoop, I would like that my transformations remain valid. So, I would like that my transformation tool works with Spark directly and that my transformation tool do not take care of the tool Spark works with.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Please, could you recommend me a tool that would work with Spark and do not take care of undernying tools like Cassandra et Hadoop?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 16 Sep 2022 09:33:57 GMT</pubDate>
    <dc:creator>Nyphel</dc:creator>
    <dc:date>2022-09-16T09:33:57Z</dc:date>
    <item>
      <title>Data transformations at Spark level</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Data-transformations-at-Spark-level/m-p/29508#M6550</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'm trying to work with Spark and Cassandra to extract data from the data lake&amp;nbsp;and transform it. Transformation may be done before of after loading in Cassandra.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;About transformations, I'm wondering: what would be the transformation tool that would allow me to do transformations without wondering of data storage? I mean that, if tomorrow I don't want to use Cassandra anymore but Hadoop, I would like that my transformations remain valid. So, I would like that my transformation tool works with Spark directly and that my transformation tool do not take care of the tool Spark works with.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Please, could you recommend me a tool that would work with Spark and do not take care of undernying tools like Cassandra et Hadoop?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 09:33:57 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Data-transformations-at-Spark-level/m-p/29508#M6550</guid>
      <dc:creator>Nyphel</dc:creator>
      <dc:date>2022-09-16T09:33:57Z</dc:date>
    </item>
    <item>
      <title>Re: Data transformations at Spark level</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Data-transformations-at-Spark-level/m-p/29653#M6551</link>
      <description>&lt;P&gt;Do these&amp;nbsp;&lt;A href="https://spark.apache.org/docs/latest/programming-guide.html#transformations" target="_blank"&gt;transformations &lt;/A&gt;not work for you? Anything that you write in Spark can be adjusted to work with different storage underneath.&lt;/P&gt;&lt;P&gt;What else would you be looking for.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Wilfred&lt;/P&gt;</description>
      <pubDate>Thu, 16 Jul 2015 05:36:21 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Data-transformations-at-Spark-level/m-p/29653#M6551</guid>
      <dc:creator>Wilfred</dc:creator>
      <dc:date>2015-07-16T05:36:21Z</dc:date>
    </item>
    <item>
      <title>Re: Data transformations at Spark level</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Data-transformations-at-Spark-level/m-p/29661#M6552</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks for your answer.&lt;/P&gt;&lt;P&gt;Spark allows me to do some transformations but it is not the main goal of Spark. A transformation tool would offer me more capabilities and would be more productive for tons of transformation rules to be produced, case by case.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Jul 2015 08:28:40 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Data-transformations-at-Spark-level/m-p/29661#M6552</guid>
      <dc:creator>Nyphel</dc:creator>
      <dc:date>2015-07-16T08:28:40Z</dc:date>
    </item>
    <item>
      <title>Re: Data transformations at Spark level</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Data-transformations-at-Spark-level/m-p/29799#M6553</link>
      <description>&lt;P&gt;In Spark a transformation works directly on the RDD. Transforms are implemented lazely and closely coupled to the RDDs. You can not use them separately.&lt;/P&gt;&lt;P&gt;What you are looking for is a tool that can generate Saprk code for you based on the transformation rule. I don't think that something like that exists.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Wilfred&lt;/P&gt;</description>
      <pubDate>Mon, 20 Jul 2015 00:54:28 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Data-transformations-at-Spark-level/m-p/29799#M6553</guid>
      <dc:creator>Wilfred</dc:creator>
      <dc:date>2015-07-20T00:54:28Z</dc:date>
    </item>
  </channel>
</rss>

