<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Apache Atlas Spark Data lineage in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/322116#M228684</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/59873"&gt;@vnandigam&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Good news. Now Spark Atlas integration is supported using CDP cluster.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;References:&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;1.&amp;nbsp;&lt;A href="https://docs.cloudera.com/cdp-private-cloud-base/7.1.6/atlas-reference/topics/atlas-spark-metadata-collection.html" target="_blank"&gt;https://docs.cloudera.com/cdp-private-cloud-base/7.1.6/atlas-reference/topics/atlas-spark-metadata-collection.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;2.&amp;nbsp;&lt;A href="https://docs.cloudera.com/cdp-private-cloud-upgrade/latest/upgrade-hdp/topics/amb-enable-spark-cm.html" target="_blank"&gt;https://docs.cloudera.com/cdp-private-cloud-upgrade/latest/upgrade-hdp/topics/amb-enable-spark-cm.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 06 Aug 2021 04:17:01 GMT</pubDate>
    <dc:creator>RangaReddy</dc:creator>
    <dc:date>2021-08-06T04:17:01Z</dc:date>
    <item>
      <title>Apache Atlas Spark Data lineage</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181214#M143443</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;Am trying to implement data lineage for my spark application. I Have kafka topic, spark streaming read data from kafka and place in data source. when I checked apache atlas it does n't provide any hooks for spark. I guess we have to use rest api for this implementation. can someone point to some documentation or example for this?&lt;/P&gt;</description>
      <pubDate>Mon, 22 May 2017 17:00:18 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181214#M143443</guid>
      <dc:creator>vnandigam</dc:creator>
      <dc:date>2017-05-22T17:00:18Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Atlas Spark Data lineage</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181215#M143444</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/13176/vnandigam.html" nodeid="13176"&gt;@vnandigam&lt;/A&gt;&lt;/P&gt;&lt;P&gt;You are correct, Atlas does not currently provide lineage for Spark.  This is something engineering/community is working on.&lt;/P&gt;&lt;P&gt;You can, however, create your own entities and use the REST API to populate them.  Here is some documentation and examples:&lt;/P&gt;&lt;P&gt;&lt;A href="http://atlas.apache.org/0.7.0-incubating/AtlasTechnicalUserGuide.pdf" target="_blank"&gt;http://atlas.apache.org/0.7.0-incubating/AtlasTechnicalUserGuide.pdf&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Please note that while this documentation also applies to Atlas 0.7-0.8 (in HDP 2.5-2.6), it does use APIs that have been deprecated in that version and will be removed n future ones.  Still, it's good to get you started with your implementation.&lt;/P&gt;&lt;P&gt;As always, if you find any responses here useful, don't forget to "accept" an answer. &lt;/P&gt;</description>
      <pubDate>Mon, 22 May 2017 20:38:56 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181215#M143444</guid>
      <dc:creator>egarelnabi</dc:creator>
      <dc:date>2017-05-22T20:38:56Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Atlas Spark Data lineage</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181216#M143445</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/445/egarelnabi.html" nodeid="445"&gt;@Eyad Garelnabi&lt;/A&gt; &lt;/P&gt;&lt;P&gt;Thanks for the answer. So I created metadata for my custom object in using rest api, then once I retrieved my event from spark streaming added as entity using rest api. So atlas will take care about lineage or do I need to add event modifications manually each and everytime?&lt;/P&gt;</description>
      <pubDate>Tue, 23 May 2017 12:16:35 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181216#M143445</guid>
      <dc:creator>vnandigam</dc:creator>
      <dc:date>2017-05-23T12:16:35Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Atlas Spark Data lineage</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181217#M143446</link>
      <description>&lt;P&gt;Take a look at the "Create Lineage amongst data sets" section (p. 46) in the document link I shared above.  It also has a detailed example.&lt;/P&gt;</description>
      <pubDate>Tue, 23 May 2017 21:03:41 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181217#M143446</guid>
      <dc:creator>egarelnabi</dc:creator>
      <dc:date>2017-05-23T21:03:41Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Atlas Spark Data lineage</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181218#M143447</link>
      <description>&lt;P&gt;yes. Got it &lt;A rel="user" href="https://community.cloudera.com/users/445/egarelnabi.html" nodeid="445"&gt;@Eyad Garelnabi&lt;/A&gt;. Thanks&lt;/P&gt;</description>
      <pubDate>Wed, 24 May 2017 12:55:23 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/181218#M143447</guid>
      <dc:creator>vnandigam</dc:creator>
      <dc:date>2017-05-24T12:55:23Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Atlas Spark Data lineage</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/322116#M228684</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/59873"&gt;@vnandigam&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Good news. Now Spark Atlas integration is supported using CDP cluster.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;References:&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;1.&amp;nbsp;&lt;A href="https://docs.cloudera.com/cdp-private-cloud-base/7.1.6/atlas-reference/topics/atlas-spark-metadata-collection.html" target="_blank"&gt;https://docs.cloudera.com/cdp-private-cloud-base/7.1.6/atlas-reference/topics/atlas-spark-metadata-collection.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;2.&amp;nbsp;&lt;A href="https://docs.cloudera.com/cdp-private-cloud-upgrade/latest/upgrade-hdp/topics/amb-enable-spark-cm.html" target="_blank"&gt;https://docs.cloudera.com/cdp-private-cloud-upgrade/latest/upgrade-hdp/topics/amb-enable-spark-cm.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 06 Aug 2021 04:17:01 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Spark-Data-lineage/m-p/322116#M228684</guid>
      <dc:creator>RangaReddy</dc:creator>
      <dc:date>2021-08-06T04:17:01Z</dc:date>
    </item>
  </channel>
</rss>

