<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Apache Atlas - Unable to collect HDFS metadata in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358792#M237934</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/77463"&gt;@pkr&lt;/a&gt;&amp;nbsp;, Thanks for the solution. Much Appreciated&lt;/P&gt;</description>
    <pubDate>Fri, 02 Dec 2022 06:20:14 GMT</pubDate>
    <dc:creator>Nigal</dc:creator>
    <dc:date>2022-12-02T06:20:14Z</dc:date>
    <item>
      <title>Apache Atlas - Unable to collect HDFS metadata</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358541#M237877</link>
      <description>&lt;P&gt;I want to collect the metadata from HDFS. But when i searched , it looks like there is no atlas hook for HDFS available like it is there for kafka , hive and other data sources. Can I get the HDFS hook&amp;nbsp; install steps or the steps to create a custom hook.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 29 Nov 2022 12:23:22 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358541#M237877</guid>
      <dc:creator>Nigal</dc:creator>
      <dc:date>2022-11-29T12:23:22Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Atlas - Unable to collect HDFS metadata</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358611#M237896</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/102136"&gt;@Nigal&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;Yes right. There is no 'HDFS hook' pre-defined in Atlas.&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;Atlas mainly collects information from Hive - Spark - Hbase - Impala&lt;/DIV&gt;&lt;DIV&gt;&lt;A href="https://docs.cloudera.com/cdp-private-cloud-base/7.1.6/cdp-governance-overview/topics/atlas-metadata-collection-overview.html" target="_blank" rel="noopener"&gt;https://docs.cloudera.com/cdp-private-cloud-base/7.1.6/cdp-governance-overview/topics/atlas-metadata-collection-overview.html&lt;/A&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;hdfs_path is synced only if this belongs to a Hive table's lineage (as is explained in &lt;A href="https://issues.apache.org/jira/browse/ATLAS-599" target="_blank" rel="noopener"&gt;https://issues.apache.org/jira/browse/ATLAS-599&lt;/A&gt;). By default, Atlas won't fetch HDFS paths.&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;Unlike HIVE entities, HDFS entities within Atlas are created manually using the Create Entity link within the Atlas Web UI.&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;Please check out the list of available 'hooks' in Atlas:&lt;/DIV&gt;&lt;DIV&gt;&lt;A href="https://atlas.apache.org" target="_blank" rel="noopener"&gt;https://atlas.apache.org&lt;/A&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;Here's a document on creating hdfs_path manually in Atlas:&lt;/DIV&gt;&lt;DIV&gt;&lt;A href="https://atlas.apache.org/2.0.0/Export-HDFS-API.html" target="_blank" rel="noopener"&gt;https://atlas.apache.org/2.0.0/Export-HDFS-API.html&lt;/A&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 30 Nov 2022 07:56:53 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358611#M237896</guid>
      <dc:creator>pkr</dc:creator>
      <dc:date>2022-11-30T07:56:53Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Atlas - Unable to collect HDFS metadata</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358647#M237905</link>
      <description>&lt;P&gt;Thanks for the solution . But this point i did'nt get clearly "&lt;STRONG&gt;h&lt;/STRONG&gt;&lt;SPAN&gt;&lt;STRONG&gt;dfs_path is synced only if this belongs to a Hive table's lineage&lt;/STRONG&gt;" . What i understood from this is that since hive runs on top of HDFS and on creating hive lineage, the lineage will show the HDFS path of hive warehouse directory. Is that correct?&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 30 Nov 2022 13:45:57 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358647#M237905</guid>
      <dc:creator>Nigal</dc:creator>
      <dc:date>2022-11-30T13:45:57Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Atlas - Unable to collect HDFS metadata</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358787#M237932</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/102136"&gt;@Nigal&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;SPAN&gt;Currently , When you create hive/sqoop/falcon/storm entity which has an association to HDFS path, it shows up in Atlas. &lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;Otherwise , any file/folder created in HDFS doesn't show up in Atlas.&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;For example, when you create a directory in HDFS , Atlas doesn't ingest it .&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;But when you create a hive table like :&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;"CREATE EXTERNAL TABLE test_table ( id int,value string) LOCATION '/user/cloudera/text' "&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;Atlas creates a lineage graph which shows relationship between the hive table and the HDFS path.&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;You can see the HDFS directories by searching "hdfs_path" and the hive tables by searching "hive_table".&lt;/SPAN&gt;&lt;/DIV&gt;</description>
      <pubDate>Fri, 02 Dec 2022 05:57:26 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358787#M237932</guid>
      <dc:creator>pkr</dc:creator>
      <dc:date>2022-12-02T05:57:26Z</dc:date>
    </item>
    <item>
      <title>Re: Apache Atlas - Unable to collect HDFS metadata</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358792#M237934</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/77463"&gt;@pkr&lt;/a&gt;&amp;nbsp;, Thanks for the solution. Much Appreciated&lt;/P&gt;</description>
      <pubDate>Fri, 02 Dec 2022 06:20:14 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Apache-Atlas-Unable-to-collect-HDFS-metadata/m-p/358792#M237934</guid>
      <dc:creator>Nigal</dc:creator>
      <dc:date>2022-12-02T06:20:14Z</dc:date>
    </item>
  </channel>
</rss>

