<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Iterate over ADLS files using spark? in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Iterate-over-ADLS-files-using-spark/m-p/229462#M82435</link>
    <description>&lt;P&gt;There are many ways to it iterate HDFS files using spark.  Is there any way to iterate over files in ADLS?&lt;/P&gt;&lt;P&gt;Here is my code:&lt;/P&gt;&lt;PRE&gt;import org.apache.hadoop.fs.Path
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
 
val path = "adl://mylake.azuredatalakestore.net/"
val conf = new Configuration()
val fs = FileSystem.get(conf)
val p = new Path(path)
val ls = fs.listStatus(p)
 
ls.foreach( x =&amp;gt; {
val f = x.getPath.toString
println(f)
val content = spark.read.option("delimiter","|").csv(f)
content.show(1)
} )


&lt;/PRE&gt;&lt;P&gt;and I get the following error:&lt;/P&gt;&lt;PRE&gt;java.lang.IllegalArgumentException: Wrong FS: adl://mylake.azuredatalakestore.net/, expected: hdfs://sparky-m1.klqj4twfp4tehiuq3c3entk04g.jx.internal.cloudapp.net:8020&lt;/PRE&gt;&lt;P&gt;It expect hdfs but the prefix for ADLS is adl.  Any ideas?&lt;/P&gt;</description>
    <pubDate>Tue, 21 Aug 2018 01:44:04 GMT</pubDate>
    <dc:creator>sunile_manjee</dc:creator>
    <dc:date>2018-08-21T01:44:04Z</dc:date>
    <item>
      <title>Iterate over ADLS files using spark?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Iterate-over-ADLS-files-using-spark/m-p/229462#M82435</link>
      <description>&lt;P&gt;There are many ways to it iterate HDFS files using spark.  Is there any way to iterate over files in ADLS?&lt;/P&gt;&lt;P&gt;Here is my code:&lt;/P&gt;&lt;PRE&gt;import org.apache.hadoop.fs.Path
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
 
val path = "adl://mylake.azuredatalakestore.net/"
val conf = new Configuration()
val fs = FileSystem.get(conf)
val p = new Path(path)
val ls = fs.listStatus(p)
 
ls.foreach( x =&amp;gt; {
val f = x.getPath.toString
println(f)
val content = spark.read.option("delimiter","|").csv(f)
content.show(1)
} )


&lt;/PRE&gt;&lt;P&gt;and I get the following error:&lt;/P&gt;&lt;PRE&gt;java.lang.IllegalArgumentException: Wrong FS: adl://mylake.azuredatalakestore.net/, expected: hdfs://sparky-m1.klqj4twfp4tehiuq3c3entk04g.jx.internal.cloudapp.net:8020&lt;/PRE&gt;&lt;P&gt;It expect hdfs but the prefix for ADLS is adl.  Any ideas?&lt;/P&gt;</description>
      <pubDate>Tue, 21 Aug 2018 01:44:04 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Iterate-over-ADLS-files-using-spark/m-p/229462#M82435</guid>
      <dc:creator>sunile_manjee</dc:creator>
      <dc:date>2018-08-21T01:44:04Z</dc:date>
    </item>
    <item>
      <title>Re: Iterate over ADLS files using spark?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Iterate-over-ADLS-files-using-spark/m-p/229463#M82436</link>
      <description>&lt;P&gt;I found a solution:&lt;/P&gt;&lt;PRE&gt;import scala.sys.process._ 


val lsResult = Seq("hadoop","fs","-ls","adl://mylake.azuredatalakestore.net/").!!
&lt;/PRE&gt;</description>
      <pubDate>Tue, 21 Aug 2018 03:26:23 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Iterate-over-ADLS-files-using-spark/m-p/229463#M82436</guid>
      <dc:creator>sunile_manjee</dc:creator>
      <dc:date>2018-08-21T03:26:23Z</dc:date>
    </item>
    <item>
      <title>Re: Iterate over ADLS files using spark?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Iterate-over-ADLS-files-using-spark/m-p/229464#M82437</link>
      <description>&lt;P&gt;I am facing the similar issue is it possible for you to post the complete code. Like, to which function you have passed IsResult?&lt;/P&gt;</description>
      <pubDate>Sun, 26 Aug 2018 14:03:13 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Iterate-over-ADLS-files-using-spark/m-p/229464#M82437</guid>
      <dc:creator>chourasiasakshi</dc:creator>
      <dc:date>2018-08-26T14:03:13Z</dc:date>
    </item>
  </channel>
</rss>

