<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Reading CSV File Spark - Issue with Backslash in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/367296#M239858</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/103409"&gt;@ShobhitSingh&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You need to adjust the csv file&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;sample.csv&lt;BR /&gt;=========&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;COL1|COL2|COL3|COL4
1st Data|2nd|3rd data|4th data
1st Data|2nd \\P data|3rd data|4th data
"1st Data"|"2nd '\\P' data"|"3rd data"|"4th data"
"1st Data"|"2nd '\\\\P' data"|"3rd data"|"4th data"&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Spark Code:&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="java"&gt;spark.read.format("csv").option("header","true").option("inferSchema","true").option("delimiter","|").load("/tmp/sample.csv").show(false)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Output:&lt;/STRONG&gt;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;+--------+--------------+----------+--------+
|COL1 |COL2 |COL3 |COL4 |
+--------+--------------+----------+--------+
|1st Data|2nd |3rd data |4th data|
|1st Data|2nd \\P data |3rd data |4th data|
|1st Data|2nd '\P' data |3rd data |4th data|
|1st Data|2nd '\\P' data|3rd data |4th data|
+--------+--------------+----------+--------+&lt;/LI-CODE&gt;</description>
    <pubDate>Thu, 30 Mar 2023 11:26:31 GMT</pubDate>
    <dc:creator>RangaReddy</dc:creator>
    <dc:date>2023-03-30T11:26:31Z</dc:date>
    <item>
      <title>Reading CSV File Spark - Issue with Backslash</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/363473#M238996</link>
      <description>&lt;P&gt;I'm facing weird issue, not sure why Spark is behaving like this.&lt;/P&gt;&lt;P&gt;samplefile.txt:&lt;/P&gt;&lt;PRE&gt;COL1|COL2|COL3|COL4 
"1st Data"|"2nd ""\\\\P"" data"|"3rd data"|"4th data"&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This is my spark code to read data:&lt;/P&gt;&lt;PRE&gt;val df = spark.read.format("csv").option("header","true").option("inferSchema","true").option("delimiter","|").load("\samplefile.xtx")
df.show(false)&lt;/PRE&gt;&lt;P&gt;Some how it is combining 2 columns data into one. Spark Scala : 2.4 Version&lt;/P&gt;&lt;P&gt;Any idea why spark is behaving like this.&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="IMG-6762.JPG" style="width: 999px;"&gt;&lt;img src="https://community.cloudera.com/t5/image/serverpage/image-id/36750iCDF9FC08D0869A65/image-size/large?v=v2&amp;amp;px=999" role="button" title="IMG-6762.JPG" alt="IMG-6762.JPG" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 09 Feb 2023 11:45:19 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/363473#M238996</guid>
      <dc:creator>ShobhitSingh</dc:creator>
      <dc:date>2023-02-09T11:45:19Z</dc:date>
    </item>
    <item>
      <title>Re: Reading CSV File Spark - Issue with Backslash</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/363493#M239002</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/103409"&gt;@ShobhitSingh&lt;/a&gt;&amp;nbsp;You need to handle the escape with another option:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;SPAN&gt;.option("escape", "\\")&lt;/SPAN&gt;&lt;/PRE&gt;&lt;P&gt;You may need to experiment with the actual string in the match argument ("//") to suit your needs.&amp;nbsp; Be sure to check spark docs specific to your version.&amp;nbsp; For example:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html" target="_blank"&gt;https://spark.apache.org/docs/latest/sql-data-sources-csv.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 09 Feb 2023 13:53:51 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/363493#M239002</guid>
      <dc:creator>steven-matison</dc:creator>
      <dc:date>2023-02-09T13:53:51Z</dc:date>
    </item>
    <item>
      <title>Re: Reading CSV File Spark - Issue with Backslash</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/363505#M239006</link>
      <description>&lt;P&gt;Hi Steven,&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Even if my data is like this, its causing issue.&lt;/SPAN&gt;&lt;/P&gt;&lt;PRE&gt;&lt;SPAN class="hljs-string"&gt;"1st Data"&lt;/SPAN&gt;|&lt;SPAN class="hljs-string"&gt;"2nd "&lt;/SPAN&gt;&lt;SPAN class="hljs-string"&gt;"\P"&lt;/SPAN&gt;&lt;SPAN class="hljs-string"&gt;" data"&lt;/SPAN&gt;|&lt;SPAN class="hljs-string"&gt;"3rd data"&lt;/SPAN&gt;|&lt;SPAN class="hljs-string"&gt;"4th data"&lt;/SPAN&gt;&lt;/PRE&gt;&lt;P&gt;&lt;SPAN&gt;What is causing issue? Any Idea. &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;I know spark is having default escape as backslash&lt;STRONG&gt;.&lt;/STRONG&gt; But why it is behaving like this.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 09 Feb 2023 14:29:55 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/363505#M239006</guid>
      <dc:creator>ShobhitSingh</dc:creator>
      <dc:date>2023-02-09T14:29:55Z</dc:date>
    </item>
    <item>
      <title>Re: Reading CSV File Spark - Issue with Backslash</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/363506#M239007</link>
      <description>&lt;P&gt;Click into that doc and check out the other escape option.&amp;nbsp; I think you need to handle the quotes too.&lt;/P&gt;</description>
      <pubDate>Thu, 09 Feb 2023 14:31:30 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/363506#M239007</guid>
      <dc:creator>steven-matison</dc:creator>
      <dc:date>2023-02-09T14:31:30Z</dc:date>
    </item>
    <item>
      <title>Re: Reading CSV File Spark - Issue with Backslash</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/367296#M239858</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/103409"&gt;@ShobhitSingh&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You need to adjust the csv file&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;sample.csv&lt;BR /&gt;=========&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;COL1|COL2|COL3|COL4
1st Data|2nd|3rd data|4th data
1st Data|2nd \\P data|3rd data|4th data
"1st Data"|"2nd '\\P' data"|"3rd data"|"4th data"
"1st Data"|"2nd '\\\\P' data"|"3rd data"|"4th data"&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Spark Code:&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="java"&gt;spark.read.format("csv").option("header","true").option("inferSchema","true").option("delimiter","|").load("/tmp/sample.csv").show(false)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Output:&lt;/STRONG&gt;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;+--------+--------------+----------+--------+
|COL1 |COL2 |COL3 |COL4 |
+--------+--------------+----------+--------+
|1st Data|2nd |3rd data |4th data|
|1st Data|2nd \\P data |3rd data |4th data|
|1st Data|2nd '\P' data |3rd data |4th data|
|1st Data|2nd '\\P' data|3rd data |4th data|
+--------+--------------+----------+--------+&lt;/LI-CODE&gt;</description>
      <pubDate>Thu, 30 Mar 2023 11:26:31 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Reading-CSV-File-Spark-Issue-with-Backslash/m-p/367296#M239858</guid>
      <dc:creator>RangaReddy</dc:creator>
      <dc:date>2023-03-30T11:26:31Z</dc:date>
    </item>
  </channel>
</rss>

