<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Loading a JSON File from URL into a Spark DataFrame with Python in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216476#M178385</link>
    <description>&lt;P&gt;Love to suggest &lt;A href="https://jsonformatter.org/json-editor"&gt;JSON Editor&lt;/A&gt; , this wil helps to open or load files/ url , it will helps, to create, update and validate JSON data.&lt;/P&gt;</description>
    <pubDate>Tue, 15 Aug 2017 03:03:52 GMT</pubDate>
    <dc:creator>james_malvi</dc:creator>
    <dc:date>2017-08-15T03:03:52Z</dc:date>
    <item>
      <title>Loading a JSON File from URL into a Spark DataFrame with Python</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216474#M178383</link>
      <description>&lt;P&gt;I'm trying to load a JSON file from an URL into DataFrame. The data is loaded and parsed correctly into the Python JSON type but passing it as argument to sc.parallelize() throws an Exception:&lt;/P&gt;&lt;P&gt;The Code:&lt;/P&gt;&lt;PRE&gt;url = "http://api.luftdaten.info/static/v1/data.json"
response = urlopen(url)
data = str(response.read())
json_data = json.loads(data)
json_string = json.dumps(json_data)
rdd = sc.parallelize(json_string)
df = sqlContext.read.json(rdd)
&lt;/PRE&gt;&lt;P&gt;The Error:&lt;/P&gt;&lt;PRE&gt;root 

|-- _corrupt_record: string (nullable = true)&amp;lt;br&amp;gt;&lt;/PRE&gt;&lt;P&gt;Anyone an Idea what is wrong? &lt;/P&gt;</description>
      <pubDate>Mon, 14 Aug 2017 18:10:49 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216474#M178383</guid>
      <dc:creator>lukas_mueller02</dc:creator>
      <dc:date>2017-08-14T18:10:49Z</dc:date>
    </item>
    <item>
      <title>Re: Loading a JSON File from URL into a Spark DataFrame with Python</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216475#M178384</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/29248/lukasmueller0289.html" nodeid="29248"&gt;@Lukas Müller&lt;/A&gt;, try below way to create dataframes for data.json&lt;/P&gt;&lt;PRE&gt;import json
import requests

r = requests.get("http://api.luftdaten.info/static/v1/data.json")
df = sqlContext.createDataFrame([json.loads(line) for line in r.iter_lines()])&lt;/PRE&gt;&lt;P&gt;Reference: &lt;A href="https://stackoverflow.com/questions/32418829/using-pyspark-to-read-json-file-directly-from-a-website"&gt;https://stackoverflow.com/questions/32418829/using-pyspark-to-read-json-file-directly-from-a-website&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 15 Aug 2017 02:36:44 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216475#M178384</guid>
      <dc:creator>yvora</dc:creator>
      <dc:date>2017-08-15T02:36:44Z</dc:date>
    </item>
    <item>
      <title>Re: Loading a JSON File from URL into a Spark DataFrame with Python</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216476#M178385</link>
      <description>&lt;P&gt;Love to suggest &lt;A href="https://jsonformatter.org/json-editor"&gt;JSON Editor&lt;/A&gt; , this wil helps to open or load files/ url , it will helps, to create, update and validate JSON data.&lt;/P&gt;</description>
      <pubDate>Tue, 15 Aug 2017 03:03:52 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216476#M178385</guid>
      <dc:creator>james_malvi</dc:creator>
      <dc:date>2017-08-15T03:03:52Z</dc:date>
    </item>
    <item>
      <title>Re: Loading a JSON File from URL into a Spark DataFrame with Python</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216477#M178386</link>
      <description>&lt;P&gt;Unfortunately this only works if the API returns a single json object per line. I reformatted the data into a string with line breaks and tried to apply this to the inline function. Still doesn't' work.&lt;/P&gt;&lt;PRE&gt;def convert_single_object_per_line(json_list):&lt;BR /&gt;   	json_string = ""
	for line in json_list:&lt;BR /&gt;    		json_string += json.dumps(line) + "\n"
        return json_string&lt;/PRE&gt;&lt;PRE&gt;df = sqlContext.createDataFrame([json.loads(line) for line in r.splitlines()])&lt;/PRE&gt;</description>
      <pubDate>Tue, 15 Aug 2017 16:04:17 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216477#M178386</guid>
      <dc:creator>lukas_mueller02</dc:creator>
      <dc:date>2017-08-15T16:04:17Z</dc:date>
    </item>
    <item>
      <title>Re: Loading a JSON File from URL into a Spark DataFrame with Python</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216478#M178387</link>
      <description>&lt;P&gt;If someone else wanna know I've found something that is working for me&lt;/P&gt;&lt;PRE&gt;def convert_single_object_per_line(json_list):
    json_string = ""
    for line in json_list:
        json_string += json.dumps(line) + "\n"
    return json_string


def parse_dataframe(json_data):
    r = convert_single_object_per_line(json_data)
    mylist = []
    for line in r.splitlines():
        mylist.append(line)
    rdd = sc.parallelize(mylist)
    df = sqlContext.jsonRDD(rdd)
    return df


url = "myurl.json"
response = urlopen(url)
data = str(response.read())
json_data = json.loads(data)
df = parse_dataframe(json_data)&amp;lt;br&amp;gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 15 Aug 2017 19:18:48 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Loading-a-JSON-File-from-URL-into-a-Spark-DataFrame-with/m-p/216478#M178387</guid>
      <dc:creator>lukas_mueller02</dc:creator>
      <dc:date>2017-08-15T19:18:48Z</dc:date>
    </item>
  </channel>
</rss>

