Member since
04-11-2017
1
Post
0
Kudos Received
0
Solutions
04-12-2017
08:09 AM
iris = spark.read.csv("/tmp/iris.csv", header=True, inferSchema=True)
iris.printSchema()
Result:
root
|-- sepalLength: double (nullable = true)
|-- sepalWidth: double (nullable = true)
|-- petalLength: double (nullable = true)
|-- petalWidth: double (nullable = true)
|-- species: string (nullable = true)
Write parquet file ... iris.write.parquet("/tmp/iris.parquet")
... and create hive table spark.sql("""
create external table iris_p (
sepalLength double,
sepalWidth double,
petalLength double,
petalWidth double,
species string
)
STORED AS PARQUET
location "/tmp/iris.parquet"
""")
... View more