iris = spark.read.csv("/tmp/iris.csv", header=True, inferSchema=True)
iris.printSchema()
Result:
root
|-- sepalLength: double (nullable = true)
|-- sepalWidth: double (nullable = true)
|-- petalLength: double (nullable = true)
|-- petalWidth: double (nullable = true)
|-- species: string (nullable = true)
Write parquet file ...
iris.write.parquet("/tmp/iris.parquet")
... and create hive table
spark.sql("""
create external table iris_p (
sepalLength double,
sepalWidth double,
petalLength double,
petalWidth double,
species string
)
STORED AS PARQUET
location "/tmp/iris.parquet"
""")