<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Rowwise manipulation of a DataFrame in PySpark. in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Rowwise-manipulation-of-a-DataFrame-in-PySpark/m-p/226334#M188194</link>
    <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/29248/lukasmueller0289.html" nodeid="29248"&gt;@Lukas Müller&lt;/A&gt;
&lt;/P&gt;&lt;P&gt;This should work for you: &lt;/P&gt;&lt;PRE&gt;from pyspark.sql.types import *
from pyspark.sql.functions import udf

# Create your UDF object (which accepts your python function called "my_udf")
udf_object = udf(my_udf, ArrayType(StringType()))

# Apply the UDF to your Dataframe (called "df")
new_df = df.withColumn("new_column", udf_object(struct([df[x] for x in df.columns])))&lt;/PRE&gt;&lt;P&gt;That should work for you. If you want to make this better, replace "ArrayType(StringType())" with a schema such as:&lt;/P&gt;&lt;PRE&gt;schema = ArrayType(StructType([
    StructField("mychar", StringType(), False),
    StructField("myint", IntegerType(), False)
])) &lt;/PRE&gt;&lt;P&gt;Hope this helps!&lt;/P&gt;</description>
    <pubDate>Tue, 22 Aug 2017 20:47:40 GMT</pubDate>
    <dc:creator>dzaratsian</dc:creator>
    <dc:date>2017-08-22T20:47:40Z</dc:date>
  </channel>
</rss>

