{ "display_name" : "PySpark", "language" : "python", "argv" : [ "python", "-m", "ipykernel", "-f", "{connection_file}" ], "env" : { "JAVA_HOME" : "/etc/alternatives/java_sdk_1.8.0/bin", "SPARK_MAJOR_VERSION" : "2", "SPARK_HOME" : "/usr/hdp/current/spark2-client/", "PYTHONPATH" : "/usr/hdp/current/spark2-client/python:/usr/hdp/current/spark2-client/python/lib/py4j-0.10.4-src.zip", "PYTHONSTARTUP" : "/usr/hdp/current/spark-client/python/pyspark/shell.py", "PYSPARK_SUBMIT_ARGS" : "--master yarn pyspark-shell --conf spark.dynamicAllocation.enabled=true --conf spark.driver.memory=50G --conf spark.dynamicAllocation.initialExecutors=1 --conf spark.dynamicAllocation.maxExecutors=40 --conf spark.dynamicAllocation.minExecutors=1 --conf spark.executor.heartbeatInterval=600s --conf spark.executor.memory=50G --conf spark.kryoserializer.buffer=64k --conf spark.kryoserializer.buffer.max=64m --conf spark.network.timeout=800s --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.shuffle.service.enabled=true --conf spark.sql.broadcastTimeout=1800 --conf spark.yarn.am.nodeLabelExpression=spark-am-worker-nodes --conf spark.yarn.driver.memoryOverhead=3072 --conf spark.yarn.executor.memoryOverhead=3072 --conf spark.yarn.queue=data-science-queue" } }