root@slave0:~# cat /root/stest.py # # Spark job to bin data in WebMercator Spatial Reference # The bin is a hexagon with a width of 100 meters # from pyspark import SparkContext import datetime from pyspark.sql import SQLContext year = datetime.date.today().strftime("%Y") moy = datetime.date.today().strftime("%B") dom = datetime.date.today().strftime("%d") hour = str(datetime.datetime.now().hour) minute = str(datetime.datetime.now().minute) opstring = "hdfs:///tmp/" + year + "/" + moy + "/" + dom + "/" + hour + "/" + minute sc = SparkContext() sqlContext = SQLContext(sc) #query = "(select * from NE.INTER_APP_EVENT where objectid = (Select MAX(Objectid) from NE.INTER_APP_EVENT))" df = sqlContext.read.format("jdbc").option("driver", "oracle.jdbc.OracleDriver").option("url","jdbc:oracle:thin:NE/Network_147@10.77.1.147:1521/ELLDEV").option("dbtable","NE.INTER_APP_EVENT").load() rdd = df.rdd output = rdd.saveAsTextFile(opstring)