Created on 08-28-2017 01:07 PM - edited 08-17-2019 11:30 AM
This article explains on how to build a spark application in eclipse that can run in local mode on a kerberized hadoop cluster.
Environment:
Eclipse : Scala IDE
Requisites:
Steps:
1. Add namenode(s) hostname to host file on local machine(In case of HA, add both namenodes to host file).
2. Add hostname of KDC server to host file on local machine.
3. Right click on your project in Eclipse and add source folder, rename it to resources.
4. Copy core-site.xml, hdfs-site.xml and yarn-site.xml to resources folder(which are in local machine).
Following is the sample scala code.
import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path import org.apache.hadoop.hdfs import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.hadoop.security.token.{Token, TokenIdentifier} import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier import org.apache.spark.{SparkContext,SparkConf} object testkerbconn { def main(args: Array[String]){ System.setProperty("java.security.krb5.conf", "/Users/hmatta/krb5.conf") UserGroupInformation.loginUserFromKeytab("ambari-qa-ok@EXAMPLE.COM","/Users/hmatta/smokeuser.headless.keytab") val sparkconf = new SparkConf().setAppName("TestKerb") .setMaster("local[*]") val sc = new SparkContext(sparkconf) val data = sc.textFile("/tmp/idtest.ambari-qa.1502551141.07.in") data.collect.foreach{println} } }