Created on 02-08-2016 01:43 PM - edited 09-16-2022 03:02 AM
After updating java version to 1.8 . we constantly see the permgen depreciated warning for every hadoop command that is being run. How can I suppress this. the perm gen value in my hadoop-env file is min 128m and max 256m . Not sure where to modify the value that occurs in the warning 512m.
Regards,
Sundar
Created 02-08-2016 01:52 PM
Which version of HDP are you running? The hadoop command environment settings are added in the hadoop environment
hadoop-env template in ambari in Advanced core-site.xml under HDFS
HADOOP_OPTS and HADOOP_CLIENT_OPTS
Here I have the sections of HDP 2.3.4. Using Java 1.8 perhaps you can compare them with yours? I see one line that has the settings. But its in the if java version < 8 section of the template.
export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m -XX:MaxPermSize=512m $HADOOP_CLIENT_OPTS"
# Set Hadoop-specific environment variables here. # The only required environment variable is JAVA_HOME. All others are # optional. When running a distributed configuration it is best to # set JAVA_HOME in this file, so that it is correctly defined on # remote nodes. # The java implementation to use. Required. export JAVA_HOME={{java_home}} export HADOOP_HOME_WARN_SUPPRESS=1 # Hadoop home directory export HADOOP_HOME=${HADOOP_HOME:-{{hadoop_home}}} # Hadoop Configuration Directory {# this is different for HDP1 #} # Path to jsvc required by secure HDP 2.0 datanode export JSVC_HOME={{jsvc_path}} # The maximum amount of heap to use, in MB. Default is 1000. export HADOOP_HEAPSIZE="{{hadoop_heapsize}}" export HADOOP_NAMENODE_INIT_HEAPSIZE="-Xms{{namenode_heapsize}}" # Extra Java runtime options. Empty by default. export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true ${HADOOP_OPTS}" # Command specific options appended to HADOOP_OPTS when specified HADOOP_JOBTRACKER_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{jtnode_opt_newsize}} -XX:MaxNewSize={{jtnode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xmx{{jtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dhadoop.mapreduce.jobsummary.logger=INFO,JSA ${HADOOP_JOBTRACKER_OPTS}" HADOOP_TASKTRACKER_OPTS="-server -Xmx{{ttnode_heapsize}} -Dhadoop.security.logger=ERROR,console -Dmapred.audit.logger=ERROR,console ${HADOOP_TASKTRACKER_OPTS}" {% if java_version < 8 %} SHARED_HADOOP_NAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -XX:PermSize={{namenode_opt_permsize}} -XX:MaxPermSize={{namenode_opt_maxpermsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{namenode_heapsize}} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT" export HADOOP_NAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-namenode/bin/kill-name-node\" -Dorg.mortbay.jetty.Request.maxFormContentSize=-1 ${HADOOP_NAMENODE_OPTS}" export HADOOP_DATANODE_OPTS="-server -XX:ParallelGCThreads=4 -XX:+UseConcMarkSweepGC -XX:ErrorFile=/var/log/hadoop/$USER/hs_err_pid%p.log -XX:NewSize=200m -XX:MaxNewSize=200m -XX:PermSize=128m -XX:MaxPermSize=256m -Xloggc:/var/log/hadoop/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{dtnode_heapsize}} -Xmx{{dtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_DATANODE_OPTS}" export HADOOP_SECONDARYNAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-secondarynamenode/bin/kill-secondary-name-node\" ${HADOOP_SECONDARYNAMENODE_OPTS}" # The following applies to multiple commands (fs, dfs, fsck, distcp etc) export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m -XX:MaxPermSize=512m $HADOOP_CLIENT_OPTS" {% else %} SHARED_HADOOP_NAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{namenode_heapsize}} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT" export HADOOP_NAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-namenode/bin/kill-name-node\" -Dorg.mortbay.jetty.Request.maxFormContentSize=-1 ${HADOOP_NAMENODE_OPTS}" export HADOOP_DATANODE_OPTS="-server -XX:ParallelGCThreads=4 -XX:+UseConcMarkSweepGC -XX:ErrorFile=/var/log/hadoop/$USER/hs_err_pid%p.log -XX:NewSize=200m -XX:MaxNewSize=200m -Xloggc:/var/log/hadoop/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{dtnode_heapsize}} -Xmx{{dtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_DATANODE_OPTS}" export HADOOP_SECONDARYNAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-secondarynamenode/bin/kill-secondary-name-node\" ${HADOOP_SECONDARYNAMENODE_OPTS}" # The following applies to multiple commands (fs, dfs, fsck, distcp etc) export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m $HADOOP_CLIENT_OPTS" {% endif %} HADOOP_NFS3_OPTS="-Xmx{{nfsgateway_heapsize}}m -Dhadoop.security.logger=ERROR,DRFAS ${HADOOP_NFS3_OPTS}" HADOOP_BALANCER_OPTS="-server -Xmx{{hadoop_heapsize}}m ${HADOOP_BALANCER_OPTS}" # On secure datanodes, user to run the datanode as after dropping privileges export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER:-{{hadoop_secure_dn_user}}} # Extra ssh options. Empty by default. export HADOOP_SSH_OPTS="-o ConnectTimeout=5 -o SendEnv=HADOOP_CONF_DIR" # Where log files are stored. $HADOOP_HOME/logs by default. export HADOOP_LOG_DIR={{hdfs_log_dir_prefix}}/$USER # History server logs export HADOOP_MAPRED_LOG_DIR={{mapred_log_dir_prefix}}/$USER # Where log files are stored in the secure data environment. export HADOOP_SECURE_DN_LOG_DIR={{hdfs_log_dir_prefix}}/$HADOOP_SECURE_DN_USER # File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default. # export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves # host:path where hadoop code should be rsync'd from. Unset by default. # export HADOOP_MASTER=master:/home/$USER/src/hadoop # Seconds to sleep between slave commands. Unset by default. This # can be useful in large clusters, where, e.g., slave rsyncs can # otherwise arrive faster than the master can service them. # export HADOOP_SLAVE_SLEEP=0.1 # The directory where pid files are stored. /tmp by default. export HADOOP_PID_DIR={{hadoop_pid_dir_prefix}}/$USER export HADOOP_SECURE_DN_PID_DIR={{hadoop_pid_dir_prefix}}/$HADOOP_SECURE_DN_USER # History server pid export HADOOP_MAPRED_PID_DIR={{mapred_pid_dir_prefix}}/$USER YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY" # A string representing this instance of hadoop. $USER by default. export HADOOP_IDENT_STRING=$USER # The scheduling priority for daemon processes. See 'man nice'. # export HADOOP_NICENESS=10 # Use libraries from standard classpath JAVA_JDBC_LIBS="" #Add libraries required by mysql connector for jarFile in `ls /usr/share/java/*mysql* 2>/dev/null` do JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile done # Add libraries required by oracle connector for jarFile in `ls /usr/share/java/*ojdbc* 2>/dev/null` do JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile done export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${JAVA_JDBC_LIBS} # Setting path to hdfs command line export HADOOP_LIBEXEC_DIR={{hadoop_libexec_dir}} # Mostly required for hadoop 2.0 export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH} export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS" {% if is_datanode_max_locked_memory_set %} # Fix temporary bug, when ulimit from conf files is not picked up, without full relogin. # Makes sense to fix only when runing DN as root if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then ulimit -l {{datanode_max_locked_memory}} fi {% endif %}
Created 02-08-2016 01:52 PM
Which version of HDP are you running? The hadoop command environment settings are added in the hadoop environment
hadoop-env template in ambari in Advanced core-site.xml under HDFS
HADOOP_OPTS and HADOOP_CLIENT_OPTS
Here I have the sections of HDP 2.3.4. Using Java 1.8 perhaps you can compare them with yours? I see one line that has the settings. But its in the if java version < 8 section of the template.
export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m -XX:MaxPermSize=512m $HADOOP_CLIENT_OPTS"
# Set Hadoop-specific environment variables here. # The only required environment variable is JAVA_HOME. All others are # optional. When running a distributed configuration it is best to # set JAVA_HOME in this file, so that it is correctly defined on # remote nodes. # The java implementation to use. Required. export JAVA_HOME={{java_home}} export HADOOP_HOME_WARN_SUPPRESS=1 # Hadoop home directory export HADOOP_HOME=${HADOOP_HOME:-{{hadoop_home}}} # Hadoop Configuration Directory {# this is different for HDP1 #} # Path to jsvc required by secure HDP 2.0 datanode export JSVC_HOME={{jsvc_path}} # The maximum amount of heap to use, in MB. Default is 1000. export HADOOP_HEAPSIZE="{{hadoop_heapsize}}" export HADOOP_NAMENODE_INIT_HEAPSIZE="-Xms{{namenode_heapsize}}" # Extra Java runtime options. Empty by default. export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true ${HADOOP_OPTS}" # Command specific options appended to HADOOP_OPTS when specified HADOOP_JOBTRACKER_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{jtnode_opt_newsize}} -XX:MaxNewSize={{jtnode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xmx{{jtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dhadoop.mapreduce.jobsummary.logger=INFO,JSA ${HADOOP_JOBTRACKER_OPTS}" HADOOP_TASKTRACKER_OPTS="-server -Xmx{{ttnode_heapsize}} -Dhadoop.security.logger=ERROR,console -Dmapred.audit.logger=ERROR,console ${HADOOP_TASKTRACKER_OPTS}" {% if java_version < 8 %} SHARED_HADOOP_NAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -XX:PermSize={{namenode_opt_permsize}} -XX:MaxPermSize={{namenode_opt_maxpermsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{namenode_heapsize}} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT" export HADOOP_NAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-namenode/bin/kill-name-node\" -Dorg.mortbay.jetty.Request.maxFormContentSize=-1 ${HADOOP_NAMENODE_OPTS}" export HADOOP_DATANODE_OPTS="-server -XX:ParallelGCThreads=4 -XX:+UseConcMarkSweepGC -XX:ErrorFile=/var/log/hadoop/$USER/hs_err_pid%p.log -XX:NewSize=200m -XX:MaxNewSize=200m -XX:PermSize=128m -XX:MaxPermSize=256m -Xloggc:/var/log/hadoop/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{dtnode_heapsize}} -Xmx{{dtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_DATANODE_OPTS}" export HADOOP_SECONDARYNAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-secondarynamenode/bin/kill-secondary-name-node\" ${HADOOP_SECONDARYNAMENODE_OPTS}" # The following applies to multiple commands (fs, dfs, fsck, distcp etc) export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m -XX:MaxPermSize=512m $HADOOP_CLIENT_OPTS" {% else %} SHARED_HADOOP_NAMENODE_OPTS="-server -XX:ParallelGCThreads=8 -XX:+UseConcMarkSweepGC -XX:ErrorFile={{hdfs_log_dir_prefix}}/$USER/hs_err_pid%p.log -XX:NewSize={{namenode_opt_newsize}} -XX:MaxNewSize={{namenode_opt_maxnewsize}} -Xloggc:{{hdfs_log_dir_prefix}}/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{namenode_heapsize}} -Xmx{{namenode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT" export HADOOP_NAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-namenode/bin/kill-name-node\" -Dorg.mortbay.jetty.Request.maxFormContentSize=-1 ${HADOOP_NAMENODE_OPTS}" export HADOOP_DATANODE_OPTS="-server -XX:ParallelGCThreads=4 -XX:+UseConcMarkSweepGC -XX:ErrorFile=/var/log/hadoop/$USER/hs_err_pid%p.log -XX:NewSize=200m -XX:MaxNewSize=200m -Xloggc:/var/log/hadoop/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms{{dtnode_heapsize}} -Xmx{{dtnode_heapsize}} -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HADOOP_DATANODE_OPTS}" export HADOOP_SECONDARYNAMENODE_OPTS="${SHARED_HADOOP_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop-hdfs-secondarynamenode/bin/kill-secondary-name-node\" ${HADOOP_SECONDARYNAMENODE_OPTS}" # The following applies to multiple commands (fs, dfs, fsck, distcp etc) export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m $HADOOP_CLIENT_OPTS" {% endif %} HADOOP_NFS3_OPTS="-Xmx{{nfsgateway_heapsize}}m -Dhadoop.security.logger=ERROR,DRFAS ${HADOOP_NFS3_OPTS}" HADOOP_BALANCER_OPTS="-server -Xmx{{hadoop_heapsize}}m ${HADOOP_BALANCER_OPTS}" # On secure datanodes, user to run the datanode as after dropping privileges export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER:-{{hadoop_secure_dn_user}}} # Extra ssh options. Empty by default. export HADOOP_SSH_OPTS="-o ConnectTimeout=5 -o SendEnv=HADOOP_CONF_DIR" # Where log files are stored. $HADOOP_HOME/logs by default. export HADOOP_LOG_DIR={{hdfs_log_dir_prefix}}/$USER # History server logs export HADOOP_MAPRED_LOG_DIR={{mapred_log_dir_prefix}}/$USER # Where log files are stored in the secure data environment. export HADOOP_SECURE_DN_LOG_DIR={{hdfs_log_dir_prefix}}/$HADOOP_SECURE_DN_USER # File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default. # export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves # host:path where hadoop code should be rsync'd from. Unset by default. # export HADOOP_MASTER=master:/home/$USER/src/hadoop # Seconds to sleep between slave commands. Unset by default. This # can be useful in large clusters, where, e.g., slave rsyncs can # otherwise arrive faster than the master can service them. # export HADOOP_SLAVE_SLEEP=0.1 # The directory where pid files are stored. /tmp by default. export HADOOP_PID_DIR={{hadoop_pid_dir_prefix}}/$USER export HADOOP_SECURE_DN_PID_DIR={{hadoop_pid_dir_prefix}}/$HADOOP_SECURE_DN_USER # History server pid export HADOOP_MAPRED_PID_DIR={{mapred_pid_dir_prefix}}/$USER YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY" # A string representing this instance of hadoop. $USER by default. export HADOOP_IDENT_STRING=$USER # The scheduling priority for daemon processes. See 'man nice'. # export HADOOP_NICENESS=10 # Use libraries from standard classpath JAVA_JDBC_LIBS="" #Add libraries required by mysql connector for jarFile in `ls /usr/share/java/*mysql* 2>/dev/null` do JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile done # Add libraries required by oracle connector for jarFile in `ls /usr/share/java/*ojdbc* 2>/dev/null` do JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile done export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${JAVA_JDBC_LIBS} # Setting path to hdfs command line export HADOOP_LIBEXEC_DIR={{hadoop_libexec_dir}} # Mostly required for hadoop 2.0 export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH} export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS" {% if is_datanode_max_locked_memory_set %} # Fix temporary bug, when ulimit from conf files is not picked up, without full relogin. # Makes sense to fix only when runing DN as root if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then ulimit -l {{datanode_max_locked_memory}} fi {% endif %}