Support Questions
Find answers, ask questions, and share your expertise

Severe Hive Performance Problems- Hortonworks Sandbox 3.0.1 VMware

Severe Hive Performance Problems- Hortonworks Sandbox 3.0.1 VMware

New Contributor

Hi, 

 

I'm trying to get through the basic HDP Sandbox tutorials and finding performance is totally unusable on many tasks. 

 

My environment: VMWare Workstation 15 Player, 56GB RAM, storage, 6 or 8 cores of processing (tried both ways). Underlying system has NVMe SSD storage, 16 Xeon cores (Sandy Bridge), 192GB physical RAM. Initial configuration was unusable for even basic queries until I took a minute and adjusted many Hive and Tez parameters to recommended values. ("Describe" on a sample tutorial table ran for >10 minutes before I killed it, "SELECT COUNT(*) from trucks" took 15 minutes, etc.. Once I did that I could at least get through the first few queries. 

 

But one of the queries in the tutorial is as follows: 

SELECT truckid, avg(mpg) avgmpg FROM truckmileage GROUP BY truckid;

This query never runs to completion. These datasets are of course tiny -- a few thousand rows, and this performance is so bad that the product is basically unusable.

 

How can I fix this? I'm wondering whether I can reduce the amount of swap the VM uses for Linux, to keep things in real RAM. Since the default allocation is 10GB RAM I assume things can normally run, if slowly, in that amount of resources. I can't think of much else to do, as I'm no Linux or Hadoop expert. 

 

I've appended my Hive and tez configurations below. 

Thanks for any pointers!

 

  <configuration  xmlns:xi="http://www.w3.org/2001/XInclude">
    
    <property>
      <name>ambari.hive.db.schema.name</name>
      <value>hive</value>
    </property>
    
    <property>
      <name>atlas.hook.hive.maxThreads</name>
      <value>1</value>
    </property>
    
    <property>
      <name>atlas.hook.hive.minThreads</name>
      <value>1</value>
    </property>
    
    <property>
      <name>datanucleus.autoCreateSchema</name>
      <value>false</value>
    </property>
    
    <property>
      <name>datanucleus.cache.level2.type</name>
      <value>none</value>
    </property>
    
    <property>
      <name>datanucleus.fixedDatastore</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.auto.convert.join</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.auto.convert.join.noconditionaltask</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.auto.convert.join.noconditionaltask.size</name>
      <value>4294967296</value>
    </property>
    
    <property>
      <name>hive.auto.convert.sortmerge.join</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.auto.convert.sortmerge.join.to.mapjoin</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.cbo.enable</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.cli.print.header</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.cluster.delegation.token.store.class</name>
      <value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
    </property>
    
    <property>
      <name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
      <value>sandbox-hdp.hortonworks.com:2181</value>
    </property>
    
    <property>
      <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
      <value>/hive/cluster/delegation</value>
    </property>
    
    <property>
      <name>hive.compactor.abortedtxn.threshold</name>
      <value>1000</value>
    </property>
    
    <property>
      <name>hive.compactor.check.interval</name>
      <value>300</value>
    </property>
    
    <property>
      <name>hive.compactor.delta.num.threshold</name>
      <value>10</value>
    </property>
    
    <property>
      <name>hive.compactor.delta.pct.threshold</name>
      <value>0.1f</value>
    </property>
    
    <property>
      <name>hive.compactor.initiator.on</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.compactor.worker.threads</name>
      <value>1</value>
    </property>
    
    <property>
      <name>hive.compactor.worker.timeout</name>
      <value>86400</value>
    </property>
    
    <property>
      <name>hive.compute.query.using.stats</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.conf.restricted.list</name>
      <value>hive.security.authenticator.manager,hive.security.authorization.manager,hive.users.in.admin.role</value>
    </property>
    
    <property>
      <name>hive.convert.join.bucket.mapjoin.tez</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.create.as.insert.only</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.default.fileformat</name>
      <value>TextFile</value>
    </property>
    
    <property>
      <name>hive.default.fileformat.managed</name>
      <value>ORC</value>
    </property>
    
    <property>
      <name>hive.driver.parallel.compilation</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.enforce.sortmergebucketmapjoin</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.exec.compress.intermediate</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.exec.compress.output</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.exec.dynamic.partition</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.exec.dynamic.partition.mode</name>
      <value>nonstrict</value>
    </property>
    
    <property>
      <name>hive.exec.failure.hooks</name>
      <value>org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook</value>
    </property>
    
    <property>
      <name>hive.exec.max.created.files</name>
      <value>100000</value>
    </property>
    
    <property>
      <name>hive.exec.max.dynamic.partitions</name>
      <value>5000</value>
    </property>
    
    <property>
      <name>hive.exec.max.dynamic.partitions.pernode</name>
      <value>2000</value>
    </property>
    
    <property>
      <name>hive.exec.orc.split.strategy</name>
      <value>HYBRID</value>
    </property>
    
    <property>
      <name>hive.exec.parallel</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.exec.parallel.thread.number</name>
      <value>8</value>
    </property>
    
    <property>
      <name>hive.exec.post.hooks</name>
      <value>org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook,org.apache.atlas.hive.hook.HiveHook</value>
    </property>
    
    <property>
      <name>hive.exec.pre.hooks</name>
      <value>org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook</value>
    </property>
    
    <property>
      <name>hive.exec.reducers.bytes.per.reducer</name>
      <value>330544432</value>
    </property>
    
    <property>
      <name>hive.exec.reducers.max</name>
      <value>1009</value>
    </property>
    
    <property>
      <name>hive.exec.scratchdir</name>
      <value>/tmp/hive</value>
    </property>
    
    <property>
      <name>hive.exec.submit.local.task.via.child</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.exec.submitviachild</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.execution.engine</name>
      <value>tez</value>
    </property>
    
    <property>
      <name>hive.execution.mode</name>
      <value>container</value>
    </property>
    
    <property>
      <name>hive.fetch.task.aggr</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.fetch.task.conversion</name>
      <value>more</value>
    </property>
    
    <property>
      <name>hive.fetch.task.conversion.threshold</name>
      <value>1073741824</value>
    </property>
    
    <property>
      <name>hive.heapsize</name>
      <value>1024</value>
    </property>
    
    <property>
      <name>hive.hook.proto.base-directory</name>
      <value>{hive_metastore_warehouse_dir}/sys.db/query_data/</value>
    </property>
    
    <property>
      <name>hive.limit.optimize.enable</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.limit.pushdown.memory.usage</name>
      <value>0.04</value>
    </property>
    
    <property>
      <name>hive.load.data.owner</name>
      <value>hive</value>
    </property>
    
    <property>
      <name>hive.lock.manager</name>
      <value></value>
    </property>
    
    <property>
      <name>hive.map.aggr</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
      <value>0.9</value>
    </property>
    
    <property>
      <name>hive.map.aggr.hash.min.reduction</name>
      <value>0.5</value>
    </property>
    
    <property>
      <name>hive.map.aggr.hash.percentmemory</name>
      <value>0.5</value>
    </property>
    
    <property>
      <name>hive.mapjoin.bucket.cache.size</name>
      <value>10000</value>
    </property>
    
    <property>
      <name>hive.mapjoin.hybridgrace.hashtable</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.mapjoin.optimized.hashtable</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.mapred.reduce.tasks.speculative.execution</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.materializedview.rewriting.incremental</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.merge.mapfiles</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.merge.mapredfiles</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.merge.orcfile.stripe.level</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.merge.rcfile.block.level</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.merge.size.per.task</name>
      <value>256000000</value>
    </property>
    
    <property>
      <name>hive.merge.smallfiles.avgsize</name>
      <value>16000000</value>
    </property>
    
    <property>
      <name>hive.merge.tezfiles</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.metastore.authorization.storage.checks</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.metastore.cache.pinobjtypes</name>
      <value>Table,Database,Type,FieldSchema,Order</value>
    </property>
    
    <property>
      <name>hive.metastore.client.connect.retry.delay</name>
      <value>5s</value>
    </property>
    
    <property>
      <name>hive.metastore.client.socket.timeout</name>
      <value>1800s</value>
    </property>
    
    <property>
      <name>hive.metastore.connect.retries</name>
      <value>24</value>
    </property>
    
    <property>
      <name>hive.metastore.db.type</name>
      <value>mysql</value>
    </property>
    
    <property>
      <name>hive.metastore.dml.events</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.metastore.event.listeners</name>
      <value></value>
    </property>
    
    <property>
      <name>hive.metastore.execute.setugi</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.metastore.failure.retries</name>
      <value>24</value>
    </property>
    
    <property>
      <name>hive.metastore.kerberos.keytab.file</name>
      <value>/etc/security/keytabs/hive.service.keytab</value>
    </property>
    
    <property>
      <name>hive.metastore.kerberos.principal</name>
      <value>hive/_HOST@EXAMPLE.COM</value>
    </property>
    
    <property>
      <name>hive.metastore.pre.event.listeners</name>
      <value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
    </property>
    
    <property>
      <name>hive.metastore.sasl.enabled</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.metastore.server.max.threads</name>
      <value>100000</value>
    </property>
    
    <property>
      <name>hive.metastore.transactional.event.listeners</name>
      <value>org.apache.hive.hcatalog.listener.DbNotificationListener</value>
    </property>
    
    <property>
      <name>hive.metastore.uris</name>
      <value>thrift://sandbox-hdp.hortonworks.com:9083</value>
    </property>
    
    <property>
      <name>hive.metastore.warehouse.dir</name>
      <value>/warehouse/tablespace/managed/hive</value>
    </property>
    
    <property>
      <name>hive.metastore.warehouse.external.dir</name>
      <value>/warehouse/tablespace/external/hive</value>
    </property>
    
    <property>
      <name>hive.optimize.bucketmapjoin</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.optimize.bucketmapjoin.sortedmerge</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.optimize.constant.propagation</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.optimize.dynamic.partition.hashjoin</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.optimize.index.filter</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.optimize.metadataonly</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.optimize.null.scan</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.optimize.reducededuplication</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.optimize.reducededuplication.min.reducer</name>
      <value>4</value>
    </property>
    
    <property>
      <name>hive.optimize.sort.dynamic.partition</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.orc.compute.splits.num.threads</name>
      <value>10</value>
    </property>
    
    <property>
      <name>hive.orc.splits.include.file.footer</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.prewarm.enabled</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.prewarm.numcontainers</name>
      <value>3</value>
    </property>
    
    <property>
      <name>hive.repl.cm.enabled</name>
      <value></value>
    </property>
    
    <property>
      <name>hive.repl.cmrootdir</name>
      <value></value>
    </property>
    
    <property>
      <name>hive.repl.rootdir</name>
      <value></value>
    </property>
    
    <property>
      <name>hive.security.authenticator.manager</name>
      <value>org.apache.hadoop.hive.ql.security.ProxyUserAuthenticator</value>
    </property>
    
    <property>
      <name>hive.security.authorization.enabled</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.security.authorization.manager</name>
      <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
    </property>
    
    <property>
      <name>hive.security.metastore.authenticator.manager</name>
      <value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
    </property>
    
    <property>
      <name>hive.security.metastore.authorization.auth.reads</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.security.metastore.authorization.manager</name>
      <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
    </property>
    
    <property>
      <name>hive.server2.allow.user.substitution</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.server2.authentication</name>
      <value>NONE</value>
    </property>
    
    <property>
      <name>hive.server2.authentication.spnego.keytab</name>
      <value>HTTP/_HOST@EXAMPLE.COM</value>
    </property>
    
    <property>
      <name>hive.server2.authentication.spnego.principal</name>
      <value>/etc/security/keytabs/spnego.service.keytab</value>
    </property>
    
    <property>
      <name>hive.server2.enable.doAs</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.server2.idle.operation.timeout</name>
      <value>6h</value>
    </property>
    
    <property>
      <name>hive.server2.idle.session.timeout</name>
      <value>1d</value>
    </property>
    
    <property>
      <name>hive.server2.logging.operation.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.server2.logging.operation.log.location</name>
      <value>/tmp/hive/operation_logs</value>
    </property>
    
    <property>
      <name>hive.server2.max.start.attempts</name>
      <value>5</value>
    </property>
    
    <property>
      <name>hive.server2.support.dynamic.service.discovery</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.server2.table.type.mapping</name>
      <value>CLASSIC</value>
    </property>
    
    <property>
      <name>hive.server2.tez.default.queues</name>
      <value>default</value>
    </property>
    
    <property>
      <name>hive.server2.tez.initialize.default.sessions</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.server2.tez.sessions.per.default.queue</name>
      <value>1</value>
    </property>
    
    <property>
      <name>hive.server2.thrift.http.path</name>
      <value>cliservice</value>
    </property>
    
    <property>
      <name>hive.server2.thrift.http.port</name>
      <value>10001</value>
    </property>
    
    <property>
      <name>hive.server2.thrift.max.worker.threads</name>
      <value>500</value>
    </property>
    
    <property>
      <name>hive.server2.thrift.port</name>
      <value>10000</value>
    </property>
    
    <property>
      <name>hive.server2.thrift.sasl.qop</name>
      <value>auth</value>
    </property>
    
    <property>
      <name>hive.server2.transport.mode</name>
      <value>binary</value>
    </property>
    
    <property>
      <name>hive.server2.use.SSL</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.server2.webui.cors.allowed.headers</name>
      <value>X-Requested-With,Content-Type,Accept,Origin,X-Requested-By,x-requested-by</value>
    </property>
    
    <property>
      <name>hive.server2.webui.enable.cors</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.server2.webui.port</name>
      <value>10002</value>
    </property>
    
    <property>
      <name>hive.server2.webui.use.ssl</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.server2.zookeeper.namespace</name>
      <value>hiveserver2</value>
    </property>
    
    <property>
      <name>hive.service.metrics.codahale.reporter.classes</name>
      <value>org.apache.hadoop.hive.common.metrics.metrics2.JsonFileMetricsReporter,org.apache.hadoop.hive.common.metrics.metrics2.JmxMetricsReporter,org.apache.hadoop.hive.common.metrics.metrics2.Metrics2Reporter</value>
    </property>
    
    <property>
      <name>hive.smbjoin.cache.rows</name>
      <value>10000</value>
    </property>
    
    <property>
      <name>hive.stats.autogather</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.stats.dbclass</name>
      <value>fs</value>
    </property>
    
    <property>
      <name>hive.stats.fetch.column.stats</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.stats.fetch.partition.stats</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.strict.managed.tables</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.support.concurrency</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.tez.auto.reducer.parallelism</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.tez.bucket.pruning</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.tez.cartesian-product.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.tez.container.size</name>
      <value>15360</value>
    </property>
    
    <property>
      <name>hive.tez.cpu.vcores</name>
      <value>-1</value>
    </property>
    
    <property>
      <name>hive.tez.dynamic.partition.pruning</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.tez.dynamic.partition.pruning.max.data.size</name>
      <value>104857600</value>
    </property>
    
    <property>
      <name>hive.tez.dynamic.partition.pruning.max.event.size</name>
      <value>1048576</value>
    </property>
    
    <property>
      <name>hive.tez.exec.print.summary</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.tez.input.format</name>
      <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
    </property>
    
    <property>
      <name>hive.tez.input.generate.consistent.splits</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.tez.java.opts</name>
      <value>-server -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseG1GC -XX:+ResizeTLAB -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps</value>
    </property>
    
    <property>
      <name>hive.tez.log.level</name>
      <value>INFO</value>
    </property>
    
    <property>
      <name>hive.tez.max.partition.factor</name>
      <value>2.0</value>
    </property>
    
    <property>
      <name>hive.tez.min.partition.factor</name>
      <value>0.25</value>
    </property>
    
    <property>
      <name>hive.tez.smb.number.waves</name>
      <value>0.5</value>
    </property>
    
    <property>
      <name>hive.txn.manager</name>
      <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
    </property>
    
    <property>
      <name>hive.txn.max.open.batch</name>
      <value>1000</value>
    </property>
    
    <property>
      <name>hive.txn.strict.locking.mode</name>
      <value>false</value>
    </property>
    
    <property>
      <name>hive.txn.timeout</name>
      <value>300</value>
    </property>
    
    <property>
      <name>hive.user.install.directory</name>
      <value>/user/</value>
    </property>
    
    <property>
      <name>hive.vectorized.execution.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.vectorized.execution.mapjoin.minmax.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.vectorized.execution.mapjoin.native.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.vectorized.execution.reduce.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>hive.vectorized.groupby.checkinterval</name>
      <value>4096</value>
    </property>
    
    <property>
      <name>hive.vectorized.groupby.flush.percent</name>
      <value>0.1</value>
    </property>
    
    <property>
      <name>hive.vectorized.groupby.maxentries</name>
      <value>100000</value>
    </property>
    
    <property>
      <name>hive.zookeeper.client.port</name>
      <value>2181</value>
    </property>
    
    <property>
      <name>hive.zookeeper.namespace</name>
      <value>hive_zookeeper_namespace</value>
    </property>
    
    <property>
      <name>hive.zookeeper.quorum</name>
      <value>sandbox-hdp.hortonworks.com:2181</value>
    </property>
    
    <property>
      <name>javax.jdo.option.ConnectionDriverName</name>
      <value>com.mysql.jdbc.Driver</value>
    </property>
    
    <property>
      <name>javax.jdo.option.ConnectionURL</name>
      <value>jdbc:mysql://sandbox-hdp.hortonworks.com/hive?createDatabaseIfNotExist=true</value>
    </property>
    
    <property>
      <name>javax.jdo.option.ConnectionUserName</name>
      <value>hive</value>
    </property>
    
    <property>
      <name>metastore.create.as.acid</name>
      <value>true</value>
    </property>
    
  </configuration>




# The heap size of the jvm, and jvm args stared by hive shell script can be controlled via:
if [ "$SERVICE" = "metastore" ]; then

  export HADOOP_HEAPSIZE=8052 # Setting for HiveMetastore
  export HADOOP_OPTS="$HADOOP_OPTS -Xloggc:/var/log/hive/hivemetastore-gc-%t.log -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCCause -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/hive/hms_heapdump.hprof -Dhive.log.dir=/var/log/hive -Dhive.log.file=hivemetastore.log"

fi

if [ "$SERVICE" = "hiveserver2" ]; then

  export HADOOP_HEAPSIZE=24158 # Setting for HiveServer2 and Client
  export HADOOP_OPTS="$HADOOP_OPTS -Xloggc:/var/log/hive/hiveserver2-gc-%t.log -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCCause -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/hive/hs2_heapdump.hprof -Dhive.log.dir=/var/log/hive -Dhive.log.file=hiveserver2.log"

fi

export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS  -Xmx${HADOOP_HEAPSIZE}m"
export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS"

# Larger heap size may be required when running queries over large number of files or partitions.
# By default hive shell scripts use a heap size of 256 (MB).  Larger heap size would also be
# appropriate for hive server (hwi etc).


# Set HADOOP_HOME to point to a specific hadoop install directory
HADOOP_HOME=${HADOOP_HOME:-/usr/hdp/current/hadoop-client}

export HIVE_HOME=${HIVE_HOME:-/usr/hdp/current/hive-client}

# Hive Configuration Directory can be controlled by:
export HIVE_CONF_DIR=${HIVE_CONF_DIR:-/usr/hdp/current/hive-client/conf}

# Folder containing extra libraries required for hive compilation/execution can be controlled by:
if [ "${HIVE_AUX_JARS_PATH}" != "" ]; then
  if [ -f "${HIVE_AUX_JARS_PATH}" ]; then
    export HIVE_AUX_JARS_PATH=${HIVE_AUX_JARS_PATH}
  elif [ -d "/usr/hdp/current/hive-webhcat/share/hcatalog" ]; then
    export HIVE_AUX_JARS_PATH=/usr/hdp/current/hive-webhcat/share/hcatalog/hive-hcatalog-core.jar
  fi
elif [ -d "/usr/hdp/current/hive-webhcat/share/hcatalog" ]; then
  export HIVE_AUX_JARS_PATH=/usr/hdp/current/hive-webhcat/share/hcatalog/hive-hcatalog-core.jar
fi

export METASTORE_PORT=9083

----------

  <configuration  xmlns:xi="http://www.w3.org/2001/XInclude">
    
    <property>
      <name>tez.am.am-rm.heartbeat.interval-ms.max</name>
      <value>250</value>
    </property>
    
    <property>
      <name>tez.am.container.idle.release-timeout-max.millis</name>
      <value>20000</value>
    </property>
    
    <property>
      <name>tez.am.container.idle.release-timeout-min.millis</name>
      <value>10000</value>
    </property>
    
    <property>
      <name>tez.am.container.reuse.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.am.container.reuse.locality.delay-allocation-millis</name>
      <value>250</value>
    </property>
    
    <property>
      <name>tez.am.container.reuse.non-local-fallback.enabled</name>
      <value>false</value>
    </property>
    
    <property>
      <name>tez.am.container.reuse.rack-fallback.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.am.java.opts</name>
      <value>-server -Xmx12288m -Djava.net.preferIPv4Stack=true</value>
    </property>
    
    <property>
      <name>tez.am.launch.cluster-default.cmd-opts</name>
      <value>-server -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
    </property>
    
    <property>
      <name>tez.am.launch.cmd-opts</name>
      <value>-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseG1GC -XX:+ResizeTLAB</value>
    </property>
    
    <property>
      <name>tez.am.launch.env</name>
      <value>LD_LIBRARY_PATH=/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-amd64-64</value>
    </property>
    
    <property>
      <name>tez.am.log.level</name>
      <value>INFO</value>
    </property>
    
    <property>
      <name>tez.am.max.app.attempts</name>
      <value>2</value>
    </property>
    
    <property>
      <name>tez.am.maxtaskfailures.per.node</name>
      <value>10</value>
    </property>
    
    <property>
      <name>tez.am.resource.memory.mb</name>
      <value>15360</value>
    </property>
    
    <property>
      <name>tez.am.tez-ui.history-url.template</name>
      <value>__HISTORY_URL_BASE__?viewPath=%2F%23%2Ftez-app%2F__APPLICATION_ID__</value>
    </property>
    
    <property>
      <name>tez.am.view-acls</name>
      <value>*</value>
    </property>
    
    <property>
      <name>tez.cluster.additional.classpath.prefix</name>
      <value>/usr/hdp/${hdp.version}/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure</value>
    </property>
    
    <property>
      <name>tez.counters.max</name>
      <value>10000</value>
    </property>
    
    <property>
      <name>tez.counters.max.groups</name>
      <value>3000</value>
    </property>
    
    <property>
      <name>tez.generate.debug.artifacts</name>
      <value>false</value>
    </property>
    
    <property>
      <name>tez.grouping.max-size</name>
      <value>1073741824</value>
    </property>
    
    <property>
      <name>tez.grouping.min-size</name>
      <value>16777216</value>
    </property>
    
    <property>
      <name>tez.grouping.split-waves</name>
      <value>1.7</value>
    </property>
    
    <property>
      <name>tez.history.logging.proto-base-dir</name>
      <value>{hive_metastore_warehouse_dir}/sys.db</value>
    </property>
    
    <property>
      <name>tez.history.logging.service.class</name>
      <value>org.apache.tez.dag.history.logging.proto.ProtoHistoryLoggingService</value>
    </property>
    
    <property>
      <name>tez.history.logging.timeline-cache-plugin.old-num-dags-per-group</name>
      <value>5</value>
    </property>
    
    <property>
      <name>tez.lib.uris</name>
      <value>/hdp/apps/${hdp.version}/tez/tez.tar.gz</value>
    </property>
    
    <property>
      <name>tez.queue.name</name>
      <value>default</value>
    </property>
    
    <property>
      <name>tez.runtime.compress</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.runtime.compress.codec</name>
      <value>org.apache.hadoop.io.compress.SnappyCodec</value>
    </property>
    
    <property>
      <name>tez.runtime.convert.user-payload.to.history-text</name>
      <value>false</value>
    </property>
    
    <property>
      <name>tez.runtime.io.sort.mb</name>
      <value>4055</value>
    </property>
    
    <property>
      <name>tez.runtime.optimize.local.fetch</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.runtime.pipelined.sorter.sort.threads</name>
      <value>2</value>
    </property>
    
    <property>
      <name>tez.runtime.shuffle.fetch.buffer.percent</name>
      <value>0.6</value>
    </property>
    
    <property>
      <name>tez.runtime.shuffle.memory.limit.percent</name>
      <value>0.25</value>
    </property>
    
    <property>
      <name>tez.runtime.sorter.class</name>
      <value>PIPELINED</value>
    </property>
    
    <property>
      <name>tez.runtime.unordered.output.buffer.size-mb</name>
      <value>1152</value>
    </property>
    
    <property>
      <name>tez.session.am.dag.submit.timeout.secs</name>
      <value>600</value>
    </property>
    
    <property>
      <name>tez.session.client.timeout.secs</name>
      <value>-1</value>
    </property>
    
    <property>
      <name>tez.shuffle-vertex-manager.max-src-fraction</name>
      <value>0.4</value>
    </property>
    
    <property>
      <name>tez.shuffle-vertex-manager.min-src-fraction</name>
      <value>0.2</value>
    </property>
    
    <property>
      <name>tez.staging-dir</name>
      <value>/tmp/${user.name}/staging</value>
    </property>
    
    <property>
      <name>tez.task.am.heartbeat.counter.interval-ms.max</name>
      <value>4000</value>
    </property>
    
    <property>
      <name>tez.task.generate.counters.per.io</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.task.get-task.sleep.interval-ms.max</name>
      <value>200</value>
    </property>
    
    <property>
      <name>tez.task.launch.cluster-default.cmd-opts</name>
      <value>-server -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
    </property>
    
    <property>
      <name>tez.task.launch.cmd-opts</name>
      <value>-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseG1GC -XX:+ResizeTLAB</value>
    </property>
    
    <property>
      <name>tez.task.launch.env</name>
      <value>LD_LIBRARY_PATH=/usr/hdp/${hdp.version}/hadoop/lib/native:/usr/hdp/${hdp.version}/hadoop/lib/native/Linux-amd64-64</value>
    </property>
    
    <property>
      <name>tez.task.max-events-per-heartbeat</name>
      <value>500</value>
    </property>
    
    <property>
      <name>tez.task.resource.memory.mb</name>
      <value>15360</value>
    </property>
    
    <property>
      <name>tez.use.cluster.hadoop-libs</name>
      <value>false</value>
    </property>
    
    <property>
      <name>yarn.timeline-service.enabled</name>
      <value>false</value>
    </property>
    
  </configuration>