<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Error closing output stream Warning in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375290#M242374</link>
    <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95875"&gt;@AndreaCavenago&lt;/a&gt;&amp;nbsp;Does this error appear every time we run this spark-submit command?&lt;/P&gt;&lt;P&gt;As this is a warning message, and it does not have any real impact, we can avoid it by changing the log level.&lt;/P&gt;&lt;P&gt;In the &lt;FONT face="andale mono,times"&gt;script.py&lt;/FONT&gt; file, add the following two lines:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;from pyspark import SparkContext
SparkContext.setLogLevel("ERROR")&lt;/LI-CODE&gt;&lt;P&gt;This will avoid the WARN message. But it will still be good to address the actual issue.&lt;/P&gt;</description>
    <pubDate>Wed, 16 Aug 2023 20:06:48 GMT</pubDate>
    <dc:creator>smruti</dc:creator>
    <dc:date>2023-08-16T20:06:48Z</dc:date>
    <item>
      <title>Error closing output stream Warning</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375252#M242352</link>
      <description>&lt;P&gt;Hello,&lt;BR /&gt;we are performing Hive queries with PySpark using the HWC in JDBC_CLUSTER mode.&lt;/P&gt;&lt;P&gt;Everything is running fine and we get the results for the queries, but we also receive a warning message saying that connection has been closed:&lt;/P&gt;&lt;PRE&gt;23/08/16 09:59:05 WARN conf.HiveConf: HiveConf of name hive.masking.algo does not exist&lt;BR /&gt;23/08/16 09:59:05 WARN transport.TIOStreamTransport: Error closing output stream.&lt;BR /&gt;java.net.SocketException: Connection or outbound has closed&lt;BR /&gt;at sun.security.ssl.SSLSocketImpl$AppOutputStream.write(SSLSocketImpl.java:1181)&lt;BR /&gt;at java.io.BufferedOutputStream.flushBuffer(BufferedOutputStream.java:82)&lt;BR /&gt;at java.io.BufferedOutputStream.flush(BufferedOutputStream.java:140)&lt;BR /&gt;at java.io.FilterOutputStream.close(FilterOutputStream.java:158)&lt;BR /&gt;at org.apache.thrift.transport.TIOStreamTransport.close(TIOStreamTransport.java:110)&lt;BR /&gt;at org.apache.thrift.transport.TSocket.close(TSocket.java:235)&lt;BR /&gt;at org.apache.thrift.transport.TSaslTransport.close(TSaslTransport.java:400)&lt;BR /&gt;at org.apache.thrift.transport.TSaslClientTransport.close(TSaslClientTransport.java:37)&lt;BR /&gt;at org.apache.hadoop.hive.metastore.security.TFilterTransport.close(TFilterTransport.java:52)&lt;BR /&gt;at org.apache.hive.jdbc.HiveConnection.close(HiveConnection.java:1153)&lt;BR /&gt;at org.apache.commons.dbcp2.DelegatingConnection.closeInternal(DelegatingConnection.java:239)&lt;BR /&gt;at org.apache.commons.dbcp2.PoolableConnection.reallyClose(PoolableConnection.java:232)&lt;BR /&gt;at org.apache.commons.dbcp2.PoolableConnectionFactory.destroyObject(PoolableConnectionFactory.java:367)&lt;BR /&gt;at org.apache.commons.pool2.impl.GenericObjectPool.destroy(GenericObjectPool.java:921)&lt;BR /&gt;at org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:468)&lt;BR /&gt;at org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:365)&lt;BR /&gt;at org.apache.commons.dbcp2.PoolingDataSource.getConnection(PoolingDataSource.java:134)&lt;BR /&gt;at org.apache.commons.dbcp2.BasicDataSource.getConnection(BasicDataSource.java:1563)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.JDBCWrapper.getConnector(HS2JDBCWrapper.scala:481)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.DefaultJDBCWrapper.getConnector(HS2JDBCWrapper.scala)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.util.QueryExecutionUtil.getConnection(QueryExecutionUtil.java:96)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.JdbcDataSourceReader.getTableSchema(JdbcDataSourceReader.java:116)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.JdbcDataSourceReader.readSchema(JdbcDataSourceReader.java:128)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.JdbcDataSourceReader.&amp;lt;init&amp;gt;(JdbcDataSourceReader.java:72)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.HiveWarehouseConnector.getDataSourceReader(HiveWarehouseConnector.java:72)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.HiveWarehouseConnector.createReader(HiveWarehouseConnector.java:40)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation$SourceHelpers.createReader(DataSourceV2Relation.scala:161)&lt;BR /&gt;at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation$.create(DataSourceV2Relation.scala:178)&lt;BR /&gt;at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:224)&lt;BR /&gt;at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:187)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.HiveWarehouseSessionImpl.executeJdbcInternal(HiveWarehouseSessionImpl.java:295)&lt;BR /&gt;at com.hortonworks.spark.sql.hive.llap.HiveWarehouseSessionImpl.sql(HiveWarehouseSessionImpl.java:159)&lt;BR /&gt;at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)&lt;BR /&gt;at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)&lt;BR /&gt;at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)&lt;BR /&gt;at java.lang.reflect.Method.invoke(Method.java:498)&lt;BR /&gt;at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)&lt;BR /&gt;at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)&lt;BR /&gt;at py4j.Gateway.invoke(Gateway.java:282)&lt;BR /&gt;at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)&lt;BR /&gt;at py4j.commands.CallCommand.execute(CallCommand.java:79)&lt;BR /&gt;at py4j.GatewayConnection.run(GatewayConnection.java:238)&lt;BR /&gt;at java.lang.Thread.run(Thread.java:750)&lt;/PRE&gt;&lt;P&gt;Once thrown, execution continues and ends with no errors or missing data.&lt;/P&gt;&lt;P&gt;The spark-submit command is the following:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;spark-submit --master yarn --driver-memory 1g --queue &amp;lt;queue_name&amp;gt; --conf spark.pyspark.python=/opt/venv/pdr/bin/python3.6 --conf spark.pyspark.driver.python=/opt/venv/pdr/bin/python3.6 --jars /opt/cloudera/parcels/CDH/jars/hive-warehouse-connector-assembly-1.0.0.7.1.7.1000-141.jar --py-files /opt/cloudera/parcels/CDH/lib/hive_warehouse_connector/pyspark_hwc-1.0.0.7.1.7.1000-141.zip /home/&amp;lt;path_to_Python_script&amp;gt;/script.py&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Configuration settings inside the Python script (script.py) are the following:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;from pyspark.sql import SparkSession
spark = SparkSession \
        .builder \
        .enableHiveSupport() \
        .appName(appname) \
        .config("spark.yarn.queue","&amp;lt;queue_name&amp;gt;") \
        .config("spark.datasource.hive.warehouse.read.via.llap","false") \
        .config("spark.sql.hive.hiveserver2.jdbc.url.principal","hive/_HOST@&amp;lt;domain&amp;gt;") \
        .config("spark.datasource.hive.warehouse.read.mode","JDBC_CLUSTER") \
        .config("spark.sql.extensions","com.hortonworks.spark.sql.rule.Extensions") \
        .config("hive.support.concurrency", "true") \
        .config("hive.enforce.bucketing","true") \
        .config("hive.exec.dynamic.partition.mode", "nonstrict") \
        .config("hive.txn.manager","org.apache.hadoop.hive.ql.lockmgr.DbTxnManager") \
        .config("hive.compactor.initiator.on", "true") \
        .config("hive.compactor.worker.threads","1") \
        .config("hive.tez.container.size", "12288") \
        .config("tez.queue.name","&amp;lt;queue_name&amp;gt;") \
        .config("mapred.job.queuename","&amp;lt;queue_name&amp;gt;") \
        .config("spark.executor.core",3) \
        .config("spark.executor.memory","6g") \
        .config("spark.shuffle.service.enabled","true") \
        .config("spark.dynamicAllocation.enabled","true") \
        .config("spark.dynamicAllocation.minExecutors",0) \
        .config("spark.dynamicAllocation.initialExecutors",1) \
        .config("spark.dynamicAllocation.maxExecutors",20) \
        .config("spark.kryo.registrator","com.qubole.spark.hiveacid.util.HiveAcidKyroRegistrator") \
     .config('spark.kryoserializer.buffer.max', '128m')\
    .config('spark.sql.autoBroadcastJoinThreshold', -1)\
.config("spark.sql.hive.hiveserver2.jdbc.url","jdbc:hive2://&amp;lt;hive2_jdbc_URL&amp;gt;:10000/default;tez.queue.name=&amp;lt;queue_name&amp;gt;;ssl=true") &lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;As said, script is correctly executed and results are returned.&lt;BR /&gt;Changing driver-memory and/or spark.executor.core / spark.executor.memory does not change the fact that the warning is still thrown.&lt;/P&gt;&lt;P&gt;Any idea?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you,&lt;BR /&gt;Andrea&lt;/P&gt;</description>
      <pubDate>Wed, 16 Aug 2023 09:08:09 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375252#M242352</guid>
      <dc:creator>AndreaCavenago</dc:creator>
      <dc:date>2023-08-16T09:08:09Z</dc:date>
    </item>
    <item>
      <title>Re: Error closing output stream Warning</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375290#M242374</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95875"&gt;@AndreaCavenago&lt;/a&gt;&amp;nbsp;Does this error appear every time we run this spark-submit command?&lt;/P&gt;&lt;P&gt;As this is a warning message, and it does not have any real impact, we can avoid it by changing the log level.&lt;/P&gt;&lt;P&gt;In the &lt;FONT face="andale mono,times"&gt;script.py&lt;/FONT&gt; file, add the following two lines:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;from pyspark import SparkContext
SparkContext.setLogLevel("ERROR")&lt;/LI-CODE&gt;&lt;P&gt;This will avoid the WARN message. But it will still be good to address the actual issue.&lt;/P&gt;</description>
      <pubDate>Wed, 16 Aug 2023 20:06:48 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375290#M242374</guid>
      <dc:creator>smruti</dc:creator>
      <dc:date>2023-08-16T20:06:48Z</dc:date>
    </item>
    <item>
      <title>Re: Error closing output stream Warning</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375315#M242387</link>
      <description>&lt;P&gt;Thank you &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/82698"&gt;@smruti&lt;/a&gt;. Yes, it happens each time I run the spark-submit.&lt;/P&gt;&lt;P&gt;I will make a test with LogLevel set to ERROR, and keep looking for a solution.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you,&lt;/P&gt;&lt;P&gt;Andrea&lt;/P&gt;</description>
      <pubDate>Thu, 17 Aug 2023 10:29:09 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375315#M242387</guid>
      <dc:creator>AndreaCavenago</dc:creator>
      <dc:date>2023-08-17T10:29:09Z</dc:date>
    </item>
    <item>
      <title>Re: Error closing output stream Warning</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375444#M242453</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95875"&gt;@AndreaCavenago&lt;/a&gt;, If &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/82698"&gt;@smruti&lt;/a&gt;'s reply has helped resolve your issue,&amp;nbsp;can you kindly mark the appropriate reply as the solution?&amp;nbsp;It will make it easier for others to find the answer in the future.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 21 Aug 2023 10:22:09 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375444#M242453</guid>
      <dc:creator>VidyaSargur</dc:creator>
      <dc:date>2023-08-21T10:22:09Z</dc:date>
    </item>
    <item>
      <title>Re: Error closing output stream Warning</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375712#M242616</link>
      <description>&lt;P&gt;Hello&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/32119"&gt;@VidyaSargur&lt;/a&gt;, the solution provided by &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/82698"&gt;@smruti&lt;/a&gt; hides the warning message, and I'm fine with that, but did not solve the issue that generate the message.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you,&lt;/P&gt;&lt;P&gt;Andrea&lt;/P&gt;</description>
      <pubDate>Tue, 29 Aug 2023 07:34:23 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375712#M242616</guid>
      <dc:creator>AndreaCavenago</dc:creator>
      <dc:date>2023-08-29T07:34:23Z</dc:date>
    </item>
    <item>
      <title>Re: Error closing output stream Warning</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375730#M242624</link>
      <description>&lt;P&gt;&lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/95875"&gt;@AndreaCavenago&lt;/a&gt;&amp;nbsp;For that you will have to check if the connection is getting interrupted/closed between the client and hiveserver2. Without thorough log analysis, it will be difficult to answer that. Could you open a support case for the same?&lt;/P&gt;</description>
      <pubDate>Tue, 29 Aug 2023 10:56:53 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375730#M242624</guid>
      <dc:creator>smruti</dc:creator>
      <dc:date>2023-08-29T10:56:53Z</dc:date>
    </item>
    <item>
      <title>Re: Error closing output stream Warning</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375816#M242671</link>
      <description>&lt;P&gt;Hello &lt;a href="https://community.cloudera.com/t5/user/viewprofilepage/user-id/82698"&gt;@smruti&lt;/a&gt; , unfortunately, I have limited access to the cluster (no CM access) and not able to create a support case at the moment. I suppose log analysis should be performed on HS2 log only, correct?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you,&lt;/P&gt;&lt;P&gt;Andrea&lt;/P&gt;</description>
      <pubDate>Thu, 31 Aug 2023 09:11:25 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Error-closing-output-stream-Warning/m-p/375816#M242671</guid>
      <dc:creator>AndreaCavenago</dc:creator>
      <dc:date>2023-08-31T09:11:25Z</dc:date>
    </item>
  </channel>
</rss>

