Created 04-30-2021 12:14 AM
from impala.dbapi import connect
import pandas as pd
class IMPALA:
def __init__(self, host, port, user, db):
self.host = host
self.port = port
self.user = user
self.db = db
def __GetConnect(self😞
if not self.db:
raise (NameError, "没有设置数据库信息")
self.conn = connect(host=self.host, port=self.port, user=self.user, database=self.db)
cur = self.conn.cursor()
if not cur:
raise (NameError, "连接数据库失败")
else:
return cur
def ExecQuery(self, sql):
cur = self.__GetConnect()
cur.execute(sql)
resList = cur.fetchall()
self.conn.close()
return resList
def ExecNonQuery(self, sql):
cur = self.__GetConnect()
cur.execute(sql)
self.conn.commit()
self.conn.close()
user = IMPALA('172.17.200.112', 21050, 'q_cluster', 'rawdata')
sql = 'select count(DISTINCT(user_id)) as cnt from events where date = CURRENT_DATE() /*SA(production)*/'
re = user.ExecQuery(sql)
print(re)
When 'cur.execute(sql)' this line is executed, an error is reported:
impala.error.HiveServer2Error: Invalid query handle: 284e18bfc37e4cd7:ea75d31a00000000
Created 05-15-2021 10:14 PM
Hi
I think you can try out below two options:
1. Increase idle_session_timeout value in impala
2. Rewrite the script to open Impala connection just before Impala related commands will be invoked.
Created 02-03-2022 04:07 AM
Hi
Could you elaborate on how to write the queries with idle_session_timeout value? I am facing the same error? In the GitHub https://github.com/cloudera/impyla/issues/278, it says there is not way we can set it up in the connection and would need to put it in query. Thanks.