from impala.dbapi import connect
import pandas as pd
class IMPALA:
def __init__(self, host, port, user, db):
self.host = host
self.port = port
self.user = user
self.db = db
def __GetConnect(self😞
if not self.db:
raise (NameError, "没有设置数据库信息")
self.conn = connect(host=self.host, port=self.port, user=self.user, database=self.db)
cur = self.conn.cursor()
if not cur:
raise (NameError, "连接数据库失败")
else:
return cur
def ExecQuery(self, sql):
cur = self.__GetConnect()
cur.execute(sql)
resList = cur.fetchall()
self.conn.close()
return resList
def ExecNonQuery(self, sql):
cur = self.__GetConnect()
cur.execute(sql)
self.conn.commit()
self.conn.close()
user = IMPALA('172.17.200.112', 21050, 'q_cluster', 'rawdata')
sql = 'select count(DISTINCT(user_id)) as cnt from events where date = CURRENT_DATE() /*SA(production)*/'
re = user.ExecQuery(sql)
print(re)
When 'cur.execute(sql)' this line is executed, an error is reported:
impala.error.HiveServer2Error: Invalid query handle: 284e18bfc37e4cd7:ea75d31a00000000