Member since
04-29-2021
3
Posts
0
Kudos Received
0
Solutions
04-30-2021
12:14 AM
from impala.dbapi import connect import pandas as pd class IMPALA: def __init__(self, host, port, user, db): self.host = host self.port = port self.user = user self.db = db def __GetConnect(self): if not self.db: raise (NameError, "没有设置数据库信息") self.conn = connect(host=self.host, port=self.port, user=self.user, database=self.db) cur = self.conn.cursor() if not cur: raise (NameError, "连接数据库失败") else: return cur def ExecQuery(self, sql): cur = self.__GetConnect() cur.execute(sql) resList = cur.fetchall() self.conn.close() return resList def ExecNonQuery(self, sql): cur = self.__GetConnect() cur.execute(sql) self.conn.commit() self.conn.close() user = IMPALA('172.17.200.112', 21050, 'q_cluster', 'rawdata') sql = 'select count(DISTINCT(user_id)) as cnt from events where date = CURRENT_DATE() /*SA(production)*/' re = user.ExecQuery(sql) print(re) When 'cur.execute(sql)' this line is executed, an error is reported: impala.error.HiveServer2Error: Invalid query handle: 284e18bfc37e4cd7:ea75d31a00000000
... View more
Labels:
- Labels:
-
Apache Hive
-
Apache Impala