Member since
01-24-2018
1
Post
0
Kudos Received
0
Solutions
01-25-2018
04:52 AM
Hi All,
I am trying to create a HIVE table from JSON file. Here , JSON file is extracted from Cloudera Manager (JSON file Content: IMPALA query report).
We use CDH5.9
Could you please help me on how to create a hive/impala table which reads the data from JSON file as underlying file?
JSON file content will look like below,
{ "queries" : [ { "queryId" : "75462e3e4c5d7694:37c50ff300000000", "statement" : "select * from XXXXXXXXXXXXX", "queryType" : "QUERY", "queryState" : "EXCEPTION", "startTime" : "2018-01-22T15:51:24.231Z", "endTime" : "2018-01-22T16:33:27.962Z", "rowsProduced" : 0, "attributes" : { "thread_storage_wait_time" : "1", "session_id" : "fd49d2f82ac32576:bcb0c36d65a6d98f", "hdfs_bytes_read_remote_percentage" : "0", "stats_missing" : "false", "thread_total_time" : "361", "thread_network_send_wait_time_percentage" : "0", "delegated_user" : "XXXXXX", "thread_network_receive_wait_time_percentage" : "9", "network_address" : "XXXXXXXXXXX:38571", "pool" : "default-pool", "hdfs_bytes_read_from_cache" : "0", "hdfs_bytes_read_local_percentage" : "100", "hdfs_scanner_average_bytes_read_per_second" : "0.0", "hdfs_bytes_read_from_cache_percentage" : "0", "bytes_streamed" : "12535", "session_type" : "HIVESERVER2", "cm_cpu_milliseconds" : "326.997635", "thread_network_receive_wait_time" : "31", "estimated_per_node_peak_memory" : "33554432", "query_status" : "Query 75462e3e4c5d7694:37c50ff300000000 expired due to client inactivity (timeout is 10m)\n", "thread_storage_wait_time_percentage" : "1", "memory_aggregate_peak" : "574801.92", "admission_result" : "Admitted immediately", "hdfs_average_scan_range" : "229336.0", "planning_wait_time" : "7", "oom" : "false", "memory_accrual" : "8.015872E7", "memory_spilled" : "0", "thread_cpu_time_percentage" : "90", "admission_wait" : "0", "file_formats" : "TEXT/NONE", "hdfs_bytes_read_local" : "229336", "hdfs_bytes_read_short_circuit" : "229336", "hdfs_bytes_read_short_circuit_percentage" : "100", "planning_wait_time_percentage" : "0", "client_fetch_wait_time" : "2523686", "client_fetch_wait_time_percentage" : "100", "memory_per_node_peak_node" : "XXXXXX:22000", "memory_per_node_peak" : "574801.92", "connected_user" : "hue/XXX@xx", "hdfs_bytes_read_remote" : "0", "thread_network_send_wait_time" : "0", "impala_version" : "impalad version 2.7.0-cdh5.9.2 RELEASE (build 2f7871169d894fab16f8a2fb99f2e34f0df8763d)", "thread_cpu_time" : "326", "hdfs_bytes_read" : "229336" }, "user" : "XXXXXX", "coordinator" : { "hostId" : "XXXXXXXX" }, "detailsAvailable" : true, "database" : "XXXXX", "durationMillis" : 2523731 } ], "warnings" : [ ] }
Thanks,
Venkatesh Kumar
... View more
Labels:
- Labels:
-
Apache Hive
-
Apache Impala
-
Cloudera Manager
-
HDFS