Support Questions

Find answers, ask questions, and share your expertise
Announcements
Now Live: Explore expert insights and technical deep dives on the new Cloudera Community BlogsRead the Announcement

Query To Return Result Only If Data Exists in Multiple Tables Specific Partition

avatar
Rising Star

Given 6 tables with identical column and all partitioned by date

Need to get result of a union results from all 6 tables

only if all tables have data for the given date i.e. partition

else return nothing.

 

 

1 ACCEPTED SOLUTION

avatar
Super Collaborator

@ChineduLB 

WITH data_counts AS (
SELECT
COUNT(*) AS count_table1,
COUNT(*) AS count_table2,
COUNT(*) AS count_table3,
COUNT(*) AS count_table4,
COUNT(*) AS count_table5,
COUNT(*) AS count_table6
FROM
table1
WHERE
date_partition = 'your_date' -- Replace 'your_date' with the specific date you're interested in
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table2
WHERE
date_partition = 'your_date'
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table3
WHERE
date_partition = 'your_date'
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table4
WHERE
date_partition = 'your_date'
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table5
WHERE
date_partition = 'your_date'
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table6
WHERE
date_partition = 'your_date'
)
SELECT
CASE
WHEN SUM(count_table1) > 0 AND SUM(count_table2) > 0 AND SUM(count_table3) > 0
AND SUM(count_table4) > 0 AND SUM(count_table5) > 0 AND SUM(count_table6) > 0 THEN
(SELECT * FROM table1 WHERE date_partition = 'your_date')
ELSE NULL -- or whatever you want to return if data doesn't exist in all tables
END AS result
FROM
data_counts;

View solution in original post

1 REPLY 1

avatar
Super Collaborator

@ChineduLB 

WITH data_counts AS (
SELECT
COUNT(*) AS count_table1,
COUNT(*) AS count_table2,
COUNT(*) AS count_table3,
COUNT(*) AS count_table4,
COUNT(*) AS count_table5,
COUNT(*) AS count_table6
FROM
table1
WHERE
date_partition = 'your_date' -- Replace 'your_date' with the specific date you're interested in
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table2
WHERE
date_partition = 'your_date'
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table3
WHERE
date_partition = 'your_date'
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table4
WHERE
date_partition = 'your_date'
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table5
WHERE
date_partition = 'your_date'
UNION ALL
SELECT
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*),
COUNT(*)
FROM
table6
WHERE
date_partition = 'your_date'
)
SELECT
CASE
WHEN SUM(count_table1) > 0 AND SUM(count_table2) > 0 AND SUM(count_table3) > 0
AND SUM(count_table4) > 0 AND SUM(count_table5) > 0 AND SUM(count_table6) > 0 THEN
(SELECT * FROM table1 WHERE date_partition = 'your_date')
ELSE NULL -- or whatever you want to return if data doesn't exist in all tables
END AS result
FROM
data_counts;