Created on
05-10-2020
04:06 PM
- last edited on
05-11-2020
12:27 AM
by
VidyaSargur
-- I adapted from this example www.cloudera.com/documentation
-- impala side
CREATE TABLE flat_struct_array (continent STRING, country STRING, city STRING);
INSERT INTO flat_struct_array VALUES
('North America', 'Canada', 'Toronto') , ('North America', 'Canada', 'Vancouver')
, ('North America', 'Canada', "St. John\'s") , ('North America', 'Canada', 'Saint John')
, ('North America', 'Canada', 'Montreal') , ('North America', 'Canada', 'Halifax')
, ('North America', 'Canada', 'Winnipeg') , ('North America', 'Canada', 'Calgary')
, ('North America', 'Canada', 'Saskatoon') , ('North America', 'Canada', 'Ottawa')
, ('North America', 'Canada', 'Yellowknife') , ('Europe', 'France', 'Paris')
, ('Europe', 'France', 'Nice') , ('Europe', 'France', 'Marseilles')
, ('Europe', 'France', 'Cannes') , ('Europe', 'Greece', 'Athens')
, ('Europe', 'Greece', 'Piraeus') , ('Europe', 'Greece', 'Hania')
, ('Europe', 'Greece', 'Heraklion') , ('Europe', 'Greece', 'Rethymnon')
, ('Europe', 'Greece', 'Fira');
CREATE TABLE complex_struct_array2
(continent STRING, country array< STRUCT <name: STRING, city: ARRAY <string> > > ) STORED AS PARQUET;
-- hive side
INSERT INTO complex_struct_array2
select continent, collect_list(struct1)
from (
SELECT continent, named_struct('name', country, 'city', collect_list(city)) as struct1 FROM flat_struct_array GROUP BY continent, country
) a group by continent
select * from complex_struct_array2
-- you'll see table with 2 records, and in Europe it has 2 countries
-- France has 4 cities and Greece has 6 cities
--back to impala side
select *
from complex_struct_array2 t, t.country t2, t.country.city t3
-- you'll see the result that France contains 10 records (with Greece cities) and vice versa
continent |name |item |
--------------|-------|------------|
Europe |France |Paris |
Europe |France |Nice |
Europe |France |Marseilles |
Europe |France |Cannes |
Europe |France |Athens |<-- should not be shown
Europe |France |Piraeus |<-- should not be shown
Europe |France |Hania |<-- should not be shown
Europe |France |Heraklion |<-- should not be shown
Europe |France |Rethymnon |<-- should not be shown
Europe |France |Fira |<-- should not be shown
...
any help please
Created 05-10-2020 05:43 PM
Impala SQL treats nested collections essentially as tables. If you want to "join" the nested collection with the containing table or collection, you need to use the same alias that you gave that table previously in the FROM list (otherwise it considers it a separate reference to the nested collection)
I.e. instead of
from complex_struct_array2 t, t.country t2, t.country.city t3
you want to write the following to do the implicit join:
from complex_struct_array2 t, t.country t2, t2.city t3
Created 05-10-2020 05:43 PM
Impala SQL treats nested collections essentially as tables. If you want to "join" the nested collection with the containing table or collection, you need to use the same alias that you gave that table previously in the FROM list (otherwise it considers it a separate reference to the nested collection)
I.e. instead of
from complex_struct_array2 t, t.country t2, t.country.city t3
you want to write the following to do the implicit join:
from complex_struct_array2 t, t.country t2, t2.city t3
Created 05-11-2020 02:50 AM
Thanks a lot TIM ..it now ok