Created on 05-10-2020 04:06 PM - last edited on 05-11-2020 12:27 AM by VidyaSargur
-- I adapted from this example www.cloudera.com/documentation -- impala side CREATE TABLE flat_struct_array (continent STRING, country STRING, city STRING); INSERT INTO flat_struct_array VALUES ('North America', 'Canada', 'Toronto') , ('North America', 'Canada', 'Vancouver') , ('North America', 'Canada', "St. John\'s") , ('North America', 'Canada', 'Saint John') , ('North America', 'Canada', 'Montreal') , ('North America', 'Canada', 'Halifax') , ('North America', 'Canada', 'Winnipeg') , ('North America', 'Canada', 'Calgary') , ('North America', 'Canada', 'Saskatoon') , ('North America', 'Canada', 'Ottawa') , ('North America', 'Canada', 'Yellowknife') , ('Europe', 'France', 'Paris') , ('Europe', 'France', 'Nice') , ('Europe', 'France', 'Marseilles') , ('Europe', 'France', 'Cannes') , ('Europe', 'Greece', 'Athens') , ('Europe', 'Greece', 'Piraeus') , ('Europe', 'Greece', 'Hania') , ('Europe', 'Greece', 'Heraklion') , ('Europe', 'Greece', 'Rethymnon') , ('Europe', 'Greece', 'Fira'); CREATE TABLE complex_struct_array2 (continent STRING, country array< STRUCT <name: STRING, city: ARRAY <string> > > ) STORED AS PARQUET; -- hive side INSERT INTO complex_struct_array2 select continent, collect_list(struct1) from ( SELECT continent, named_struct('name', country, 'city', collect_list(city)) as struct1 FROM flat_struct_array GROUP BY continent, country ) a group by continent select * from complex_struct_array2 -- you'll see table with 2 records, and in Europe it has 2 countries -- France has 4 cities and Greece has 6 cities --back to impala side select * from complex_struct_array2 t, t.country t2, t.country.city t3 -- you'll see the result that France contains 10 records (with Greece cities) and vice versa continent |name |item | --------------|-------|------------| Europe |France |Paris | Europe |France |Nice | Europe |France |Marseilles | Europe |France |Cannes | Europe |France |Athens |<-- should not be shown Europe |France |Piraeus |<-- should not be shown Europe |France |Hania |<-- should not be shown Europe |France |Heraklion |<-- should not be shown Europe |France |Rethymnon |<-- should not be shown Europe |France |Fira |<-- should not be shown ...
any help please
Created 05-10-2020 05:43 PM
Impala SQL treats nested collections essentially as tables. If you want to "join" the nested collection with the containing table or collection, you need to use the same alias that you gave that table previously in the FROM list (otherwise it considers it a separate reference to the nested collection)
I.e. instead of
from complex_struct_array2 t, t.country t2, t.country.city t3
you want to write the following to do the implicit join:
from complex_struct_array2 t, t.country t2, t2.city t3
Created 05-10-2020 05:43 PM
Impala SQL treats nested collections essentially as tables. If you want to "join" the nested collection with the containing table or collection, you need to use the same alias that you gave that table previously in the FROM list (otherwise it considers it a separate reference to the nested collection)
I.e. instead of
from complex_struct_array2 t, t.country t2, t.country.city t3
you want to write the following to do the implicit join:
from complex_struct_array2 t, t.country t2, t2.city t3
Created 05-11-2020 02:50 AM
Thanks a lot TIM ..it now ok