<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Why Impala return cross join on Array and struct data elements in Support Questions</title>
    <link>https://community.cloudera.com/t5/Support-Questions/Why-Impala-return-cross-join-on-Array-and-struct-data/m-p/295717#M217908</link>
    <description>&lt;P&gt;Impala SQL treats nested collections essentially as tables. If you want to "join" the nested collection with the containing table or collection,&amp;nbsp; you need to use the same alias that you gave that table previously in the FROM list (otherwise it considers it a separate reference to the nested collection)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I.e. instead of&lt;/P&gt;&lt;PRE&gt;from complex_struct_array2 t, t.country t2, t.country.city t3&lt;/PRE&gt;&lt;P&gt;you want to write the following to do the implicit join:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;from complex_struct_array2 t, t.country t2, t2.city t3&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 11 May 2020 00:43:46 GMT</pubDate>
    <dc:creator>Tim Armstrong</dc:creator>
    <dc:date>2020-05-11T00:43:46Z</dc:date>
    <item>
      <title>Why Impala return cross join on Array and struct data elements</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Why-Impala-return-cross-join-on-Array-and-struct-data/m-p/295716#M217907</link>
      <description>&lt;PRE&gt;-- I adapted from this example www.cloudera.com/documentation

-- impala side

CREATE TABLE flat_struct_array (continent STRING, country STRING, city STRING);

INSERT INTO flat_struct_array VALUES
    ('North America', 'Canada', 'Toronto') , ('North America', 'Canada', 'Vancouver')
  , ('North America', 'Canada', "St. John\'s") , ('North America', 'Canada', 'Saint John')
  , ('North America', 'Canada', 'Montreal') , ('North America', 'Canada', 'Halifax')
  , ('North America', 'Canada', 'Winnipeg') , ('North America', 'Canada', 'Calgary')
  , ('North America', 'Canada', 'Saskatoon') , ('North America', 'Canada', 'Ottawa')
  , ('North America', 'Canada', 'Yellowknife') , ('Europe', 'France', 'Paris')
  , ('Europe', 'France', 'Nice') , ('Europe', 'France', 'Marseilles')
  , ('Europe', 'France', 'Cannes') , ('Europe', 'Greece', 'Athens')
  , ('Europe', 'Greece', 'Piraeus') , ('Europe', 'Greece', 'Hania')
  , ('Europe', 'Greece', 'Heraklion') , ('Europe', 'Greece', 'Rethymnon')
  , ('Europe', 'Greece', 'Fira');

CREATE TABLE complex_struct_array2
(continent STRING, country array&amp;lt; STRUCT &amp;lt;name: STRING, city: ARRAY &amp;lt;string&amp;gt; &amp;gt;   &amp;gt; ) STORED AS PARQUET;

-- hive side

INSERT INTO complex_struct_array2
select continent, collect_list(struct1)
from (
SELECT continent, named_struct('name', country, 'city', collect_list(city)) as struct1 FROM flat_struct_array GROUP BY continent, country
) a group by continent

select * from complex_struct_array2
-- you'll see table with 2 records, and in Europe it has 2 countries
-- France has 4 cities and Greece has 6 cities

--back to impala side

select * 
from complex_struct_array2 t, t.country t2, t.country.city t3

-- you'll see the result that France contains 10 records (with Greece cities) and vice versa
continent     |name   |item        |
--------------|-------|------------|
Europe        |France |Paris       |
Europe        |France |Nice        |
Europe        |France |Marseilles  |
Europe        |France |Cannes      |
Europe        |France |Athens      |&amp;lt;-- should not be shown
Europe        |France |Piraeus     |&amp;lt;-- should not be shown
Europe        |France |Hania       |&amp;lt;-- should not be shown
Europe        |France |Heraklion   |&amp;lt;-- should not be shown
Europe        |France |Rethymnon   |&amp;lt;-- should not be shown
Europe        |France |Fira        |&amp;lt;-- should not be shown
...&lt;BR /&gt;&lt;BR /&gt;any&amp;nbsp;help&amp;nbsp;please&lt;/PRE&gt;</description>
      <pubDate>Tue, 21 Apr 2026 11:29:43 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Why-Impala-return-cross-join-on-Array-and-struct-data/m-p/295716#M217907</guid>
      <dc:creator>omran</dc:creator>
      <dc:date>2026-04-21T11:29:43Z</dc:date>
    </item>
    <item>
      <title>Re: Why Impala return cross join on Array and struct data elements</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Why-Impala-return-cross-join-on-Array-and-struct-data/m-p/295717#M217908</link>
      <description>&lt;P&gt;Impala SQL treats nested collections essentially as tables. If you want to "join" the nested collection with the containing table or collection,&amp;nbsp; you need to use the same alias that you gave that table previously in the FROM list (otherwise it considers it a separate reference to the nested collection)&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I.e. instead of&lt;/P&gt;&lt;PRE&gt;from complex_struct_array2 t, t.country t2, t.country.city t3&lt;/PRE&gt;&lt;P&gt;you want to write the following to do the implicit join:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;from complex_struct_array2 t, t.country t2, t2.city t3&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 11 May 2020 00:43:46 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Why-Impala-return-cross-join-on-Array-and-struct-data/m-p/295717#M217908</guid>
      <dc:creator>Tim Armstrong</dc:creator>
      <dc:date>2020-05-11T00:43:46Z</dc:date>
    </item>
    <item>
      <title>Re: Why Impala return cross join on Array and struct data elements</title>
      <link>https://community.cloudera.com/t5/Support-Questions/Why-Impala-return-cross-join-on-Array-and-struct-data/m-p/295738#M217925</link>
      <description>&lt;P&gt;Thanks a lot TIM ..it now ok&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 11 May 2020 09:50:53 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Support-Questions/Why-Impala-return-cross-join-on-Array-and-struct-data/m-p/295738#M217925</guid>
      <dc:creator>omran</dc:creator>
      <dc:date>2020-05-11T09:50:53Z</dc:date>
    </item>
  </channel>
</rss>

