<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Re: Is there a way to create Hive table based on Avro data directly ? in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119481#M22372</link>
    <description>&lt;P&gt;This statement misses the 'LOCATION' clause so it is not an external table&lt;/P&gt;</description>
    <pubDate>Fri, 23 Sep 2016 18:07:06 GMT</pubDate>
    <dc:creator>jknulst</dc:creator>
    <dc:date>2016-09-23T18:07:06Z</dc:date>
    <item>
      <title>Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119473#M22364</link>
      <description>&lt;P&gt;I have a dataset that is almost 600GB in Avro format in HDFS. Whay is the most efficient way to create a Hive table directly on this dataset ? &lt;/P&gt;&lt;P&gt;For smaller datasets, I can move my data to disk, use Avro tools to extract schema, upload schema to HDFS and create Hive table based on that schema. Is there a way to directly extract Avro schema from a dataset in HDFS without writing java code ?&lt;/P&gt;</description>
      <pubDate>Wed, 09 Mar 2016 23:42:39 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119473#M22364</guid>
      <dc:creator>shishir_saxena4</dc:creator>
      <dc:date>2016-03-09T23:42:39Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119474#M22365</link>
      <description>&lt;PRE&gt;CREATE EXTERNAL TABLE avro_hive_table
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'TBLPROPERTIES
('avro.schema.url'='hdfs://localdomain/user/avro/schemas/activity.avsc')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
LOCATION
'/user/avro/applog_avro';&lt;/PRE&gt;</description>
      <pubDate>Wed, 09 Mar 2016 23:45:05 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119474#M22365</guid>
      <dc:creator>amcbarnett</dc:creator>
      <dc:date>2016-03-09T23:45:05Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119475#M22366</link>
      <description>&lt;P&gt;Try this:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;CREATE EXTERNAL TABLE  tableName 
PARTITIONED BY (ingestiondatetime BIGINT, recordtype STRING)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serd2.avro.AvroSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES ('avro.schema.url'='hdfs:///user/file.avsc');&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Wed, 09 Mar 2016 23:46:24 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119475#M22366</guid>
      <dc:creator>sunile_manjee</dc:creator>
      <dc:date>2016-03-09T23:46:24Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119476#M22367</link>
      <description>&lt;P&gt;Thanks &lt;A rel="user" href="https://community.cloudera.com/users/369/amcbarnett.html" nodeid="369"&gt;@Ancil McBarnett&lt;/A&gt; &lt;A rel="user" href="https://community.cloudera.com/users/1486/smanjee.html" nodeid="1486"&gt;@Sunile Manjee&lt;/A&gt;. I don't have .avsc file for schema. How can I extract Avro schema for this data ?&lt;/P&gt;</description>
      <pubDate>Wed, 09 Mar 2016 23:57:23 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119476#M22367</guid>
      <dc:creator>shishir_saxena4</dc:creator>
      <dc:date>2016-03-09T23:57:23Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119477#M22368</link>
      <description>&lt;P&gt;You can try the following, cat your large file, grab a few lines output to new file on local fs. Ill be curious to know if that works with avro serialization.&lt;/P&gt;&lt;P&gt;&lt;A href="http://stackoverflow.com/questions/22852063/how-to-copy-first-few-lines-of-a-large-file-in-hadoop-to-a-new-file#22852192" target="_blank"&gt;http://stackoverflow.com/questions/22852063/how-to-copy-first-few-lines-of-a-large-file-in-hadoop-to-a-new-file#22852192&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Then use avro-tools to extract schema.&lt;/P&gt;</description>
      <pubDate>Thu, 10 Mar 2016 00:00:38 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119477#M22368</guid>
      <dc:creator>aervits</dc:creator>
      <dc:date>2016-03-10T00:00:38Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119478#M22369</link>
      <description>&lt;P&gt;Thank You &lt;A rel="user" href="https://community.cloudera.com/users/393/aervits.html" nodeid="393"&gt;@Artem Ervits&lt;/A&gt; &lt;A rel="user" href="https://community.cloudera.com/users/1486/smanjee.html" nodeid="1486"&gt;@Sunile Manjee&lt;/A&gt; &lt;A rel="user" href="https://community.cloudera.com/users/369/amcbarnett.html" nodeid="369"&gt;@Ancil McBarnett&lt;/A&gt;. I was able to get my solution using a combination of commands.&lt;/P&gt;&lt;PRE&gt;hdfs dfs -cat $1 | head --bytes 10K &amp;gt; $SAMPLE_FILE 
java -jar $AVRO_TOOLS_PATH/avro-tools-1.7.7.jar getschema $SAMPLE_FILE &amp;gt; $AVRO_SCHEMA_FILE 
hdfs dfs -put $AVRO_SCHEMA_FILE $AVRO_SCHEMA_DIR&lt;/PRE&gt;&lt;P&gt;head command needs to be used with --bytes option to get first 10K bytes. Then I used Avro tools to retrieve schema and copied schema back to HDFS.&lt;/P&gt;</description>
      <pubDate>Thu, 10 Mar 2016 02:22:44 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119478#M22369</guid>
      <dc:creator>shishir_saxena4</dc:creator>
      <dc:date>2016-03-10T02:22:44Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119479#M22370</link>
      <description>&lt;P&gt;You taught me something new, awesome community!&lt;/P&gt;</description>
      <pubDate>Thu, 10 Mar 2016 07:25:11 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119479#M22370</guid>
      <dc:creator>aervits</dc:creator>
      <dc:date>2016-03-10T07:25:11Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119480#M22371</link>
      <description>&lt;P&gt;&lt;A rel="user" href="https://community.cloudera.com/users/369/amcbarnett.html" nodeid="369"&gt;@Ancil McBarnett&lt;/A&gt; &lt;/P&gt;&lt;P&gt;Are you sure AVRO backed tables can be created as external tables? If I run your statement I get problems on the LOCATION predicate. Hive does not expect the LOCATION clause it seems&lt;/P&gt;&lt;P&gt;Edit:&lt;/P&gt;&lt;P&gt;Never mind, you can but the order of the statements matters:&lt;/P&gt;&lt;P&gt;This works:&lt;/P&gt;&lt;PRE&gt;CREATE EXTERNAL TABLE as_avro 
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' 
STORED as INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' 
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' 
LOCATION '/user/root/as_avro' 
TBLPROPERTIES ('avro.schema.url'='hdfs:///user/root/avro.avsc');

&lt;/PRE&gt;</description>
      <pubDate>Fri, 23 Sep 2016 17:45:02 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119480#M22371</guid>
      <dc:creator>jknulst</dc:creator>
      <dc:date>2016-09-23T17:45:02Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119481#M22372</link>
      <description>&lt;P&gt;This statement misses the 'LOCATION' clause so it is not an external table&lt;/P&gt;</description>
      <pubDate>Fri, 23 Sep 2016 18:07:06 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119481#M22372</guid>
      <dc:creator>jknulst</dc:creator>
      <dc:date>2016-09-23T18:07:06Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119482#M22373</link>
      <description>&lt;P&gt;Perhaps this page can help you:&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="https://cwiki.apache.org/confluence/display/Hive/AvroSerDe#AvroSerDe-CreatingAvro-backedHivetables"&gt;create hive table based avro&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 22 Dec 2016 13:41:31 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119482#M22373</guid>
      <dc:creator>strong_young_cn</dc:creator>
      <dc:date>2016-12-22T13:41:31Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to create Hive table based on Avro data directly ?</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119483#M22374</link>
      <description>&lt;P&gt;hadoop jar avro-tools-1.8.2.jar getschema hdfs_archive/mydoc.avro&lt;/P&gt;&lt;P&gt;would also done the job&lt;/P&gt;,&lt;P&gt;instead of java -jar, you can directly run it on hdfs thanks to :&lt;/P&gt;&lt;P&gt;hadoop jar avro-tools-1.8.2.jar getschema hdfsPathTOAvroFile.avro&lt;/P&gt;</description>
      <pubDate>Sun, 03 Sep 2017 00:07:47 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Is-there-a-way-to-create-Hive-table-based-on-Avro-data/m-p/119483#M22374</guid>
      <dc:creator>niparisco</dc:creator>
      <dc:date>2017-09-03T00:07:47Z</dc:date>
    </item>
  </channel>
</rss>

