<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>question Real-time Analysis of Twitter using Impala in Archives of Support Questions (Read Only)</title>
    <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Real-time-Analysis-of-Twitter-using-Impala/m-p/797#M99</link>
    <description>&lt;P&gt;As many of you reading this may already know, Cloudera has previously provided some excellent examples of how to use Flume to ingest Twitter data into HADOOP,&amp;nbsp; and analyze with Hue.&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://blog.cloudera.com/blog/2012/09/analyzing-twitter-data-with-hadoop/"&gt;http://blog.cloudera.com/blog/2012/09/analyzing-twitter-data-with-hadoop/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://blog.cloudera.com/blog/2013/03/how-to-analyze-twitter-data-with-hue/"&gt;http://blog.cloudera.com/blog/2013/03/how-to-analyze-twitter-data-with-hue/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="https://github.com/cloudera/cdh-twitter-example"&gt;https://github.com/cloudera/cdh-twitter-example&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;As an alternative to writing to HDFS, I’ve written a small prototype (available on &lt;A target="_blank" href="https://github.com/AronMacDonald/Twitter_Hbase_Impala"&gt;GitHub&lt;/A&gt;), using Flume, to write the tweets to Hbase and then report directly in real-time via Impala.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;If you wish to setup this prototype then:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;1. Setup Hadoop and follow Cloudera’s Twitter example: setting up Flume and Twitter4J API to write tweets to HDFS:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;---------------------------------------------------------------------------------------------------------------------------------------------&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;Cloudera’s Steps:&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://blog.cloudera.com/blog/2013/03/how-to-create-a-cdh-cluster-on-amazon-ec2-via-cloudera-manager/"&gt;http://blog.cloudera.com/blog/2013/03/how-to-create-a-cdh-cluster-on-amazon-ec2-via-cloudera-manager/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="https://github.com/cloudera/cdh-twitter-example"&gt;https://github.com/cloudera/cdh-twitter-example&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Dan Sander (&lt;A target="_blank" href="http://www.datadansandler.com/"&gt;www.datadansandler.com&lt;/A&gt;) has also created a document and videos walking through the entire process in detail&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://www.datadansandler.com/2013/03/making-clouderas-twitter-stream-real.html"&gt;http://www.datadansandler.com/2013/03/making-clouderas-twitter-stream-real.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://www.youtube.com/watch?v=2xO_8P09M38&amp;amp;list=PLPrplWpTfYTPU2topP8hJwpekrFj4wF8G"&gt;http://www.youtube.com/watch?v=2xO_8P09M38&amp;amp;list=PLPrplWpTfYTPU2topP8hJwpekrFj4wF8G&lt;/A&gt;&lt;/P&gt;&lt;P&gt;I found this a useful additional reference if I got stuck following Clouderas Steps.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;2. Setup&amp;nbsp; Flume to write to HBASE, Impala&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;---------------------------------------------------&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="https://github.com/AronMacDonald/Twitter_Hbase_Impala"&gt;https://github.com/AronMacDonald/Twitter_Hbase_Impala&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;Note&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;: &lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;My&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Flume&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Hbase&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Sink&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;code&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;was&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;inspired&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;by&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Dan&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Sandler&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;’&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;s&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Apache&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Web&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Log&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Flume&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Hbase&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;example&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;A target="_blank" href="https://github.com/DataDanSandler/log_analysis"&gt;https://github.com/DataDanSandler/log_analysis&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;In Hbase you need to create a table to store the tweets:&lt;/P&gt;&lt;P&gt;&amp;nbsp; sudo -u hdfs hbase shell&lt;/P&gt;&lt;P&gt;&amp;nbsp; create 'tweets', {NAME =&amp;gt; 'tweet'}, {NAME =&amp;gt; 'retweeted_status'}, {NAME =&amp;gt; 'entities'}, {NAME =&amp;gt; 'user'}&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;In Impala you create a table linked to the HBASE table:&lt;/P&gt;&lt;P&gt;CREATE EXTERNAL TABLE HB_IMPALA_TWEETS (&lt;/P&gt;&lt;P&gt;&amp;nbsp; id &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; int,&lt;/P&gt;&lt;P&gt;&amp;nbsp; id_str &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; text &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; created_at &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; timestamp,&lt;/P&gt;&lt;P&gt;&amp;nbsp; geo_latitude &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; double,&lt;/P&gt;&lt;P&gt;&amp;nbsp; geo_longitude &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;double,&lt;/P&gt;&lt;P&gt;&amp;nbsp; user_screen_name &amp;nbsp; &amp;nbsp; &amp;nbsp; string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; user_location &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; user_followers_count &amp;nbsp; string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; user_profile_image_url string&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;)&lt;/P&gt;&lt;P&gt;STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'&lt;/P&gt;&lt;P&gt;WITH SERDEPROPERTIES (&lt;/P&gt;&lt;P&gt;"hbase.columns.mapping" =&lt;/P&gt;&lt;P&gt;":key,tweet:id_str,tweet:text,tweet:created_at,tweet:geo_latitude,tweet:geo_longitude, user:screen_name,user:location,user:followers_count,user:profile_image_url"&lt;/P&gt;&lt;P&gt;)&lt;/P&gt;&lt;P&gt;TBLPROPERTIES("&lt;A target="_blank" href="http://hbase.table.name/"&gt;hbase.table.name&lt;/A&gt;" = "tweets");&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;For those that are interested in integrating with SAP HANA I’ve also added some logic in the Flume event to write a subset of fields to SAP HANA as well.&lt;/P&gt;&lt;P&gt;Further details on that are on an SAP blog&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://scn.sap.com/community/developer-center/hana/blog/2013/08/07/streaming-real-time-data-to-hadoop-and-hana"&gt;http://scn.sap.com/community/developer-center/hana/blog/2013/08/07/streaming-real-time-data-to-hadoop-and-hana&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I’m still working on other parts of the prototype, to make use of the Tweet information both within Impala and HANA&lt;/P&gt;&lt;P&gt;Hopefully I’ll be able to share that as well if/when I get it working. &amp;nbsp; &amp;nbsp; &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;In the mean time I’ve recently seen 2 other examples of using Twitter data for &lt;STRONG&gt;Sentiment Analysis &lt;/STRONG&gt;which may&amp;nbsp;interest:&lt;/P&gt;&lt;P&gt;Hortonworks&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;A target="_blank" href="http://www.youtube.com/watch?feature=player_embedded&amp;amp;v=y3nFfsTnY3M"&gt;http://www.youtube.com/watch?feature=player_embedded&amp;amp;v=y3nFfsTnY3M&lt;/A&gt;&lt;/P&gt;&lt;P&gt;SAP HANA SCN&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;A target="_blank" href="http://scn.sap.com/community/developer-center/hana/blog/2013/06/19/real-time-sentiment-rating-of-movies-on-sap-hana-one"&gt;http://scn.sap.com/community/developer-center/hana/blog/2013/06/19/real-time-sentiment-rating-of-movies-on-sap-hana-one&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 21 Apr 2026 14:02:50 GMT</pubDate>
    <dc:creator>Aron</dc:creator>
    <dc:date>2026-04-21T14:02:50Z</dc:date>
    <item>
      <title>Real-time Analysis of Twitter using Impala</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Real-time-Analysis-of-Twitter-using-Impala/m-p/797#M99</link>
      <description>&lt;P&gt;As many of you reading this may already know, Cloudera has previously provided some excellent examples of how to use Flume to ingest Twitter data into HADOOP,&amp;nbsp; and analyze with Hue.&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://blog.cloudera.com/blog/2012/09/analyzing-twitter-data-with-hadoop/"&gt;http://blog.cloudera.com/blog/2012/09/analyzing-twitter-data-with-hadoop/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://blog.cloudera.com/blog/2013/03/how-to-analyze-twitter-data-with-hue/"&gt;http://blog.cloudera.com/blog/2013/03/how-to-analyze-twitter-data-with-hue/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="https://github.com/cloudera/cdh-twitter-example"&gt;https://github.com/cloudera/cdh-twitter-example&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;As an alternative to writing to HDFS, I’ve written a small prototype (available on &lt;A target="_blank" href="https://github.com/AronMacDonald/Twitter_Hbase_Impala"&gt;GitHub&lt;/A&gt;), using Flume, to write the tweets to Hbase and then report directly in real-time via Impala.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;If you wish to setup this prototype then:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;1. Setup Hadoop and follow Cloudera’s Twitter example: setting up Flume and Twitter4J API to write tweets to HDFS:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;---------------------------------------------------------------------------------------------------------------------------------------------&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;Cloudera’s Steps:&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://blog.cloudera.com/blog/2013/03/how-to-create-a-cdh-cluster-on-amazon-ec2-via-cloudera-manager/"&gt;http://blog.cloudera.com/blog/2013/03/how-to-create-a-cdh-cluster-on-amazon-ec2-via-cloudera-manager/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="https://github.com/cloudera/cdh-twitter-example"&gt;https://github.com/cloudera/cdh-twitter-example&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Dan Sander (&lt;A target="_blank" href="http://www.datadansandler.com/"&gt;www.datadansandler.com&lt;/A&gt;) has also created a document and videos walking through the entire process in detail&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://www.datadansandler.com/2013/03/making-clouderas-twitter-stream-real.html"&gt;http://www.datadansandler.com/2013/03/making-clouderas-twitter-stream-real.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://www.youtube.com/watch?v=2xO_8P09M38&amp;amp;list=PLPrplWpTfYTPU2topP8hJwpekrFj4wF8G"&gt;http://www.youtube.com/watch?v=2xO_8P09M38&amp;amp;list=PLPrplWpTfYTPU2topP8hJwpekrFj4wF8G&lt;/A&gt;&lt;/P&gt;&lt;P&gt;I found this a useful additional reference if I got stuck following Clouderas Steps.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;2. Setup&amp;nbsp; Flume to write to HBASE, Impala&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;---------------------------------------------------&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="https://github.com/AronMacDonald/Twitter_Hbase_Impala"&gt;https://github.com/AronMacDonald/Twitter_Hbase_Impala&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN style="line-height: 14px;"&gt;Note&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;: &lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;My&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Flume&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Hbase&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Sink&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;code&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;was&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;inspired&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;by&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Dan&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Sandler&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;’&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;s&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Apache&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Web&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Log&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Flume&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;Hbase&lt;/SPAN&gt;&lt;SPAN style="line-height: 14px;"&gt;example&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;A target="_blank" href="https://github.com/DataDanSandler/log_analysis"&gt;https://github.com/DataDanSandler/log_analysis&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;In Hbase you need to create a table to store the tweets:&lt;/P&gt;&lt;P&gt;&amp;nbsp; sudo -u hdfs hbase shell&lt;/P&gt;&lt;P&gt;&amp;nbsp; create 'tweets', {NAME =&amp;gt; 'tweet'}, {NAME =&amp;gt; 'retweeted_status'}, {NAME =&amp;gt; 'entities'}, {NAME =&amp;gt; 'user'}&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;In Impala you create a table linked to the HBASE table:&lt;/P&gt;&lt;P&gt;CREATE EXTERNAL TABLE HB_IMPALA_TWEETS (&lt;/P&gt;&lt;P&gt;&amp;nbsp; id &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; int,&lt;/P&gt;&lt;P&gt;&amp;nbsp; id_str &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; text &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; created_at &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; timestamp,&lt;/P&gt;&lt;P&gt;&amp;nbsp; geo_latitude &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; double,&lt;/P&gt;&lt;P&gt;&amp;nbsp; geo_longitude &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;double,&lt;/P&gt;&lt;P&gt;&amp;nbsp; user_screen_name &amp;nbsp; &amp;nbsp; &amp;nbsp; string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; user_location &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; user_followers_count &amp;nbsp; string,&lt;/P&gt;&lt;P&gt;&amp;nbsp; user_profile_image_url string&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;)&lt;/P&gt;&lt;P&gt;STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'&lt;/P&gt;&lt;P&gt;WITH SERDEPROPERTIES (&lt;/P&gt;&lt;P&gt;"hbase.columns.mapping" =&lt;/P&gt;&lt;P&gt;":key,tweet:id_str,tweet:text,tweet:created_at,tweet:geo_latitude,tweet:geo_longitude, user:screen_name,user:location,user:followers_count,user:profile_image_url"&lt;/P&gt;&lt;P&gt;)&lt;/P&gt;&lt;P&gt;TBLPROPERTIES("&lt;A target="_blank" href="http://hbase.table.name/"&gt;hbase.table.name&lt;/A&gt;" = "tweets");&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;For those that are interested in integrating with SAP HANA I’ve also added some logic in the Flume event to write a subset of fields to SAP HANA as well.&lt;/P&gt;&lt;P&gt;Further details on that are on an SAP blog&lt;/P&gt;&lt;P&gt;&lt;A target="_blank" href="http://scn.sap.com/community/developer-center/hana/blog/2013/08/07/streaming-real-time-data-to-hadoop-and-hana"&gt;http://scn.sap.com/community/developer-center/hana/blog/2013/08/07/streaming-real-time-data-to-hadoop-and-hana&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I’m still working on other parts of the prototype, to make use of the Tweet information both within Impala and HANA&lt;/P&gt;&lt;P&gt;Hopefully I’ll be able to share that as well if/when I get it working. &amp;nbsp; &amp;nbsp; &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;In the mean time I’ve recently seen 2 other examples of using Twitter data for &lt;STRONG&gt;Sentiment Analysis &lt;/STRONG&gt;which may&amp;nbsp;interest:&lt;/P&gt;&lt;P&gt;Hortonworks&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;A target="_blank" href="http://www.youtube.com/watch?feature=player_embedded&amp;amp;v=y3nFfsTnY3M"&gt;http://www.youtube.com/watch?feature=player_embedded&amp;amp;v=y3nFfsTnY3M&lt;/A&gt;&lt;/P&gt;&lt;P&gt;SAP HANA SCN&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;A target="_blank" href="http://scn.sap.com/community/developer-center/hana/blog/2013/06/19/real-time-sentiment-rating-of-movies-on-sap-hana-one"&gt;http://scn.sap.com/community/developer-center/hana/blog/2013/06/19/real-time-sentiment-rating-of-movies-on-sap-hana-one&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 21 Apr 2026 14:02:50 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Real-time-Analysis-of-Twitter-using-Impala/m-p/797#M99</guid>
      <dc:creator>Aron</dc:creator>
      <dc:date>2026-04-21T14:02:50Z</dc:date>
    </item>
    <item>
      <title>Re: Real-time Analysis of Twitter using Impala</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Real-time-Analysis-of-Twitter-using-Impala/m-p/2315#M100</link>
      <description>&lt;P&gt;Thanks for the post, Aron.&lt;/P&gt;</description>
      <pubDate>Wed, 16 Oct 2013 19:23:17 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Real-time-Analysis-of-Twitter-using-Impala/m-p/2315#M100</guid>
      <dc:creator>Clint</dc:creator>
      <dc:date>2013-10-16T19:23:17Z</dc:date>
    </item>
    <item>
      <title>Re: Real-time Analysis of Twitter using Impala</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Real-time-Analysis-of-Twitter-using-Impala/m-p/14018#M101</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;When I try to link the external table to impala from hbase i get:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;CREATE EXTERNAL TABLE HB_IMPALA_TWEETS (&lt;BR /&gt;&amp;gt; id int,&lt;BR /&gt;&amp;gt; id_str string,&lt;BR /&gt;&amp;gt; text string,&lt;BR /&gt;&amp;gt; created_at timestamp,&lt;BR /&gt;&amp;gt; geo_latitude double,&lt;BR /&gt;&amp;gt; geo_longitude double,&lt;BR /&gt;&amp;gt; user_screen_name string,&lt;BR /&gt;&amp;gt; user_location string,&lt;BR /&gt;&amp;gt; user_followers_count string,&lt;BR /&gt;&amp;gt; user_profile_image_url string&lt;BR /&gt;&amp;gt;&lt;BR /&gt;&amp;gt; )&lt;BR /&gt;&amp;gt; STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'&lt;BR /&gt;&amp;gt; WITH SERDEPROPERTIES (&lt;BR /&gt;&amp;gt; "hbase.columns.mapping" =&lt;BR /&gt;&amp;gt; ":key,tweet:id_str,tweet:text,tweet:created_at,tweet:geo_latitude,tweet:geo_longitude, user:screen_name,user:location,user:followers_count,user:profile_image_url"&lt;BR /&gt;&amp;gt; )&lt;BR /&gt;&amp;gt; TBLPROPERTIES("hbase.table.name" = "tweets");&lt;BR /&gt;Query: create EXTERNAL TABLE HB_IMPALA_TWEETS ( id int, id_str string, text string, created_at timestamp, geo_latitude double, geo_longitude double, user_screen_name string, user_location string, user_followers_count string, user_profile_image_url string ) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( "hbase.columns.mapping" = ":key,tweet:id_str,tweet:text,tweet:created_at,tweet:geo_latitude,tweet:geo_longitude, user:screen_name,user:location,user:followers_count,user:profile_image_url" ) TBLPROPERTIES("hbase.table.name" = "tweets")&lt;BR /&gt;ERROR: AnalysisException: Syntax error in line 1:&lt;BR /&gt;...image_url string ) STORED &lt;U&gt;&lt;STRONG&gt;BY&lt;/STRONG&gt;&lt;/U&gt; 'org.apache.hadoop.hive.h...&lt;BR /&gt;^&lt;BR /&gt;Encountered: BY&lt;BR /&gt;Expected: AS&lt;/P&gt;&lt;P&gt;CAUSED BY: Exception: Syntax error&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any Idea why it is not working ? Do I need to add a JAR ?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Jun 2014 14:44:12 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Real-time-Analysis-of-Twitter-using-Impala/m-p/14018#M101</guid>
      <dc:creator>Kulssaka</dc:creator>
      <dc:date>2014-06-23T14:44:12Z</dc:date>
    </item>
    <item>
      <title>Re: Real-time Analysis of Twitter using Impala</title>
      <link>https://community.cloudera.com/t5/Archives-of-Support-Questions/Real-time-Analysis-of-Twitter-using-Impala/m-p/14562#M102</link>
      <description>&lt;P&gt;We just need to use Hive to create the impala table...&lt;/P&gt;</description>
      <pubDate>Tue, 01 Jul 2014 09:18:06 GMT</pubDate>
      <guid>https://community.cloudera.com/t5/Archives-of-Support-Questions/Real-time-Analysis-of-Twitter-using-Impala/m-p/14562#M102</guid>
      <dc:creator>Kulssaka</dc:creator>
      <dc:date>2014-07-01T09:18:06Z</dc:date>
    </item>
  </channel>
</rss>

