- Subscribe to RSS Feed
- Mark as New
- Mark as Read
- Bookmark
- Subscribe
- Printer Friendly Page
- Report Inappropriate Content
Created on 02-10-2019 10:15 PM
This is an update to a previous article on accessing Philadelphia Open Crime Data and storing it in Apache Phoenix on HBase.
For nosql summit
Update this
http://princeton0.field.hortonworks.com:16010/master-status
Philly Crime Data
City of Philly
App Token
76MVJDcTksxeS1uYPf8D0XdUF
Secret Token
WZnlB_YJ5r9rjj_alWVdc_yqnxRpnIk5BHgb
crime
https://data.phila.gov/resource/sspu-uyfa.json?$$app_token=76MVJDcTksxeS1uYPf8D0XdUF&$limit=5000
https://www.opendataphilly.org/dataset/crime-incidents/resource/f6e5998d-9d33-4a45-8397-3a6bb8607d10
https://www.opendataphilly.org/dataset/crime-incidents
https://data.phila.gov/resource/sspu-uyfa.json
"dc_dist":"18",
"dc_key":"200918067518",
"dispatch_date":"2009-10-02",
"dispatch_date_time":"2009-10-02T14:24:00.000",
"dispatch_time":"14:24:00",
"hour":"14",
"location_block":"S 38TH ST / MARKETUT ST",
"psa":"3",
"text_general_code":"Other Assaults",
"ucr_general":"800"}
CREATE EXTERNAL TABLE
crime (dc_dist STRING,
dc_key STRING,dispatch_date STRING,dispatch_date_time STRING,hour STRING,location_block STRING,psa STRING,
text_general_code STRING,ucr_general STRING)
CLUSTERED BY (psa)INTO 4 BUCKETS
ROW FORMAT DELIMITED
STORED AS ORC
LOCATION '/crime/hive'
TBLPROPERTIES('transactional'='true');
dc_dist,dc_key ,dispatch_date,dispatch_date_time,hour,location_block,psa,text_general_code,ucr_general
Today
&dispatch_date=${now():format('yyyy-MM-dd')}
311 service
https://data.phila.gov/resource/4t9v-rppq.json?$$app_token=76MVJDcTksxeS1uYPf8D0XdUF&$limit=5000
http://arcgis.dvrpc.org/arcgis/rest/services/Transportation/PedestrianCounts/FeatureServer/1
http://arcgis.dvrpc.org/arcgis/rest/services/Transportation/TrafficCounts/FeatureServer/0
see: https://community.hortonworks.com/articles/52856/stream-data-into-hive-like-a-king-using-nifi.html
Perhaps adapt this
https://github.com/socrata/soda-java
https://www.opendataphilly.org/
create table crime
hdfs dfs -mkdir -p /crime/fail
hdfs dfs -mkdir -p /crime
https://phoenix.apache.org/faq.html
/usr/hdp/current/phoenix-client/bin/sqlline.py localhost:2181:/hbase-unsecure
CREATE TABLE phillycrime (dc_dist varchar,
dc_key varchar not null primary key,dispatch_date varchar,dispatch_date_time varchar,dispatch_time varchar,hour varchar,location_block varchar,psa varchar,
text_general_code varchar,ucr_general varchar);
{"dc_dist":"18","dc_key":"200918067518","dispatch_date":"2009-10-02","dispatch_date_time":"2009-10-02T14:24:00.000","dispatch_time":"14:24:00","hour":"14","location_block":"S 38TH ST / MARKETUT ST","psa":"3","text_general_code":"Other Assaults","ucr_general":"800"}
upsert into phillycrime values ('18', '200918067518', '2009-10-02','2009-10-02T14:24:00.000','14:24:00','14', 'S 38TH ST / MARKETUT ST','3','Other Assaults','800');
!tables
!describe phillycrime
upsert into test values
org.apache.phoenix.jdbc.PhoenixDriver
jdbc:phoenix:localhost:2181:/hbase-unsecure
/usr/hdp/2.4.0.0-169/phoenix/phoenix-client.jar
/usr/hdp/2.4.0.0-169/hbase/lib/hbase-client.jar
/etc/hbase/conf/hbase-site.xml
plus hadoop ones
https://community.hortonworks.com/articles/19016/connect-to-phoenix-hbase-using-dbvisualizer.html
0: jdbc:phoenix:localhost:2181:/hbase-unsecur> !describe phillycrime
+------------------------------------------+------------------------------------------+------------------------------------------+------------------------------------------+----------------+
| TABLE_CAT | TABLE_SCHEM | TABLE_NAME | COLUMN_NAME | |
+------------------------------------------+------------------------------------------+------------------------------------------+------------------------------------------+----------------+
| | | PHILLYCRIME | DC_DIST | 12 |
| | | PHILLYCRIME | DC_KEY | 12 |
| | | PHILLYCRIME | DISPATCH_DATE | 12 |
| | | PHILLYCRIME | DISPATCH_DATE_TIME | 12 |
| | | PHILLYCRIME | HOUR | 12 |
| | | PHILLYCRIME | LOCATION_BLOCK | 12 |
| | | PHILLYCRIME | PSA | 12 |
| | | PHILLYCRIME | TEXT_GENERAL_CODE | 12 |
| | | PHILLYCRIME | UCR_GENERAL | 12 |
+------------------------------------------+------------------------------------------+------------------------------------------+------------------------------------------+----------------+
0: jdbc:phoenix:localhost:2181:/hbase-unsecur>
0: jdbc:phoenix:localhost:2181:/hbase-unsecur> upsert into phillycrime values ('18', '200918067518', '2009-10-02','2009-10-02T14:24:00.000','14', 'S 38TH ST / MARKETUT ST','3','Other Assaults','800');
1 row affected (0.115 seconds)
0: jdbc:phoenix:localhost:2181:/hbase-unsecur> select * from phillycrime;
+------------------------------------------+------------------------------------------+------------------------------------------+------------------------------------------+----------------+
| DC_DIST | DC_KEY | DISPATCH_DATE | DISPATCH_DATE_TIME | |
+------------------------------------------+------------------------------------------+------------------------------------------+------------------------------------------+----------------+
| 18 | 200918067518 | 2009-10-02 | 2009-10-02T14:24:00.000 | 14 |
+------------------------------------------+------------------------------------------+------------------------------------------+------------------------------------------+----------------+
1 row selected (0.197 seconds)
upsert into phillycrime values ('${'dc_dist'}','${'dc_key'}','${'dispatch_date'}','${'dispatch_date_time'}','${'dispatch_time'}','${'hour'}','${'location_block'}','${'psa'}','${'text_general_code'}','${'ucr_general'}')
jdbc:phoenix:localhost:2181:/hbase-unsecur> select distinct(text_general_code) from phillycrime;
<value>phillycrime${now():toNumber()}.json</value>
upsert into phillycrime values ('${'dc_dist'}','${'dc_key'}','${'dispatch_date'}','${'dispatch_date_time'}','${'dispatch_time'}','${'hour'}','${'location_block'}','${'psa'}','${'text_general_code'}','${'ucr_general'}')
<name>URL</name>
<value>https://data.phila.gov/resource/sspu-uyfa.json?$$$app_token=76MVJDcTksxeS1uYPf8D0XdUF&$$limit=5000&dispatch_date=${now():format('yyyy-MM-dd')}</value>
</property>
<property>
<name>Filename</name>
<value>phillycrime${now():toNumber()}.json</value>
</property>
<name>Regular Expression</name>
<value>(?s:^.*$)</value>
</property>
<property>
<name>Replacement Value</name>
<value>upsert into phillycrime values ('${'dc_dist'}','${'dc_key'}','${'dispatch_date'}','${'dispatch_date_time'}','${'dispatch_time'}','${'hour'}','${'location_block'}','${'psa'}','${'text_general_code'}','${'ucr_general'}')</value>
<comment/>
<class>org.apache.nifi.dbcp.DBCPConnectionPool</class>
<enabled>true</enabled>
<property>
<name>Database Connection URL</name>
<value>jdbc:phoenix:localhost:2181:/hbase-unsecure</value>
</property>
<property>
<name>Database Driver Class Name</name>
<value>org.apache.phoenix.jdbc.PhoenixDriver</value>
</property>
<property>
<name>Database Driver Jar Url</name>
<value>file:///usr/hdp/2.4.0.0-169/phoenix/phoenix-client.jar</value>
https://data.phila.gov/resource/sspu-uyfa.json
https://www.opendataphilly.org/dataset/crime-incidents
Run spring boot …
https://www.opendataphilly.org/dataset/crime-incidents
https://github.com/tspannhw/phoenix
org.apache.phoenix.jdbc.PhoenixDriver |
jdbc:phoenix:princeton0.field.hortonworks.com:/hbase-unsecure |
phoenixuser |