Created on 11-04-2020 08:24 AM - edited 12-02-2020 02:46 PM
As explained in this Parent article, here is the first step in our hybrid cloud implementation: replicating bank branches and employee data from an on-prem cluster to CDP Public Cloud.
Note: you can find the datasets used for this exercise here
$ scp -i [your_key_location] ssh_tunnel_setup_files.zip [your_user_with_sudo_privileges]@[your_host]:/home/[your_user_with_sudo_privileges]
$ ssh -i [your_key_location] [your_user_with_sudo_privileges]@[your_host]
$ sudo su
$ wget https://archive.cloudera.com/ccm/0.1/ccm-autossh-client-0.1-20191023211905gitd03880c.x86_64.rpm
$ yum -y --nogpgcheck localinstall ccm-autossh-client-0.1-20191023211905gitd03880c.x86_64.rpm
$ unzip ssh_tunnel_setup_files.zip
$ ./install.sh
==========================================================================================
SSH tunnel for CM established successfully.
Run 'ccm-tunnel status' for status
Run 'journalctl -f -u ccm-tunnel@CM.service' or 'journalctl -xe' for logs.
==========================================================================================
create database if not exists worldwidebank;
use worldwidebank;
CREATE EXTERNAL TABLE if not exists worldwidebank.employees_ext(
number int,
location int,
gender string,
title string,
givenname string,
middleinitial string,
surname string,
streetaddress string,
city string,
state string,
statefull string,
zipcode string,
country string,
countryfull string,
emailaddress string,
username string,
password string,
telephonenumber string,
telephonecountrycode string,
mothersmaiden string,
birthday string,
age int,
tropicalzodiac string,
cctype string,
ccnumber string,
cvv2 string,
ccexpires string,
ssn string,
insuranceid string,
salary string,
bloodtype string,
weight double,
height int)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION 's3a://pvi-e2e-cdp-bucket/vizbank/raw/employees/'
tblproperties("skip.header.line.count"="1");
CREATE EXTERNAL TABLE if not exists worldwidebank.locations_ext(
LOCATION_ID int,
ADDRESS string,
BKCLASS string,
CBSA string,
CBSA_DIV string,
CBSA_DIV_FLG string,
CBSA_DIV_NO string,
CBSA_METRO string,
CBSA_METRO_FLG string,
CBSA_METRO_NAME string,
CBSA_MICRO_FLG string,
CBSA_NO string,
CERT string,
CITY string,
COUNTY string,
CSA string,
CSA_FLG string,
CSA_NO string,
ESTYMD string,
FI_UNINUM string,
MAINOFF string,
NAME string,
OFFNAME string,
OFFNUM string,
RUNDATE string,
SERVTYPE string,
STALP string,
STCNTY string,
STNAME string,
UNINUM string,
ZIP int)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION 's3a://pvi-e2e-cdp-bucket/vizbank/raw/locations/'
tblproperties("skip.header.line.count"="1");
create table worldwidebank.employees as select * from worldwidebank.employees_ext;
create table worldwidebank.locations as select * from worldwidebank.locations_ext;
CREATE MATERIALIZED VIEW worldwidebank.employees_per_state as select locations.stname, count(*) as num_employees from employees, locations where employees.location=locations.location_id GROUP BY locations.stname;
The next step will be to profile sensitive data to protect our employees' data.