Created on 09-16-2016 02:41 PM - edited 09-16-2022 03:39 AM
Using the following Configuration file, it dies...
This is patterned off of the provided example from Cloudera.. it builds fine when hdfs-master count = 1, but when making it 2 to have more than a single NameNode, etc.. it fails..
cloudera-manager {
instance: ${instances.m42x} {
tags {
# add any additional tags as needed
application: "Cloudera Manager 5"
}
}
repository: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/5.8/"
repositoryKeyUrl: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/RPM-GPG-KEY-cloudera"
enableEnterpriseTrial: true
}
cluster {
products {
CDH: 5
}
configs {
# HDFS fencing should be set to true for HA configurations
HDFS {
dfs_ha_fencing_methods: "shell(true)"
}
}
services: [HDFS, ZOOKEEPER, HBASE, HIVE ]
hdfsmaster-1 {
count: 2
instance: ${instances.m42x} {
tags {
group: hdfsmaster-1
}
}
roles {
HDFS: [NAMENODE, JOURNALNODE, FAILOVERCONTROLLER, BALANCER]
ZOOKEEPER: [SERVER]
HBASE: [MASTER]
}
# NameNode nameservice, autofailover, and quorum journal name must be configured for high availability
configs {
HDFS {
NAMENODE {
dfs_federation_namenode_nameservice: hanameservice
autofailover_enabled: true
dfs_namenode_quorum_journal_name: hanameservice
}
}
}
}
hdfsmaster-2 {
count: 1
instance: ${instances.m42x} {
tags {
group: hdfsmaster-2
}
}
roles {
HDFS: [ JOURNALNODE]
ZOOKEEPER: [SERVER]
}
}
master-1 {
count: 1
instance: ${instances.m42x} {
tags {
group: master
}
}
roles {
HBASE: [MASTER]
ZOOKEEPER: [SERVER]
}
}
workers {
count: 3
minCount: 1
instance: ${instances.c34x} {
# placementGroup: REPLACE-ME
tags {
group: worker
}
}
roles {
HDFS: [DATANODE]
HBASE: [REGIONSERVER]
}
}
gateways {
count: 1
instance: ${instances.t2l} {
tags {
group: gateway
}
}
roles {
HIVE: [GATEWAY]
}
}
postCreateScripts: ["""#!/bin/sh
echo 'Hello World!'
exit 0
""",
"""#!/usr/bin/python
print 'Hello again!'
"""]
preTerminateScripts: ["""#!/bin/sh
echo 'Goodbye World!'
exit 0
""",
"""#!/usr/bin/python
print 'Goodbye again!'
"""]
Created 09-23-2016 08:18 AM
Below is my script that I got running.. but sanitized AWS credentials, network, security.. etc
In shows how to
################################################################################
# begin
#
provider {
type: aws
region: us-west-2
subnetId: INSERT AWS Subnet ID
securityGroupsIds: INSERT SG-ID or create IAM Profile
instanceNamePrefix: INSERT-NAME-OF-CLUSTER
rootVolumeSizeGB: 100
rootVolumeType: gp2
# associatePublicIpAddresses: true
# spotBimi-e3d81583dUSDPerHr: 0.50
}
ssh {
username: ec2-user # for RHEL image
privateKey: keypair.pem # with an absolute path to .pem file
}
instances {
m42x {
type: m4.2xlarge # requires an HVM AMI
image: INSERT AMI
rootVolumeSizeGB: 100
# iamProfileName: iam-profile-REPLACE-ME
tags {
owner: ${?USER}
Project: "INSERT Project name"
Vertical: "Insert Project vertical"
}
bootstrapScript: """#!/bin/sh
#
# This is an embedded bootstrap script that runs as root and can be used to customize
# the instances immediately after boot and before any other Cloudera Director action
export AWS_ACCESS_KEY_ID=INSERT AWS Access Key
export AWS_SECRET_ACCESS_KEY=INSERT AWS Secret Key
export INSTANCE_ID=$(curl http://instance-data/latest/meta-data/instance-id)
export AVAILABILITY_ZONE=$(curl http://instance-data/latest/meta-data/placement/availability-zone)
export VOL1_ID=$(aws ec2 create-volume --size 15 --region us-west-2 --availability-zone ${AVAILABILITY_ZONE} --volume-type gp2 --query "VolumeId" | tr -d '"')
export VOL2_ID=$(aws ec2 create-volume --size 15 --region us-west-2 --availability-zone ${AVAILABILITY_ZONE} --volume-type gp2 --query "VolumeId" | tr -d '"')
echo 'Sleeping 30'
sleep 30
echo 'Create additional EBS volumes'
aws ec2 attach-volume --region us-west-2 --volume-id ${VOL1_ID} --instance-id ${INSTANCE_ID} --device /dev/sdf
aws ec2 attach-volume --region us-west-2 --volume-id ${VOL2_ID} --instance-id ${INSTANCE_ID} --device /dev/sdg
# Create filesystems on the devices
echo 'Create filesystems'
mkfs.ext4 /dev/xvdf
mkfs.ext4 /dev/xvdg
# Create directories for the mount points
mkdir /data1
mkdir /data2
# Add the mount points to /etc/fstab
echo "/dev/xvdf /data1 ext4 defaults 0 0" >> /etc/fstab
echo "/dev/xvdg /data2 ext4 defaults 0 0" >> /etc/fstab
sleep 10
# Mount all the devices
echo 'Mount drives'
mount -a
#
# modify attributes of new volumes to delete on terminate
echo 'Modify volume attributes'
aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --region us-west-2 --block-device-mappings "[{\"DeviceName\": \"/dev/sdf\",\"Ebs\":{\"DeleteOnTermination\":true}}]"
aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --region us-west-2 --block-device-mappings "[{\"DeviceName\": \"/dev/sdg\",\"Ebs\":{\"DeleteOnTermination\":true}}]"
#
# tag the volumes
echo 'Tag resources'
aws ec2 create-tags --region us-west-2 --resources ${INSTANCE_ID} --tags Key=Application,Value=BIGDATA Key=Vertical,Value=VerticalName Key=Project,Value="Project name"
aws ec2 create-tags --region us-west-2 --resources ${VOL1_ID} --tags Key=Application,Value=BIGDATA Key=Vertical,Value=VerticalName Key=Project,Value="Project name"
aws ec2 create-tags --region us-west-2 --resources ${VOL2_ID} --tags Key=Application,Value=BIGDATA Key=Vertical,Value=VerticalName Key=Project,Value="Project name"
#
# If the exit code is not zero Cloudera Director will automatically retry
echo 'This is the last line of the script'
exit 0
"""
#
# Flag indicating whether to normalize the instance. Not setting normalization here implies that your
# bootstrap script will take care of normalization. This is an advanced configuration that will require
# assistance from Cloudera support.
#
# Normalization includes:
# downloading and installing packages
# minimizing swappiness
# increasing the maximun number of open files
# mounting ephemeral disks
# resizing the root partition.
#
# Defaults to true
#
normalizeInstance: true
}
m44x {
type: m4.4xlarge
image: ami
tags {
owner: ${?USER}
}
}
c34x {
type: c3.4xlarge
image: ami
tags {
owner: ${?USER}
}
}
i2x {
type: i2.xlarge
image: ami
tags {
owner: ${?USER}
}
}
i22x {
type: i2.2xlarge
image: ami
tags {
owner: ${?USER}
}
}
r32x {
type: r3.2xlarge
image: ami
tags {
owner: ${?USER}
}
}
t2l { # only suitable as a gateway
type: t2.large
image: ami
tags {
owner: ${?USER}
}
}
}
cloudera-manager {
instance: ${instances.m42x} {
tags {
# add any additional tags as needed
Application: "Cloudera Manager 5"
Project: "place project name here"
Vertical: "place vertical name here"
}
}
repository: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/5.8/"
repositoryKeyUrl: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/RPM-GPG-KEY-cloudera"
enableEnterpriseTrial: true
}
cluster {
products {
CDH: 5
}
configs {
# HDFS fencing should be set to true for HA configurations
HDFS {
dfs_ha_fencing_methods: "shell(true)"
}
}
services: [HDFS, ZOOKEEPER, HBASE ]
hdfsmaster-1 {
count: 2
instance: ${instances.m44x} {
tags {
group: hdfsmaster-1
}
}
roles {
HDFS: [NAMENODE, JOURNALNODE, FAILOVERCONTROLLER ]
ZOOKEEPER: [SERVER]
}
# NameNode nameservice, autofailover, and quorum journal name must be configured for high availability
configs {
HDFS {
NAMENODE {
dfs_federation_namenode_nameservice: hanameservice
autofailover_enabled: true
dfs_namenode_quorum_journal_name: hanameservice
}
}
}
}
hdfsmaster-2 {
count: 1
instance: ${instances.m44x} {
tags {
group: hdfsmaster-2
}
}
roles {
HDFS: [ JOURNALNODE, BALANCER]
ZOOKEEPER: [SERVER]
HBASE: [MASTER]
}
}
master-1 {
count: 1
instance: ${instances.m44x} {
tags {
group: master
}
}
roles {
HBASE: [MASTER]
ZOOKEEPER: [SERVER]
}
}
workers {
count: 3
minCount: 1
instance: ${instances.m42x} {
# placementGroup: REPLACE-ME
tags {
group: worker
}
}
roles {
HDFS: [DATANODE]
HBASE: [REGIONSERVER]
}
}
postCreateScripts: ["""#!/bin/sh
echo 'Hello World!'
exit 0
""",
"""#!/usr/bin/python
print 'Hello again!'
"""]
preTerminateScripts: ["""#!/bin/sh
echo 'Goodbye World!'
exit 0
""",
"""#!/usr/bin/python
print 'Goodbye again!'
"""]
}
Created 09-16-2016 07:08 PM
I used the same HA config files as shown in this topic, and in my cluster it created the instances correctly, but in Cloudera Director, it fails as bootstrap error. Is there a sample bootstrap script that we could use to add to the sample HA config file to populate the instances with required ROLES services?
Created 09-23-2016 08:18 AM
Below is my script that I got running.. but sanitized AWS credentials, network, security.. etc
In shows how to
################################################################################
# begin
#
provider {
type: aws
region: us-west-2
subnetId: INSERT AWS Subnet ID
securityGroupsIds: INSERT SG-ID or create IAM Profile
instanceNamePrefix: INSERT-NAME-OF-CLUSTER
rootVolumeSizeGB: 100
rootVolumeType: gp2
# associatePublicIpAddresses: true
# spotBimi-e3d81583dUSDPerHr: 0.50
}
ssh {
username: ec2-user # for RHEL image
privateKey: keypair.pem # with an absolute path to .pem file
}
instances {
m42x {
type: m4.2xlarge # requires an HVM AMI
image: INSERT AMI
rootVolumeSizeGB: 100
# iamProfileName: iam-profile-REPLACE-ME
tags {
owner: ${?USER}
Project: "INSERT Project name"
Vertical: "Insert Project vertical"
}
bootstrapScript: """#!/bin/sh
#
# This is an embedded bootstrap script that runs as root and can be used to customize
# the instances immediately after boot and before any other Cloudera Director action
export AWS_ACCESS_KEY_ID=INSERT AWS Access Key
export AWS_SECRET_ACCESS_KEY=INSERT AWS Secret Key
export INSTANCE_ID=$(curl http://instance-data/latest/meta-data/instance-id)
export AVAILABILITY_ZONE=$(curl http://instance-data/latest/meta-data/placement/availability-zone)
export VOL1_ID=$(aws ec2 create-volume --size 15 --region us-west-2 --availability-zone ${AVAILABILITY_ZONE} --volume-type gp2 --query "VolumeId" | tr -d '"')
export VOL2_ID=$(aws ec2 create-volume --size 15 --region us-west-2 --availability-zone ${AVAILABILITY_ZONE} --volume-type gp2 --query "VolumeId" | tr -d '"')
echo 'Sleeping 30'
sleep 30
echo 'Create additional EBS volumes'
aws ec2 attach-volume --region us-west-2 --volume-id ${VOL1_ID} --instance-id ${INSTANCE_ID} --device /dev/sdf
aws ec2 attach-volume --region us-west-2 --volume-id ${VOL2_ID} --instance-id ${INSTANCE_ID} --device /dev/sdg
# Create filesystems on the devices
echo 'Create filesystems'
mkfs.ext4 /dev/xvdf
mkfs.ext4 /dev/xvdg
# Create directories for the mount points
mkdir /data1
mkdir /data2
# Add the mount points to /etc/fstab
echo "/dev/xvdf /data1 ext4 defaults 0 0" >> /etc/fstab
echo "/dev/xvdg /data2 ext4 defaults 0 0" >> /etc/fstab
sleep 10
# Mount all the devices
echo 'Mount drives'
mount -a
#
# modify attributes of new volumes to delete on terminate
echo 'Modify volume attributes'
aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --region us-west-2 --block-device-mappings "[{\"DeviceName\": \"/dev/sdf\",\"Ebs\":{\"DeleteOnTermination\":true}}]"
aws ec2 modify-instance-attribute --instance-id ${INSTANCE_ID} --region us-west-2 --block-device-mappings "[{\"DeviceName\": \"/dev/sdg\",\"Ebs\":{\"DeleteOnTermination\":true}}]"
#
# tag the volumes
echo 'Tag resources'
aws ec2 create-tags --region us-west-2 --resources ${INSTANCE_ID} --tags Key=Application,Value=BIGDATA Key=Vertical,Value=VerticalName Key=Project,Value="Project name"
aws ec2 create-tags --region us-west-2 --resources ${VOL1_ID} --tags Key=Application,Value=BIGDATA Key=Vertical,Value=VerticalName Key=Project,Value="Project name"
aws ec2 create-tags --region us-west-2 --resources ${VOL2_ID} --tags Key=Application,Value=BIGDATA Key=Vertical,Value=VerticalName Key=Project,Value="Project name"
#
# If the exit code is not zero Cloudera Director will automatically retry
echo 'This is the last line of the script'
exit 0
"""
#
# Flag indicating whether to normalize the instance. Not setting normalization here implies that your
# bootstrap script will take care of normalization. This is an advanced configuration that will require
# assistance from Cloudera support.
#
# Normalization includes:
# downloading and installing packages
# minimizing swappiness
# increasing the maximun number of open files
# mounting ephemeral disks
# resizing the root partition.
#
# Defaults to true
#
normalizeInstance: true
}
m44x {
type: m4.4xlarge
image: ami
tags {
owner: ${?USER}
}
}
c34x {
type: c3.4xlarge
image: ami
tags {
owner: ${?USER}
}
}
i2x {
type: i2.xlarge
image: ami
tags {
owner: ${?USER}
}
}
i22x {
type: i2.2xlarge
image: ami
tags {
owner: ${?USER}
}
}
r32x {
type: r3.2xlarge
image: ami
tags {
owner: ${?USER}
}
}
t2l { # only suitable as a gateway
type: t2.large
image: ami
tags {
owner: ${?USER}
}
}
}
cloudera-manager {
instance: ${instances.m42x} {
tags {
# add any additional tags as needed
Application: "Cloudera Manager 5"
Project: "place project name here"
Vertical: "place vertical name here"
}
}
repository: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/5.8/"
repositoryKeyUrl: "http://archive.cloudera.com/cm5/redhat/6/x86_64/cm/RPM-GPG-KEY-cloudera"
enableEnterpriseTrial: true
}
cluster {
products {
CDH: 5
}
configs {
# HDFS fencing should be set to true for HA configurations
HDFS {
dfs_ha_fencing_methods: "shell(true)"
}
}
services: [HDFS, ZOOKEEPER, HBASE ]
hdfsmaster-1 {
count: 2
instance: ${instances.m44x} {
tags {
group: hdfsmaster-1
}
}
roles {
HDFS: [NAMENODE, JOURNALNODE, FAILOVERCONTROLLER ]
ZOOKEEPER: [SERVER]
}
# NameNode nameservice, autofailover, and quorum journal name must be configured for high availability
configs {
HDFS {
NAMENODE {
dfs_federation_namenode_nameservice: hanameservice
autofailover_enabled: true
dfs_namenode_quorum_journal_name: hanameservice
}
}
}
}
hdfsmaster-2 {
count: 1
instance: ${instances.m44x} {
tags {
group: hdfsmaster-2
}
}
roles {
HDFS: [ JOURNALNODE, BALANCER]
ZOOKEEPER: [SERVER]
HBASE: [MASTER]
}
}
master-1 {
count: 1
instance: ${instances.m44x} {
tags {
group: master
}
}
roles {
HBASE: [MASTER]
ZOOKEEPER: [SERVER]
}
}
workers {
count: 3
minCount: 1
instance: ${instances.m42x} {
# placementGroup: REPLACE-ME
tags {
group: worker
}
}
roles {
HDFS: [DATANODE]
HBASE: [REGIONSERVER]
}
}
postCreateScripts: ["""#!/bin/sh
echo 'Hello World!'
exit 0
""",
"""#!/usr/bin/python
print 'Hello again!'
"""]
preTerminateScripts: ["""#!/bin/sh
echo 'Goodbye World!'
exit 0
""",
"""#!/usr/bin/python
print 'Goodbye again!'
"""]
}
Created 09-23-2016 08:47 AM
jnlkds:
Thanks, I'll try out the code you had pasted here and see if it works in my environment.
Created 09-29-2016 12:54 PM
So far, I have not been successful. I tried in my AWS environment and there is a bit of improvement to the script you provided... So, I'm modifying the script to match my AWS environment. Will update later as now it is WIP.
Created 09-29-2016 01:29 PM
let me know.. as i know it works.. no worries!